<?php
/**
 * Pattern Detector
 *
 * Finds registered patterns in normalized HTML using XPath queries
 * converted from CSS selectors. Patterns are checked in priority order
 * with confidence scoring.
 *
 * v1.3.2: Added nested pattern removal to prevent double-counting
 *         when child elements match lower-priority patterns.
 *
 * @package STCWHeadlessAssistant
 * @since 2.0.0
 */

namespace STCW\Headless\Engine\Detector;

if (!defined('ABSPATH')) exit;

class PatternDetector {
    
    /**
     * DOMDocument instance
     * @var \DOMDocument
     */
    private $dom;
    
    /**
     * DOMXPath instance
     * @var \DOMXPath
     */
    private $xpath;
    
    /**
     * Detected patterns cache
     * @var array
     */
    private $detected = [];
    
    /**
     * Node position counter
     * @var int
     */
    private $position_counter = 0;
    
    /**
     * Detect all registered patterns in HTML
     *
     * @param string $html Normalized HTML content
     * @return array Detected patterns with DOM nodes
     */
    public function detect($html) {
        // Load HTML into DOM
        $this->load_dom($html);
        
        $this->detected = [];
        $this->position_counter = 0;
        
        // Get patterns sorted by priority (highest first)
        $patterns = PatternRegistry::get_patterns_by_priority();
        
        foreach ($patterns as $name => $config) {
            if (!($config['enabled'] ?? true)) {
                continue;
            }
            
            $matches = $this->detect_pattern($name, $config);
            
            foreach ($matches as $match) {
                // Call extractor if defined
                $extracted = null;
                if (isset($config['extractor']) && is_callable($config['extractor'])) {
                    try {
                        $extracted = call_user_func($config['extractor'], $match['node']);
                    } catch (\Exception $e) {
                        // Log extraction errors but don't fail
                        if (defined('WP_DEBUG') && WP_DEBUG) {
                            // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_error_log
                            error_log(
                                sprintf(
                                    /* translators: 1: Pattern name, 2: Error message */
                                    esc_html('Error extracting pattern "%1$s": %2$s'),
                                    esc_html($name),
                                    esc_html($e->getMessage())
                                )
                            );
                        }
                    }
                }
                
                $this->detected[] = [
                    'pattern' => $name,
                    'node' => $match['node'],
                    'xpath' => $match['xpath'],
                    'html' => $this->dom->saveHTML($match['node']),
                    'extracted' => $extracted,
                    'position' => $this->get_node_position($match['node']),
                    'confidence' => $config['confidence'] ?? 1.0,
                    'priority' => $config['priority'] ?? 5,
                ];
            }
        }
        
        // v1.3.2: Remove nested patterns (lower-priority children)
        $this->remove_nested_patterns();
        
        // Sort by document position
        usort($this->detected, function($a, $b) {
            return $a['position'] - $b['position'];
        });
        
        return $this->detected;
    }
    
    /**
     * Remove lower-priority patterns that are nested inside higher-priority patterns
     *
     * This prevents double-counting when a specific pattern (e.g., code_block_pro)
     * contains generic child elements that match lower-priority patterns (e.g., code).
     *
     * Examples:
     * - code_block_pro (priority 6) contains <pre class="wp-block-code"> (code, priority 5)
     *   → Keep code_block_pro, remove nested code
     *
     * - Multiple accordions side-by-side (same priority)
     *   → Keep both (not nested, just siblings)
     *
     * - kadence_accordion and gutenberg_accordion (different patterns)
     *   → Keep both (different patterns, different nodes)
     *
     * @return void
     */
    private function remove_nested_patterns() {
        $filtered = [];
        
        // Sort by priority (high to low), then confidence (high to low)
        usort($this->detected, function($a, $b) {
            $priority_diff = $b['priority'] - $a['priority'];
            
            if ($priority_diff !== 0) {
                return $priority_diff;
            }
            
            return $b['confidence'] <=> $a['confidence'];
        });
        
        foreach ($this->detected as $match) {
            $is_nested = false;
            
            // Check if this node is a descendant of any higher-priority match already kept
            foreach ($filtered as $higher_priority_match) {
                if ($this->is_descendant_of($match['node'], $higher_priority_match['node'])) {
                    $is_nested = true;
                    
                    // Debug logging
                    if (defined('WP_DEBUG') && WP_DEBUG && defined('WP_DEBUG_LOG') && WP_DEBUG_LOG) {
                        // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_error_log
                        error_log(
                            sprintf(
                                /* translators: 1: Nested pattern name, 2: Parent pattern name */
                                esc_html('[STCW Headless] Removing nested pattern "%1$s" inside "%2$s"'),
                                esc_html($match['pattern']),
                                esc_html($higher_priority_match['pattern'])
                            )
                        );
                    }
                    
                    break;
                }
            }
            
            if (!$is_nested) {
                $filtered[] = $match;
            }
        }
        
        $this->detected = $filtered;
    }
    
    /**
     * Check if a node is a descendant of another node
     *
     * Walks up the DOM tree from $node to see if $ancestor is encountered.
     *
     * @param \DOMNode $node Potential descendant
     * @param \DOMNode $ancestor Potential ancestor
     * @return bool True if node is inside ancestor
     */
    private function is_descendant_of($node, $ancestor) {
        $parent = $node->parentNode;
        
        while ($parent) {
            if ($parent === $ancestor) {
                return true;
            }
            $parent = $parent->parentNode;
        }
        
        return false;
    }
    
    /**
     * Detect specific pattern using its selectors
     *
     * @param string $name Pattern name
     * @param array $config Pattern configuration
     * @return array Matched nodes
     */
    private function detect_pattern($name, $config) {
        $matches = [];
        $selectors = $config['selectors'] ?? [];
        
        foreach ($selectors as $selector) {
            try {
                // Convert CSS selector to XPath if needed
                $xpath_query = (strpos($selector, '//') === 0 || strpos($selector, '/') === 0)
                    ? $selector
                    : $this->css_to_xpath($selector);
                
                // Query DOM
                $nodes = $this->xpath->query($xpath_query);
                
                if ($nodes === false) {
                    continue;
                }
                
                foreach ($nodes as $node) {
                    // Apply validators if defined
                    if (isset($config['validators']) && !empty($config['validators'])) {
                        if (!$this->validate_pattern($node, $config['validators'])) {
                            continue;
                        }
                    }
                    
                    $matches[] = [
                        'node' => $node,
                        'xpath' => $xpath_query,
                    ];
                }
            } catch (\Exception $e) {
                // Log XPath errors but continue
                if (defined('WP_DEBUG') && WP_DEBUG) {
                    // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_error_log
                    error_log(
                        sprintf(
                            /* translators: 1: Pattern name, 2: Error message */
                            esc_html('Error detecting pattern "%1$s": %2$s'),
                            esc_html($name),
                            esc_html($e->getMessage())
                        )
                    );
                }
            }
        }
        
        return $matches;
    }
    
    /**
     * Validate pattern match using custom validators
     *
     * @param \DOMNode $node Matched node
     * @param array $validators Validator function names
     * @return bool True if valid
     */
    private function validate_pattern($node, $validators) {
        foreach ($validators as $validator) {
            // Check if validator is callable
            if (is_callable($validator)) {
                if (!call_user_func($validator, $node)) {
                    return false;
                }
            } elseif (method_exists($this, $validator)) {
                if (!$this->$validator($node)) {
                    return false;
                }
            }
        }
        
        return true;
    }
    
    /**
     * Validator: Check if node has accordion structure
     *
     * @param \DOMNode $node Node to validate
     * @return bool True if valid accordion
     */
    private function has_accordion_structure($node) {
        // Look for accordion-specific elements
        $xpath = new \DOMXPath($node->ownerDocument);
        
        // Check for accordion panes/items
        $panes = $xpath->query('.//*[contains(@class, "accordion-pane") or contains(@class, "accordion-item")]', $node);
        
        if ($panes->length > 0) {
            return true;
        }
        
        // Check for details/summary structure
        $details = $xpath->query('.//details', $node);
        
        return $details->length > 0;
    }
    
    /**
     * Convert CSS selector to XPath
     *
     * Handles common CSS patterns including:
     * - Element selectors (div, p, h1)
     * - Class selectors (.my-class)
     * - ID selectors (#my-id)
     * - Attribute selectors ([attr="value"], [attr*="value"], [attr])
     * - Descendant combinators (space)
     * - Child combinators (>)
     * - Multiple classes (.class1.class2)
     *
     * @param string $css CSS selector
     * @return string XPath query
     */
    private function css_to_xpath($css) {
        $css = trim($css);
        
        // Element selector
        if (preg_match('/^[a-z][a-z0-9]*$/i', $css)) {
            return "//{$css}";
        }
        
        // Class selector
	if (preg_match('/^\.([a-z0-9_-]+)$/i', $css, $m)) {
	    return "//*[contains(concat(' ', normalize-space(@class), ' '), ' {$m[1]} ')]";
        }
        
        // Element + class
        if (preg_match('/^([a-z]+)\.([a-z0-9_-]+)$/i', $css, $m)) {
            return "//{$m[1]}[contains(concat(' ', normalize-space(@class), ' '), ' {$m[2]} ')]";
        }
        
        // Element + multiple classes (e.g., 'div.class1.class2')
        if (preg_match('/^([a-z]+)\.([a-z0-9_-]+(?:\.[a-z0-9_-]+)+)$/i', $css, $m)) {
            $element = $m[1];
            $classes = explode('.', $m[2]);
            $conditions = [];
            
            foreach ($classes as $class) {
                $conditions[] = "contains(concat(' ', normalize-space(@class), ' '), ' {$class} ')";
            }
            
            return "//{$element}[" . implode(' and ', $conditions) . "]";
        }
        
        // ID selector
        if (preg_match('/^#([a-z0-9_-]+)$/i', $css, $m)) {
            return "//*[@id='{$m[1]}']";
        }
        
        // Attribute selector with equals (e.g., '[data-block-type="kadence/accordion"]')
        if (preg_match('/^\[([a-z-]+)=["\']([^"\']+)["\']\]$/i', $css, $m)) {
            return "//*[@{$m[1]}='{$m[2]}']";
        }
        
        // Attribute selector contains (e.g., '[class*="accordion"]')
        if (preg_match('/^\[([a-z-]+)\*=["\']([^"\']+)["\']\]$/i', $css, $m)) {
            return "//*[contains(@{$m[1]}, '{$m[2]}')]";
        }
        
        // Attribute exists (e.g., '[data-type]')
        if (preg_match('/^\[([a-z-]+)\]$/i', $css, $m)) {
            return "//*[@{$m[1]}]";
        }
        
        // Descendant combinator (space) - e.g., 'div .my-class'
        if (strpos($css, ' ') !== false && strpos($css, '>') === false) {
            $parts = preg_split('/\s+/', $css);
            $xpath_parts = [];
            
            foreach ($parts as $part) {
                $xpath_parts[] = $this->css_to_xpath_single($part);
            }
            
            return implode('//', $xpath_parts);
        }
        
        // Child combinator (>) - e.g., 'div > .my-class'
        if (strpos($css, '>') !== false) {
            $parts = array_map('trim', explode('>', $css));
            $xpath_parts = [];
            
            foreach ($parts as $part) {
                $xpath_parts[] = $this->css_to_xpath_single($part);
            }
            
            return implode('/', $xpath_parts);
        }
        
        // Fallback: treat as class selector
        return "//*[contains(@class, '{$css}')]";
    }
    
    /**
     * Convert single CSS selector part to XPath
     *
     * Helper for complex selectors with combinators
     *
     * @param string $css Single CSS selector part
     * @return string XPath fragment
     */
    private function css_to_xpath_single($css) {
        $css = trim($css);
        
        // Remove leading // if present
        if (strpos($css, '//') === 0) {
            $css = substr($css, 2);
        }
        
        // Element
        if (preg_match('/^[a-z][a-z0-9]*$/i', $css)) {
            return $css;
        }
        
        // Class
        if (preg_match('/^\.([a-z0-9_-]+)$/i', $css, $m)) {
            return "//*[contains(concat(' ', normalize-space(@class), ' '), ' {$m[1]} ')]";
        }
        
        // Element + class
        if (preg_match('/^([a-z]+)\.([a-z0-9_-]+)$/i', $css, $m)) {
            return "//{$m[1]}[contains(concat(' ', normalize-space(@class), ' '), ' {$m[2]} ')]";
        }
        
        // ID
        if (preg_match('/^#([a-z0-9_-]+)$/i', $css, $m)) {
            return "//*[@id='{$m[1]}']";
        }
        
        return "//*[contains(@class, '{$css}')]";
    }
    
    /**
     * Get node's position in document (for sorting)
     *
     * @param \DOMNode $node Node to get position for
     * @return int Position index
     */
    private function get_node_position($node) {
        return $this->position_counter++;
    }
    
    /**
     * Load HTML into DOMDocument
     *
     * @param string $html HTML content
     * @return void
     */
    private function load_dom($html) {
        $this->dom = new \DOMDocument('1.0', 'UTF-8');
        
        // Suppress HTML5 errors
        libxml_use_internal_errors(true);
        
        // Load HTML with UTF-8 encoding
        $this->dom->loadHTML('<?xml encoding="UTF-8">' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
        
        // Clear errors
        libxml_clear_errors();
        
        // Create XPath instance
        $this->xpath = new \DOMXPath($this->dom);
    }
    
    /**
     * Get detection statistics
     *
     * @return array Statistics
     */
    public function get_stats() {
        $stats = [
            'total_patterns' => count($this->detected),
            'by_pattern' => [],
            'by_confidence' => [
                'high' => 0,    // >= 0.95
                'medium' => 0,  // 0.85 - 0.94
                'low' => 0,     // < 0.85
            ],
        ];
        
        foreach ($this->detected as $match) {
            $pattern_name = $match['pattern'];
            $confidence = $match['confidence'];
            
            // Count by pattern
            if (!isset($stats['by_pattern'][$pattern_name])) {
                $stats['by_pattern'][$pattern_name] = 0;
            }
            $stats['by_pattern'][$pattern_name]++;
            
            // Count by confidence
            if ($confidence >= 0.95) {
                $stats['by_confidence']['high']++;
            } elseif ($confidence >= 0.85) {
                $stats['by_confidence']['medium']++;
            } else {
                $stats['by_confidence']['low']++;
            }
        }
        
        return $stats;
    }
}
