<?php
/**
 * Smart Matching Algorithm
 * 
 * This file contains the AI-powered matching logic for finding relevant pages
 * based on URL keywords and typo detection.
 */

if (!defined('ABSPATH')) {
    exit;
}

/**
 * Find the best matching page for a 404 URL using smart algorithms
 * 
 * @param string $requested_url The 404 URL
 * @param array $options Plugin options
 * @return array|false Array with 'url', 'type', 'score' or false if no match
 */
function nandann_smart_ai_404_find_smart_match($requested_url, $options) {
    // Extract the slug/path from the URL
    $path = wp_parse_url($requested_url, PHP_URL_PATH);
    $slug = nandann_smart_ai_404_extract_slug_from_path($path);
    
    if (empty($slug)) {
        return false;
    }
    
    // Try different matching strategies in order of priority
    
    // 1. Exact slug match (typo detection)
    if ($options['typo_detection_enabled'] === '1') {
        $typo_match = nandann_smart_ai_404_find_typo_match($slug, $options);
        if ($typo_match) {
            return $typo_match;
        }
    }
    
    // 2. Keyword-based matching
    if ($options['smart_matching_enabled'] === '1') {
        $keyword_match = nandann_smart_ai_404_find_keyword_match($slug, $options);
        if ($keyword_match) {
            return $keyword_match;
        }
    }
    
    // 3. Category/tag matching
    $taxonomy_match = nandann_smart_ai_404_find_taxonomy_match($slug);
    if ($taxonomy_match) {
        return $taxonomy_match;
    }
    
    return false;
}

/**
 * Find matches based on typo detection using Levenshtein distance
 */
function nandann_smart_ai_404_find_typo_match($slug, $options) {
    $threshold = intval($options['typo_threshold']);
    $best_match = null;
    $best_distance = PHP_INT_MAX;
    
    // Build post types array - include 'product' only if WooCommerce is active
    $post_types = array('post', 'page');
    if (class_exists('WooCommerce')) {
        $post_types[] = 'product';
    }
    
    // Get all published posts, pages, and products
    $args = array(
        'post_type' => $post_types,
        'post_status' => 'publish',
        'posts_per_page' => 500, // Limit for performance
        'fields' => 'ids',
        'no_found_rows' => true,
        'update_post_meta_cache' => false,
        'update_post_term_cache' => false
    );
    
    $posts = get_posts($args);
    
    foreach ($posts as $post_id) {
        $post_slug = get_post_field('post_name', $post_id);
        
        // Calculate Levenshtein distance
        $distance = levenshtein(
            strtolower($slug),
            strtolower($post_slug)
        );
        
        // Check if this is a better match
        if ($distance <= $threshold && $distance < $best_distance) {
            $best_distance = $distance;
            $best_match = array(
                'url' => get_permalink($post_id),
                'type' => 'typo_match',
                'score' => 100 - ($distance * 10),
                'post_id' => $post_id
            );
        }
    }
    
    // Also check WooCommerce product categories if WooCommerce is active
    if (class_exists('WooCommerce') && taxonomy_exists('product_cat')) {
        $terms = get_terms(array(
            'taxonomy' => 'product_cat',
            'hide_empty' => true,
            'number' => 200,
        ));
        
        if (!is_wp_error($terms) && !empty($terms)) {
            foreach ($terms as $term) {
                // Calculate Levenshtein distance for term slug
                $distance = levenshtein(
                    strtolower($slug),
                    strtolower($term->slug)
                );
                
                // Check if this is a better match
                if ($distance <= $threshold && $distance < $best_distance) {
                    $best_distance = $distance;
                    $best_match = array(
                        'url' => get_term_link($term),
                        'type' => 'typo_match',
                        'score' => 100 - ($distance * 10),
                        'term_id' => $term->term_id
                    );
                }
            }
        }
    }
    
    return $best_match;
}

/**
 * Find matches based on keyword similarity
 */
function nandann_smart_ai_404_find_keyword_match($slug, $options) {
    $threshold = intval($options['keyword_threshold']);
    
    // Extract keywords from the slug
    $keywords = nandann_smart_ai_404_extract_keywords($slug);
    
    if (empty($keywords)) {
        return false;
    }
    
    $best_match = null;
    $best_score = 0;
    
    // Build post types array - include 'product' only if WooCommerce is active
    $post_types = array('post', 'page');
    if (class_exists('WooCommerce')) {
        $post_types[] = 'product';
    }
    
    foreach ($post_types as $post_type) {
        $args = array(
            'post_type' => $post_type,
            'post_status' => 'publish',
            'posts_per_page' => 100,
            'fields' => 'ids',
            'no_found_rows' => true,
            'update_post_meta_cache' => false,
            'update_post_term_cache' => false
        );
        
        $posts = get_posts($args);
        
        foreach ($posts as $post_id) {
            $score = nandann_smart_ai_404_calculate_match_score($post_id, $keywords);
            
            if ($score >= $threshold && $score > $best_score) {
                $best_score = $score;
                $best_match = array(
                    'url' => get_permalink($post_id),
                    'type' => 'keyword_match',
                    'score' => $score,
                    'post_id' => $post_id
                );
            }
        }
    }
    
    // Also check WooCommerce product categories if WooCommerce is active
    if (class_exists('WooCommerce') && taxonomy_exists('product_cat')) {
        $terms = get_terms(array(
            'taxonomy' => 'product_cat',
            'hide_empty' => true,
            'number' => 100,
        ));
        
        if (!is_wp_error($terms) && !empty($terms)) {
            foreach ($terms as $term) {
                // Calculate keyword match score for term name and description
                $term_text = strtolower($term->name . ' ' . $term->description);
                $matches = 0;
                
                foreach ($keywords as $keyword) {
                    if (strpos($term_text, strtolower($keyword)) !== false) {
                        $matches++;
                    }
                }
                
                $score = ($matches / count($keywords)) * 100;
                
                if ($score >= $threshold && $score > $best_score) {
                    $best_score = $score;
                    $best_match = array(
                        'url' => get_term_link($term),
                        'type' => 'keyword_match',
                        'score' => $score,
                        'term_id' => $term->term_id
                    );
                }
            }
        }
    }
    
    return $best_match;
}

/**
 * Find matches in taxonomies (categories, tags, product categories)
 */
function nandann_smart_ai_404_find_taxonomy_match($slug) {
    // Clean the slug
    $search_term = sanitize_text_field($slug);
    
    // Search in categories
    $category = get_term_by('slug', $search_term, 'category');
    if ($category) {
        return array(
            'url' => get_term_link($category),
            'type' => 'category_match',
            'score' => 95
        );
    }
    
    // Search in tags
    $tag = get_term_by('slug', $search_term, 'post_tag');
    if ($tag) {
        return array(
            'url' => get_term_link($tag),
            'type' => 'tag_match',
            'score' => 90
        );
    }
    
    // Search in WooCommerce product categories if WooCommerce is active
    if (class_exists('WooCommerce') && taxonomy_exists('product_cat')) {
        $product_cat = get_term_by('slug', $search_term, 'product_cat');
        if ($product_cat) {
            return array(
                'url' => get_term_link($product_cat),
                'type' => 'product_category_match',
                'score' => 95
            );
        }
    }
    
    // Search for similar taxonomy terms
    $terms = get_terms(array(
        'taxonomy' => array('category', 'post_tag'),
        'hide_empty' => true,
        'search' => $search_term
    ));
    
    if (!empty($terms) && !is_wp_error($terms)) {
        return array(
            'url' => get_term_link($terms[0]),
            'type' => 'taxonomy_similar',
            'score' => 85
        );
    }
    
    return false;
}

/**
 * Extract slug from URL path
 */
function nandann_smart_ai_404_extract_slug_from_path($path) {
    $path = trim($path, '/');
    $parts = explode('/', $path);
    
    // Get the last meaningful part of the URL
    $slug = end($parts);
    
    // Remove query parameters and file extensions
    $slug = preg_replace('/\?.*$/', '', $slug);
    $slug = preg_replace('/\.(html|htm|php|asp)$/i', '', $slug);
    
    return sanitize_title($slug);
}

/**
 * Extract keywords from slug
 */
function nandann_smart_ai_404_extract_keywords($slug) {
    // Replace common separators with spaces
    $text = str_replace(array('-', '_', '.'), ' ', $slug);
    
    // Remove numbers if they're standalone
    $text = preg_replace('/\b\d+\b/', '', $text);
    
    // Split into words
    $words = explode(' ', $text);
    
    // Filter out common stop words and short words
    $stop_words = array('the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by');
    $keywords = array();
    
    foreach ($words as $word) {
        $word = trim(strtolower($word));
        if (strlen($word) > 2 && !in_array($word, $stop_words)) {
            $keywords[] = $word;
        }
    }
    
    return array_unique($keywords);
}

/**
 * Calculate match score between keywords and post
 */
function nandann_smart_ai_404_calculate_match_score($post_id, $keywords) {
    $post = get_post($post_id);
    if (!$post) {
        return 0;
    }
    
    $score = 0;
    $max_score = count($keywords) * 25; // Maximum possible score
    
    // Get post data for comparison
    $post_title = strtolower($post->post_title);
    $post_slug = strtolower($post->post_name);
    $post_content = strtolower(wp_strip_all_tags($post->post_content));
    $post_excerpt = strtolower($post->post_excerpt);
    
    foreach ($keywords as $keyword) {
        $keyword = strtolower($keyword);
        
        // Check title (highest weight)
        if (strpos($post_title, $keyword) !== false) {
            $score += 25;
        }
        
        // Check slug (high weight)
        if (strpos($post_slug, $keyword) !== false) {
            $score += 20;
        }
        
        // Check excerpt (medium weight)
        if (!empty($post_excerpt) && strpos($post_excerpt, $keyword) !== false) {
            $score += 15;
        }
        
        // Check content (lower weight due to noise)
        if (strpos($post_content, $keyword) !== false) {
            $score += 10;
        }
        
        // Check categories and tags
        $categories = get_the_category($post_id);
        foreach ($categories as $category) {
            if (strpos(strtolower($category->name), $keyword) !== false) {
                $score += 15;
            }
        }
        
        $tags = get_the_tags($post_id);
        if ($tags) {
            foreach ($tags as $tag) {
                if (strpos(strtolower($tag->name), $keyword) !== false) {
                    $score += 12;
                }
            }
        }
    }
    
    // Normalize score to 0-100
    if ($max_score > 0) {
        $score = min(100, ($score / $max_score) * 100);
    }
    
    return round($score);
}

/**
 * Get similar string using similar_text function
 */
function nandann_smart_ai_404_get_similarity_percentage($str1, $str2) {
    similar_text(strtolower($str1), strtolower($str2), $percent);
    return $percent;
}

