<?php
/**
 * SEO Insights Class
 * Analyzes conversation data to provide SEO-actionable insights
 */

if (!defined('ABSPATH')) {
    exit;
}

class Chatly_Insights {
    
    /**
     * Common stop words to exclude from keyword analysis
     */
    private $stop_words = array(
        'a', 'an', 'the', 'and', 'or', 'but', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
        'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
        'must', 'shall', 'can', 'need', 'dare', 'ought', 'used', 'to', 'of', 'in', 'for', 'on', 'with',
        'at', 'by', 'from', 'as', 'into', 'through', 'during', 'before', 'after', 'above', 'below',
        'between', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where',
        'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor',
        'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'i', 'me', 'my', 'myself',
        'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he',
        'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they',
        'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that',
        'these', 'those', 'am', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does',
        'did', 'doing', 'would', 'could', 'should', 'might', 'must', 'shall', 'will', 'can',
        'about', 'hi', 'hello', 'hey', 'thanks', 'thank', 'please', 'yes', 'no', 'ok', 'okay',
        'get', 'got', 'like', 'want', 'know', 'think', 'see', 'look', 'make', 'go', 'come',
        'take', 'use', 'find', 'give', 'tell', 'ask', 'work', 'seem', 'feel', 'try', 'leave',
        'call', 'keep', 'let', 'begin', 'show', 'hear', 'play', 'run', 'move', 'live', 'believe',
        'hold', 'bring', 'happen', 'write', 'provide', 'sit', 'stand', 'lose', 'pay', 'meet',
        'include', 'continue', 'set', 'learn', 'change', 'lead', 'understand', 'watch', 'follow',
        'stop', 'create', 'speak', 'read', 'allow', 'add', 'spend', 'grow', 'open', 'walk',
        'win', 'offer', 'remember', 'love', 'consider', 'appear', 'buy', 'wait', 'serve', 'die',
        'send', 'expect', 'build', 'stay', 'fall', 'cut', 'reach', 'kill', 'remain', 'im', 'ive',
        'dont', 'cant', 'wont', 'didnt', 'doesnt', 'isnt', 'arent', 'wasnt', 'werent', 'youre'
    );
    
    /**
     * Get top questions from conversations
     */
    public function get_top_questions($limit = 20, $days = 30) {
        global $wpdb;
        $messages_table = $wpdb->prefix . 'chatly_messages';
        
        $date_limit = date('Y-m-d H:i:s', strtotime("-{$days} days"));
        
        // Get all user messages that look like questions
        $messages = $wpdb->get_col($wpdb->prepare(
            "SELECT content FROM $messages_table 
             WHERE role = 'user' 
             AND created_at >= %s
             AND (content LIKE %s OR content LIKE %s OR content LIKE %s OR content LIKE %s OR content LIKE %s OR content LIKE %s)
             ORDER BY created_at DESC",
            $date_limit,
            '%?%',
            'what %',
            'how %',
            'when %',
            'where %',
            'why %'
        ));
        
        if (empty($messages)) {
            return array();
        }
        
        // Normalize and count questions
        $question_counts = array();
        foreach ($messages as $message) {
            $normalized = $this->normalize_question($message);
            if (strlen($normalized) > 10) { // Skip very short messages
                if (!isset($question_counts[$normalized])) {
                    $question_counts[$normalized] = array(
                        'question' => $message, // Keep original for display
                        'count' => 0
                    );
                }
                $question_counts[$normalized]['count']++;
            }
        }
        
        // Sort by count
        uasort($question_counts, function($a, $b) {
            return $b['count'] - $a['count'];
        });
        
        // Return top questions
        return array_slice(array_values($question_counts), 0, $limit);
    }
    
    /**
     * Normalize a question for comparison
     */
    private function normalize_question($question) {
        $normalized = strtolower(trim($question));
        $normalized = preg_replace('/[^\w\s]/', '', $normalized);
        $normalized = preg_replace('/\s+/', ' ', $normalized);
        return $normalized;
    }
    
    /**
     * Extract keywords from conversations
     */
    public function get_keyword_frequency($limit = 50, $days = 30) {
        global $wpdb;
        $messages_table = $wpdb->prefix . 'chatly_messages';
        
        $date_limit = date('Y-m-d H:i:s', strtotime("-{$days} days"));
        
        // Get all user messages
        $messages = $wpdb->get_col($wpdb->prepare(
            "SELECT content FROM $messages_table 
             WHERE role = 'user' 
             AND created_at >= %s",
            $date_limit
        ));
        
        if (empty($messages)) {
            return array();
        }
        
        // Extract and count words
        $word_counts = array();
        foreach ($messages as $message) {
            $words = $this->extract_keywords($message);
            foreach ($words as $word) {
                if (!isset($word_counts[$word])) {
                    $word_counts[$word] = 0;
                }
                $word_counts[$word]++;
            }
        }
        
        // Sort by count
        arsort($word_counts);
        
        // Return top keywords
        return array_slice($word_counts, 0, $limit, true);
    }
    
    /**
     * Extract meaningful keywords from text
     */
    private function extract_keywords($text) {
        // Convert to lowercase and remove special characters
        $text = strtolower($text);
        $text = preg_replace('/[^\w\s]/', ' ', $text);
        
        // Split into words
        $words = preg_split('/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY);
        
        // Filter out stop words and short words
        $keywords = array();
        foreach ($words as $word) {
            if (strlen($word) >= 3 && !in_array($word, $this->stop_words) && !is_numeric($word)) {
                $keywords[] = $word;
            }
        }
        
        return $keywords;
    }
    
    /**
     * Extract keyword phrases (2-3 word combinations)
     */
    public function get_keyword_phrases($limit = 30, $days = 30) {
        global $wpdb;
        $messages_table = $wpdb->prefix . 'chatly_messages';
        
        $date_limit = date('Y-m-d H:i:s', strtotime("-{$days} days"));
        
        // Get all user messages
        $messages = $wpdb->get_col($wpdb->prepare(
            "SELECT content FROM $messages_table 
             WHERE role = 'user' 
             AND created_at >= %s",
            $date_limit
        ));
        
        if (empty($messages)) {
            return array();
        }
        
        $phrase_counts = array();
        
        foreach ($messages as $message) {
            $phrases = $this->extract_phrases($message);
            foreach ($phrases as $phrase) {
                if (!isset($phrase_counts[$phrase])) {
                    $phrase_counts[$phrase] = 0;
                }
                $phrase_counts[$phrase]++;
            }
        }
        
        // Filter phrases that appear more than once
        $phrase_counts = array_filter($phrase_counts, function($count) {
            return $count >= 2;
        });
        
        arsort($phrase_counts);
        
        return array_slice($phrase_counts, 0, $limit, true);
    }
    
    /**
     * Extract 2-3 word phrases from text
     */
    private function extract_phrases($text) {
        $text = strtolower($text);
        $text = preg_replace('/[^\w\s]/', ' ', $text);
        $words = preg_split('/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY);
        
        $phrases = array();
        $word_count = count($words);
        
        // Extract 2-word phrases
        for ($i = 0; $i < $word_count - 1; $i++) {
            if (!in_array($words[$i], $this->stop_words) || !in_array($words[$i + 1], $this->stop_words)) {
                $phrase = $words[$i] . ' ' . $words[$i + 1];
                if (strlen($phrase) >= 5) {
                    $phrases[] = $phrase;
                }
            }
        }
        
        // Extract 3-word phrases
        for ($i = 0; $i < $word_count - 2; $i++) {
            $phrase = $words[$i] . ' ' . $words[$i + 1] . ' ' . $words[$i + 2];
            // At least one word should not be a stop word
            $non_stop = 0;
            foreach (array($words[$i], $words[$i + 1], $words[$i + 2]) as $w) {
                if (!in_array($w, $this->stop_words)) $non_stop++;
            }
            if ($non_stop >= 2 && strlen($phrase) >= 8) {
                $phrases[] = $phrase;
            }
        }
        
        return $phrases;
    }
    
    /**
     * Identify content gaps - questions that got poor responses
     */
    public function get_content_gaps($limit = 20, $days = 30) {
        global $wpdb;
        $messages_table = $wpdb->prefix . 'chatly_messages';
        $conv_table = $wpdb->prefix . 'chatly_conversations';
        
        $date_limit = date('Y-m-d H:i:s', strtotime("-{$days} days"));
        
        // Get conversations where AI mentioned it doesn't know or can't help
        $gap_indicators = array(
            "I don't have",
            "I'm not sure",
            "I don't know",
            "I cannot",
            "I can't",
            "not able to",
            "don't have information",
            "no information",
            "unable to",
            "beyond my",
            "outside my",
            "not familiar",
            "would need to check",
            "recommend contacting",
            "suggest reaching out",
            "best to contact"
        );
        
        $like_clauses = array();
        $like_values = array();
        foreach ($gap_indicators as $indicator) {
            $like_clauses[] = "content LIKE %s";
            $like_values[] = '%' . $wpdb->esc_like($indicator) . '%';
        }
        
        $where_likes = implode(' OR ', $like_clauses);
        
        // Find AI messages that indicate gaps
        $query = $wpdb->prepare(
            "SELECT conversation_id FROM $messages_table 
             WHERE role = 'assistant' 
             AND created_at >= %s
             AND ($where_likes)",
            array_merge(array($date_limit), $like_values)
        );
        
        $gap_conv_ids = $wpdb->get_col($query);
        
        if (empty($gap_conv_ids)) {
            return array();
        }
        
        // Get the user questions from these conversations
        $ids_placeholder = implode(',', array_fill(0, count($gap_conv_ids), '%d'));
        
        $questions = $wpdb->get_results($wpdb->prepare(
            "SELECT content, conversation_id FROM $messages_table 
             WHERE role = 'user' 
             AND conversation_id IN ($ids_placeholder)
             ORDER BY created_at ASC",
            $gap_conv_ids
        ));
        
        // Group by conversation and get first question (usually the main topic)
        $gap_questions = array();
        $seen_convs = array();
        
        foreach ($questions as $q) {
            if (!isset($seen_convs[$q->conversation_id])) {
                $seen_convs[$q->conversation_id] = true;
                $normalized = $this->normalize_question($q->content);
                if (strlen($normalized) > 10) {
                    if (!isset($gap_questions[$normalized])) {
                        $gap_questions[$normalized] = array(
                            'question' => $q->content,
                            'count' => 0
                        );
                    }
                    $gap_questions[$normalized]['count']++;
                }
            }
        }
        
        uasort($gap_questions, function($a, $b) {
            return $b['count'] - $a['count'];
        });
        
        return array_slice(array_values($gap_questions), 0, $limit);
    }
    
    /**
     * Generate FAQ Schema JSON-LD from Knowledge Base
     */
    public function generate_faq_schema() {
        global $wpdb;
        $kb_table = $wpdb->prefix . 'chatly_knowledge_base';
        
        $entries = $wpdb->get_results(
            "SELECT question, answer FROM $kb_table WHERE enabled = 1 ORDER BY priority DESC, id ASC"
        );
        
        if (empty($entries)) {
            return null;
        }
        
        $faq_items = array();
        foreach ($entries as $entry) {
            $faq_items[] = array(
                '@type' => 'Question',
                'name' => wp_strip_all_tags($entry->question),
                'acceptedAnswer' => array(
                    '@type' => 'Answer',
                    'text' => wp_strip_all_tags($entry->answer)
                )
            );
        }
        
        $schema = array(
            '@context' => 'https://schema.org',
            '@type' => 'FAQPage',
            'mainEntity' => $faq_items
        );
        
        return wp_json_encode($schema, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
    }
    
    /**
     * Generate FAQ HTML block
     */
    public function generate_faq_html() {
        global $wpdb;
        $kb_table = $wpdb->prefix . 'chatly_knowledge_base';
        
        $entries = $wpdb->get_results(
            "SELECT question, answer, category FROM $kb_table WHERE enabled = 1 ORDER BY category ASC, priority DESC, id ASC"
        );
        
        if (empty($entries)) {
            return '';
        }
        
        $html = '<div class="chatly-faq">' . "\n";
        
        $current_category = '';
        foreach ($entries as $entry) {
            if ($entry->category && $entry->category !== $current_category) {
                if ($current_category !== '') {
                    $html .= "</div>\n";
                }
                $html .= '<div class="faq-category">' . "\n";
                $html .= '<h3>' . esc_html($entry->category) . '</h3>' . "\n";
                $current_category = $entry->category;
            }
            
            $html .= '<div class="faq-item">' . "\n";
            $html .= '<h4 class="faq-question">' . esc_html($entry->question) . '</h4>' . "\n";
            $html .= '<div class="faq-answer">' . wp_kses_post($entry->answer) . '</div>' . "\n";
            $html .= '</div>' . "\n";
        }
        
        if ($current_category !== '') {
            $html .= "</div>\n";
        }
        
        $html .= '</div>';
        
        return $html;
    }
    
    /**
     * Check if Yoast SEO is active
     */
    public function is_yoast_active() {
        return defined('WPSEO_VERSION') || class_exists('WPSEO_Options');
    }
    
    /**
     * Get Yoast focus keywords for comparison
     */
    public function get_yoast_focus_keywords() {
        if (!$this->is_yoast_active()) {
            return array();
        }
        
        global $wpdb;
        
        // Get focus keywords from post meta
        $keywords = $wpdb->get_col(
            "SELECT DISTINCT meta_value FROM {$wpdb->postmeta} 
             WHERE meta_key = '_yoast_wpseo_focuskw' 
             AND meta_value != '' 
             LIMIT 100"
        );
        
        return array_filter($keywords);
    }
    
    /**
     * Compare chat keywords with Yoast focus keywords
     */
    public function get_keyword_opportunities() {
        $chat_keywords = $this->get_keyword_frequency(100, 30);
        $yoast_keywords = $this->get_yoast_focus_keywords();
        
        if (empty($chat_keywords)) {
            return array(
                'matching' => array(),
                'opportunities' => array(),
                'yoast_only' => $yoast_keywords
            );
        }
        
        $yoast_lower = array_map('strtolower', $yoast_keywords);
        
        $matching = array();
        $opportunities = array();
        
        foreach ($chat_keywords as $keyword => $count) {
            $found = false;
            foreach ($yoast_lower as $yk) {
                if (strpos($yk, $keyword) !== false || strpos($keyword, $yk) !== false) {
                    $matching[$keyword] = $count;
                    $found = true;
                    break;
                }
            }
            if (!$found && $count >= 3) {
                $opportunities[$keyword] = $count;
            }
        }
        
        return array(
            'matching' => $matching,
            'opportunities' => array_slice($opportunities, 0, 20, true),
            'yoast_only' => array_diff($yoast_keywords, array_keys($matching))
        );
    }
    
    /**
     * Get conversation statistics
     */
    public function get_conversation_stats($days = 30) {
        global $wpdb;
        $conv_table = $wpdb->prefix . 'chatly_conversations';
        $msg_table = $wpdb->prefix . 'chatly_messages';
        $leads_table = $wpdb->prefix . 'chatly_leads';
        
        $date_limit = date('Y-m-d H:i:s', strtotime("-{$days} days"));
        
        $stats = array();
        
        // Total conversations
        $stats['total_conversations'] = (int) $wpdb->get_var($wpdb->prepare(
            "SELECT COUNT(*) FROM $conv_table WHERE started_at >= %s",
            $date_limit
        ));
        
        // Total messages
        $stats['total_messages'] = (int) $wpdb->get_var($wpdb->prepare(
            "SELECT COUNT(*) FROM $msg_table WHERE created_at >= %s",
            $date_limit
        ));
        
        // User messages only
        $stats['user_messages'] = (int) $wpdb->get_var($wpdb->prepare(
            "SELECT COUNT(*) FROM $msg_table WHERE role = 'user' AND created_at >= %s",
            $date_limit
        ));
        
        // Leads captured
        $stats['leads_captured'] = (int) $wpdb->get_var($wpdb->prepare(
            "SELECT COUNT(*) FROM $leads_table WHERE created_at >= %s",
            $date_limit
        ));
        
        // Conversion rate
        $stats['conversion_rate'] = $stats['total_conversations'] > 0 
            ? round(($stats['leads_captured'] / $stats['total_conversations']) * 100, 1) 
            : 0;
        
        // Avg messages per conversation
        $stats['avg_messages'] = $stats['total_conversations'] > 0 
            ? round($stats['total_messages'] / $stats['total_conversations'], 1) 
            : 0;
        
        return $stats;
    }
    
    /**
     * Suggest Knowledge Base entries based on common questions
     */
    public function suggest_kb_entries($limit = 10) {
        global $wpdb;
        $kb_table = $wpdb->prefix . 'chatly_knowledge_base';
        
        // Get existing KB questions (normalized)
        $existing = $wpdb->get_col("SELECT LOWER(question) FROM $kb_table");
        $existing_normalized = array_map(array($this, 'normalize_question'), $existing);
        
        // Get top questions
        $top_questions = $this->get_top_questions(50, 30);
        
        // Filter out questions already in KB
        $suggestions = array();
        foreach ($top_questions as $q) {
            $normalized = $this->normalize_question($q['question']);
            $is_duplicate = false;
            
            foreach ($existing_normalized as $existing_q) {
                similar_text($normalized, $existing_q, $percent);
                if ($percent > 70) {
                    $is_duplicate = true;
                    break;
                }
            }
            
            if (!$is_duplicate) {
                $suggestions[] = $q;
            }
            
            if (count($suggestions) >= $limit) {
                break;
            }
        }
        
        return $suggestions;
    }
}
