<?php

/**
 * Data Scraper class for Bytesweavers AI Chat Master.
 *
 * This class scrapes website content from pages, posts, and WooCommerce products (if available),
 * then builds a raw text version and generates a summarized knowledge base using the AI API.
 *
 * @package Bytesweavers_AI_Chat_Master
 */

namespace AICW;

class Data_Scraper {

    /**
     * Scrape website content and generate a knowledge base summary.
     *
     * @return array Contains the raw text and summarized content.
     */
    public function scrape_website() {
        try {
            // 1) Gather raw content.
            $knowledge_base = array();

            // Scrape pages.
            $pages = get_pages();
            foreach ($pages as $page) {
                $content = wp_strip_all_tags($page->post_content);
                $knowledge_base[$page->post_title] = $content;
            }

            // Scrape posts.
            $posts = get_posts(array('posts_per_page' => -1));
            foreach ($posts as $post) {
                $content = wp_strip_all_tags($post->post_content);
                $knowledge_base[$post->post_title] = $content;
            }

            // Scrape WooCommerce products if available.
            if (class_exists('WooCommerce')) {
                $products = wc_get_products(array('limit' => -1));
                foreach ($products as $product) {
                    $knowledge_base[$product->get_name()] = array(
                        'description' => $product->get_description(),
                        'price'       => $product->get_price(),
                        'sku'         => $product->get_sku()
                    );
                }
            }

            // 2) Build raw text representation from the scraped content.
            $raw_text = $this->build_raw_text($knowledge_base);

            // Generate summaries using the AI API.
            $summaries = $this->summarize_knowledge($raw_text);

            // 3) Save the summarized content into the database.
            update_option('aicw_knowledge_base', $summaries);
            update_option('aicw_knowledge_base_summary', $summaries);

            // Return both raw and summarized versions.
            return array(
                'success' => true,
                'summaries' => $summaries
            );
        } catch (\Exception $e) {
            return array(
                'success' => false,
                'error' => $e->getMessage()
            );
        }
    }

    /**
     * Build a raw text representation from the knowledge base.
     *
     * @param array $knowledge_base The scraped content.
     * @return string The plain-text representation.
     */
    private function build_raw_text($knowledge_base) {
        $text = __("Website Content:", "bytesweavers-ai-chat-master") . "\n\n";
        foreach ($knowledge_base as $title => $content) {
            if (is_array($content)) {
                $text .= __("Product:", "bytesweavers-ai-chat-master") . " {$title}\n";
                $text .= __("Price:", "bytesweavers-ai-chat-master") . " " . ($content['price'] ?: __('N/A', "bytesweavers-ai-chat-master")) . "\n";
                $text .= __("SKU:", "bytesweavers-ai-chat-master") . " " . ($content['sku'] ?: __('N/A', "bytesweavers-ai-chat-master")) . "\n";
                $text .= __("Description:", "bytesweavers-ai-chat-master") . " " . ($content['description'] ?: '') . "\n\n";
            } else {
                $text .= $title . ":\n" . $content . "\n\n";
            }
        }
        return $text;
    }

    /**
     * Summarize the raw text content using the API Handler.
     *
     * @param string $raw_text The raw website content.
     * @return string The summarized content.
     */
    private function summarize_knowledge($raw_text) {
        $api_handler = new API_Handler();
        $prompt = __("Please summarize the following website content:", "bytesweavers-ai-chat-master") . "\n\n" . $raw_text;
        $summary = $api_handler->process_message($prompt, '');
        if (is_array($summary) && isset($summary['error'])) {
            return __("Could not summarize: ", "bytesweavers-ai-chat-master") . $summary['error'];
        }
        return $summary;
    }

    /**
     * Retrieve the complete website summary including custom instructions, rendered as HTML.
     *
     * @return string The website knowledge base with custom instructions rendered from Markdown.
     */
    public function get_website_summary() {
        $knowledge_base = get_option('aicw_knowledge_base', '');
        $custom_instructions = get_option('aicw_custom_instructions', '');
        $baseText = __("Website Knowledge Base:", "bytesweavers-ai-chat-master") . "\n\n" . $knowledge_base;
        if (!empty($custom_instructions)) {
            $baseText .= "\n" . __("Custom Instructions:", "bytesweavers-ai-chat-master") . "\n" . $custom_instructions;
        }

        return $this->parse_markdown($baseText);
    }

    /**
     * Parse Markdown text and return HTML.
     *
     * @param string $text
     * @return string
     */
    private function parse_markdown($text) {
        // Escape HTML special characters.
        $text = htmlspecialchars($text, ENT_QUOTES, 'UTF-8');
        // Code blocks with optional language indicator.
        $text = preg_replace('/```(\w+)?\n([\s\S]+?)\n```/', '<pre><code class="language-$1">$2</code></pre>', $text);
        // Inline code.
        $text = preg_replace('/`([^`]+)`/', '<code>$1</code>', $text);
        // Headers.
        $text = preg_replace('/^### (.*)$/m', '<h3>$1</h3>', $text);
        $text = preg_replace('/^## (.*)$/m', '<h2>$1</h2>', $text);
        $text = preg_replace('/^# (.*)$/m', '<h1>$1</h1>', $text);
        // Bold.
        $text = preg_replace('/\*\*([^*]+)\*\*/', '<strong>$1</strong>', $text);
        // Italic.
        $text = preg_replace('/\*([^*]+)\*/', '<em>$1</em>', $text);
        // Unordered lists.
        $text = preg_replace('/^\* (.*)$/m', '<li>$1</li>', $text);
        $text = preg_replace('/(<li>.*<\/li>)(\s*<li>.*<\/li>)+/s', '<ul>$0</ul>', $text);
        // Convert newlines to <br>.
        $text = nl2br($text);
        return $text;
    }
}