<?php
namespace Highpots\SpamProtection;

use DOMDocument;
use DOMElement;
use DOMNodeList;
use DOMXPath;

// Prevent direct access
if (!defined('ABSPATH')) {
    exit;
}

/**
 * DOM-based form field injection processor.
 *
 * @deprecated 1.1.0 Field injection is now handled client-side via JavaScript.
 *                   This class is kept for backwards compatibility but is no longer used.
 *                   See HPSP_Rest_Api and assets/js/hpsp-frontend.js for the new approach.
 */
class HPSP_Dom_Processor {
    private HPSP_Token_Manager $hmac_token;
    private HPSP_Honeypot_Manager $honeypot;
    private array $extracted_scripts = [];

    public function __construct() {
        $this->hmac_token = new HPSP_Token_Manager();
        $this->honeypot = new HPSP_Honeypot_Manager();
    }

    /**
     * Process content and add spam protection fields to forms
     */
    public function process_content(string $content): string {
        if (empty(trim($content))) {
            return $content;
        }

        // Check if content contains forms
        if (strpos($content, '<form') === false) {
            return $content;
        }

        // Extract script tags to prevent DOMDocument from corrupting them
        $content = $this->extract_scripts($content);

        $dom = $this->create_dom_document();

        if (!$this->load_html($dom, $content)) {
            return $this->restore_scripts($content);
        }

        $forms = $this->find_forms($dom);

        if ($forms->length === 0) {
            return $this->restore_scripts($content);
        }

        $this->process_forms($dom, $forms);
        
        return $this->get_clean_html($dom);
    }

    /**
     * Create and configure DOMDocument
     */
    private function create_dom_document(): DOMDocument {
        $dom = new DOMDocument('1.0', 'UTF-8');
        $dom->preserveWhiteSpace = false;
        $dom->formatOutput = false;
        return $dom;
    }

    /**
     * Prepares an HTML string for a DOM parser by ensuring it is valid UTF-8.
     *
     * This function is essential to prevent parsing failures on invalid byte sequences.
     * It does NOT convert to HTML entities.
     *
     * @param string $content The input HTML string, which might have an unknown encoding.
     * @return string The HTML string, guaranteed to be valid UTF-8.
     */
    private function prepare_html_for_dom(string $content): string
    {
        // If it's already valid UTF-8, do nothing. This is the fastest case.
        if (mb_check_encoding($content, 'UTF-8')) {
            return $content;
        }

        // If not, attempt to convert from a common encoding. Avoid 'auto'.
        // ISO-8859-1 is a frequent source of encoding issues.
        $content = mb_convert_encoding($content, 'UTF-8', 'ISO-8859-1');

        // Finally, substitute any remaining invalid UTF-8 characters to be safe.
        // This prevents the DOM parser from failing if the conversion above was wrong.
        return mb_convert_encoding($content, 'UTF-8', 'UTF-8');
    }

    /**
     * Loads an HTML string into a DOMDocument object, correctly handling UTF-8 encoding.
     *
     * @param DOMDocument $dom The DOMDocument object to load the HTML into.
     * @param string $content The raw HTML string.
     * @return bool Returns true on success or false on failure.
     */
    private function load_html(DOMDocument $dom, string $content): bool
    {
        // Suppress warnings from malformed HTML
        libxml_use_internal_errors(true);

        // 1. Ensure the content is valid UTF-8 using our robust function.
        $utf8_content = $this->prepare_html_for_dom($content);

        // 2. CRITICAL: Prepend the XML encoding declaration to force libxml to parse as UTF-8.
        $loaded = $dom->loadHTML(
            '<?xml encoding="UTF-8">' . $utf8_content,
            LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
        );

        // Clear the libxml error buffer
        libxml_clear_errors();

        return $loaded;
    }

    /**
     * Find all forms in the document
     */
    private function find_forms(DOMDocument $dom): DOMNodeList {
        $xpath = new DOMXPath($dom);
        return $xpath->query('//form');
    }

    /**
     * Process all forms and add spam protection fields
     */
    private function process_forms(DOMDocument $dom, DOMNodeList $forms): void {
        $xpath = new DOMXPath($dom);
        
        foreach ($forms as $form) {
            $form_id = $this->extract_form_id($xpath, $form);
            $this->add_spam_protection_fields($dom, $form, $form_id);
        }
    }

    /**
     * Extract form ID from form element
     */
    private function extract_form_id(DOMXPath $xpath, DOMElement $form): string {
        // Try to find existing form_id input
        $form_id_node = $xpath->query('.//input[@name="form_id"]', $form)->item(0);
        
        if ($form_id_node && $form_id_node->getAttribute('value')) {
            return $form_id_node->getAttribute('value');
        }

        // Try form ID attribute
        if ($form->hasAttribute('id')) {
            return $form->getAttribute('id');
        }

        // Try form name attribute
        if ($form->hasAttribute('name')) {
            return $form->getAttribute('name');
        }

        // Try to find action and create ID from it
        if ($form->hasAttribute('action')) {
            $action = $form->getAttribute('action');
            if (!empty($action) && $action !== '#') {
                return 'form_' . md5($action);
            }
        }

        // Fallback: generate unique ID based on form content
        return 'form_' . md5($form->getNodePath() . uniqid('', true));
    }

    /**
     * Add spam protection fields to form
     */
    private function add_spam_protection_fields(DOMDocument $dom, DOMElement $form, string $form_id): void {
        // Generate tokens and data
        $hmac_data = $this->hmac_token->generate_token($form_id);
        $honeypot_field = $this->honeypot->generate_field_name($form_id);
        $rendered_at = time();

        // Define fields to add
        $fields = [
            ['name' => 'hpsp_form_id', 'value' => $form_id, 'type' => 'hidden'],
            ['name' => 'hpsp_hmac_token', 'value' => $hmac_data['token'], 'type' => 'hidden'],
            ['name' => 'hpsp_hmac_timestamp', 'value' => (string)$hmac_data['timestamp'], 'type' => 'hidden'],
            ['name' => 'hpsp_hmac_nonce', 'value' => $hmac_data['nonce'], 'type' => 'hidden'],
            ['name' => 'hpsp_rendered_at', 'value' => (string)$rendered_at, 'type' => 'hidden'],
            ['name' => $honeypot_field, 'value' => '', 'type' => 'text', 'honeypot' => true],
        ];

        // Add fields to form
        foreach ($fields as $field) {
            $input = $this->create_input_element($dom, $field);
            $form->appendChild($input);
        }
    }

    /**
     * Create input element
     */
    private function create_input_element(DOMDocument $dom, array $field): DOMElement {
        $input = $dom->createElement('input');
        $input->setAttribute('type', $field['type']);
        $input->setAttribute('name', $field['name']);
        $input->setAttribute('value', $field['value']);

        // Special handling for honeypot field
        if (!empty($field['honeypot'])) {
            $input->setAttribute('style', 'position:absolute; left:-9999px; width:1px; height:1px; opacity:0;');
            $input->setAttribute('aria-hidden', 'true');
            $input->setAttribute('tabindex', '-1');
            $input->setAttribute('autocomplete', 'off');
        }

        return $input;
    }

    /**
     * Extract script tags and replace with placeholders to prevent DOMDocument corruption
     */
    private function extract_scripts(string $content): string {
        $this->extracted_scripts = [];

        return preg_replace_callback(
            '/<script\b[^>]*>.*?<\/script>/is',
            function ($matches) {
                $placeholder = '<!--HPSP_SCRIPT_PLACEHOLDER_' . count($this->extracted_scripts) . '-->';
                $this->extracted_scripts[] = $matches[0];
                return $placeholder;
            },
            $content
        );
    }

    /**
     * Restore extracted script tags from placeholders
     */
    private function restore_scripts(string $content): string {
        foreach ($this->extracted_scripts as $index => $script) {
            $placeholder = '<!--HPSP_SCRIPT_PLACEHOLDER_' . $index . '-->';
            $content = str_replace($placeholder, $script, $content);
        }

        return $content;
    }

    /**
     * Get clean HTML output
     */
    private function get_clean_html(DOMDocument $dom): string {
        $html = $dom->saveHTML();
        return $this->restore_scripts($html);
    }
}