<?php
if (! defined('ABSPATH')) {
    exit;
}

class AutopilotSEO_HTML_To_Gutenberg
{

    public function __construct()
    {
        // Optionally inject config or dependencies here
    }

    /**
     * Convert raw HTML into Gutenberg block-based markup.
     *
     * This example:
     *  - Loads the HTML into a DOMDocument
     *  - Iterates over top-level nodes only
     *  - Converts <h1>-<h6>, <p>, and <img> to Gutenberg blocks
     *  - Optionally sideloads remote images if $post_id is provided
     *
     * @param string $html    Raw HTML
     * @param int    $post_id If you want sideloaded images attached to a specific post
     * @return string Gutenberg block markup
     */
    public function convert($html, $post_id = 0)
    {
        // Ensure media functions exist (for sideloading)
        if (! function_exists('media_sideload_image')) {
            require_once ABSPATH . 'wp-admin/includes/media.php';
            require_once ABSPATH . 'wp-admin/includes/file.php';
            require_once ABSPATH . 'wp-admin/includes/image.php';
        }

        // Suppress warnings about missing <html> or <body> tags
        libxml_use_internal_errors(true);

        $doc = new DOMDocument();
        // Load with flags to skip adding extra <html><body> wrappers
        $doc->loadHTML(
            '<?xml encoding="utf-8" ?>' . $html,
            LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
        );
        libxml_clear_errors();

        // Now parse top-level child nodes
        $block_output = '';
        foreach ($doc->childNodes as $node) {
            if ($node->nodeType === XML_ELEMENT_NODE) {
                $block_output .= $this->map_node_to_block($node, $post_id);
            } elseif ($node->nodeType === XML_TEXT_NODE) {
                // Raw text at top level => treat as paragraph
                $text = trim($node->textContent);
                if (! empty($text)) {
                    $block_output .= $this->create_paragraph_block($text);
                }
            }
        }

        return $block_output;
    }

    /**
     * Map a single DOMElement node to the appropriate Gutenberg block(s).
     *
     * @param DOMNode $node
     * @param int     $post_id
     * @return string Gutenberg blocks markup
     */
    private function map_node_to_block(DOMNode $node, $post_id = 0)
    {
        $tag = strtolower($node->nodeName);

        switch ($tag) {
            case 'h1':
            case 'h2':
            case 'h3':
            case 'h4':
            case 'h5':
            case 'h6':
                // HEADINGS (h1..h6)
                // Extract the heading level (e.g., "h2" => 2)
                $level = (int) filter_var($tag, FILTER_SANITIZE_NUMBER_INT);
                $text  = trim($node->textContent);
                return $this->create_heading_block($text, $level);

            case 'div':
                if ($node instanceof DOMElement && $node->getAttribute('class') === 'video-container') {
                    return $this->create_video_block($node);
                }
                // For other divs, treat as paragraph
                $inner_html = $this->get_inner_html($node);
                return $this->create_paragraph_block($inner_html);

            case 'p':
                // PARAGRAPHS
                return $this->parse_paragraph_with_children($node, $post_id);

            case 'img':
                // IMAGES (if they appear at top-level)
                if ($node instanceof DOMElement) {
                    $src = $node->getAttribute('src');
                    $alt = $node->getAttribute('alt');
                    return $this->create_image_block($src, $alt, $post_id);
                }
                return '';

            case 'ul':
            case 'ol':
                // LISTS
                return $this->create_list_block($node, $post_id);

            case 'table':
                // TABLES
                return $this->create_table_block($node);

            default:
                // For unhandled tags, either skip or treat as a paragraph
                $inner_html = $this->get_inner_html($node);
                return $this->create_paragraph_block($inner_html);
        }
    }

    /**
     * Parse a <p> element's children, creating multiple blocks if necessary.
     *
     * - If we find text nodes, we accumulate them into a single "paragraph" piece.
     * - If we find an <img>, we output a paragraph block for text so far,
     *   then an image block, then continue.
     */
    private function parse_paragraph_with_children(DOMNode $p_node, $post_id = 0)
    {
        $blocks = [];
        $text_buffer = '';

        foreach ($p_node->childNodes as $child) {
            if ($child->nodeType === XML_TEXT_NODE) {
                // Accumulate raw text
                $text_buffer .= $child->textContent;
            } elseif ($child->nodeType === XML_ELEMENT_NODE) {
                if (strtolower($child->nodeName) === 'img') {
                    // First, output a paragraph block for any text we have so far
                    $text_buffer = trim($text_buffer);
                    if (! empty($text_buffer)) {
                        $blocks[] = $this->create_paragraph_block($text_buffer);
                        $text_buffer = '';
                    }

                    // Then, create an image block
                    /** @var DOMElement $child */
                    $src = $child->getAttribute('src');
                    $alt = $child->getAttribute('alt');
                    $blocks[] = $this->create_image_block($src, $alt, $post_id);
                } elseif (strtolower($child->nodeName) === 'a') {
                    // Preserve anchor tags with their attributes
                    /** @var DOMElement $child */
                    $href = $child->getAttribute('href');
                    $inner = $this->get_inner_html($child);
                    $text_buffer .= '<a href="' . esc_url($href) . '">' . $inner . '</a>';
                } else {
                    // It's some other element inside <p> (e.g. <strong>, <br>, etc.)
                    $inner = $this->get_inner_html($child);
                    $text_buffer .= $inner;
                }
            }
        }

        // If there's remaining text after the last child, output one more paragraph block
        $text_buffer = trim($text_buffer);
        if (! empty($text_buffer)) {
            $blocks[] = $this->create_paragraph_block($text_buffer);
        }

        // Concatenate all blocks. Each $blocks[] is a string of block HTML
        return implode("\n", $blocks) . "\n";
    }

    /**
     * Extract the raw inner HTML from a DOMNode (child elements only).
     *
     * @param DOMNode $node
     * @return string
     */
    private function get_inner_html(DOMNode $node)
    {
        $inner = '';
        foreach ($node->childNodes as $child) {
            $inner .= $node->ownerDocument->saveHTML($child);
        }
        return $inner;
    }

    /**
     * Create a core/heading Gutenberg block for text.
     *
     * @param string $text  The heading text
     * @param int    $level 1..6
     */
    private function create_heading_block($text, $level = 2)
    {
        $level = max(1, min(6, (int) $level));
        $escaped = wp_kses_post($text);

        // Example:
        // <!-- wp:heading {"level":2} -->
        // <h2>Your Heading</h2>
        // <!-- /wp:heading -->
        $block  = '<!-- wp:heading {"level":' . $level . '} -->' . "\n";
        $block .= "<h{$level} class='wp-block-heading'>{$escaped}</h{$level}>\n";
        $block .= '<!-- /wp:heading -->' . "\n";

        return $block;
    }

    /**
     * Create a core/paragraph Gutenberg block.
     *
     * @param string $html The raw inner HTML for the paragraph
     */
    private function create_paragraph_block($html)
    {
        // Optionally sanitize or keep minimal HTML
        $cleaned = wp_kses_post($html);

        // Example:
        // <!-- wp:paragraph -->
        // <p>Your text</p>
        // <!-- /wp:paragraph -->
        $block  = '<!-- wp:paragraph -->' . "\n";

        // If $cleaned isn't wrapped in <p>, wrap it yourself
        if (stripos($cleaned, '<p>') === false) {
            $block .= '<p>' . $cleaned . '</p>' . "\n";
        } else {
            $block .= $cleaned . "\n";
        }

        $block .= '<!-- /wp:paragraph -->' . "\n";
        return $block;
    }

    /**
     * Create a core/image Gutenberg block for an <img> src.
     * Optionally sideload remote images to the Media Library.
     *
     * @param string $src
     * @param string $alt
     * @param int    $post_id If > 0, attach sideloaded image to this post
     */
    private function create_image_block($src, $alt = '', $post_id = 0)
    {
        // By default, just use the src as-is
        $local_url     = $src;
        $attachment_id = 0;

        // If it's external, optionally sideload
        if ($this->should_sideload_image($src)) {
            $downloaded = $this->download_external_image_by_url($src, $post_id, $alt);
            if (! is_wp_error($downloaded)) {
                $attachment_id = $downloaded['attachment_id'];
                $local_url     = $downloaded['url'];
            }
        }

        // Build block JSON attributes
        $attrs = [
            'id'  => (int) $attachment_id,

            'sizeSlug' => 'large',
            'linkDestination' => 'none',
        ];
        $attr_json = json_encode($attrs);

        // Example:
        // <!-- wp:image {"id":123,"alt":"My alt"} -->
        // <figure class="wp-block-image size-full"><img src="..." alt="My alt"/></figure>
        // <!-- /wp:image -->
        $block  = '<!-- wp:image ' . $attr_json . ' -->' . "\n";
        $block .= '<figure class="wp-block-image size-large">';
        $block .= '<img src="' . esc_url($local_url) . '" alt="' . esc_attr($alt) . '"/>';
        $block .= '</figure>' . "\n";
        $block .= '<!-- /wp:image -->' . "\n";

        return $block;
    }

    /**
     * Create a Gutenberg block for <ul> or <ol> along with <li> children.
     * 
     * This matches the example output you provided (with nested <li><li>).
     * If you want more standard HTML, adjust the <li> markup in the loop.
     *
     * @param DOMNode $list_node
     * @param int     $post_id
     * @return string
     */
    private function create_list_block(DOMNode $list_node, $post_id = 0)
    {
        $tag_name = strtolower($list_node->nodeName); // "ul" or "ol"

        // Start the list block
        $output  = "<!-- wp:list -->\n";
        $output .= "<{$tag_name} class=\"wp-block-list\">";

        // For each child <li>
        foreach ($list_node->childNodes as $child) {
            if ($child->nodeType === XML_ELEMENT_NODE && strtolower($child->nodeName) === 'li') {

                // Get the inner HTML of the <li> to preserve any nested <p>, <strong>, etc.
                $inner_html = $this->get_inner_html($child);

                // Each list item is wrapped in a list-item block
                $output .= "<!-- wp:list-item -->";
                // Following your example markup exactly:
                //   <li><li>...child content...</li></li>
                $output .= "<li>{$inner_html}</li>";
                $output .= "<!-- /wp:list-item -->";
            }
        }

        // Close out the list block
        $output .= "</{$tag_name}>\n";
        $output .= "<!-- /wp:list -->\n";

        return $output;
    }

    /**
     * Decide whether an image should be sideloaded.
     * Checks if $src is external to our domain.
     */
    private function should_sideload_image($url)
    {
        $site_host = wp_parse_url(home_url(), PHP_URL_HOST);
        $url_host  = wp_parse_url($url, PHP_URL_HOST);
        // If no host or host doesn't match our site => treat as external
        return ($url_host && strcasecmp($url_host, $site_host) !== 0);
    }

    /**
     * Find an attachment by its remote URL, if we've stored _remote_url.
     * So we don't re-upload the same image multiple times.
     *
     * @param string $url
     * @return int Attachment ID or 0 if not found
     */
    private function find_attachment_by_remote_url($url)
    {
        $args = [
            'post_type'      => 'attachment',
            'post_status'    => 'any',
            'posts_per_page' => 1,
            'meta_query'     => [
                [
                    'key'   => '_remote_url',
                    'value' => $url,
                ],
            ],
            'fields' => 'ids',
        ];
        $query = new WP_Query($args);

        if (! empty($query->posts)) {
            return (int) $query->posts[0];
        }
        return 0;
    }

    /**
     * Download a remote image to the Media Library, avoiding duplicates by URL.
     *
     * Returns array:
     *  [
     *    'attachment_id' => (int),
     *    'url' => (string) the local URL
     *  ]
     * or WP_Error on failure.
     *
     * @param string $url
     * @param int    $post_id
     * @param string $desc  Title/desc for the attachment
     * @return array|WP_Error
     */
    public function download_external_image_by_url($url, $post_id = 0, $desc = '')
    {
        // Check if already sideloaded
        $existing_id = $this->find_attachment_by_remote_url($url);
        if ($existing_id) {
            return [
                'attachment_id' => $existing_id,
                'url'           => wp_get_attachment_url($existing_id),
            ];
        }

        // Download file
        $response = wp_remote_get($url, ['timeout' => 20]);
        if (is_wp_error($response)) {
            return $response;
        }
        if (200 !== wp_remote_retrieve_response_code($response)) {
            return new WP_Error('http_error', 'Failed to download image: HTTP status ' . wp_remote_retrieve_response_code($response));
        }
        $body  = wp_remote_retrieve_body($response);
        $ctype = wp_remote_retrieve_header($response, 'content-type');
        if (empty($body)) {
            return new WP_Error('empty_body', 'Remote image body is empty.');
        }

        // Guess extension from content-type
        $extension_map = [
            'image/jpeg' => '.jpg',
            'image/jpg'  => '.jpg',
            'image/png'  => '.png',
            'image/gif'  => '.gif',
            'image/webp' => '.webp',
        ];
        $ext = '.jpg';
        if (isset($extension_map[$ctype])) {
            $ext = $extension_map[$ctype];
        }

        // Create temporary file
        $tmp = wp_tempnam($url);
        if (!$tmp) {
            return new WP_Error('temp_file_failed', 'Could not create a temp file.');
        }

        // Initialize WordPress Filesystem
        require_once ABSPATH . 'wp-admin/includes/file.php';
        WP_Filesystem();
        global $wp_filesystem;

        if (!$wp_filesystem) {
            return new WP_Error('filesystem_error', 'Could not initialize WordPress filesystem.');
        }

        // Rename and write content with proper extension
        $tmp_file = $tmp . $ext;
        if (!$wp_filesystem->move($tmp, $tmp_file, true)) {
            return new WP_Error('rename_failed', 'Could not rename temporary file.');
        }
        $wp_filesystem->put_contents($tmp_file, $body);

        // Use wp_handle_sideload to move into uploads
        $file_array = [
            'name'     => basename($tmp_file),
            'tmp_name' => $tmp_file,
        ];
        $sideload = wp_handle_sideload($file_array, ['test_form' => false]);
        if (isset($sideload['error'])) {
            return new WP_Error('sideload_error', $sideload['error']);
        }

        // Create attachment post
        $attachment = [
            'post_mime_type' => $sideload['type'],
            'post_title'     => $desc ?: basename($sideload['file']),
            'post_content'   => '',
            'post_status'    => 'inherit',
        ];
        $attach_id = wp_insert_attachment($attachment, $sideload['file'], $post_id);
        if (is_wp_error($attach_id)) {
            return $attach_id;
        }

        // Generate image metadata (thumbnails, etc.)
        $attach_data = wp_generate_attachment_metadata($attach_id, $sideload['file']);
        wp_update_attachment_metadata($attach_id, $attach_data);

        // Store _remote_url so we don't re-upload next time
        update_post_meta($attach_id, '_remote_url', $url);

        return [
            'attachment_id' => $attach_id,
            'url'           => wp_get_attachment_url($attach_id),
        ];
    }

    /**
     * Create a core/embed block for YouTube videos.
     * 
     * @param DOMElement $container_node The video container div
     * @return string Gutenberg block markup
     */
    private function create_video_block(DOMElement $container_node)
    {
        // Find the iframe within the container
        $iframes = $container_node->getElementsByTagName('iframe');
        if ($iframes->length === 0) {
            return '';
        }

        $iframe = $iframes->item(0);
        $src = $iframe->getAttribute('src');

        // Extract YouTube video ID
        $video_id = '';
        if (preg_match('/youtube\.com\/embed\/([^"\'&?\/\s]+)/', $src, $matches)) {
            $video_id = $matches[1];
        }

        if (empty($video_id)) {
            return '';
        }

        // Create YouTube embed block
        $url = 'https://www.youtube.com/watch?v=' . $video_id;

        $block  = '<!-- wp:embed {"url":"' . esc_url($url) . '","type":"video","providerNameSlug":"youtube",' .
            '"responsive":true,"className":"wp-embed-aspect-16-9 wp-has-aspect-ratio"} -->' . "\n";
        $block .= '<figure class="wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube ' .
            'wp-embed-aspect-16-9 wp-has-aspect-ratio">';
        $block .= '<div class="wp-block-embed__wrapper">' . "\n";
        $block .= esc_url($url) . "\n";
        $block .= '</div></figure>' . "\n";
        $block .= '<!-- /wp:embed -->' . "\n";

        return $block;
    }

    /**
     * Create a core/table Gutenberg block from a table element.
     * 
     * @param DOMNode $table_node The table element to convert
     * @return string Gutenberg block markup
     */
    private function create_table_block(DOMNode $table_node)
    {
        // Ensure we have a DOMElement
        if (!($table_node instanceof DOMElement)) {
            return '';
        }

        // Start the table block
        $output = "<!-- wp:table -->\n";
        $output .= '<figure class="wp-block-table"><table class="has-fixed-layout">';

        // Process all rows
        $rows = $table_node->getElementsByTagName('tr');
        if ($rows->length > 0) {
            // Check if first row has headers
            $first_row = $rows->item(0);
            $has_header = false;
            foreach ($first_row->childNodes as $cell) {
                if ($cell->nodeType === XML_ELEMENT_NODE && strtolower($cell->nodeName) === 'th') {
                    $has_header = true;
                    break;
                }
            }

            // Add thead if we found header cells
            if ($has_header) {
                $output .= '<thead><tr>';
                foreach ($first_row->childNodes as $cell) {
                    if ($cell->nodeType === XML_ELEMENT_NODE) {
                        $content = $this->get_inner_html($cell);
                        $output .= "<th>{$content}</th>";
                    }
                }
                $output .= '</tr></thead>';
            }

            // Start tbody
            $output .= '<tbody>';

            // Process each row
            for ($i = ($has_header ? 1 : 0); $i < $rows->length; $i++) {
                $row = $rows->item($i);
                $output .= '<tr>';

                // Process each cell
                foreach ($row->childNodes as $cell) {
                    if ($cell->nodeType === XML_ELEMENT_NODE) {
                        $content = $this->get_inner_html($cell);
                        $output .= "<td>{$content}</td>";
                    }
                }

                $output .= '</tr>';
            }

            $output .= '</tbody>';
        }

        // Close the table block
        $output .= '</table></figure>';
        $output .= "\n<!-- /wp:table -->\n";

        return $output;
    }
}
