<?php
/**
 * RankJet AI XML Sitemap Generator
 * 
 * Generates dynamic, high-performance XML sitemaps with:
 * - Sitemap index linking to sub-sitemaps
 * - Smart filtering (noindex, private, draft, external canonical)
 * - Automatic pagination (50,000 URL limit per file)
 * - 24-hour transient caching with intelligent invalidation
 * 
 * @package RankJet_AI
 * @since 1.0.8
 */

if (!defined('ABSPATH')) {
    exit;
}

class Rankjet_Ai_Sitemap {

    /**
     * Maximum URLs per sitemap file (Google's limit)
     */
    const MAX_URLS_PER_SITEMAP = 50000;

    /**
     * Cache expiration in seconds (24 hours)
     */
    const CACHE_EXPIRATION = DAY_IN_SECONDS;

    /**
     * Cache key prefix
     */
    const CACHE_PREFIX = 'rankjet_sitemap_';

    /**
     * Constructor - Register hooks
     */
    public function __construct() {
        // Only initialize if sitemap is enabled
        if ($this->is_enabled()) {
            add_action('init', [$this, 'add_rewrite_rules'], 1);
            add_action('template_redirect', [$this, 'handle_sitemap_request']);
            add_filter('query_vars', [$this, 'add_query_vars']);
            
            // Cache invalidation hooks
            add_action('save_post', [$this, 'invalidate_cache'], 10, 1);
            add_action('delete_post', [$this, 'invalidate_cache'], 10, 1);
            add_action('create_term', [$this, 'invalidate_taxonomy_cache'], 10, 3);
            add_action('edit_term', [$this, 'invalidate_taxonomy_cache'], 10, 3);
            add_action('delete_term', [$this, 'invalidate_taxonomy_cache'], 10, 3);
        }
    }

    /**
     * Check if sitemap is enabled
     */
    public function is_enabled() {
        return get_option('rankjet_sitemap_enabled', 'yes') === 'yes';
    }

    /**
     * Add custom query vars
     */
    public function add_query_vars($vars) {
        $vars[] = 'rankjet_sitemap';
        $vars[] = 'rankjet_sitemap_page';
        return $vars;
    }

    /**
     * Add rewrite rules for sitemap URLs
     */
    public function add_rewrite_rules() {
        // Sitemap index
        add_rewrite_rule(
            'sitemap_index\.xml$',
            'index.php?rankjet_sitemap=index',
            'top'
        );

        // Sub-sitemaps with optional pagination
        add_rewrite_rule(
            '([a-z_]+)-sitemap([0-9]*)\.xml$',
            'index.php?rankjet_sitemap=$matches[1]&rankjet_sitemap_page=$matches[2]',
            'top'
        );
    }

    /**
     * Handle sitemap requests
     */
    public function handle_sitemap_request() {
        $sitemap_type = get_query_var('rankjet_sitemap');
        
        if (empty($sitemap_type)) {
            return;
        }

        $page = (int) get_query_var('rankjet_sitemap_page', 1);
        if ($page < 1) {
            $page = 1;
        }

        // Set XML headers
        header('Content-Type: application/xml; charset=UTF-8');
        header('X-Robots-Tag: noindex, follow');

        // Generate appropriate sitemap
        switch ($sitemap_type) {
            case 'index':
                echo $this->generate_sitemap_index();
                break;
            case 'post':
                echo $this->generate_post_type_sitemap('post', $page);
                break;
            case 'page':
                echo $this->generate_post_type_sitemap('page', $page);
                break;
            case 'category':
                echo $this->generate_taxonomy_sitemap('category', $page);
                break;
            case 'post_tag':
                echo $this->generate_taxonomy_sitemap('post_tag', $page);
                break;
            default:
                // Check if it's a custom post type
                if (post_type_exists($sitemap_type) && $this->is_post_type_included($sitemap_type)) {
                    echo $this->generate_post_type_sitemap($sitemap_type, $page);
                } else {
                    // 404 for unknown sitemap types
                    status_header(404);
                    echo '<?xml version="1.0" encoding="UTF-8"?><error>Sitemap not found</error>';
                }
        }
        exit;
    }

    /**
     * Generate sitemap index
     */
    public function generate_sitemap_index() {
        // Check cache
        $cached = $this->get_cached_sitemap('index', 1);
        if ($cached !== false) {
            return $cached;
        }

        // Get XSL stylesheet URL
        $xsl_url = RANKJET_AI_PLUGIN_URL . 'inc/sitemap/sitemap-index.xsl';

        $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
        $xml .= '<?xml-stylesheet type="text/xsl" href="' . esc_url($xsl_url) . '"?>' . "\n";
        $xml .= '<!-- Generated by RankJet AI - https://rankjet.ai -->' . "\n";
        $xml .= '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";

        // Get enabled post types
        $post_types = $this->get_enabled_post_types();
        
        foreach ($post_types as $post_type) {
            $count = $this->get_post_type_count($post_type);
            $pages = ceil($count / self::MAX_URLS_PER_SITEMAP);
            
            if ($pages < 1) {
                $pages = 1;
            }

            for ($i = 1; $i <= $pages; $i++) {
                $page_suffix = $pages > 1 ? $i : '';
                $xml .= $this->build_sitemap_entry(
                    home_url("/{$post_type}-sitemap{$page_suffix}.xml"),
                    $this->get_post_type_last_modified($post_type)
                );
            }
        }

        // Get enabled taxonomies
        $taxonomies = $this->get_enabled_taxonomies();
        
        foreach ($taxonomies as $taxonomy) {
            $count = $this->get_taxonomy_count($taxonomy);
            $pages = ceil($count / self::MAX_URLS_PER_SITEMAP);
            
            if ($pages < 1) {
                $pages = 1;
            }

            for ($i = 1; $i <= $pages; $i++) {
                $page_suffix = $pages > 1 ? $i : '';
                $xml .= $this->build_sitemap_entry(
                    home_url("/{$taxonomy}-sitemap{$page_suffix}.xml"),
                    $this->get_taxonomy_last_modified($taxonomy)
                );
            }
        }

        $xml .= '</sitemapindex>';

        // Cache the result
        $this->set_cached_sitemap('index', 1, $xml);

        return $xml;
    }

    /**
     * Generate post type sitemap
     */
    public function generate_post_type_sitemap($post_type, $page = 1) {
        // Check cache
        $cached = $this->get_cached_sitemap($post_type, $page);
        if ($cached !== false) {
            return $cached;
        }

        // Get XSL stylesheet URL
        $xsl_url = RANKJET_AI_PLUGIN_URL . 'inc/sitemap/sitemap.xsl';

        $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
        $xml .= '<?xml-stylesheet type="text/xsl" href="' . esc_url($xsl_url) . '"?>' . "\n";
        $xml .= '<!-- Generated by RankJet AI - https://rankjet.ai -->' . "\n";
        $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";

        // Query posts with pagination
        $offset = ($page - 1) * self::MAX_URLS_PER_SITEMAP;
        
        $args = [
            'post_type'      => $post_type,
            'post_status'    => 'publish',
            'posts_per_page' => self::MAX_URLS_PER_SITEMAP,
            'offset'         => $offset,
            'orderby'        => 'modified',
            'order'          => 'DESC',
            'no_found_rows'  => true,
            'meta_query'     => [
                'relation' => 'OR',
                // Include posts without noindex meta
                [
                    'key'     => '_rankjet_robots_noindex',
                    'compare' => 'NOT EXISTS',
                ],
                // Include posts explicitly set to index
                [
                    'key'     => '_rankjet_robots_noindex',
                    'value'   => '1',
                    'compare' => '!=',
                ],
            ],
        ];

        $posts = get_posts($args);
        $changefreq = $this->get_changefreq($post_type);

        foreach ($posts as $post) {
            // Skip if should be excluded
            if (!$this->should_include_post($post)) {
                continue;
            }

            $xml .= $this->build_url_entry(
                get_permalink($post->ID),
                get_post_modified_time('c', true, $post->ID),
                $changefreq
            );
        }

        $xml .= '</urlset>';

        // Cache the result
        $this->set_cached_sitemap($post_type, $page, $xml);

        return $xml;
    }

    /**
     * Generate taxonomy sitemap
     */
    public function generate_taxonomy_sitemap($taxonomy, $page = 1) {
        // Check cache
        $cached = $this->get_cached_sitemap($taxonomy, $page);
        if ($cached !== false) {
            return $cached;
        }

        // Get XSL stylesheet URL
        $xsl_url = RANKJET_AI_PLUGIN_URL . 'inc/sitemap/sitemap.xsl';

        $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
        $xml .= '<?xml-stylesheet type="text/xsl" href="' . esc_url($xsl_url) . '"?>' . "\n";
        $xml .= '<!-- Generated by RankJet AI - https://rankjet.ai -->' . "\n";
        $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";

        // Query terms with pagination
        $offset = ($page - 1) * self::MAX_URLS_PER_SITEMAP;
        
        $terms = get_terms([
            'taxonomy'   => $taxonomy,
            'hide_empty' => true,
            'number'     => self::MAX_URLS_PER_SITEMAP,
            'offset'     => $offset,
        ]);

        if (is_wp_error($terms)) {
            $terms = [];
        }

        $changefreq = $this->get_changefreq($taxonomy);

        foreach ($terms as $term) {
            // Check if term is set to noindex
            $noindex = get_term_meta($term->term_id, '_rankjet_robots_noindex', true);
            if ($noindex === '1') {
                continue;
            }

            $xml .= $this->build_url_entry(
                get_term_link($term),
                $this->get_term_last_modified($term),
                $changefreq
            );
        }

        $xml .= '</urlset>';

        // Cache the result
        $this->set_cached_sitemap($taxonomy, $page, $xml);

        return $xml;
    }

    /**
     * Build a sitemap index entry
     */
    private function build_sitemap_entry($loc, $lastmod) {
        $entry = "  <sitemap>\n";
        $entry .= "    <loc>" . esc_url($loc) . "</loc>\n";
        if ($lastmod) {
            $entry .= "    <lastmod>" . esc_html($lastmod) . "</lastmod>\n";
        }
        $entry .= "  </sitemap>\n";
        return $entry;
    }

    /**
     * Build a URL entry
     */
    private function build_url_entry($loc, $lastmod, $changefreq = 'weekly') {
        $entry = "  <url>\n";
        $entry .= "    <loc>" . esc_url($loc) . "</loc>\n";
        if ($lastmod) {
            $entry .= "    <lastmod>" . esc_html($lastmod) . "</lastmod>\n";
        }
        $entry .= "    <changefreq>" . esc_html($changefreq) . "</changefreq>\n";
        $entry .= "  </url>\n";
        return $entry;
    }

    /**
     * Check if a post should be included in the sitemap
     */
    private function should_include_post($post) {
        // Exclude password-protected posts
        if (!empty($post->post_password)) {
            return false;
        }

        // Check for external canonical
        $canonical = get_post_meta($post->ID, '_rankjet_canonical_url', true);
        if (!empty($canonical)) {
            $post_url = get_permalink($post->ID);
            // If canonical points elsewhere, exclude
            if (trailingslashit($canonical) !== trailingslashit($post_url)) {
                return false;
            }
        }

        // Check noindex from other SEO plugins
        // RankMath
        $rankmath_robots = get_post_meta($post->ID, 'rank_math_robots', true);
        if (is_array($rankmath_robots) && in_array('noindex', $rankmath_robots)) {
            return false;
        }

        // Yoast
        $yoast_noindex = get_post_meta($post->ID, '_yoast_wpseo_meta-robots-noindex', true);
        if ($yoast_noindex === '1') {
            return false;
        }

        // All in One SEO
        $aioseo_noindex = get_post_meta($post->ID, '_aioseo_noindex', true);
        if ($aioseo_noindex === '1') {
            return false;
        }

        return true;
    }

    /**
     * Get enabled post types
     */
    private function get_enabled_post_types() {
        $enabled = [];
        
        // Default post types
        if (get_option('rankjet_sitemap_include_posts', 'yes') === 'yes') {
            $enabled[] = 'post';
        }
        if (get_option('rankjet_sitemap_include_pages', 'yes') === 'yes') {
            $enabled[] = 'page';
        }

        // Custom post types (optional feature)
        if (get_option('rankjet_sitemap_include_custom_post_types', 'no') === 'yes') {
            $custom_types = get_post_types([
                'public'   => true,
                '_builtin' => false,
            ], 'names');

            $excluded_cpts = (array) get_option('rankjet_sitemap_excluded_post_types', []);
            
            foreach ($custom_types as $cpt) {
                if (!in_array($cpt, $excluded_cpts)) {
                    $enabled[] = $cpt;
                }
            }
        }

        return $enabled;
    }

    /**
     * Check if a post type is included
     */
    private function is_post_type_included($post_type) {
        return in_array($post_type, $this->get_enabled_post_types());
    }

    /**
     * Get enabled taxonomies
     */
    private function get_enabled_taxonomies() {
        $enabled = [];
        
        if (get_option('rankjet_sitemap_include_categories', 'yes') === 'yes') {
            $enabled[] = 'category';
        }

        if (get_option('rankjet_sitemap_include_tags', 'no') === 'yes') {
            $enabled[] = 'post_tag';
        }

        // Custom taxonomies (optional feature)
        if (get_option('rankjet_sitemap_include_custom_taxonomies', 'no') === 'yes') {
            $custom_taxonomies = get_taxonomies([
                'public'   => true,
                '_builtin' => false,
            ], 'names');

            $excluded_taxonomies = (array) get_option('rankjet_sitemap_excluded_taxonomies', []);
            
            foreach ($custom_taxonomies as $taxonomy) {
                if (!in_array($taxonomy, $excluded_taxonomies)) {
                    $enabled[] = $taxonomy;
                }
            }
        }

        return $enabled;
    }

    /**
     * Get post type count for pagination
     */
    private function get_post_type_count($post_type) {
        $counts = wp_count_posts($post_type);
        return isset($counts->publish) ? (int) $counts->publish : 0;
    }

    /**
     * Get taxonomy term count for pagination
     */
    private function get_taxonomy_count($taxonomy) {
        return (int) wp_count_terms(['taxonomy' => $taxonomy, 'hide_empty' => true]);
    }

    /**
     * Get last modified date for a post type
     */
    private function get_post_type_last_modified($post_type) {
        global $wpdb;
        
        $date = $wpdb->get_var($wpdb->prepare(
            "SELECT post_modified_gmt FROM {$wpdb->posts} 
             WHERE post_type = %s AND post_status = 'publish' 
             ORDER BY post_modified_gmt DESC LIMIT 1",
            $post_type
        ));

        return $date ? mysql2date('c', $date, false) : null;
    }

    /**
     * Get last modified date for a taxonomy
     */
    private function get_taxonomy_last_modified($taxonomy) {
        global $wpdb;
        
        // Get the most recently modified post in this taxonomy
        $date = $wpdb->get_var($wpdb->prepare(
            "SELECT p.post_modified_gmt 
             FROM {$wpdb->posts} p
             INNER JOIN {$wpdb->term_relationships} tr ON p.ID = tr.object_id
             INNER JOIN {$wpdb->term_taxonomy} tt ON tr.term_taxonomy_id = tt.term_taxonomy_id
             WHERE tt.taxonomy = %s AND p.post_status = 'publish'
             ORDER BY p.post_modified_gmt DESC LIMIT 1",
            $taxonomy
        ));

        return $date ? mysql2date('c', $date, false) : null;
    }

    /**
     * Get last modified date for a term
     */
    private function get_term_last_modified($term) {
        global $wpdb;
        
        $date = $wpdb->get_var($wpdb->prepare(
            "SELECT p.post_modified_gmt 
             FROM {$wpdb->posts} p
             INNER JOIN {$wpdb->term_relationships} tr ON p.ID = tr.object_id
             WHERE tr.term_taxonomy_id = %d AND p.post_status = 'publish'
             ORDER BY p.post_modified_gmt DESC LIMIT 1",
            $term->term_taxonomy_id
        ));

        return $date ? mysql2date('c', $date, false) : null;
    }

    /**
     * Get changefreq setting for a content type
     */
    private function get_changefreq($type) {
        $default = 'weekly';
        $option = get_option("rankjet_sitemap_{$type}_changefreq", '');
        return !empty($option) ? $option : $default;
    }

    /**
     * Get priority setting for a content type
     */
    private function get_priority($type) {
        $defaults = [
            'post'     => '0.6',
            'page'     => '0.7',
            'category' => '0.4',
            'post_tag' => '0.3',
        ];
        $option = get_option("rankjet_sitemap_{$type}_priority", '');
        return !empty($option) ? $option : (isset($defaults[$type]) ? $defaults[$type] : '0.5');
    }

    /**
     * Get cached sitemap
     */
    private function get_cached_sitemap($type, $page) {
        $key = self::CACHE_PREFIX . $type . '_' . $page;
        return get_transient($key);
    }

    /**
     * Set cached sitemap
     */
    private function set_cached_sitemap($type, $page, $content) {
        $key = self::CACHE_PREFIX . $type . '_' . $page;
        set_transient($key, $content, self::CACHE_EXPIRATION);
    }

    /**
     * Invalidate cache when a post is saved/deleted
     */
    public function invalidate_cache($post_id) {
        // Don't invalidate for revisions or autosaves
        if (wp_is_post_revision($post_id) || wp_is_post_autosave($post_id)) {
            return;
        }

        $post = get_post($post_id);
        if (!$post) {
            return;
        }

        // Clear post type sitemap cache
        $this->clear_sitemap_cache($post->post_type);
        
        // Clear index cache
        $this->clear_sitemap_cache('index');

        // Clear taxonomy caches for terms associated with this post
        $taxonomies = get_object_taxonomies($post->post_type);
        foreach ($taxonomies as $taxonomy) {
            $this->clear_sitemap_cache($taxonomy);
        }
    }

    /**
     * Invalidate taxonomy cache
     */
    public function invalidate_taxonomy_cache($term_id, $tt_id, $taxonomy) {
        $this->clear_sitemap_cache($taxonomy);
        $this->clear_sitemap_cache('index');
    }

    /**
     * Clear sitemap cache for a specific type
     */
    public function clear_sitemap_cache($type) {
        global $wpdb;
        
        // Delete all transients matching the pattern
        $wpdb->query($wpdb->prepare(
            "DELETE FROM {$wpdb->options} WHERE option_name LIKE %s",
            '_transient_' . self::CACHE_PREFIX . $type . '%'
        ));
        
        // Also delete timeout transients
        $wpdb->query($wpdb->prepare(
            "DELETE FROM {$wpdb->options} WHERE option_name LIKE %s",
            '_transient_timeout_' . self::CACHE_PREFIX . $type . '%'
        ));
    }

    /**
     * Clear all sitemap caches
     */
    public function clear_all_cache() {
        global $wpdb;
        
        $wpdb->query($wpdb->prepare(
            "DELETE FROM {$wpdb->options} WHERE option_name LIKE %s",
            '_transient_' . self::CACHE_PREFIX . '%'
        ));
        
        $wpdb->query($wpdb->prepare(
            "DELETE FROM {$wpdb->options} WHERE option_name LIKE %s",
            '_transient_timeout_' . self::CACHE_PREFIX . '%'
        ));
    }

    /**
     * Flush rewrite rules (call on plugin activation)
     */
    public static function activate() {
        $instance = new self();
        $instance->add_rewrite_rules();
        flush_rewrite_rules();
    }

    /**
     * Clean up on deactivation
     */
    public static function deactivate() {
        flush_rewrite_rules();
    }
}
