<?php
declare(strict_types=1);
namespace Mop_Ai_Indexer\Includes;

/**
 * Registers rewrite rules and query vars for MOP AI Indexer endpoints.
 *
 * This class owns the canonical endpoint and optional alias endpoints that serve
 * the generated plain-text index file through WordPress routing.
 *
 * Why routing is used (instead of placing a physical file in the site root):
 * - allows reliable header control (X-Robots-Tag, Content-Type, etc.)
 * - enables future agent policies (allow/deny by user-agent, logging)
 * - avoids server-specific .htaccess / filesystem permissions issues
 *
 * @since      1.0.0
 * @package    Mop_Ai_Indexer
 * @subpackage Mop_Ai_Indexer/includes
 */

if (! defined('ABSPATH')) exit;

/**
 * Registers endpoint rewrite rules and query vars for Index files.
 *
 * This class is to register endpoint rewrite rules and query vars for Index files.
 *
 * @since      1.0.0
 * @package    Mop_Ai_Indexer
 * @subpackage Mop_Ai_Indexer/includes
 * @author     Anjana Hemachandra
 */
class Mop_Ai_Indexer_Endpoint_Router {

	/**
	 * Query var name used to signal an MOP AI Indexer endpoint request.
	 *
	 * @since 1.0.0
	 * @var   string
	 */
	public const QUERY_VAR = 'mop_ai_indexer_endpoint';

	/**
	 * Register rewrite rules for the canonical endpoint and supported aliases.
	 *
	 * Important:
	 * We register rules for all supported filenames (llms.txt, llms-full.txt).
	 * If a request comes in for a non-canonical alias, the controller can 301 redirect
	 * to the configured canonical file name to avoid duplicate content URLs.
	 *
	 * @since  1.0.0
	 * @return void
	 */
	public function register_rewrite_rules(): void {

		foreach (self::get_supported_file_names() as $file_name) {

			/**
			 * Convert the file name into a safe rewrite regex.
			 *
			 * Example: llms-full.txt -> llms\-full\.txt
			 */
			$pattern = '^' . preg_quote($file_name, '#') . '$';

			add_rewrite_rule(
				$pattern,
				'index.php?' . self::QUERY_VAR . '=' . rawurlencode($file_name),
				'top'
			);
		}
	}

	/**
	 * Register custom query vars.
	 *
	 * @since  1.0.0
	 * @param  array $vars Existing query vars.
	 * @return array
	 */
	public function register_query_vars(array $vars): array {

		if (! in_array(self::QUERY_VAR, $vars, true)) {
			$vars[] = self::QUERY_VAR;
		}

		return $vars;
	}

	/**
	 * Return supported endpoint file names.
	 *
	 * @since  1.0.0
	 * @return string[]
	 */
	public static function get_supported_file_names(): array {

		return array(
			'llms.txt',
			'llms-full.txt',
		);
	}

	/**
	 * Sanitize an endpoint file name to the supported list.
	 *
	 * @since  1.0.0
	 * @param  string $file_name Raw file name.
	 * @return string Sanitized file name (or default).
	 */
	public static function sanitize_file_name(string $file_name): string {

		$file_name = sanitize_text_field((string)$file_name);
		$file_name = wp_basename($file_name);

		if (! in_array($file_name, self::get_supported_file_names(), true)) {
			$file_name = 'llms.txt';
		}

		return $file_name;
	}

	/**
	 * Determine the configured canonical file name for the endpoint.
	 *
	 * This uses the plugin setting 'mop_ai_indexer_iset' (iset_file_name_format) but does
	 * not require any other plugin to be active.
	 *
	 * @since  1.0.0
	 * @return string Canonical file name.
	 */
	public static function get_canonical_file_name(): string {

		$iset = get_option('mop_ai_indexer_iset', array());
		$iset = is_array($iset) ? $iset : array();

		$file_name = isset($iset['iset_file_name_format']) ? (string)$iset['iset_file_name_format'] : 'llms.txt';
		$file_name = self::sanitize_file_name($file_name);

		return $file_name;
	}

	/**
	 * Get the public URL for an endpoint file name.
	 *
	 * @since  1.0.0
	 * @param  string $file_name File name.
	 * @return string URL.
	 */
	public static function get_endpoint_url(string $file_name): string {

		$file_name = self::sanitize_file_name($file_name);
		return home_url('/' . $file_name);
	}

	/**
	 * Determine if the current request URI looks like an MOP AI Indexer endpoint request.
	 *
	 * This is used for cache/minification plugin integration where we need to make
	 * decisions based on the URL path even when the rewrite query var may not be
	 * available (depending on plugin load order).
	 *
	 * @since  1.0.0
	 * @return bool
	 */
	public static function is_endpoint_request_uri(): bool {

		$request_uri = isset($_SERVER['REQUEST_URI']) ? sanitize_text_field(wp_unslash((string)$_SERVER['REQUEST_URI'])) : '';
		if ($request_uri === '') return false;

		$path = wp_parse_url($request_uri, PHP_URL_PATH);
		$path = is_string($path) ? trim((string)$path) : '';
		$path = rtrim($path, '/');
		$path = ltrim($path, '/');

		if ($path === '') return false;

		foreach (self::get_supported_file_names() as $file_name) {
			if ($path === $file_name) return true;
		}

		return false;
	}
}
