<?php
declare(strict_types=1);
namespace Mop_Ai_Indexer\Includes\Logic;

/**
 * Serves MOP AI Indexer plain-text endpoints via WordPress routing.
 *
 * This controller:
 * - detects endpoint requests via rewrite query vars
 * - applies headers (Content-Type, X-Robots-Tag, nosniff)
 * - enforces alias -> canonical 301 redirects (duplication prevention)
 * - applies cache/minification guards for common optimization plugins
 * - provides extension points for agent policy allow/deny + logging
 *
 * @since      1.0.0
 * @package    Mop_Ai_Indexer
 * @subpackage Mop_Ai_Indexer/includes/logic
 */

/**
 * If this file is called directly, then exit.
 */
if (! defined('ABSPATH')) exit;

/**
 * Import classes from sub-namespaces.
 */
use Mop_Ai_Indexer\Includes\{Mop_Ai_Indexer_Endpoint_Router};

/**
 * Serves Index files via WordPress routing and applies endpoint runtime guards.
 *
 * This class is to serve Index files via WordPress routing and apply endpoint runtime guards.
 *
 * @since      1.0.0
 * @package    Mop_Ai_Indexer
 * @subpackage Mop_Ai_Indexer/includes/logic
 * @author     Anjana Hemachandra
 */
class Mop_Ai_Indexer_Endpoint_Controller {

	/**
	 * Serve the endpoint response when this request targets one of the configured endpoints.
	 *
	 * Hook this early on template_redirect so we exit before theme rendering and before
	 * canonical redirect logic runs.
	 *
	 * @since  1.0.0
	 * @see    Mop_Ai_Indexer_Agent_Policy_Manager
	 * @see    Mop_Ai_Indexer_Endpoint_Router
	 * @return void
	 */
	public function maybe_serve_endpoint(): void {

		/**
		 * Normalize the request path early.
		 *
		 * This is used for:
		 * - trailing slash normalization (/llms.txt/ -> /llms.txt)
		 * - fallback endpoint detection when rewrite rules are stale (e.g., after manual file updates)
		 *
		 * WordPress rewrite rules normally map supported filenames into a query var, but in the real
		 * world rewrite rules may be out-of-date until permalinks are flushed. This fallback ensures
		 * the endpoint still behaves correctly (serves the file or returns a 404 message).
		 */
		$request_uri = isset($_SERVER['REQUEST_URI']) ? sanitize_text_field(wp_unslash((string)$_SERVER['REQUEST_URI'])) : '';
		$path = $request_uri ? wp_parse_url($request_uri, PHP_URL_PATH) : '';
		$path = is_string($path) ? (string)$path : '';
		$path = $path ? ltrim($path, '/') : '';

		/**
		 * Trailing slash normalization to avoid duplicate URLs.
		 *
		 * Example:
		 * - /llms.txt/ -> /llms.txt
		 */
		if ($path !== '' && substr($path, -1) === '/') {

			$path_no_slash = rtrim($path, '/');

			foreach (Mop_Ai_Indexer_Endpoint_Router::get_supported_file_names() as $file_name) {
				if ($path_no_slash === $file_name) {
					wp_safe_redirect(home_url('/' . $file_name), 301);
					exit;
				}
			}
		}

		$requested = get_query_var(Mop_Ai_Indexer_Endpoint_Router::QUERY_VAR);
		$requested = is_string($requested) ? $requested : '';

		if ($requested === '') return;

		$requested = Mop_Ai_Indexer_Endpoint_Router::sanitize_file_name((string)$requested);

		$canonical = Mop_Ai_Indexer_Endpoint_Router::get_canonical_file_name();

		/**
		 * Alias -> canonical redirect to avoid duplicate near-identical content URLs.
		 */
		if ($requested !== $canonical) {
			wp_safe_redirect(Mop_Ai_Indexer_Endpoint_Router::get_endpoint_url($canonical), 301);
			exit;
		}

		/**
		 * Apply endpoint guards: caching/minification should not touch these responses.
		 */
		$this->apply_endpoint_runtime_guards();

		/**
		 * Apply allow/deny policy (future expansion).
		 *
		 * Default behaviour is "allow"; the policy manager provides filters + optional logging.
		 */
		$policy = new Mop_Ai_Indexer_Agent_Policy_Manager();
		$user_agent = $policy->get_user_agent();
		$is_allowed = $policy->is_allowed($user_agent, $canonical);

		if (! $is_allowed) {

			$this->send_plain_text_headers($canonical);

			status_header(403);
			echo esc_html__('Forbidden', 'mop-ai-indexer') . "\n";
			echo esc_html__('This endpoint is not available for this user-agent.', 'mop-ai-indexer') . "\n";
			exit;
		}

		/**
		 * Locate the generated file (stored in uploads directory).
		 */
		$file_path = $this->get_plugin_upload_file_path($canonical);

		if (! is_file($file_path)) {

			$this->send_plain_text_headers($canonical);

			status_header(404);
			echo esc_html__('Not found', 'mop-ai-indexer') . "\n";
			/* translators: %s: Endpoint file name (for example: llms.txt). */
			echo sprintf(esc_html__('%s is not generated yet.', 'mop-ai-indexer'), esc_html($canonical)) . "\n";
			echo esc_html__('Generate it in WordPress admin under MOP AI Indexer, Index Manager.', 'mop-ai-indexer') . "\n";
			exit;
		}

		/**
		 * Send headers and stream the file content.
		 */
		$this->send_plain_text_headers($canonical);

		/**
		 * Optional hit logging.
		 */
		$policy->maybe_log_hit(array(
			'file_name' => $canonical,
			'file_path' => $file_path,
			'user_agent' => $user_agent,
			'allowed' => $is_allowed ? '1' : '0',
		));

		/**
		 * Stream file contents.
		 */
		@readfile($file_path); // phpcs:ignore WordPress.WP.AlternativeFunctions.file_system_operations_readfile -- Stream generated plain-text file efficiently without loading into memory; path is allowlisted and constrained to uploads.
		exit;
	}

	/**
	 * Apply runtime guards to reduce caching/minification interference.
	 *
	 * These endpoints are intended for AI agents and should not be rewritten by HTML
	 * optimizers, minifiers or aggressive cache layers.
	 *
	 * @since  1.0.0
	 * @return void
	 */
	private function apply_endpoint_runtime_guards(): void {

		/**
		 * Also send no-cache headers to discourage edge caches from storing the output.
		 *
		 * Note: The file can still be cached by AI agents on their side; this is about
		 * preventing WordPress-side optimization stacks from mangling the response.
		 */
		if (function_exists('nocache_headers')) {
			nocache_headers();
		}

		header('Cache-Control: no-store, no-cache, must-revalidate, max-age=0');
		header('Pragma: no-cache');
		header('Expires: 0');
	}

	/**
	 * Send plain-text response headers.
	 *
	 * @since  1.0.0
	 * @see    Mop_Ai_Indexer_Robots_Indexing_Guard
	 * @param  string $file_name The requested endpoint file name.
	 * @return void
	 */
	private function send_plain_text_headers(string $file_name): void {

		/**
		 * Correct content type is essential: these endpoints must not be served as HTML.
		 */
		header('Content-Type: text/plain; charset=UTF-8');
		header('X-Content-Type-Options: nosniff');

		/**
		 * Prevent classic search engines indexing the endpoint by default.
		 *
		 * Important: This is controlled via a redefinable constant in the main plugin file.
		 */
		$guard = new Mop_Ai_Indexer_Robots_Indexing_Guard();
		$guard->maybe_send_noindex_header();

		/**
		 * Strongly discourage framing.
		 */
		header('X-Frame-Options: SAMEORIGIN');
	}

	/**
	 * Resolve uploads path for the generated endpoint file.
	 *
	 * @since  1.0.0
	 * @see    Mop_Ai_Indexer_Endpoint_Router
	 * @param  string $file_name Endpoint file name.
	 * @return string Absolute file path in plugin uploads directory.
	 */
	private function get_plugin_upload_file_path(string $file_name): string {

		$file_name = Mop_Ai_Indexer_Endpoint_Router::sanitize_file_name($file_name);

		$upload_dir = wp_upload_dir();
		$basedir = isset($upload_dir['basedir']) ? (string)$upload_dir['basedir'] : '';
		$basedir = wp_normalize_path($basedir);

		if ($basedir === '') return '';

		$plugin_uploads_path = wp_normalize_path(trailingslashit($basedir) . 'mop-ai-indexer/');

		return wp_normalize_path(trailingslashit($plugin_uploads_path) . $file_name);
	}

	/**
	 * Autoptimize: disable optimization on endpoint requests.
	 *
	 * @since  1.0.0
	 * @see    Mop_Ai_Indexer_Endpoint_Router
	 * @param  bool $do_noptimize Existing value.
	 * @return bool
	 */
	public function autoptimize_disable_optimization(bool $do_noptimize): bool {

		if (Mop_Ai_Indexer_Endpoint_Router::is_endpoint_request_uri()) return true;
		return (bool)$do_noptimize;
	}

	/**
	 * LiteSpeed Cache: mark endpoint requests as not cacheable.
	 *
	 * @since  1.0.0
	 * @see    Mop_Ai_Indexer_Endpoint_Router
	 * @param  bool $is_cacheable Existing value.
	 * @return bool
	 */
	public function litespeed_disable_cache(bool $is_cacheable): bool {

		if (Mop_Ai_Indexer_Endpoint_Router::is_endpoint_request_uri()) return false;
		return (bool)$is_cacheable;
	}

	/**
	 * WP Rocket: reject caching for endpoint URIs.
	 *
	 * @since  1.0.0
	 * @see    Mop_Ai_Indexer_Endpoint_Router
	 * @param  array $rejected Existing rejected URI patterns.
	 * @return array
	 */
	public function wp_rocket_reject_uri(array $rejected): array {

		$rejected = is_array($rejected) ? $rejected : array();

		/**
		 * Add endpoint URIs.
		 *
		 * WP Rocket expects patterns without the domain, starting with a slash.
		 */
		foreach (Mop_Ai_Indexer_Endpoint_Router::get_supported_file_names() as $file_name) {
			$pattern = '/' . preg_quote($file_name, '/');
			if (! in_array($pattern, $rejected, true)) {
				$rejected[] = $pattern;
			}
		}

		return $rejected;
	}
}
