<?php

namespace Limb_Chatbot\Includes\Services\Knowledge\Source_Validators;

use Limb_Chatbot\Includes\Exceptions\Error_Codes;
use Limb_Chatbot\Includes\Exceptions\Exception;
use Limb_Chatbot\Includes\Interfaces\Source_Validator_Interface;

/**
 * URL Validator for dataset generation.
 *
 * Validates URL sources to ensure they are valid, accessible URLs.
 *
 * @since 1.0.0
 */
class URL_Validator implements Source_Validator_Interface {

	/**
	 * Validates the parameters for URL-based datasets.
	 *
	 * Ensures that URLs are provided and valid.
	 *
	 * @param  array $config Configuration array containing URLs dataset parameters.
	 *
	 * @throws Exception If URLs are missing or invalid.
	 *
	 * @since 1.0.0
	 */
	public function validate( array $config ): void {
		$urls = $config['urls'] ?? null;
		if ( empty( $urls ) ) {
			throw new Exception( Error_Codes::VALIDATION_INVALID_VALUE, __( 'URLs are missing.', 'limb-chatbot' ) );
		}

		if ( ! is_array( $urls ) ) {
			throw new Exception( Error_Codes::VALIDATION_INVALID_VALUE, __( 'URLs must be an array.', 'limb-chatbot' ) );
		}

		if ( empty( $urls ) ) {
			throw new Exception( Error_Codes::VALIDATION_INVALID_VALUE, __( 'At least one URL is required.', 'limb-chatbot' ) );
		}

		// Validate each URL
		foreach ( $urls as $index => $url ) {
			if ( ! is_string( $url ) ) {
				throw new Exception( Error_Codes::VALIDATION_INVALID_VALUE, sprintf( __( 'URL at index %d must be a string.', 'limb-chatbot' ), $index ) );
			}

			// Validate URL format
			if ( ! filter_var( $url, FILTER_VALIDATE_URL ) ) {
				throw new Exception( Error_Codes::VALIDATION_INVALID_VALUE, sprintf( __( 'Invalid URL format at index %d: %s', 'limb-chatbot' ), $index, $url ) );
			}

			// Validate URL scheme (must be http or https)
			$parsed_url = parse_url( $url );
			if ( ! $parsed_url || ! isset( $parsed_url['scheme'] ) ) {
				throw new Exception( Error_Codes::VALIDATION_INVALID_VALUE, sprintf( __( 'URL at index %d must have a valid scheme (http or https).', 'limb-chatbot' ), $index ) );
			}

			if ( ! in_array( $parsed_url['scheme'], [ 'http', 'https' ], true ) ) {
				throw new Exception( Error_Codes::VALIDATION_INVALID_VALUE, sprintf( __( 'URL at index %d must use http or https protocol.', 'limb-chatbot' ), $index ) );
			}
		}
	}

	/**
	 * Gets the total number of URL knowledge items.
	 *
	 * Returns the count of URLs in the array.
	 *
	 * @param  array  $config
	 *
	 * @return int Total number of URL knowledge items.
	 *
	 * @since 1.0.9
	 */
	public function get_total( array $config ): int {
		$urls = $config['urls'] ?? [];
		if ( ! is_array( $urls ) ) {
			return 0;
		}
		return count( $urls );
	}
}

