<?php

namespace Limb_Chatbot\Includes\AI_Providers\Claude\Services;

use Limb_Chatbot\Includes\Data_Objects\Chatbot;
use Limb_Chatbot\Includes\Data_Objects\Message;
use Limb_Chatbot\Includes\Exceptions\Error_Codes;
use Limb_Chatbot\Includes\Exceptions\Exception;
use Limb_Chatbot\Includes\Interfaces\Token_Calculator_Interface;

/**
 * Class Token_Calculator
 *
 * Estimates the number of tokens in a given message for Claude AI.
 * Claude uses a tokenization similar to GPT models, so we use similar estimation logic.
 *
 * @package Limb_Chatbot\Includes\AI_Providers\Claude\Services
 * @since 1.0.9
 */
class Token_Calculator implements Token_Calculator_Interface {

	/**
	 * Pattern to match emojis and special characters.
	 *
	 * @var string
	 * @since 1.0.9
	 */
	const SPECIAL_CHARACTERS_PATTERN = '/[\x{1F600}-\x{1F64F}\x{1F300}-\x{1F5FF}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F800}-\x{1F8FF}\x{1F900}-\x{1F9FF}\x{1FA00}-\x{1FA6F}\x{1FA70}-\x{1FAFF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}]/u';

	/**
	 * Threshold for small words (1 token).
	 *
	 * @var int
	 * @since 1.0.9
	 */
	const SMALL_WORD_THRESHOLD = 4;

	/**
	 * Threshold for medium words (2 tokens).
	 *
	 * @var int
	 * @since 1.0.9
	 */
	const MEDIUM_WORD_THRESHOLD = 8;

	/**
	 * Average characters per token.
	 *
	 * @var int
	 * @since 1.0.9
	 */
	const CHAR_PER_TOKEN = 4;

	/**
	 * Additional token count for special characters.
	 *
	 * @var int
	 * @since 1.0.9
	 */
	const ADDITIONAL_TOKEN_FOR_SPECIAL_CHARS = 1;

	/**
	 * Maximum image dimension for scaling.
	 *
	 * @var int
	 * @since 1.0.9
	 */
	const MAX_DIMENSION = 2048;

	/**
	 * Minimum image dimension for scaling.
	 *
	 * @var int
	 * @since 1.0.9
	 */
	const MIN_DIMENSION = 768;

	/**
	 * Tile size for image token calculation.
	 *
	 * @var int
	 * @since 1.0.9
	 */
	const TILE_SIZE = 512;

	/**
	 * Tokens per image tile.
	 *
	 * @var int
	 * @since 1.0.9
	 */
	const TILE_TOKENS = 170;

	/**
	 * Base tokens for image processing.
	 *
	 * @var int
	 * @since 1.0.9
	 */
	const BASE_TOKENS = 85;

	/**
	 * The message instance for which token calculation is performed.
	 *
	 * @var Message
	 * @since 1.0.9
	 */
	protected Message $message;

	/**
	 * Optional chatbot context.
	 *
	 * @var Chatbot|null
	 * @since 1.0.9
	 */
	protected ?Chatbot $chatbot = null;

	/**
	 * Token_Calculator constructor.
	 *
	 * @param Message      $message Message object containing content to be tokenized.
	 * @param Chatbot|null $chatbot Optional chatbot context.
	 *
	 * @since 1.0.9
	 */
	public function __construct( Message $message, ?Chatbot $chatbot = null ) {
		$this->message = $message;
		$this->chatbot = $chatbot;
	}

	/**
	 * Calculates the estimated total token usage for the given message.
	 *
	 * Supports both text and base64-encoded image attachments.
	 *
	 * @return int Total token count.
	 *
	 * @throws Exception If attachment data is invalid.
	 * @since 1.0.9
	 */
	public function calculate(): int {
		$tokens = 0;
		foreach ( $this->message->get_content() as $content ) {
			$method = "count_{$content['type']}_tokens";
			if ( method_exists( $this, $method ) ) {
				$tokens += $this->$method( $content[ $content['type'] ]['value'] );
			}
		}

		return $tokens;
	}

	/**
	 * Calculates token count for a plain text input.
	 *
	 * - Short words: 1 token
	 * - Medium words: 2 tokens
	 * - Long words: proportional to char length
	 * - Emoji/symbols: +1 token
	 *
	 * @param string $text Plain text content.
	 *
	 * @return int Token estimate.
	 * @since 1.0.9
	 */
	private function count_text_tokens( $text ): int {
		$text        = trim( preg_replace( '/\s+/', ' ', $text ) );
		$words       = preg_split( '/(\W)/u', $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
		$token_count = 0;

		foreach ( $words as $word ) {
			if ( trim( $word ) !== '' ) {
				if ( mb_strlen( $word ) < self::SMALL_WORD_THRESHOLD ) {
					$token_count += 1;
				} elseif ( mb_strlen( $word ) <= self::MEDIUM_WORD_THRESHOLD ) {
					$token_count += 2;
				} else {
					$token_count += ceil( mb_strlen( $word ) / self::CHAR_PER_TOKEN );
				}
				if ( preg_match( self::SPECIAL_CHARACTERS_PATTERN, $word ) ) {
					$token_count += self::ADDITIONAL_TOKEN_FOR_SPECIAL_CHARS;
				}
			}
		}

		return $token_count;
	}

	/**
	 * Estimates token usage for a base64 image attachment using tile estimation.
	 *
	 * Image is scaled to 2048 max width/height, then tiles of 512px are counted.
	 * Returns tile tokens + base tokens.
	 *
	 * @param string $image_base64 Base64-encoded image data.
	 *
	 * @return int Token estimate.
	 *
	 * @throws Exception If base64 or image format is invalid.
	 * @since 1.0.9
	 */
	private function count_attachment_tokens( $image_base64 ): int {
		$image_data = base64_decode( $image_base64, true );
		if ( ! $image_data ) {
			throw new Exception( Error_Codes::VALIDATION_BAD_BASE64, __( 'Invalid base64 image data', 'limb-chatbot' ) );
		}
		$info = getimagesizefromstring( $image_data );
		if ( ! $info ) {
			throw new Exception( Error_Codes::VALIDATION_INVALID_VALUE, __( 'Invalid image format', 'limb-chatbot' ) );
		}
		[ $original_width, $original_height ] = $info;

		if ( $original_width > self::MAX_DIMENSION || $original_height > self::MAX_DIMENSION ) {
			$scale_factor  = min( self::MAX_DIMENSION / $original_width, self::MAX_DIMENSION / $original_height );
			$scaled_width  = (int) ( $original_width * $scale_factor );
			$scaled_height = (int) ( $original_height * $scale_factor );
		} else {
			$scaled_width  = $original_width;
			$scaled_height = $original_height;
		}

		if ( $scaled_width < $scaled_height ) {
			$scaled_width  = self::MIN_DIMENSION;
			$scaled_height = (int) ( ( $scaled_width / $original_width ) * $original_height );
		} else {
			$scaled_height = self::MIN_DIMENSION;
			$scaled_width  = (int) ( ( $scaled_height / $original_height ) * $original_width );
		}

		$tiles_width     = ceil( $scaled_width / self::TILE_SIZE );
		$tiles_height    = ceil( $scaled_height / self::TILE_SIZE );
		$number_of_tiles = $tiles_width * $tiles_height;

		return ( $number_of_tiles * self::TILE_TOKENS ) + self::BASE_TOKENS;
	}
}

