<?php

namespace Limb_Chatbot\Includes\AI_Providers\Deep_Seek\Services;

use Limb_Chatbot\Includes\Data_Objects\Chatbot;
use Limb_Chatbot\Includes\Data_Objects\Message;
use Limb_Chatbot\Includes\Interfaces\Token_Calculator_Interface;

/**
 * Class Token_Calculator
 *
 * Calculates token counts for messages using different character weightings
 * depending on content type, such as English or Chinese characters.
 *
 * @package Limb_Chatbot\Includes\AI_Providers\Deep_Seek\Services
 * @implements Token_Calculator_Interface
 * @since 1.0.0
 */
class Token_Calculator implements Token_Calculator_Interface {

	/**
	 * Approximate token value per English character.
	 * @since 1.0.0
	 */
	const ENGLISH_CHAR_TOKENS = 0.3;

	/**
	 * Approximate token value per Chinese character.
	 * @since 1.0.0
	 */
	const CHINESE_CHAR_TOKENS = 0.6;

	/**
	 * The message object to calculate tokens for.
	 *
	 * @var Message
	 * @since 1.0.0
	 */
	protected Message $message;

	/**
	 * Token_Calculator constructor.
	 *
	 * @param  Message  $message  The message containing content to calculate tokens.
	 * @param  Chatbot|null  $chatbot  Optional chatbot context (not used currently).
	 *
	 * @since 1.0.0
	 */
	public function __construct( Message $message, ?Chatbot $chatbot = null ) {
		$this->message = $message;
	}

	/**
	 * Calculate the total tokens count for the message content.
	 *
	 * @return int The calculated token count.
	 * @since 1.0.0
	 */
	public function calculate(): int {
		$tokens = 0;
		foreach ( $this->message->get_content() as $content ) {
			$method = "count_{$content['type']}_tokens";
			if ( method_exists( $this, $method ) ) {
				if (!empty($content[ $content['type'] ]['value'])) {
					$tokens += $this->$method( $content[ $content['type'] ]['value'] );
				}
			}
		}

		return $tokens;
	}


	/**
	 * Count tokens for text content by approximating based on English and Chinese character counts.
	 *
	 * @param  string  $text  The text to count tokens for.
	 *
	 * @return int The estimated token count.
	 * @since 1.0.0
	 */
	public function count_text_tokens( $text ) {
		$english_count = preg_match_all( '/[a-zA-Z0-9\s\p{P}]/u', $text );
		$chinese_count = preg_match_all( '/\p{Han}/u', $text );

		return (int) ( ( $english_count * self::ENGLISH_CHAR_TOKENS ) + ( $chinese_count * self::CHINESE_CHAR_TOKENS ) );
	}
}