<?php

namespace Limb_Chatbot\Includes\Data_Objects;

use Limb_Chatbot\Includes\Factories\Entry_Stringifier_Factory;
use Limb_Chatbot\Includes\Services\Helper;

/**
 * Represents a dataset entry in the system.
 *
 * @since 1.0.0
 */
class Dataset_Entry extends WPDB_Data_Object {

	/**
	 * The name of the database table associated with this model.
	 *
	 * @since 1.0.0
	 */
	const TABLE_NAME = 'lbaic_dataset_entries';

	/**
	 * The list of fillable fields for mass assignment.
	 *
	 * @since 1.0.0
	 */
	const FILLABLE = [ 'entry', 'dataset_id', 'created_at', 'updated_at' ];

	/**
	 * The ID of the dataset this entry belongs to.
	 *
	 * @var int|null
	 * @since 1.0.0
	 */
	public ?int $dataset_id = null;

	/**
	 * The actual entry data as an associative array.
	 *
	 * @var array|null
	 * @since 1.0.0
	 */
	public ?array $entry = null;

	/**
	 * The score associated with the dataset entry.
	 *
	 * @var float|null
	 */
	public ?float $score;

	/**
	 * Last update timestamp.
	 *
	 * @var string
	 * @since 1.0.0
	 */
	public string $updated_at;

	/**
	 * Creation timestamp.
	 *
	 * @var string
	 * @since 1.0.0
	 */
	public string $created_at;

	/**
	 * Dataset_Entry constructor.
	 *
	 * @param  mixed  $instance  Optional raw data to initialize the entry.
	 *
	 * @since 1.0.0
	 */
	public function __construct( $instance = null ) {
		if ( is_array( $instance ) && ! empty( $instance['entry'] ) && $this->isJson( $instance['entry'] ) ) {
			$instance['entry'] = Helper::maybe_json_decode( $instance['entry'] );
		}
		parent::__construct( $instance );
	}

	/**
	 * Get the entry data.
	 *
	 * @return array|null The decoded entry data array.
	 * @since 1.0.0
	 */
	public function get_entry(): ?array {
		return $this->entry;
	}

	/**
	 * Set the entry data.
	 *
	 * @param  array|null  $entry  The entry content.
	 *
	 * @return void
	 * @since 1.0.0
	 */
	public function set_entry( ?array $entry ): void {
		$this->entry = $entry;
	}

	/**
	 * Convert the dataset entry into a string representation optimized for RAG systems.
	 *
	 * Handles both QA entries and chunk entries with appropriate formatting:
	 * - QA entries: Question-Answer format with semantic context
	 * - Chunk entries: Content with contextual metadata (title, heading_path)
	 *
	 * @return string Stringified entry optimized for vector search and RAG retrieval.
	 * @since 1.0.0
	 *
	 */
	public function stringify(): string {
		$entry_array = $this->entry['messages'][0] ?? [];
		$entry_metadata = $this->entry['entry_metadata'] ?? [];
		$entry_type = $entry_metadata['entry_type'] ?? $this->detect_entry_type();

		// Use stringifier factory
		$factory = new Entry_Stringifier_Factory();
		$stringifier = $factory->make( $entry_type );

		// Prepare chunk data for stringifier
		$chunk_data = [
			'input'  => trim( $entry_array['input'] ?? '' ),
			'output' => trim( $entry_array['output'] ?? '' ),
			'question' => trim( $entry_array['input'] ?? '' ),
			'answer' => trim( $entry_array['output'] ?? '' ),
			'title'  => trim( $entry_array['input'] ?? '' ),
			'heading' => $entry_metadata['heading'] ?? trim( $entry_array['input'] ?? '' ),
			'content' => trim( $entry_array['output'] ?? '' ),
			'heading_path' => $entry_metadata['heading_path'] ?? '',
			'post_title' => $entry_metadata['post_title'] ?? '',
			'source_url' => $entry_metadata['source_url'] ?? '',
			'chunk_index' => $entry_metadata['chunk_index'] ?? null,
			'overlap_start' => $entry_metadata['overlap_start'] ?? null,
			'overlap_end' => $entry_metadata['overlap_end'] ?? null,
			'metadata' => $entry_metadata,
		];

		return $stringifier->stringify_for_embedding( $chunk_data );
	}

	/**
	 * Detect entry type based on entry structure.
	 *
	 * @return string 'qa' or 'chunk'
	 * @since 1.0.0
	 */
	private function detect_entry_type(): string {
		// If entry_metadata exists and has entry_type, use it
		if ( isset( $this->entry['entry_metadata']['entry_type'] ) ) {
			return $this->entry['entry_metadata']['entry_type'];
		}

		// Default to 'qa' for backward compatibility
		// Chunks typically have longer output and title-like input
		$entry_array = $this->entry['messages'][0] ?? [];
		$input       = trim( $entry_array['input'] ?? '' );
		$output      = trim( $entry_array['output'] ?? '' );

		// Heuristic: If input looks like a question (ends with ?) or output is short, it's likely QA
		// If output is long and input is short/title-like, it's likely a chunk
		if ( preg_match( '/\?$/', $input ) || strlen( $output ) < 200 ) {
			return 'qa';
		}

		// Default to chunk for new entries without metadata
		return 'chunk';
	}


	/**
	 * Extract the input text from the dataset entry.
	 *
	 * @return string|null The input text, or null if not available.
	 * @since 1.0.0
	 *
	 */
	public function extract_input(): ?string {
		return $this->entry['messages'][0]['input'] ?? null;
	}

	/**
	 * Extract the output text from the dataset entry.
	 *
	 * @return string|null The output text, or null if not available.
	 * @since 1.0.0
	 *
	 */
	public function extract_output(): ?string {
		return $this->entry['messages'][0]['output'] ?? null;
	}

	/**
	 * Get the associated vector for this dataset entry.
	 *
	 * @return Vector|null The related vector object, or null if none exists.
	 * @since 1.0.0
	 *
	 */
	public function vector(): ?Vector {
		return Vector::where( [ 'dataset_entry_id' => $this->get_id() ] )->first() ?? null;
	}

	/**
	 * Get the score of this object.
	 *
	 * @return float|null The score value, or null if not set.
	 * @since 1.0.0
	 *
	 */
	public function get_score(): ?float {
		return $this->score;
	}

	/**
	 * Set the score of this object.
	 *
	 * @param  float|null  $score  The score value to set.
	 *
	 * @return void
	 * @since 1.0.0
	 *
	 */
	public function set_score( ?float $score ): void {
		$this->score = $score;
	}

	/**
	 * Retrieve the dataset associated with this object.
	 *
	 * @return Dataset|null The dataset instance, or null if not found.
	 * @since 1.0.0
	 *
	 */
	public function dataset() {
		return Dataset::find( $this->get_dataset_id() );
	}

	/**
	 * Get the ID of the dataset this entry belongs to.
	 *
	 * @return int|null Dataset ID.
	 * @since 1.0.0
	 */
	public function get_dataset_id(): ?int {
		return $this->dataset_id;
	}

	/**
	 * Set the ID of the dataset this entry belongs to.
	 *
	 * @param  int|null  $dataset_id  Dataset ID.
	 *
	 * @return void
	 * @since 1.0.0
	 */
	public function set_dataset_id( ?int $dataset_id ): void {
		$this->dataset_id = $dataset_id;
	}

	/**
	 * Stringify entry for inference (chat usage).
	 *
	 * Produces a lean, instruction-focused representation
	 * optimized for LLM consumption at runtime.
	 *
	 * @param  int|null  $max_chunks
	 *
	 * @return string
	 * @since 1.0.0
	 */
	public function stringify_for_inference(int $max_chunks = null): string {
		$entry_array = $this->entry['messages'][0] ?? [];
		$entry_metadata = $this->entry['entry_metadata'] ?? [];
		$entry_type = $entry_metadata['entry_type'] ?? $this->detect_entry_type();

		// Use stringifier factory
		$factory = new Entry_Stringifier_Factory();
		$stringifier = $factory->make( $entry_type );

		// Prepare chunk data for stringifier
		$chunk_data = [
			'input'  => trim( $entry_array['input'] ?? '' ),
			'output' => trim( $entry_array['output'] ?? '' ),
			'question' => trim( $entry_array['input'] ?? '' ),
			'answer' => trim( $entry_array['output'] ?? '' ),
			'title'  => trim( $entry_array['input'] ?? '' ),
			'heading' => $entry_metadata['heading'] ?? trim( $entry_array['input'] ?? '' ),
			'content' => trim( $entry_array['output'] ?? '' ),
			'heading_path' => $entry_metadata['heading_path'] ?? '',
			'post_title' => $entry_metadata['post_title'] ?? '',
			'source_url' => $entry_metadata['source_url'] ?? '',
			'chunk_index' => $entry_metadata['chunk_index'] ?? null,
			'overlap_start' => $entry_metadata['overlap_start'] ?? null,
			'overlap_end' => $entry_metadata['overlap_end'] ?? null,
			'metadata' => $entry_metadata,
		];

		return $stringifier->stringify_for_inference( $chunk_data );
	}


}