<?php

namespace Limb_Chatbot\Includes\Services\Jobs\Handlers;

use Limb_Chatbot\Includes\Data_Objects\AI_Model;
use Limb_Chatbot\Includes\Data_Objects\Chatbot;
use Limb_Chatbot\Includes\Data_Objects\Config;
use Limb_Chatbot\Includes\Data_Objects\Dataset;
use Limb_Chatbot\Includes\Data_Objects\Dataset_Entry;
use Limb_Chatbot\Includes\Data_Objects\Job;
use Limb_Chatbot\Includes\Data_Objects\Task;
use Limb_Chatbot\Includes\Data_Objects\Vector_Index;
use Limb_Chatbot\Includes\Data_Objects\Vector_Index_Meta;
use Limb_Chatbot\Includes\Exceptions\Error_Codes;
use Limb_Chatbot\Includes\Exceptions\Exception;
use Limb_Chatbot\Includes\Factories\Dataset_Builder_Factory;
use Limb_Chatbot\Includes\Factories\Source_Fetcher_Factory;
use Limb_Chatbot\Includes\Factories\Source_Validator_Factory;
use Limb_Chatbot\Includes\Interfaces\Multitask_Handler_Interface;
use Limb_Chatbot\Includes\Repositories\Vector_Repository;
use Limb_Chatbot\Includes\Services\Dataset_Service;
use Limb_Chatbot\Includes\Services\Knowledge\Indexing_Service;
use Limb_Chatbot\Includes\Services\Knowledge\Knowledge_Generator;
use Limb_Chatbot\Includes\Services\Job\Abstract_Job_Handler;
use Limb_Chatbot\Includes\Vector_Dbs\Local\Local;
use Limb_Chatbot\Includes\Vector_Dbs\Pinecone\Pinecone;

/**
 * Dataset Generating Job Handler
 *
 * Handles dataset generation jobs. Extracts logic from the old background process system.
 *
 * @since 1.1.0
 */
class Dataset_Generating extends Abstract_Job_Handler implements Multitask_Handler_Interface {

	/**
	 * Batch size for generating child tasks.
	 *
	 * @var int
	 * @since 1.1.0
	 */
	public int $child_task_batch_size = 100;
	/**
	 * Chunk size for fetching objects in batches.
	 *
	 * @var int
	 * @since 1.1.0
	 */
	private int $chunk_size = 5;
	/**
	 * Dataset service instance.
	 *
	 * @var Dataset_Service
	 * @since 1.1.0
	 */
	private Dataset_Service $dataset_service;

	/**
	 * Constructor.
	 *
	 * @since 1.1.0
	 */
	public function __construct() {
		parent::__construct();
		$this->dataset_service = new Dataset_Service();
	}

	/**
	 * Get the job type this handler manages.
	 *
	 * @return string
	 * @since 1.1.0
	 */
	public function get_job_type(): string {
		return Job::TYPE_DATASET_GENERATING;
	}

	/**
	 * Validate job configuration.
	 *
	 * Extracted from Handler::validate_params()
	 *
	 * @param  array  $config  Job configuration.
	 *
	 * @return bool True if valid.
	 * @throws Exception If validation fails.
	 * @since 1.1.0
	 */
	public function validate( array $config, ?string $chatbot_uuid = null ): bool {
		$source       = $config['source'] ?? null;
		$chatbot_uuid = $chatbot_uuid === Job::CHATBOT_DEFAULT ? null : $chatbot_uuid;

		// Validate chatbot if provided
		if ( ! empty( $chatbot_uuid ) ) {
			$chatbot = Chatbot::find_by_uuid( $chatbot_uuid );
			if ( empty( $chatbot ) ) {
				throw new Exception( Error_Codes::VALIDATION_INVALID_VALUE,
					__( 'The specified chatbot does not exist.', 'limb-chatbot' ) );
			}
		} else {
			$chatbot = Chatbot::make();
		}

		// Validate source
		if ( empty( $source ) ) {
			throw new Exception( 'missing_source', __( 'Source is required for dataset generation.', 'limb-chatbot' ) );
		}

		$validator = ( new Source_Validator_Factory() )->make( $source );
		$validator->validate( $config );

		$this->validate_indexing_params( $config, $chatbot );

		return true;
	}

	private function validate_indexing_params( array $config, ?Chatbot $chatbot = null ) {
		$dimension              = $config['indexing_dimension'] ?? null;
		$vector_index_type      = $config['indexing_vector_index_type'] ?? null;
		$vector_index_config_id = $config['indexing_vector_index_config_id'] ?? null;
		$ai_model_id            = $config['indexing_ai_model_id'] ?? null;
		$config_id              = $config['indexing_config_id'] ?? null;

		if ( ! empty( $vector_index_type ) && ! empty( $dimension ) ) {
			if ( ! in_array( $vector_index_type, [ Pinecone::$id, Local::$id ], true ) ) {
				throw new Exception(
					Error_Codes::VALIDATION_INVALID_VALUE,
					__( 'Unsupported storage type.', 'limb-chatbot' )
				);
			}

			// Validate vector index config ID (required for non-local storage)
			if ( $vector_index_type !== Local::$id && empty( $vector_index_config_id ) ) {
				throw new Exception(
					Error_Codes::VALIDATION_INVALID_VALUE,
					__( 'Storage config ID is missing.', 'limb-chatbot' )
				);
			}
		}

		if ( ! empty( $ai_model_id ) ) {
			$ai_model = AI_Model::find( $ai_model_id );
			if ( empty( $ai_model ) ) {
				throw new Exception(
					Error_Codes::VALIDATION_INVALID_VALUE,
					__( 'Unknown AI model ID.', 'limb-chatbot' )
				);
			}

			// Ensure the AI model supports the requested dimension
			if ( ! $ai_model->is_supported_dimension( $dimension ) ) {
				throw new Exception(
					Error_Codes::VALIDATION_INVALID_VALUE,
					sprintf(
						__( 'Model %s does not support dimension %s.', 'limb-chatbot' ),
						'<strong>' . $ai_model->get_name() . '</strong>',
						'<strong>' . $dimension . '</strong>'
					)
				);
			}
		} else {
			$exception = new Exception(
				Error_Codes::VALIDATION_INVALID_VALUE,
				__( 'No AI Model is saved in ', 'limb-chatbot' )
			);
			$exception->attach_link( 'admin.php?page=lbaic-dashboard&menu=knowledge-settings', 'Knowledge Settings' );
			throw $exception;
		}

		// Validate config
		if ( ! empty( $config_id ) && ! Config::count( [ 'id' => $config_id ] ) ) {
			$exception = new Exception(
				Error_Codes::VALIDATION_INVALID_VALUE,
				__( 'API Key missing. Add it in ', 'limb-chatbot' )
			);
			$exception->attach_link( admin_url( 'admin.php?page=lbaic-dashboard&menu=chatbot&tab=ai-settings' ), __( 'Chatbot -> AI Settings', 'limb-chatbot' ) );
			throw $exception;
		} elseif ( empty( $config_id ) ) {
			$exception = new Exception(
				Error_Codes::VALIDATION_INVALID_VALUE,
				__( 'No API Key is saved in ', 'limb-chatbot' )
			);
			$exception->attach_link( 'admin.php?page=lbaic-dashboard&menu=knowledge-settings', 'Knowledge Settings' );
			throw $exception;
		}

		return true;
	}

	/**
	 * Get total number of tasks that will be generated.
	 *
	 * Calculates total without actually fetching/generating tasks.
	 * Essential for large datasets to prevent timeouts.
	 *
	 * @param  array  $config  Job configuration.
	 *
	 * @return int Total task count.
	 * @throws Exception If calculation fails.
	 * @since 1.1.0
	 */
	public function get_total( array $config, Job $job): int {
		$source = $config['source'];

		// Get total count from validator
		$validator = ( new Source_Validator_Factory() )->make( $source );
		$total     = $validator->get_total( $config );


		return max( 0, (int) $total );
	}

	/**
	 * Generate a batch of tasks (chunked generation).
	 *
	 * Generates tasks in chunks to prevent timeouts with large datasets.
	 * For example, if generating 100k products, creates 100 tasks at a time.
	 *
	 * @param  Job  $job  Job instance.
	 * @param  array  $config  Job configuration.
	 * @param  int  $offset  Starting offset for this batch.
	 * @param  int  $limit  Maximum number of tasks to generate.
	 *
	 * @return int Number of tasks actually created.
	 * @throws Exception If task generation fails.
	 * @since 1.1.0
	 */
	public function generate_task_batch( Job $job, array $config, int $offset, int $limit ): int {
		$source = $config['source'];

		// Calculate pagination based on offset and limit
		// Our chunk_size is how many we fetch per page from the source
		$per_page = $this->chunk_size;

		// Calculate which pages we need to fetch
		$start_page = floor( $offset / $per_page ) + 1;
		$end_offset = $offset + $limit;
		$end_page   = ceil( $end_offset / $per_page );

		// Fetch objects and create tasks
		$fetcher       = ( new Source_Fetcher_Factory() )->make( $source );
		$task_count    = 0;
		$items_to_skip = $offset % $per_page; // Skip items if offset doesn't align with page start

		for ( $paged = $start_page; $paged <= $end_page; $paged ++ ) {
			// Don't generate more tasks than requested
			if ( $task_count >= $limit ) {
				break;
			}

			$fetch_params = [
				'paged'    => $paged,
				'per_page' => $per_page,
			];

			// Fetch objects for this page
			$objects = $fetcher->fetch( $fetch_params, $config );

			foreach ( $objects as $index => $object ) {
				// Skip items if we're starting mid-page
				if ( $paged === $start_page && $index < $items_to_skip ) {
					continue;
				}

				// Stop if we've created enough tasks
				if ( $task_count >= $limit ) {
					break 2; // Break out of both loops
				}

				// Create task for this object
				if ( $this->create_task( $job->get_id(), $object ) ) {
					$task_count ++;
				}
			}
		}

		return $task_count;
	}

	/**
	 * Process a single task.
	 *
	 * If task has a parent (child task), it's an indexing task.
	 * Otherwise, it's a parent task that will generate sub-tasks.
	 *
	 * @param  Task  $task  Task to process.
	 *
	 * @return bool True on success.
	 * @throws Exception If processing fails.
	 * @since 1.1.0
	 */
	public function process_task( Task $task ): bool {
		// Check if this is a child task (indexing task)
		if ( $task->get_parent_task_id() ) {
			// This is a child task - index the entry
			$indexing_service = new Indexing_Service();

			return $indexing_service->index_entry( $task );
		}

		// This is a parent task - generate sub-tasks (entries)
		// The parent task processing is handled by generate_sub_tasks
		// which is called by the multitask processor
		return true;
	}

	/**
	 * Determine if an exception is critical.
	 *
	 * Extracted from Process::should_pause_process() and Process::is_critical_error()
	 *
	 * @param  Exception  $exception  Exception that occurred.
	 *
	 * @return bool True if critical.
	 * @since 1.1.0
	 */
	public function is_critical_error( Exception $exception ): bool {
		// Use parent's default implementation which checks common critical codes
		if ( parent::is_critical_error( $exception ) ) {
			return true;
		}

		// Add any dataset-generating specific critical error checks here
		// For now, use the parent implementation

		return false;
	}

	/**
	 * Generate sub-tasks for a parent task.
	 *
	 * Generates dataset entries and creates child tasks for indexing each entry.
	 * Supports batching to prevent memory exhaustion on large datasets.
	 *
	 * @param  Task  $task  Parent task.
	 *
	 * @return bool True if all sub-tasks are generated, false if more batches needed.
	 * @throws Exception If generation fails.
	 * @since 1.1.0
	 */
	public function generate_sub_tasks( Task $task ): bool {
		$item = $task->get_payload();
		$job  = $task->job();

		// Build dataset from item
		$builder = ( new Dataset_Builder_Factory() )->make( $item );
		$dataset = $builder->build( $item, $job );

		if ( ! $dataset instanceof Dataset ) {
			// Not a valid dataset, but not an error - just skip
			return true;
		}

		// Clear any previous errors
		$dataset->update_meta( 'errors', wp_json_encode( [] ) );

		// Clear existing entries if any
		if ( $dataset->has_entries() ) {
			$this->dataset_service->clear( $dataset );
		}

		// Generate knowledge entries (only once, on first batch)
		$dataset = ( new Knowledge_Generator() )->generate( $dataset );

		// Get indexing configuration from dataset
		$index_config_id   = $dataset->get_meta_value( 'index_config_id' );
		$vector_index_id   = $dataset->get_meta_value( 'vector_index_id' );
		$dimension         = $dataset->get_meta_value( 'dimension' );
		$index_ai_model_id = $dataset->get_meta_value( 'index_ai_model_id' );
		// Only create indexing tasks if indexing configuration exists
		if ( empty( $index_config_id ) || empty( $index_ai_model_id ) || empty( $vector_index_id ) || empty( $dimension ) ) {
			return true;
		}
		$dataset_entries = Dataset_Entry::where( [ 'dataset_id' => $dataset->get_id() ], - 1, - 1 );

		foreach ( $dataset_entries as $entry ) {
			if ( $entry instanceof Dataset_Entry ) {
				// Create child task payload with entry ID and indexing config
				$child_payload = [
					'dataset_entry_id' => $entry->get_id(),
					'config_id'        => $index_config_id,
					'ai_model_id'      => $index_ai_model_id,
					'vector_index_id'  => $vector_index_id,
					'dimension'        => $dimension,
				];

				$this->create_sub_task( $job->get_id(), $task, $child_payload );
			}
		}

		return true;
	}
}

