<?php

namespace Limb_Chatbot\Includes\Services;

use Limb_Chatbot\Includes\Data_Objects\AI_Model;
use Limb_Chatbot\Includes\Data_Objects\Chatbot;
use Limb_Chatbot\Includes\Data_Objects\Dataset;
use Limb_Chatbot\Includes\Data_Objects\Dataset_Entry;
use Limb_Chatbot\Includes\Data_Objects\Dataset_Meta;
use Limb_Chatbot\Includes\Data_Objects\Job;
use Limb_Chatbot\Includes\Data_Objects\Setting;
use Limb_Chatbot\Includes\Data_Objects\Vector_Index;
use Limb_Chatbot\Includes\Data_Objects\WP_Post_Data_Object;
use Limb_Chatbot\Includes\Services\Knowledge\Dataset_Builders\Informational_Dataset_Builder;
use Limb_Chatbot\Includes\Services\Knowledge\Dataset_Builders\Actionable_Dataset_Builder;
use Limb_Chatbot\Includes\Services\Automations\Post_Creation_Handler;
use Limb_Chatbot\Includes\Services\Automations\Post_Update_Handler;
use Limb_Chatbot\Includes\Services\Automations\Post_Deletion_Handler;
use Limb_Chatbot\Includes\Exceptions\Error_Codes;
use Limb_Chatbot\Includes\Exceptions\Exception;
use Limb_Chatbot\Includes\Repositories\Dataset_Repository;
use Limb_Chatbot\Includes\Repositories\Vector_Repository;
use Limb_Chatbot\Includes\Services\Knowledge\Builders\Chunk_Entry_Builder;
use Limb_Chatbot\Includes\Services\Knowledge\Chunkers\Heading_Aware_Chunker;
use Limb_Chatbot\Includes\Services\Knowledge\Indexing_Service;
use Limb_Chatbot\Includes\Services\Knowledge\Knowledge_Generator;
use WP_Post;


/**
 * Service class for managing dataset-related operations such as creation, deletion, file uploading, and importing.
 *
 * @since 1.0.0
 */
class Dataset_Service {

	/**
	 * Dataset repository instance.
	 *
	 * @since 1.0.0
	 * @var Dataset_Repository
	 */
	protected Dataset_Repository $repository;

	/**
	 * Vector service instance.
	 *
	 * @var Vector_Service
	 * @since 1.0.0
	 */
	protected Vector_Service $vector_service;

	/**
	 * Dataset entry service instance.
	 *
	 * @var Dataset_Entry_Service
	 * @since 1.0.0
	 */
	protected Dataset_Entry_Service $dataset_entry_service;

	/**
	 * Vector repository instance.
	 *
	 * @var Vector_Repository
	 * @since 1.0.0
	 */
	protected Vector_Repository $vector_repository;

	/**
	 * Constructor.
	 *
	 * @param  Dataset_Repository|null  $repository  Optional repository instance. A default one will be created if not passed.
	 *
	 * @since 1.0.0
	 */
	public function __construct( ?Dataset_Repository $repository = null ) {
		$this->repository            = $repository ?? new Dataset_Repository();
		$this->vector_repository     = new Vector_Repository();
		$this->vector_service        = new Vector_Service( $this->vector_repository );
		$this->dataset_entry_service = new Dataset_Entry_Service();
	}

	/**
	 * Handles CSV file upload and stores metadata into settings.
	 *
	 * @since 1.0.0
	 *
	 * @param array $file Uploaded file array from $_FILES.
	 * @return Setting|null Updated setting with file metadata.
	 * @throws Exception If file upload is invalid or fails.
	 */
	public function upload( array $file ): ?Setting {
		$file = ! empty( $file['file'] ) ? $file['file'] : null;
		if ( empty( $file['tmp_name'] ) || ! is_uploaded_file( $file['tmp_name'] ) ) {
			throw new Exception( Error_Codes::DATASET_FILE_UPLOAD_MISSING_FILE, __( 'Invalid or missing uploaded file.', 'limb-chatbot' ) );
		}
		$sub_dir        = Limb_Chatbot()->get_files_dir() . Dataset::FILES_SUB_DIR;
		$target_dir     = Helper::get_wp_uploaded_file_dir( $sub_dir );
		$sanitized_name = sanitize_file_name( $file['name'] ?? 'dataset.csv' );
		$filename       = substr( md5( uniqid( '', true ) ), 0, 8 ) . '-' . $sanitized_name;
		if ( ! wp_mkdir_p( $target_dir ) ) {
			throw new Exception( Error_Codes::FILE_UNABLE_TO_CREATE_UPLOAD_DIRECTORY, __( 'Failed to create upload directory.', 'limb-chatbot' ) );
		}
		if ( ! Helper::wp_handle_limb_upload( $file, $target_dir . $filename ) ) {
			throw new Exception( Error_Codes::FILE_FAILED_TO_MOVE_UPLOADED_FILE, __( 'Failed to move uploaded file.', 'limb-chatbot' ) );
		}
		$file_setting = Setting::find( Dataset::FILES_SETTING_KEY );
		$values       = $file_setting->get_value() ?? [];
		$values[]     = [
			'uuid'          => Helper::get_uuid(),
			'file_path'     => $sub_dir . $filename,
			'file_name'     => $filename,
			'original_name' => $sanitized_name,
			'file_size'     => round( $file['size'] ?? 0 ),
			'created_at'    => current_time( 'mysql', true )
		];

		return Setting::update( [ 'key' => Dataset::FILES_SETTING_KEY ], [ 'value' => $values ] );
	}

	/**
	 * Delete multiple datasets and their related entries/metadata in batch.
	 *
	 * @param  array  $data  Query arguments to select datasets for deletion.
	 *
	 * @return void
	 * @throws Exception
	 * @since 1.0.0
	 */
	public function batch_delete( $data ) {
		$datasets = Dataset::where( $data );
		if ( $datasets->is_empty() ) {
			return;
		}
		$datasets->each( function ( Dataset $dataset ) {
			$dataset_entries = Dataset_Entry::where( [ 'dataset_id' => $dataset->get_id() ] );
			if ( ! $dataset_entries->is_empty() ) {
				$dataset_entries->each( function ( Dataset_Entry $entry ) {
					$this->dataset_entry_service->delete( $entry->get_id() );
				} );
			}
			Dataset_Meta::delete( [ 'dataset_id' => $dataset->get_id() ] );
			Dataset::delete( [ 'id' => $dataset->get_id() ] );
		} );
	}

	/**
	 * Deletes a dataset and its associated entries and metadata.
	 *
	 * @param  int|string  $id  Dataset ID to delete.
	 *
	 * @return bool True if deletion succeeded, false otherwise.
	 * @since 1.0.0
	 *
	 */
	public function delete( $id ): bool {
		return $this->repository->delete( Dataset::find( $id ) );
	}

	/**
	 * Handle post creation by delegating to Post_Creation_Handler.
	 *
	 * @param  string  $new_status  New post status.
	 * @param  string  $old_status  Old post status.
	 * @param  WP_Post  $post  The post object.
	 *
	 * @return void
	 * @since 1.0.11
	 */
	public function post_object_created( string $new_status, string $old_status, WP_Post $post ): void {
		// Only run on first publish
		if ( $new_status === 'publish' && $old_status !== 'publish' ) {
			$handler = new Post_Creation_Handler( $this );
			$handler->handle( $post->ID, $post );
		}
	}

	/**
	 * Handle post updates by delegating to Post_Update_Handler.
	 *
	 * @param  int  $post_id  Post ID.
	 * @param  WP_Post  $after  The post object after update.
	 * @param  WP_Post  $before  The post object before update.
	 *
	 * @return void
	 * @since 1.0.11
	 */
	public function post_object_updated( int $post_id, WP_Post $after, WP_Post $before ): void {
		$handler = new Post_Update_Handler( $this );
		$handler->handle( $post_id, $after, $before );
	}

	/**
	 * Handle post deletion by delegating to Post_Deletion_Handler.
	 *
	 * @param  int  $post_id  Post ID.
	 * @param  WP_Post  $post  The post object.
	 *
	 * @return void
	 * @since 1.0.11
	 */
	public function post_object_deleted( $post_id, $post ): void {
		if ( ! $post instanceof WP_Post ) {
			return;
		}

		$handler = new Post_Deletion_Handler( $this );
		$handler->handle( $post_id, $post );
	}

	/**
	 * Mark all datasets for a given post as not synced.
	 *
	 * @param  WP_Post  $post  The post object.
	 *
	 * @return void
	 * @throws \Exception
	 * @since 1.0.0
	 *
	 */
	public function make_post_datasets_not_synced( WP_Post $post ): Collection {
		// Mark the current datasets not synced
		$datasets = Dataset::where( [
			'source'          => $post->ID,
			'source_sub_type' => $post->post_type,
			'source_type'     => [ Dataset::SOURCE_POST, Dataset::SOURCE_CPT ]
		] );
		if ( ! $datasets->is_empty() ) {
			$datasets->each( function ( Dataset $dataset ) {
				$dataset->mark_not_synced();
			} );
		}

		return $datasets;
	}

	/**
	 * Regenerate dataset entries from updated post content.
	 *
	 * @param  Dataset  $dataset  The dataset to regenerate.
	 *
	 * @return void
	 * @since 1.0.11
	 */
	public function regenerate_dataset_entries( Dataset $dataset ): void {
		try {
			// Clear existing entries
			$this->clear( $dataset );

			// Generate new entries
			$generator = new Knowledge_Generator();
			$generator->generate( $dataset );

			// Index all new entries
			$all_entries = $dataset->dataset_entries();
			if ( is_array( $all_entries ) && ! empty( $all_entries ) ) {
				$indexing_service = new Indexing_Service();
				foreach ( $all_entries as $entry ) {
					if ( $entry instanceof Dataset_Entry ) {
						try {
							$indexing_service->index_entry_direct( $entry );
						} catch ( \Exception $e ) {
							Helper::log( $e );
						}
					}
				}
			}
		} catch ( \Exception $e ) {
			Helper::log( $e );
		}
	}

	/**
	 * Clear all the entries and vectors connected with dataset.
	 *
	 * @param  Dataset  $dataset The dataset object to clear entries for.
	 *
	 * @return void
	 * @throws Exception
	 */
	public function clear( Dataset $dataset ) {
		$this->dataset_entry_service->batch_delete( [ 'dataset_id' => $dataset->get_id() ] );
	}

	/**
	 * Regenerates dataset entries from source_content meta.
	 *
	 * Accepts dataset ID and source_content (HTML), updates the meta,
	 * clears existing entries, chunks the content, creates new entries,
	 * and indexes them synchronously in one request.
	 *
	 * @param  int  $dataset_id  Dataset ID.
	 * @param  string  $source_content  HTML content to regenerate from.
	 *
	 * @return array Response data with dataset, entries count, indexed count, and skipped count.
	 * @throws Exception If regeneration fails.
	 * @since 1.0.0
	 */
	public function regenerate_from_content( int $dataset_id, $title, string $source_content ): array {
		if ( empty( trim( $source_content ) ) ) {
			throw new Exception(
				Error_Codes::VALIDATION_INVALID_VALUE,
				__( 'source_content is required and cannot be empty.', 'limb-chatbot' )
			);
		}

		// Find the dataset
		$dataset = Dataset::find( $dataset_id );
		if ( ! $dataset instanceof Dataset ) {
			throw new Exception(
				Error_Codes::MISSING_VALUE,
				__( 'Dataset not found.', 'limb-chatbot' )
			);
		}

		// Update the dataset name
		$dataset->set_name($title);
		$dataset->save();

		// Update source_content meta
		$dataset->update_meta( 'source_content', $source_content );

		// Clear existing entries
		if ( $dataset->has_entries() ) {
			$this->clear( $dataset );
		}

		// Clear any previous errors
		$dataset->update_meta( 'errors', wp_json_encode( [] ) );

		// Initialize chunker and entry builder
		$chunker = new Heading_Aware_Chunker();
		$entry_builder = new Chunk_Entry_Builder();
		$indexing_service = new Indexing_Service();

		// Get title for chunking
		$title = $dataset->get_name() ?: __( 'Content', 'limb-chatbot' );

		// Chunk the content
		$chunks = $chunker->chunk( $source_content, $title );

		if ( empty( $chunks ) ) {
			throw new Exception(
				Error_Codes::EMPTY_VALUE,
				__( 'Failed to chunk content into segments', 'limb-chatbot' )
			);
		}

		// Process chunks into dataset entries
		$all_entries = new Collection();
		$source_url = $dataset->source_url() ?? '';
		$source_type = $dataset->get_source_type();

		$chunk_index = 0;
		foreach ( $chunks as $chunk ) {
			try {
				// Enrich chunk with source metadata
				$chunk['post_title'] = $title;
				$chunk['source_url'] = $source_url;
				$chunk['source_type'] = $source_type;
				$chunk['chunk_index'] = $chunk_index++;

				$entry = $entry_builder->build( $chunk );
				$entry->set_dataset_id( $dataset->get_id() );
				$entry->save();

				$all_entries->push_item( $entry );
			} catch ( \Exception $e ) {
				// Log individual entry build errors but don't fail the whole process
				Helper::log( array(
					'error'      => 'Failed to build chunk entry',
					'dataset_id' => $dataset->get_id(),
					'chunk_data' => $chunk,
					'exception'  => $e->getMessage()
				), __METHOD__ );
			}
		}

		if ( $all_entries->is_empty() ) {
			throw new Exception(
				Error_Codes::EMPTY_VALUE,
				__( 'No valid entries were generated from given source', 'limb-chatbot' )
			);
		}

		// Set dataset status to generated
		$dataset->set_status( Dataset::STATUS_GENERATED );
		$dataset->save();

		// Index entries synchronously
		$indexed_count = 0;
		$skipped_count = 0;

		foreach ( $all_entries as $entry ) {
			if ( $entry instanceof Dataset_Entry ) {
				try {
					$result = $indexing_service->index_entry_direct( $entry );
					if ( $result ) {
						$indexed_count++;
					} else {
						$skipped_count++;
					}
				} catch ( \Exception $e ) {
					// Log indexing errors but continue
					Helper::log( array(
						'error'           => 'Failed to index entry',
						'dataset_id'      => $dataset->get_id(),
						'dataset_entry_id' => $entry->get_id(),
						'exception'       => $e->getMessage()
					), __METHOD__ );
					$skipped_count++;
				}
			}
		}

		// Reload dataset to get updated status
		$dataset = Dataset::find( $dataset_id );

		// Return response data
		return array(
			'dataset'       => $dataset,
			'entries_count' => $all_entries->count(),
			'indexed_count' => $indexed_count,
			'skipped_count' => $skipped_count,
		);
	}
}