<?php

namespace Limb_Chatbot\Includes\Repositories;

use Limb_Chatbot\Includes\Data_Objects\AI_Model;
use Limb_Chatbot\Includes\Data_Objects\Dataset;
use Limb_Chatbot\Includes\Database_Strategies\WPDB;
use Limb_Chatbot\Includes\Services\Collection;
use Limb_Chatbot\Includes\Data_Objects\Dataset_Entry;
use Limb_Chatbot\Includes\Data_Objects\Dataset_Meta;
use Limb_Chatbot\Includes\Services\Data_Object_Collection;


/**
 * Repository for managing Dataset records.
 *
 * Provides methods to retrieve datasets with search and pagination support.
 *
 * @since 1.0.0
 */
class Dataset_Repository {

	/**
	 * Retrieves a list of datasets based on the given parameters.
	 *
	 * Supports advanced filtering including:
	 * - Search by name/title
	 * - Filter by source type
	 * - Filter by synced status
	 * - Filter by AI provider
	 * - Filter by storage (vector_index_type)
	 *
	 * @since 1.0.0
	 *
	 * @param array $params {
	 *     Optional. Parameters for filtering and pagination.
	 *
	 *     @type string   $search              Search keyword for name/title.
	 *     @type string   $source_type         Filter by source type (post, term, file, url, text, cpt, q_a).
	 *     @type int|bool $synced              Filter by synced status (0/1 or false/true).
	 *     @type string   $ai_provider_id      Filter by AI provider ID.
	 *     @type string   $vector_index_type   Filter by vector index type (storage type).
	 *     @type int      $per_page            Number of results per page. Default 10.
	 *     @type int      $page                Current page number. Default 1.
	 *     @type string   $orderby             Field to order by. Default 'id'.
	 *     @type string   $order               Sort order direction. Accepts 'ASC' or 'DESC'. Default 'DESC'.
	 * }
	 *
	 * @return Collection Collection of Dataset objects with total count preserved.
	 */
	public function get_items( $params ) {
		if ( ! empty( $params['search'] ) && ! empty( $params['search_fields'] ) ) {
			foreach ( $params['search_fields'] as $field ) {
				$params["{$field}LIKE"] = "%{$params['search']}%";
			}
		}
		if ( isset( $params['synced'] ) ) {
			$dataset_metas = Dataset_Meta::where( [ 'meta_key' => 'synced', 'meta_value' => $params['synced'] ] );
			$synced_ids    = $dataset_metas->pluck( 'dataset_id' );
			unset( $params['synced'] );
		}
		if ( isset( $params['ai_provider_id'] ) ) {
			$model_ids       = AI_Model::where( [ 'ai_provider_id' => $params['ai_provider_id'] ] )->pluck( 'id' );
			$dataset_metas   = Dataset_Meta::where( [ 'meta_key' => 'index_ai_model_id', 'meta_value' => $model_ids ] );
			$ai_provider_ids = $dataset_metas->pluck( 'dataset_id' );
			unset( $params['ai_provider_id'] );
		}
		if ( isset( $params['vector_index_type'] ) ) {
			$dataset_metas         = Dataset_Meta::where( [
				'meta_key'   => 'vector_index_type',
				'meta_value' => $params['vector_index_type']
			] );
			$vector_index_type_ids = $dataset_metas->pluck( 'dataset_id' );
			unset( $params['vector_index_type'] );
		}

		$datasets = Dataset::where( $params, - 1, - 1, $params['orderby'] ?? 'id', $params['order'] ?? 'DESC' );
		$new_ids  = $datasets->pluck( 'id' );
		if ( isset( $synced_ids ) ) {
			$new_ids = array_intersect( $new_ids, $synced_ids );
		}
		if ( isset( $ai_provider_ids ) ) {
			$new_ids = array_intersect( $new_ids, $ai_provider_ids );
		}
		if ( isset( $vector_index_type_ids ) ) {
			$new_ids = array_intersect( $new_ids, $vector_index_type_ids );
		}
		$per_page = $params['per_page'] ?? 10;
		$page     = $params['page'] ?? 10;
		$count    = count( $new_ids );
		$new_ids  = array_slice( $new_ids, ( $page - 1 ) * $per_page, $per_page );

		$datasets = $datasets->filter( function ( Dataset $dataset ) use ( $new_ids ) {
			return in_array( $dataset->get_id(), $new_ids );
		} );
		$datasets->set_total( $count );

		return $datasets;
	}

	/**
	 * Deletes a dataset along with its entries and metadata.
	 *
	 * This method first checks if the dataset is currently being processed in either
	 * the actual or chunk import processes. If the dataset is active in a process,
	 * those processes are canceled before deletion. Afterward, the dataset itself,
	 * its entries, and its metadata are deleted.
	 *
	 * @param Dataset $dataset The dataset object to be deleted.
	 * @since 1.0.0
	 *
	 * @return bool True if the dataset was successfully deleted, false otherwise.
	 */
	public function delete( Dataset $dataset ) {
		$dataset_id = $dataset->get_id();
		if ( $deleted = Dataset::delete( [ 'id' => $dataset_id ] ) ) {
			Dataset_Entry::delete( [ 'dataset_id' => $dataset_id ] );
			Dataset_Meta::delete( [ 'dataset_id' => $dataset_id ] );
		}

		return $deleted;
	}

	/**
	 * Retrieves the first dataset matching the given parameters.
	 *
	 * Optionally, related data can be eager-loaded by specifying it in the 'include' key
	 * of the $params array.
	 *
	 * @param array $params Associative array of conditions for querying datasets.
	 *                      Example: ['id' => 123, 'status' => 'active', 'include' => ['entries', 'meta']]
	 * @since 1.0.0
	 *
	 * @return Dataset|null The first dataset matching the criteria, or null if none found.
	 */
	public function get_item( array $params ) {
		$items = Dataset::where( $params );
		if ( ! empty( $params['include'] ) ) {
			$items = $items->with( $params['include'] );
		}

		return $items->first();
	}
}