<?php

namespace Limb_Chatbot\Includes\Services\Knowledge\Dataset_Builders;

use Limb_Chatbot\Includes\Data_Objects\AI_Model;
use Limb_Chatbot\Includes\Data_Objects\Dataset;
use Limb_Chatbot\Includes\Data_Objects\Job;
use Limb_Chatbot\Includes\Data_Objects\Vector_Index;
use Limb_Chatbot\Includes\Factories\Knowledge_Mapper_Factory;
use Limb_Chatbot\Includes\Interfaces\Dataset_Builder_Interface;
use Limb_Chatbot\Includes\Repositories\Vector_Index_Repository;
use Limb_Chatbot\Includes\Services\Helper;
use Limb_Chatbot\Includes\Services\Vector_Index_Service;
use Limb_Chatbot\Includes\Vector_Dbs\Local\Local;

class Actionable_Dataset_Builder implements Dataset_Builder_Interface {

	public function build( array $data, Job $job ): ?Dataset {
		try {
			$model_id               = $job->get_config_value( 'indexing_ai_model_id' );
			$config_id              = $job->get_config_value( 'indexing_config_id' );
			$vector_index_type      = $job->get_config_value( 'indexing_vector_index_type' );
			$vector_index_config_id = $job->get_config_value( 'indexing_vector_index_config_id' );
			$dimension              = $job->get_config_value( 'indexing_dimension' );
			$taxonomies             = $job->get_config_value( 'taxonomies' );
			$chatbot_uuid           = $job->get_chatbot_uuid() === Job::CHATBOT_DEFAULT ? null : $job->get_chatbot_uuid();

			$mapper = ( new Knowledge_Mapper_Factory() )->make( $data['source_type'] );
			$source = Helper::resolve_source_object( $data['source_type'], $data['source'] );

			$vector_index = $this->create_vector_index( $chatbot_uuid, $vector_index_type, $vector_index_config_id, $dimension, $data['source_sub_type'] );

			$dataset_name = $mapper->get_entry_input( $source );
			if ( ! $dataset = $this->check_dataset_existence( $data, $vector_index->get_id(), $model_id ) ) {
				$dataset = new Dataset( $data );
				$dataset->set_status( Dataset::STATUS_PENDING );
				$dataset->set_name( $dataset_name );
				$dataset->set_type( Dataset::TYPE_ACTIONABLE_KNOWLEDGE );
				$dataset->set_source_sub_type( $data['source_sub_type'] ?? null );
				$dataset->save();
			}

			// Reset sync status and metadata
			$dataset->mark_not_synced();
			$dataset->update_meta( 'index_ai_model_id', $model_id );
			$dataset->update_meta( 'index_config_id', $config_id );
			$dataset->update_meta( 'vector_index_type', $vector_index_type );
			$dataset->update_meta( 'vector_index_config_id', $vector_index_config_id );
			$dataset->update_meta( 'dimension', $dimension );
			$dataset->update_meta( 'taxonomies', $taxonomies );
			$dataset->update_meta( 'vector_index_id', $vector_index->get_id() );
			$dataset->update_meta( 'chatbot_uuid', $chatbot_uuid );

			return $dataset;
		} catch ( \Exception $e ) {
			Helper::log( $e );

			return null;
		}
	}

	private function create_vector_index( $chatbot_uuid, $vector_index_type, $vector_index_config_id, $dimension, $sub_type ) {
		$is_local     = $vector_index_type === Local::$id;
		$name         = 'lbaiccpt' . Helper::underscore_to_hyphen($sub_type) . 'index';
		$where        = [
			'name'         => $name,
			'vector_db_id' => $is_local ? null : $vector_index_type,
			'config_id'    => $is_local ? null : $vector_index_config_id,
		];
		$vector_index = Vector_Index::where( $where );
		if ( $vector_index->is_empty() ) {
			$data                 = array_merge( $where, array(
				'metas' => array(
					array(
						'meta_key'   => 'dimension',
						'meta_value' => $dimension,
					),
					array(
						'meta_key'   => 'purpose',
						'meta_value' => 'actionable_knowledge',
					),
					array(
						'meta_key'   => 'chatbot_uuid',
						'meta_value' => $chatbot_uuid,
					),
				),
			) );
			$vector_index_service = new Vector_Index_Service( new Vector_Index_Repository() );;

			return $vector_index_service->create( $data );
		}

		return $vector_index->first();
	}

	private function check_dataset_existence( $data, $vector_index_id, $ai_model_id ) {
		$dataset = null;
		if ( ! in_array( $data['source_type'], [ Dataset::SOURCE_QA, Dataset::SOURCE_TEXT ] ) ) {
			$criteria = [
				'source_type'     => $data['source_type'],
				'source_sub_type' => $data['source_sub_type'] ?? null,
				'source'          => $data['source'],
				'type'            => Dataset::TYPE_ACTIONABLE_KNOWLEDGE,
			];
			$datasets = Dataset::where( $criteria );
			if ( ! $datasets->is_empty() ) {
				foreach ( $datasets->get() as $item ) {
					if ( $item instanceof Dataset ) {
						$dataset_vector_index_id = $item->get_meta_value( 'vector_index_id' );
						if ( $dataset_vector_index_id != $vector_index_id ) {
							$dataset = null;
						} else {
							$ai_model = AI_Model::find( $ai_model_id );
							if ( $item->ai_provider_id() != $ai_model->get_ai_provider_id() ) {
								$dataset = null;
							} else {
								$dataset = $item;
								break;
							}
						}
					}
				}
			}
		}

		return $dataset;
	}
}

