<?php

namespace Limb_Chatbot\Includes\Services\Knowledge\Stringifiers;

use Limb_Chatbot\Includes\Interfaces\Chunk_Stringifier_Interface;

/**
 * QA Stringifier
 *
 * Stringifies question-answer entries for different purposes.
 * Optimized for RAG retrieval of Q&A pairs.
 *
 * @since 1.2.1
 */
class QA_Stringifier implements Chunk_Stringifier_Interface {

	/**
	 * Stringify QA entry for vector embedding (RAG).
	 *
	 * @param array $chunk QA entry data with structure:
	 *                     - input: string (question)
	 *                     - output: string (answer)
	 *                     - heading_path: string (optional)
	 *                     - post_title: string (optional)
	 * @return string Stringified QA for embedding.
	 * @since 1.2.1
	 */
	public function stringify_for_embedding( array $chunk ): string {
		$parts = [];

		$question = $chunk['input'] ?? $chunk['question'] ?? '';
		$answer = $chunk['output'] ?? $chunk['answer'] ?? '';

		// Primary content: Q&A pair
		if ( ! empty( $question ) && ! empty( $answer ) ) {
			$parts[] = "Question: {$question}";
			$parts[] = "Answer: {$answer}";
		}

		// Context metadata
		$heading_path = $chunk['heading_path'] ?? '';
		$post_title = $chunk['metadata']['post_title'] ?? $chunk['post_title'] ?? '';

		$context_parts = [];
		if ( ! empty( $post_title ) ) {
			$context_parts[] = $post_title;
		}
		if ( ! empty( $heading_path ) && $heading_path !== $post_title ) {
			$context_parts[] = $heading_path;
		}

		if ( ! empty( $context_parts ) ) {
			$parts[] = 'Context: ' . implode( ' > ', $context_parts );
		}

		$result = implode( "\n\n", array_filter( $parts ) );

		return $this->normalize_text( $result );
	}

	/**
	 * Stringify QA entry for LLM inference (chat).
	 *
	 * @param array $chunk QA entry data.
	 * @return string Stringified QA for inference.
	 * @since 1.2.1
	 */
	public function stringify_for_inference( array $chunk ): string {
		$parts = [];

		// Context header
		$post_title = $chunk['metadata']['post_title'] ?? $chunk['post_title'] ?? '';
		$heading_path = $chunk['metadata']['heading_path'] ?? $chunk['heading_path'] ?? '';

		if ( ! empty( $post_title ) || ! empty( $heading_path ) ) {
			$context_parts = array_filter( [ $post_title, $heading_path ] );
			if ( ! empty( $context_parts ) ) {
				$parts[] = "## " . implode( ' > ', $context_parts );
			}
		}

		// Q&A pair
		$question = $chunk['input'] ?? $chunk['question'] ?? '';
		$answer = $chunk['output'] ?? $chunk['answer'] ?? '';

		if ( ! empty( $question ) ) {
			$parts[] = "**Q:** {$question}";
		}
		if ( ! empty( $answer ) ) {
			$parts[] = "**A:** {$answer}";
		}

		$result = implode( "\n", array_filter( $parts ) );

		return $this->normalize_text( $result );
	}

	/**
	 * Stringify QA entry for human display.
	 *
	 * @param array $chunk QA entry data.
	 * @return string Stringified QA for display.
	 * @since 1.2.1
	 */
	public function stringify_for_display( array $chunk ): string {
		$parts = [];

		$parts[] = "=== Q&A Entry ===";

		// Context
		$post_title = $chunk['metadata']['post_title'] ?? $chunk['post_title'] ?? '';
		$heading_path = $chunk['metadata']['heading_path'] ?? $chunk['heading_path'] ?? '';

		if ( ! empty( $post_title ) ) {
			$parts[] = "Post: {$post_title}";
		}
		if ( ! empty( $heading_path ) ) {
			$parts[] = "Section: {$heading_path}";
		}

		// Q&A
		$question = $chunk['input'] ?? $chunk['question'] ?? '';
		$answer = $chunk['output'] ?? $chunk['answer'] ?? '';

		if ( ! empty( $question ) ) {
			$parts[] = "\nQ: {$question}";
		}
		if ( ! empty( $answer ) ) {
			$parts[] = "A: {$answer}";
		}

		return implode( "\n", $parts );
	}

	/**
	 * Normalize text for consistent formatting.
	 *
	 * @param string $text Text to normalize.
	 * @return string Normalized text.
	 * @since 1.2.1
	 */
	private function normalize_text( string $text ): string {
		// Decode HTML entities
		$text = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5, 'UTF-8' );

		// Normalize quotes
		$text = preg_replace( '/[""]/u', '"', $text );
		$text = preg_replace( "/['']/u", "'", $text );

		// Normalize dashes
		$text = preg_replace( '/[–—]/u', '-', $text );

		// Normalize ellipsis
		$text = preg_replace( '/\.{2,}/', '...', $text );

		// Normalize whitespace
		$text = preg_replace( '/[ \t]+/', ' ', $text );
		$text = preg_replace( '/\n{3,}/', "\n\n", $text );

		return trim( $text );
	}
}

