<?php
/**
 * Collects site data from WordPress for use in file generation.
 *
 * @package AIDF
 * @since   1.0.0
 */

if ( ! defined( 'ABSPATH' ) ) {
	exit;
}

/**
 * Extracts data from WordPress core, settings, and user-provided options.
 */
class AIDF_Data_Collector {

	/**
	 * Collect all available data for file generation.
	 *
	 * @return array<string, mixed>
	 */
	public static function collect() {
		$settings = AIDF_Plugin::get_settings();

		return array(
			'identity'    => self::get_identity( $settings ),
			'permissions' => self::get_permissions( $settings ),
			'content'     => self::get_content( $settings ),
			'technical'   => self::get_technical( $settings ),
			'site'        => self::get_site_data(),
			'pages'       => self::get_pages(),
			'settings'    => $settings,
		);
	}

	/**
	 * Get identity information.
	 *
	 * @param  array<string, mixed> $settings Plugin settings.
	 * @return array<string, mixed>
	 */
	private static function get_identity( $settings ) {
		return array(
			'name'                => ! empty( $settings['business_name'] ) ? $settings['business_name'] : get_bloginfo( 'name' ),
			'legal_name'          => $settings['legal_name'],
			'tagline'             => ! empty( $settings['tagline'] ) ? $settings['tagline'] : get_bloginfo( 'description' ),
			'email'               => ! empty( $settings['contact_email'] ) ? $settings['contact_email'] : get_option( 'admin_email' ),
			'phone'               => $settings['contact_phone'],
			'contact_url'         => untrailingslashit( $settings['contact_url'] ),
			'url'                 => untrailingslashit( home_url() ),
			'address'             => $settings['address'],
			'postcode'            => $settings['postcode'],
			'city'                => $settings['location_city'],
			'region'              => $settings['location_region'],
			'country'             => $settings['location_country'],
			'services'            => self::normalise_services( $settings['services'] ),
			'not_services'        => self::parse_list( $settings['not_services'] ),
			'service_areas'       => self::parse_list( $settings['service_areas'] ),
			'not_service_areas'   => self::parse_list( $settings['not_service_areas'] ),
			'industry'            => $settings['industry'],
			'founded'             => $settings['founded_year'],
			'employees'           => $settings['employee_count'],
			'business_type'       => $settings['business_type'],
			'company_number'      => $settings['company_number'],
			'company_jurisdiction' => $settings['company_jurisdiction'],
			'vat_number'          => ! empty( $settings['vat_registered'] ) ? $settings['vat_number'] : '',
			'customer_type'       => $settings['customer_type'],
			'offering_type'       => $settings['offering_type'],
			'is_ecommerce'        => ! empty( $settings['is_ecommerce'] ),
			'operating_hours'     => self::resolve_operating_hours( $settings ),
			'social_links'        => self::parse_list( $settings['social_links'] ),
			'logo_url'            => $settings['logo_url'],
			'products_url'        => untrailingslashit( $settings['products_url'] ),
			'privacy_policy_url'  => untrailingslashit( $settings['privacy_policy_url'] ),
			'terms_url'           => untrailingslashit( $settings['terms_url'] ),
		);
	}

	/**
	 * Get permission settings.
	 *
	 * @param  array<string, mixed> $settings Plugin settings.
	 * @return array<string, string>
	 */
	private static function get_permissions( $settings ) {
		return array(
			'ai_usage'        => $settings['ai_usage'],
			'ai_training'     => $settings['ai_training'],
			'crawler_policy'  => $settings['crawler_policy'],
			'content_licence' => $settings['content_licence'],
			'citation_format' => $settings['citation_format'],
		);
	}

	/**
	 * Get content data (brand, FAQs).
	 *
	 * @param  array<string, mixed> $settings Plugin settings.
	 * @return array<string, mixed>
	 */
	private static function get_content( $settings ) {
		return array(
			'brand_alternates'    => self::parse_list( $settings['brand_alternates'] ),
			'brand_never'         => self::parse_list( $settings['brand_never'] ),
			'brand_pronunciation' => $settings['brand_pronunciation'],
			'brand_misspellings'  => self::parse_list( $settings['brand_misspellings'] ),
			'brand_voice'         => $settings['brand_voice'],
			'brand_taglines'      => self::parse_list( $settings['brand_taglines'] ),
			'brand_boilerplate'   => $settings['brand_boilerplate'],
			'key_people'          => is_array( $settings['key_people'] ) ? $settings['key_people'] : array(),
			'faqs'                => is_array( $settings['faqs'] ) ? $settings['faqs'] : array(),
		);
	}

	/**
	 * Get technical data.
	 *
	 * @param  array<string, mixed> $settings Plugin settings.
	 * @return array<string, mixed>
	 */
	private static function get_technical( $settings ) {
		return array(
			'developer_notes' => $settings['developer_notes'],
			'api_info'        => $settings['api_info'],
			'tech_stack'      => $settings['tech_stack'],
			'wp_version'      => get_bloginfo( 'version' ),
			'php_version'     => phpversion(),
			'theme'           => wp_get_theme()->get( 'Name' ),
			'charset'         => get_bloginfo( 'charset' ),
			'language'        => get_bloginfo( 'language' ),
			'locale'          => get_locale(),
		);
	}

	/**
	 * Get core WordPress site data.
	 *
	 * @return array<string, mixed>
	 */
	private static function get_site_data() {
		return array(
			'name'        => get_bloginfo( 'name' ),
			'description' => get_bloginfo( 'description' ),
			'url'         => untrailingslashit( home_url() ),
			'admin_email' => get_option( 'admin_email' ),
			'language'    => get_bloginfo( 'language' ),
			'charset'     => get_bloginfo( 'charset' ),
			'rss_url'     => untrailingslashit( get_bloginfo( 'rss2_url' ) ),
			'atom_url'    => get_bloginfo( 'atom_url' ),
			'pingback'    => get_bloginfo( 'pingback_url' ),
			'is_public'   => (bool) get_option( 'blog_public' ),
			'timezone'    => wp_timezone_string(),
			'date_format' => get_option( 'date_format' ),
		);
	}

	/**
	 * Get published pages as a simplified list.
	 *
	 * @return array<int, array<string, string>>
	 */
	private static function get_pages() {
		$pages  = array();
		$result = get_pages(
			array(
				'sort_column' => 'menu_order,post_title',
				'post_status' => 'publish',
				'number'      => 50,
			)
		);

		if ( is_array( $result ) ) {
			foreach ( $result as $page ) {
				$pages[] = array(
					'title' => $page->post_title,
					'url'   => untrailingslashit( get_permalink( $page ) ),
				);
			}
		}

		return $pages;
	}

	/**
	 * Resolve operating hours from preset or custom value.
	 *
	 * @param  array<string, mixed> $settings Plugin settings.
	 * @return string
	 */
	private static function resolve_operating_hours( $settings ) {
		$preset = isset( $settings['operating_hours_preset'] ) ? $settings['operating_hours_preset'] : 'custom';

		if ( 'custom' === $preset ) {
			return isset( $settings['operating_hours'] ) ? $settings['operating_hours'] : '';
		}

		$presets = AIDF_Plugin::get_operating_hours_presets();

		return isset( $presets[ $preset ] ) ? $presets[ $preset ] : $settings['operating_hours'];
	}

	/**
	 * Normalise services from either the new array format or legacy string format.
	 *
	 * New format: array of arrays with 'name' and optional 'url'.
	 * Legacy format: newline-separated string with optional pipe-delimited URLs.
	 *
	 * @param  array<int, array<string, string>>|string $value Services data.
	 * @return array<int, array<string, string>>
	 */
	private static function normalise_services( $value ) {
		// New array format from repeater.
		if ( is_array( $value ) ) {
			$services = array();

			foreach ( $value as $item ) {
				if ( ! is_array( $item ) || empty( $item['name'] ) ) {
					continue;
				}

				$entry = array( 'name' => $item['name'] );

				if ( ! empty( $item['url'] ) ) {
					$entry['url'] = untrailingslashit( $item['url'] );
				}

				$services[] = $entry;
			}

			return $services;
		}

		// Legacy string format: "Service Name | https://url".
		if ( ! is_string( $value ) || empty( $value ) ) {
			return array();
		}

		$lines    = preg_split( '/[\r\n]+/', $value );
		$services = array();

		foreach ( $lines as $line ) {
			$line = trim( $line );

			if ( empty( $line ) ) {
				continue;
			}

			if ( false !== strpos( $line, '|' ) ) {
				$parts = explode( '|', $line, 2 );
				$name  = trim( $parts[0] );
				$url   = trim( $parts[1] );

				if ( ! empty( $name ) ) {
					$entry = array( 'name' => $name );

					if ( ! empty( $url ) ) {
						$entry['url'] = untrailingslashit( $url );
					}

					$services[] = $entry;
				}
			} else {
				$services[] = array( 'name' => $line );
			}
		}

		return $services;
	}

	/**
	 * Parse a comma- or newline-separated string into a trimmed array.
	 *
	 * @param  string $value Raw input.
	 * @return array<int, string>
	 */
	private static function parse_list( $value ) {
		if ( empty( $value ) ) {
			return array();
		}

		$items = preg_split( '/[\r\n,]+/', $value );
		$items = array_map( 'trim', $items );
		$items = array_filter( $items, 'strlen' );

		return array_values( $items );
	}
}
