<?php
namespace Highpots\SpamProtection;

// Prevent direct access
if (!defined('ABSPATH')) {
    exit;
}

/**
 * Extracts user-submitted form fields from POST data.
 *
 * This class filters out spam protection fields, WordPress nonces,
 * and other system fields to extract only the actual user input
 * for content validation purposes.
 *
 * @package Highpots\SpamProtection
 */
class HPSP_Form_Field_Extractor {
    /** @var array<string> List of field names to exclude from extraction */
    private array $excluded_fields = [
        'hpsp_hmac_token',
        'hpsp_hmac_timestamp',
        'hpsp_hmac_nonce',
        'hpsp_rendered_at',
        'hpsp_form_id',
        'action',
        '_wpnonce',
        '_wp_http_referer',
        'submit',
    ];

    /**
     * Extract form fields from POST data.
     *
     * Filters out spam protection fields and system fields,
     * returning only user-submitted content for validation.
     *
     * @param array|null $post_data POST data array to extract from, or null to use $_POST
     * @return array<string, string> Extracted and sanitized form fields
     */
    public function extract(array $post_data = null): array {
        // phpcs:ignore WordPress.Security.NonceVerification.Missing -- Called from form plugins that handle their own nonce verification
        if (isset($post_data) && is_array($post_data) && count($post_data)) {
            $data = $post_data;
        } else {
            // Unslash $_POST data (sanitization happens in extract_recursive)
            // phpcs:ignore WordPress.Security.NonceVerification.Missing -- Called from form plugins that handle their own nonce verification
            $data = wp_unslash($_POST);
        }
        $result = [];

        $this->extract_recursive($data, $result);

        return $result;
    }

    /**
     * Recursively extract form fields from nested data.
     *
     * Handles nested arrays and objects, flattening the structure
     * while preserving all string values.
     *
     * @param mixed $data Data to extract from (array or object)
     * @param array &$result Reference to result array to populate
     * @return void
     */
    private function extract_recursive($data, array &$result): void {
        if (!is_array($data) && !is_object($data)) {
            return;
        }

        foreach ($data as $key => $value) {
            // Skip excluded fields and honeypot fields
            if ($this->should_skip_field($key)) {
                continue;
            }

            if (is_string($value)) {
                $result[$key] = sanitize_text_field($value);
            } elseif (is_array($value) || is_object($value)) {
                $this->extract_recursive($value, $result);
            }
        }
    }

    /**
     * Check if a field should be skipped during extraction.
     *
     * Fields are skipped if they're in the excluded list or start
     * with 'hpsp_' (spam protection system fields).
     *
     * @param string $key Field name to check
     * @return bool True if field should be skipped, false otherwise
     */
    private function should_skip_field(string $key): bool {
        // Skip excluded fields
        if (in_array($key, $this->excluded_fields)) {
            return true;
        }

        // Skip honeypot fields (start with hpsp_)
        if (str_starts_with($key, 'hpsp_')) {
            return true;
        }

        return false;
    }

    /**
     * Add a field name to the exclusion list.
     *
     * Allows dynamically excluding additional fields from extraction.
     *
     * @param string $field Field name to exclude
     * @return void
     */
    public function add_excluded_field(string $field): void {
        if (!in_array($field, $this->excluded_fields)) {
            $this->excluded_fields[] = $field;
        }
    }
}