<?php
namespace IntentDeep\VirtualFiles\Traits;

/**
 * Helper trait for content sanitization
 */
trait ContentSanitizationHelper {

    /**
     * Sanitize virtual file content for security
     */
    private function sanitizeVirtualFileContent(string $content, string $extension): string {
        // For structured data files and Markdown, preserve the structure but ensure basic security
        $structured_extensions = ['xml', 'json', 'yml', 'yaml', 'rss', 'csv', 'md', 'markdown'];

        if (in_array(strtolower($extension), $structured_extensions)) {
            // Apply enhanced structured content sanitization
            $content = $this->sanitizeStructuredContent($content);

            // For JSON, additional sanitization of array values if it's valid JSON
            if (strtolower($extension) === 'json') {
                $decoded = json_decode($content, true);
                if (json_last_error() === JSON_ERROR_NONE && is_array($decoded)) {
                    $decoded = $this->sanitizeArray($decoded);
                    return json_encode($decoded, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
                }
            }

            // Normalize line endings and trim for consistency
            $content = str_replace(["\r\n", "\r"], "\n", $content);
            return trim($content);
        }

        // For text files, use WordPress post sanitization (HTML-aware)
        return wp_kses_post($content);
    }

    /**
     * Sanitize structured content (XML, JSON, Markdown) while preserving valid syntax
     */
    private function sanitizeStructuredContent($content) {
        // Remove dangerous PHP tags (case-insensitive, multiline)
        $content = preg_replace('/<\?php.*?\?>/si', '', $content);

        // Remove script tags with attributes and content
        $content = preg_replace('/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/mi', '', $content);

        // Remove iframe tags with attributes and content
        $content = preg_replace('/<iframe\b[^<]*(?:(?!<\/iframe>)<[^<]*)*<\/iframe>/mi', '', $content);

        // Remove dangerous HTML event handlers that might be in XML/JSON attributes
        $dangerous_events = ['onload', 'onerror', 'onclick', 'onmouseover', 'onfocus', 'onblur'];
        foreach ($dangerous_events as $event) {
            $content = preg_replace('/\b' . preg_quote($event, '/') . '\s*=/i', '', $content);
        }

        // Remove javascript: and data: URLs that could be dangerous
        $content = preg_replace('/\b(javascript|data|vbscript):/i', '', $content);

        // Limit content size to prevent memory issues (1MB max)
        if (strlen($content) > 1048576) {
            $content = substr($content, 0, 1048576);
        }

        return $content;
    }

    /**
     * Recursively sanitize array values
     */
    private function sanitizeArray(array $array): array {
        foreach ($array as $key => $value) {
            if (is_array($value)) {
                $array[$key] = $this->sanitizeArray($value);
            } else {
                $array[$key] = is_string($value) ? sanitize_text_field($value) : $value;
            }
        }
        return $array;
    }
}
