<?php

namespace CocktailRecipes\Core\Helpers;

use CocktailRecipes\Plugin;

final class HTML
{
    // Tag brackets for use during markdown conversion
    private const ASCII_STX = "\x02";   // ASCII Start of Text for use as "<" placeholder
    private const ASCII_ETX = "\x03";   // ASCII End of Text for use as ">" placeholder

    // Attributes to open external links in new tab/window
    public const NEW_TAB = 'target="_blank" rel="noopener noreferrer"';

    /**
     * Create an HTML comment
     *
     * @param bool $tags    true to include `<!--` and `-->` tags (default)
     */
    public static function comment(string $message, bool $tags = true): string
    {
        return ($tags ? '<!-- ' : ' ')
            . '[' . Plugin::slug() . '] '
            . trim(Sanitizer::stripEsc($message))
            . ($tags ? ' -->' : ' ');
    }

    /**
     * Create an HTML comment block
     *
     * @param   array   $lines  additional lines (multiline mode)
     * @param   bool    $tags   true to include `<!--` and `-->` tags (default)
     */
    public static function commentBlock(string $heading, array $lines, bool $tags = true): string
    {
        $lines = array_map(fn($line) => trim(Sanitizer::stripEsc($line)), $lines);
        if (!$lines) return '';
        $heading = trim(Sanitizer::stripEsc($heading));
        $indent = '    ';
        return ($tags ? "<!--\n" : "\n")
            . $indent . '[' . Plugin::slug() . ']'
            . ($heading == '' ? "\n" : " $heading:\n")
            . $indent . implode("\n$indent", $lines) . "\n"
            . ($tags ? "-->\n" : "\n");
    }

    /** Remove ESC chars and escape html special characters */
    public static function esc(string $text): string
    {
        return esc_html(Sanitizer::stripEsc($text));
    }

    /**
     * Convert or remove all but simple tags for safe HTML output
     *
     * Tags maintained, but with all attributes removed:
     *    strong, em, code, mark, b, i, u, q, small, sup, sub
     * Tags maintained, with some attributes:
     *     span (class), abbr (title)
     * Remaining "<" and ">" characters converted to entities
     *
     * @param bool $stripTags   remove non-whitelisted tags
     * @param bool $rawOutput   keeps tag placeholders and extra '<' and '>' chars as-is
     */
    public static function escTags(
        string $text,
        bool $stripTags = false,
        bool $rawOutput = false
    ): string {
        $O = self::ASCII_STX;
        $C = self::ASCII_ETX;

        // keep simple tags with all attributes stripped
        $text = preg_replace(
            '#<(/?(?:strong|small|code|mark|sub|sup|em|b|i|u|q))(?:\s.*?)?>#is',
            $O . '$1' . $C,
            $text
        );

        // keep tags with an optional attribute (i.e. 'class' for span)
        static $allowedAttrs = [
            'span' => 'class',
            'abbr' => 'title',
        ];
        $text = preg_replace_callback(
            '#<(span|abbr)(?:\s.*?)?>#is',
            function ($tagMatch) use ($O, $C, $allowedAttrs) {
                $tag  = $tagMatch[1];
                $attr = $allowedAttrs[strtolower($tag)] ?? null;
                return $attr && preg_match('#\b' . $attr . '\s*=\s*"(.*?)"#is', $tagMatch[0], $attrMatch)
                    ? $O . $tag . ' ' . $attr . '="' . htmlspecialchars($attrMatch[1], ENT_QUOTES, 'UTF-8') . '"' . $C
                    : $O . $tag . $C;
            },
            $text
        );
        $text = preg_replace('#</(span|abbr)(?:\s.*?)?>#is', $O . '/$1' . $C, $text);

        // optionally remove non-whitelisted tags, leaving inner content as-is
        if ($stripTags) {
            $text = wp_strip_all_tags($text, false);
        }

        // encode all remaining raw '<' and '>' as html entities
        return $rawOutput
            ? $text
            : strtr($text, [
                '<' => '&lt;',
                '>' => '&gt;',
                $O  => '<',
                $C  => '>'
            ]);
    }

    /**
     * Convert simple markdown syntax to HTML, remove ESC chars and escape HTML special chars as needed
     *
     * Supported:
     *   **bold**, __bold__
     *   *italic*, _italic_
     *   `code`, ``code``, etc.
     *   <https://example.com>, <mailto:address>, <user@example.com>
     *   [text](https://example.com), [text](https://example.com "description")
     *
     * @param bool $escape      remove ASCII ESC, escape html special chars and non-whitelisted tags
     * @param bool $stripTags   remove non-whitelisted tags when $escape is true
     *
     * @see HTML::escTags()
     */
    public static function markdown(
        string $text,
        bool   $escape = true,
        bool   $stripTags = false
    ): string {
        // open/close tag delimiters; i.e. '<' and '>' placeholders
        $O = self::ASCII_STX;
        $C = self::ASCII_ETX;

        // bold: **text** or __text__
        $text = preg_replace(
            '/(?<=^|[^\pL\d])(?<!\x1B)(\*\*|__)(?=\S)(.+?)(?<=\S)(?<!\x1B)\1(?=$|[^\pL\d])/u',
            "{$O}strong{$C}\$2{$O}/strong{$C}",
            $text
        );

        // italic: *text* or _text_
        $text = preg_replace(
            '/(?<=^|[^\pL\d])(?<!\x1B)(\*|_)(?=\S)(.+?)(?<=\S)(?<!\x1B)\1(?=$|[^\pL\d])/u',
            "{$O}em{$C}\$2{$O}/em{$C}",
            $text
        );

        // code: `text` or ``text`` etc.
        $text = preg_replace_callback(
            '/(?<!\x1B)(`+)(.*?)(?<!\x1B)\1/',
            fn($m) => $O . 'code' . $C . $m[2] . $O . '/code' . $C,
            $text
        );

        // auto-links: <https://example.com> or <mailto:address>
        $text = preg_replace_callback(
            '/(?<!\x1B)<(([a-z][a-z0-9+.-]*):([^ >]+))>/i',
            // $m[1] = all text within <...>
            // $m[2] = protocol, e.g. 'https', 'http', 'mailto', etc.
            // $m[3] = rest of URL after protocol or email address
            fn($m) => strpos($m[1], Sanitizer::ESC) !== false
                ? $m[0]
                : (strtolower($m[2]) == 'mailto'
                    ? $O . 'a href="mailto:' . sanitize_email($m[3]) . '"' . $C . $m[3] . $O . '/a' . $C
                    : $O . 'a href="' . esc_url($m[1]) . '" ' . self::NEW_TAB . $C . $m[1] . $O . '/a' . $C
                ),
            $text
        );

        // emails: <user@example.com>
        $text = preg_replace_callback(
            '/(?<!\x1B)<([A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,})>/',
            fn($m) => strpos($m[1], Sanitizer::ESC) !== false
                ? $m[0]
                : $O . 'a href="mailto:' . sanitize_email($m[1]) . '"' . $C . $m[1] . $O . '/a' . $C,
            $text
        );

        // links: [text](url) or  [text](url "description")
        $text = preg_replace_callback(
            '/(?<!\x1B)\[([^\]]*)\]\(([^\s)]*)(?:\s+"([^"]*)")?\)/u',
            // $m[1] = text
            // $m[2] = url
            // $m[3] = description (optional)
            function ($m) use ($O, $C) {
                if (strpos($url = $m[2], Sanitizer::ESC) !== false) return $m[0];
                $external = strpos($url, '://') !== false || substr($url, 0, 2) === '//';
                $url   = ($external || substr($url, 0, 1) === '/') ? esc_url($url) : substr(esc_url('/' . $url), 1);
                $extra = $external ? ' ' . self::NEW_TAB : '';
                if (!empty($m[3])) $extra .= ' title="' . esc_attr($m[3]) . '"';
                return $O . 'a href="' . $url . '"' . $extra . $C . $m[1] . $O . '/a' . $C;
            },
            $text
        );

        // bypass tag/entity escaping
        if (!$escape) return strtr($text, [$O => '<', $C => '>']);

        // remove ASCII ESC chars, remove or escape all but simple tags in text
        $text = self::escTags(Sanitizer::stripEsc($text), $stripTags, true);

        // escape html special characters outside whitelisted tags
        $text = preg_replace_callback(
            '/(?:^[^\x02]+)|(?<=\x03)[^\x02]+/u',
            // matches all text from start of string until first STX (or end of string)
            // or all text after an ETX until the next STX (or end of string)
            fn($m) => strtr($m[0], [
                '<' => '&lt;',
                '>' => '&gt;',
                '&' => '&amp;',
                '"' => '&quot;',
                "'" => '&#39;',
            ]),
            $text
        );
        return strtr($text, [
            $O => '<',
            $C => '>',
        ]);
    }
}
