/**
 * GFMR Language Detection Module
 *
 * Handles language detection for syntax highlighting.
 * Extracted from gfmr-main.js for better modularity.
 *
 * @package MarkdownRendererForGitHub
 * @since 1.8.0
 */

(function () {
  "use strict";

  /**
   * Language Detector - provides language detection utilities
   */
  const LanguageDetector = {
    /**
     * Detect language from element class, data attributes, or content
     *
     * @param {HTMLElement} element - The code element to analyze
     * @return {string} Detected language or 'plaintext'
     */
    detectLanguage(element) {
      console.log("[DEBUG] Starting language detection for element:", {
        tagName: element.tagName,
        className: element.className,
        classList: Array.from(element.classList || []),
        outerHTML: element.outerHTML.substring(0, 200),
      });

      // 1. Standard language class detection
      const className = element.className || "";
      const match = className.match(/language-(\w+)/);
      if (match) {
        console.log("[DEBUG] Language detected from element:", match[1]);
        return match[1];
      }

      // 2. Detection from parent elements
      let parent = element.parentElement;
      while (parent && parent !== document.body) {
        const parentClassName = parent.className || "";
        const parentMatch = parentClassName.match(/language-(\w+)/);
        if (parentMatch) {
          console.log(
            "[DEBUG] Language detected from parent:",
            parentMatch[1],
            "parent:",
            parent.tagName
          );
          return parentMatch[1];
        }
        parent = parent.parentElement;
      }

      // 3. Data attribute check
      const dataLang =
        element.getAttribute("data-language") ||
        element.closest("[data-language]")?.getAttribute("data-language");
      if (dataLang) {
        console.log("[DEBUG] Language detected from data attribute:", dataLang);
        return dataLang;
      }

      // 4. Infer language from already Shiki-highlighted code
      if (this.isAlreadyHighlighted(element)) {
        const inferredLang = this.inferLanguageFromHighlightedCode(element);
        if (inferredLang !== "plaintext") {
          console.log(
            "[DEBUG] Language inferred from highlighted code:",
            inferredLang
          );
          return inferredLang;
        }
      }

      console.log("[DEBUG] No language detected, falling back to plaintext");
      return "plaintext";
    },

    /**
     * Check if element is already syntax highlighted
     *
     * @param {HTMLElement} element - The code element to check
     * @return {boolean} True if already highlighted
     */
    isAlreadyHighlighted(element) {
      const hasShikiSpans = element.querySelector(
        'span.line, span[style*="color:"]'
      );
      const hasHighlightClass =
        element.classList.contains("hljs") ||
        element.classList.contains("shiki");
      return !!(hasShikiSpans || hasHighlightClass);
    },

    /**
     * Infer language from highlighted code content
     *
     * @param {HTMLElement} element - The code element to analyze
     * @return {string} Inferred language or 'plaintext'
     */
    inferLanguageFromHighlightedCode(element) {
      const text = element.textContent || "";
      console.log(
        "[DEBUG] Language inference target text:",
        text.substring(0, 100) + "..."
      );

      // 1. Language detection by comment syntax (highest priority)
      if (/^\/\/\s*(JavaScript|JS|TypeScript|TS)/i.test(text.trim())) {
        console.log("[DEBUG] Inferred as JavaScript from comment");
        return "javascript";
      }
      if (/^#\s*Python/i.test(text.trim())) {
        console.log("[DEBUG] Inferred as Python from comment");
        return "python";
      }
      if (/^#\s*(YAML|YML)/i.test(text.trim())) {
        console.log("[DEBUG] Inferred as YAML from comment");
        return "yaml";
      }
      if (
        /^\/\*\s*(CSS|Style)/i.test(text.trim()) ||
        /^\/\/\s*(CSS|Style)/i.test(text.trim())
      ) {
        console.log("[DEBUG] Inferred as CSS from comment");
        return "css";
      }
      if (/^<!--\s*(HTML)/i.test(text.trim())) {
        console.log("[DEBUG] Inferred as HTML from comment");
        return "html";
      }

      // Diff/patch patterns (MUST be checked BEFORE bash due to 'sh' in path patterns)
      // Includes: git diff headers, unified diff headers, hunk headers, and content-only diffs
      if (
        /^diff --git\s/m.test(text) ||                    // git diff header
        /^---\s+[ab]\//.test(text.trim()) ||             // unified diff file header (a/...)
        /^---\s+\/dev\/null/.test(text.trim()) ||        // new file diff header
        /^\+\+\+\s+[ab]\//.test(text.trim()) ||          // unified diff file header (b/...)
        /^\+\+\+\s+\/dev\/null/.test(text.trim()) ||     // deleted file diff header
        /^@@\s+-\d+,?\d*\s+\+\d+,?\d*\s+@@/m.test(text) || // hunk header (anywhere in content)
        this.isContentOnlyDiff(text)                      // content-only patch (just +/- lines)
      ) {
        console.log('[DEBUG] Inferred as diff language');
        return 'diff';
      }

      // 2. Bash/Shell patterns (high priority)
      if (
        /^#!/.test(text.trim()) ||
        /\b(set -[euxo]|function\s+\w+|if\s*\[|for\s+\w+\s+in|case\s+.*\s+in|echo\s|export\s|source\s|bash|sh\b)/m.test(
          text
        )
      ) {
        console.log("[DEBUG] Inferred as Bash language");
        return "bash";
      }

      // Mermaid patterns (high priority)
      if (
        /\b(graph|flowchart|sequenceDiagram|classDiagram|stateDiagram|erDiagram|gitGraph|gantt|pie|journey)\b/i.test(
          text
        )
      ) {
        console.log("[DEBUG] Inferred as Mermaid language");
        return "mermaid";
      }

      // YAML patterns (priority over JavaScript/TypeScript)
      if (this.isYamlContent(text)) {
        console.log("[DEBUG] Inferred as YAML language");
        return "yaml";
      }

      // JSX/TSX patterns (high priority)
      if (
        /<[A-Z]\w*[^>]*\/?>/.test(text) &&
        /\b(const|let|var|import|export|function|return)\b/.test(text)
      ) {
        console.log("[DEBUG] Inferred as JSX/TSX language");
        return "javascript";
      }

      // Rust patterns (check before Java/C++)
      if (
        /\b(fn\s+\w+|let\s+mut|impl\s+|pub\s+fn|trait\s+|match\s+\{|&mut\s|&self|Option<|Result<|\.unwrap\(\)|\.expect\(|use\s+std::|use\s+\w+::\{|mod\s+\w+|#\[derive)/i.test(text)
      ) {
        console.log('[DEBUG] Inferred as Rust language');
        return 'rust';
      }

      // Java/C++ patterns
      if (
        /\b(public|private|protected|static|void|int|String|System\.out|#include|std::)\b/.test(
          text
        )
      ) {
        if (
          /\bSystem\.out|public\s+class|public\s+static\s+void\s+main/.test(
            text
          )
        ) {
          console.log("[DEBUG] Inferred as Java language");
          return "java";
        } else if (/#include/.test(text) || /std::(cout|cin|endl|string|vector|map|set)/.test(text)) {
          console.log("[DEBUG] Inferred as C++ language");
          return "cpp";
        }
      }

      // Python patterns (improved version)
      if (
        /\b(def |class |import |from |print\(|if __name__|elif |except:|finally:|with |as |lambda |yield )/m.test(
          text
        ) &&
        !/<[^>]*>/.test(text) && // No HTML tags
        !/\bfunction\b/.test(text)
      ) {
        // No JavaScript functions
        console.log("[DEBUG] Inferred as Python language");
        return "python";
      }

      // JavaScript/TypeScript patterns (revised version)
      if (
        /\b(function\s+\w+|const|let|var|class|import|export|=>|\w+\.\w+\()\b/.test(
          text
        ) &&
        !/^#!/.test(text.trim()) &&
        !this.isYamlContent(text)
      ) {
        // Not YAML
        console.log("[DEBUG] Inferred as JavaScript language");
        return "javascript";
      }

      // HTML patterns (excluding JSX)
      if (
        /<[a-z][^>]*>/.test(text) && // Lowercase tags (JSX components start with uppercase)
        !/\b(const|let|var|function|import|export|=>)\b/.test(text)
      ) {
        // No JS/TS keywords
        console.log("[DEBUG] Inferred as HTML language");
        return "html";
      }

      // CSS patterns (strict validation)
      if (
        /^\s*[.#]?\w+\s*\{|@media|@import|@keyframes/.test(text) ||
        (/\b(margin|padding|display|position|color|background|font-family|font-size|width|height|border|text-align):\s*/.test(
          text
        ) &&
          !/\b(const|let|var|function|return|if|for|while)\b/.test(text))
      ) {
        console.log("[DEBUG] Inferred as CSS language");
        return "css";
      }

      // JSON patterns
      if (/^\s*[{[]/.test(text.trim()) && /[}\]]\s*$/.test(text.trim())) {
        try {
          JSON.parse(text);
          console.log("[DEBUG] Inferred as JSON language");
          return "json";
        } catch {
          // Ignore JSON parsing errors
        }
      }

      console.log("[DEBUG] Inferred as plaintext language");
      return "plaintext";
    },

    /**
     * Detailed method for YAML content determination
     *
     * @param {string} text - The text content to analyze
     * @return {boolean} True if content appears to be YAML
     */
    isYamlContent(text) {
      console.log(
        "[DEBUG] Starting YAML validation:",
        text.substring(0, 150) + "..."
      );

      // Exclude diff patterns from YAML detection
      // Covers: unified diff headers, git diff, /dev/null, hunk headers
      if (
        /^---\s+[ab]\//.test(text.trim()) ||
        /^---\s+\/dev\/null/.test(text.trim()) ||
        /^\+\+\+\s+[ab]\//.test(text.trim()) ||
        /^\+\+\+\s+\/dev\/null/.test(text.trim()) ||
        /^diff --git\s/.test(text.trim()) ||
        /^@@\s+-\d+,?\d*\s+\+\d+,?\d*\s+@@/m.test(text) ||
        this.isContentOnlyDiff(text)
      ) {
        console.log('[DEBUG] Excluded diff content from YAML detection');
        return false;
      }

      // 1. YAML comment patterns
      if (/^#.*\.(ya?ml|yml)/i.test(text) || /# YAML/.test(text)) {
        console.log("[DEBUG] Matched YAML comment pattern");
        return true;
      }

      // 2. Docker Compose specific keywords
      if (
        /^\s*(version|services|volumes|networks|configs|secrets|deploy|build):\s*["']?[\w.-]+["']?\s*$/m.test(
          text
        )
      ) {
        console.log("[DEBUG] Matched Docker Compose specific keywords");
        return true;
      }

      // 3. Typical YAML key:value patterns (colon and space)
      const yamlKeyValuePattern =
        /^\s*[a-zA-Z_][a-zA-Z0-9_-]*:\s*(["'].*["']|\d+\.?\d*|true|false|null|\[|\{|\||>|-\s|$)/m;
      if (yamlKeyValuePattern.test(text)) {
        console.log("[DEBUG] Matched YAML key:value pattern");
        // Additional check to distinguish from JavaScript object notation
        const hasJsPatterns =
          /\b(function|const|let|var|class|=>|\(\)|\.prototype)\b/.test(text);
        if (!hasJsPatterns) {
          console.log(
            "[DEBUG] No JavaScript notation found, determined as YAML"
          );
          return true;
        }
      }

      // 4. YAML list syntax (hyphen + space)
      if (/^\s*-\s+\w+/m.test(text) && !/^\s*-\s*\d+\s*[-+*/]/.test(text)) {
        console.log("[DEBUG] Matched YAML list syntax");
        return true;
      }

      // 5. Indent-based hierarchical structure
      const lines = text.split("\n").filter((line) => line.trim());
      let hasIndentedStructure = false;
      let prevIndent = -1;

      for (const line of lines.slice(0, 10)) {
        // Check the first 10 lines
        if (line.trim().startsWith("#")) continue; // Skip comment lines
        if (!/:\s/.test(line)) continue; // Skip lines without key:value format

        const indent = line.length - line.trimStart().length;
        if (prevIndent >= 0 && indent !== prevIndent && indent > 0) {
          hasIndentedStructure = true;
          break;
        }
        if (prevIndent < 0) prevIndent = indent;
      }

      if (hasIndentedStructure) {
        console.log("[DEBUG] Matched indent hierarchical structure");
        return true;
      }

      console.log("[DEBUG] YAML validation: No match");
      return false;
    },

    /**
     * Detect content-only diff (patch without headers)
     * Only +/- lines without git diff, ---, +++ headers
     * @param {string} text - The text content to analyze
     * @return {boolean} True if content appears to be a content-only diff
     */
    isContentOnlyDiff(text) {
      const lines = text.trim().split('\n');

      // Must have at least 2 lines
      if (lines.length < 2) return false;

      // Check if we have both + and - lines (not headers)
      let hasAddLine = false;
      let hasRemoveLine = false;
      let hasNonDiffLine = false;

      for (const line of lines) {
        // Skip empty lines
        if (!line.trim()) continue;

        // Headers are not content-only
        if (/^(diff --git|---\s|^\+\+\+\s|@@\s+-\d|index\s+[a-f0-9])/.test(line)) {
          return false; // Has headers, let other patterns handle it
        }

        if (/^\+(?!\+)/.test(line)) {
          hasAddLine = true;
        } else if (/^-(?!-)/.test(line)) {
          hasRemoveLine = true;
        } else if (/^\s/.test(line)) {
          // Context lines (leading space) are OK
        } else {
          hasNonDiffLine = true;
        }
      }

      // Content-only diff needs both + and - lines, minimal non-diff content
      if (hasAddLine && hasRemoveLine && !hasNonDiffLine) {
        console.log('[DEBUG] Detected content-only diff pattern');
        return true;
      }

      return false;
    },

    /**
     * Extract plain text from existing Shiki styles
     *
     * @param {HTMLElement} element - The highlighted element
     * @return {string} Plain text content
     */
    extractPlainTextFromHighlighted(element) {
      // Remove all span elements to get text only
      const clone = element.cloneNode(true);
      const spans = clone.querySelectorAll("span");
      spans.forEach((span) => {
        // Replace span content with text node
        const textNode = document.createTextNode(span.textContent);
        span.parentNode.replaceChild(textNode, span);
      });

      let text = clone.textContent || clone.innerText || "";

      // Decode HTML entities
      text = text
        .replace(/&lt;/g, "<")
        .replace(/&gt;/g, ">")
        .replace(/&amp;/g, "&")
        .replace(/&quot;/g, '"')
        .replace(/&#39;/g, "'");

      console.log(
        "[DEBUG] Extracted plain text:",
        text.substring(0, 200) + "..."
      );
      return text;
    },

    /**
     * Mermaid block detection
     *
     * @param {HTMLElement} element - The element to check
     * @return {boolean} True if element is a Mermaid block
     */
    isMermaidBlock(element) {
      const className = element.className || "";

      // CSS class-based detection (existing)
      if (
        className.includes("language-mermaid") ||
        className.includes("mermaid")
      ) {
        return true;
      }

      // Content-based detection for all Mermaid diagram types
      const textContent = (element.textContent || "")
        .trim()
        .replace(/\s+/g, " ");
      // Support all Mermaid diagram types with typo tolerance
      return (
        /^(graph|flowchart)/i.test(textContent) ||
        /^sequenceDiagram/i.test(textContent) ||
        /^stat(e)?Diagram/i.test(textContent) ||
        /^erDiagram/i.test(textContent) ||
        /^gitgraph/i.test(textContent) ||
        /^classDiagram/i.test(textContent) ||
        /^pie/i.test(textContent) ||
        /^gantt/i.test(textContent) ||
        textContent.includes("journey")
      );
    },
  };

  // Export to global scope
  window.wpGfmLanguageDetector = LanguageDetector;
})();
