<?php
if (!defined('ABSPATH')) { exit; }

class ALBP_Generator {

    private function is_prohibited_topic($title, $keywords, $outline = []){
        $hay = strtolower(trim($title.' '.($keywords ?? '').' '.implode(' ', (array)$outline)));
        $banned = [
            'adult','sex','xxx','porn','nsfw','escort',
            'gambling','casino','betting','poker','slots',
            'usury','riba','interest loan','payday loan','credit with interest',
            'drugs','cannabis','marijuana','weed','opioid','steroids','cocaine','heroin',
            'religion','religions','christian','christianity','islam','muslim','jewish','judaism','buddhism','hindu',
            'haram'
        ];
        foreach ($banned as $w){
            if ($w !== '' && strpos($hay, $w) !== false) return true;
        }
        return false;
    }

    private function openai_chat($messages, $model) {
        $key = get_option('albp_openai_key', '');
        if (!$key) return new WP_Error('albp_no_key', 'Missing OpenAI key');

        $body = ['model'=>$model,'messages'=>$messages,'temperature'=>1];
        $res = wp_remote_post('https://api.openai.com/v1/chat/completions', [
            'timeout' => 60,
            'headers' => ['Authorization'=>'Bearer '.$key,'Content-Type'=>'application/json'],
            'body' => wp_json_encode($body),
        ]);
        if (is_wp_error($res)) return $res;
        $code = wp_remote_retrieve_response_code($res);
        $data = json_decode(wp_remote_retrieve_body($res), true);
        if ($code >= 400 || empty($data['choices'][0]['message']['content'])) {
            return new WP_Error('albp_openai_err', 'OpenAI error: '.wp_remote_retrieve_body($res));
        }
        return $data['choices'][0]['message']['content'];
    }

    private function pexels_image($query) {
        $key = get_option('albp_pexels_key', '');
        if (!$key) return null;
        $res = wp_remote_get('https://api.pexels.com/v1/search?per_page=1&query='.rawurlencode($query), [
            'timeout' => 30, 'headers' => ['Authorization' => $key],
        ]);
        if (is_wp_error($res)) return null;
        $data = json_decode(wp_remote_retrieve_body($res), true);
        if (empty($data['photos'][0]['src']['large'])) return null;
        return ['url'=>$data['photos'][0]['src']['large2x'] ?? $data['photos'][0]['src']['large'], 'photographer'=>$data['photos'][0]['photographer'] ?? '', 'link'=>$data['photos'][0]['url'] ?? ''];
    }

    private function download_to_media($image) {
        if (!$image || empty($image['url'])) return 0;
        include_once ABSPATH . 'wp-admin/includes/file.php';
        include_once ABSPATH . 'wp-admin/includes/media.php';
        include_once ABSPATH . 'wp-admin/includes/image.php';

        $tmp = download_url($image['url']);
        if (is_wp_error($tmp)) return 0;

        // FIX: parse_url -> wp_parse_url
        $path_part = wp_parse_url($image['url'], PHP_URL_PATH);
        $file_array = [
            'name'     => basename($path_part),
            'tmp_name' => $tmp,
        ];

        $id = media_handle_sideload($file_array, 0);
        if (is_wp_error($id)) {
            // FIX: unlink -> wp_delete_file
            wp_delete_file($tmp);
            return 0;
        }
        return $id;
    }

    private function append_references($html, $sources) {
        // 1) Dedupe + sanitize the provided sources
        $seen = [];
        $clean = [];
        foreach ((array)$sources as $u) {
            $u = trim((string)$u);
            if (!$u) continue;
            $href = esc_url_raw($u);
            if (!$href) continue;
            $k = strtolower($href);
            if (isset($seen[$k])) continue;
            $seen[$k] = true;
            $clean[] = $href;
        }

        // 2) Build one canonical ordered list with clickable links
        $items = '';
        foreach ($clean as $u) {
            $safe  = esc_url($u);
            $label = esc_html($u);
            $items .= '<li><a href="'.$safe.'" rel="nofollow ugc" target="_blank">'.$label.'</a></li>';
        }
        $list = $items ? '<ol>'.$items.'</ol>' : '';

        if (!$list) return $html; // nothing to add

        // 3) If a <section id="sources"> exists, REPLACE it entirely (no leftovers)
        $html = preg_replace(
            '#<section[^>]*\bid=["\']sources["\'][^>]*>.*?</section>#is',
            '<section id="sources" class="article-sources"><h2>References</h2>'.$list.'</section>',
            $html,
            1,
            $replacedA
        );
        if ($replacedA) return $html;

        // 4) If there’s an <h2>References</h2> block, REPLACE everything under it with our list
        $html = preg_replace(
            '#(<h2[^>]*>\s*References\s*</h2>)(.*?)(?=<h2|<section\b|$)#is',
            '<h2>References</h2>'.$list,
            $html,
            1,
            $replacedB
        );
        if ($replacedB) return $html;

        // 5) If the model wrote a plain <p>References</p>, replace that block too
        $html = preg_replace(
            '#(<p[^>]*>\s*References\s*</p>)(.*?)(?=<h2|<section\b|$)#is',
            '<section id="sources" class="article-sources"><h2>References</h2>'.$list.'</section>',
            $html,
            1,
            $replacedC
        );
        if ($replacedC) return $html;

        // 6) Otherwise, append a normalized References section at the end
        return $html . "\n" . '<section id="sources" class="article-sources"><h2>References</h2>'.$list.'</section>';
    }

    private function inject_jsonld($html, $title, $url, $sources) {
        $schema = [
            "@context" => "https://schema.org",
            "@type"    => "Article",
            "headline" => wp_strip_all_tags($title),
            "mainEntityOfPage" => esc_url_raw($url),
            "citation" => array_values(array_filter($sources)),
        ];
        $script = '<script type="application/ld+json">'.wp_json_encode($schema).'</script>';
        if (stripos($html, '</body>') !== false) { return str_ireplace('</body>', $script.'</body>', $html); }
        return $html . "\n" . $script;
    }

    private function tag_affiliate_links($html) {
        return preg_replace_callback('#<a\\s+([^>]*href\\s*=\\s*["\\\']([^"\\\']+)["\\\'][^>]*)>#i', function($m){
            $attr = $m[1]; $href = $m[2];
            $isAff = preg_match('#(amazon\\.|impact|awin|shareasale|ref=|aff=|utm_campaign=affiliate)#i', $href);
            if (!$isAff) return "<a {$attr}>";
            if (preg_match('#rel\\s*=\\s*["\\\']([^"\\\']*)["\\\']#i', $attr, $rm)) { $rel = $rm[1]; $attr = str_ireplace($rm[0], 'rel="'.trim($rel.' sponsored nofollow ugc').'"', $attr); }
            else { $attr .= ' rel="sponsored nofollow ugc"'; }
            return "<a {$attr}>";
        }, $html);
    }

    private function count_citations($html) {
        $count = 0;

        // 1) <section id="sources"> … <ol>/<ul>…</ol>/<ul>
        if (preg_match('#<section[^>]*id=["\']sources["\'][^>]*>.*?<(o[lu])[^>]*>(.*?)</\1>#is', $html, $m)) {
            if (!empty($m[2])) {
                preg_match_all('#<li\b#i', $m[2], $li);
                $count = max($count, isset($li[0]) ? count($li[0]) : 0);
            }
        }

        // 2) <h2>References</h2> followed by <ol>/<ul>
        if (preg_match('#<h2[^>]*>\s*References\s*</h2>\s*<(o[lu])[^>]*>(.*?)</\1>#is', $html, $m2)) {
            if (!empty($m2[2])) {
                preg_match_all('#<li\b#i', $m2[2], $li2);
                $count = max($count, isset($li2[0]) ? count($li2[0]) : 0);
            }
        }

        // 3) Fallback: count unique [n] style markers in plain text
        if ($count === 0) {
            // FIX: strip_tags -> wp_strip_all_tags
            $plain = wp_strip_all_tags($html);
            if (preg_match_all('/\[(\d{1,3})\]/', $plain, $mm)) {
                $count = max($count, count(array_unique($mm[1])));
            }
        }

        return $count;
    }

    private function quality_check($html, $minWords, $minHeadings, $minCitations) {
        $plain = wp_strip_all_tags($html);
        $words = str_word_count($plain);
        preg_match_all('#<h2|<h3#i', $html, $hMatches);
        $headings = count($hMatches[0]);
        $citations = $this->count_citations($html);
        $pass = ($words >= $minWords) && ($headings >= $minHeadings) && ($citations >= $minCitations);
        return compact('pass','words','headings','citations');
    }

    private function suggest_internal_links($content, $keywords) {
        $tokens = array_filter(array_map('trim', preg_split('/[;,]\\s*/', $keywords)));
        if (empty($tokens)) return $content;
        $q = new WP_Query(['post_type'=>'post','post_status'=>'publish','posts_per_page'=>5,'s'=>implode(' ', $tokens)]);
        if (!$q->have_posts()) return $content;
        $list = '';
        foreach ($q->posts as $p) { $list .= '<li><a href="'.get_permalink($p).'" target="_blank">'.esc_html(get_the_title($p)).'</a></li>'; }
        return $content . "\n" . '<section class="albp-internal-links"><h2>Suggested Internal Links</h2><ul>'.$list.'</ul></section>';
    }

    // Keep original content to preserve structure; we only clamp when we had plaintext truncation (disabled here).
    private function clamp_words($html, $max) { return $html; }

    private function convert_md_headings($text) {
        $text = preg_replace('/^\\s*###\\s+(.*)$/m', '<h3>$1</h3>', $text);
        $text = preg_replace('/^\\s*##\\s+(.*)$/m', '<h2>$1</h2>', $text);
        return $text;
    }

    // Converts **bold**, *italic*, `code`, and bullets to HTML with <h3> promotion above lists.
    private function render_inline_and_lists($text) {
        $lines = preg_split("/\\r?\\n/", $text);
        $out = [];
        $pending_plain = null;
        $n = count($lines);
        $i = 0;

        $convert_inline = function($t){
            $codes = [];
            $t = preg_replace_callback('/`([^`]+)`/', function($m) use (&$codes){
                $idx = count($codes);
                $codes[$idx] = '<code>'.esc_html($m[1]).'</code>';
                return "@@CODE{$idx}@@";
            }, $t);
            $t = preg_replace('/\\*\\*([^\\*\\n]+)\\*\\*/', '<strong>$1</strong>', $t);
            $t = preg_replace('/(?<!\\*)\\*([^\\*\\n]+)\\*(?!\\*)/', '<em>$1</em>', $t);
            if (!empty($codes)) {
                foreach ($codes as $k=>$v){ $t = str_replace("@@CODE{$k}@@", $v, $t); }
            }
            return $t;
        };

        while ($i < $n) {
            $line = rtrim($lines[$i]);
            if (preg_match('/^\\s*([-*–])\\s+(.+)/u', $line)) {
                if ($pending_plain !== null) {
                    $pp = trim($pending_plain);
                    // FIX: strip_tags -> wp_strip_all_tags (in comparison)
                    if ($pp !== '' && stripos($pp, '<h2') === false && stripos($pp, '<h3') === false && wp_strip_all_tags($pp) === $pp) {
                        $out[] = '<h3>'.$convert_inline($pp).'</h3>';
                        $pending_plain = null;
                    } else {
                        $out[] = $convert_inline($pp);
                        $pending_plain = null;
                    }
                }
                $items = [];
                while ($i < $n) {
                    $ln = rtrim($lines[$i]);
                    if (!preg_match('/^\\s*([-*–])\\s+(.+)/u', $ln, $m)) break;
                    $item_text = $convert_inline(trim($m[2]));
                    $items[] = '<li>'.$item_text.'</li>';
                    $i++;
                }
                $out[] = '<ul>'.implode('', $items).'</ul>';
                continue;
            }

            if (trim($line) === '') {
                if ($pending_plain !== null) { $out[] = $convert_inline($pending_plain); $pending_plain = null; }
                $out[] = '';
                $i++;
                continue;
            }
            if (preg_match('/^\\s*<h[23]\\b/i', $line)) {
                if ($pending_plain !== null) { $out[] = $convert_inline($pending_plain); $pending_plain = null; }
                $out[] = $line;
                $i++;
                continue;
            }
            $pending_plain = $pending_plain === null ? trim($line) : ($pending_plain . "\n" . trim($line));
            $i++;
        }
        if ($pending_plain !== null) { $out[] = $convert_inline($pending_plain); }

        return implode("\n", $out);
    }

    private function yoast_fill($post_id, $title, $content, $keywords, $primary_cat_id = 0) {
        // Remove our References block and JSON-LD so it doesn't pollute the snippet
        $clean = preg_replace('#<section[^>]*id=["\']sources["\'][^>]*>.*?</section>#is', '', $content);
        $clean = preg_replace('#<script[^>]*type=["\']application/ld\+json["\'][^>]*>.*?</script>#is', '', $clean);

        // Try to grab the first meaningful paragraph
        $first = '';
        if (preg_match('#<p[^>]*>\s*(.*?)\s*</p>#is', $clean, $m)) {
            $first = html_entity_decode( wp_strip_all_tags($m[1]), ENT_QUOTES, get_bloginfo('charset') );
            $first = trim(preg_replace('/\s+/u',' ', $first));
        }
        // Fallback: first sentence / first 155 chars
        if ($first === '') {
            $text = html_entity_decode( wp_strip_all_tags($clean), ENT_QUOTES, get_bloginfo('charset') );
            $text = trim(preg_replace('/\s+/u',' ', $text));
            if (preg_match('/^(.{80,160}?\.)\s/u', $text, $m2)) { $first = $m2[1]; }
            else { $first = mb_substr($text, 0, 155); }
        }

        // Clamp to ~155 characters, cut on word boundary
        if (mb_strlen($first) > 155) {
            $cut = mb_substr($first, 0, 155);
            $cut = preg_replace('/\s+\S*$/u','', $cut);
            $first = rtrim($cut, ' .,;:!').'…';
        }

        $title_clean = trim(preg_replace('/\s+/u',' ', wp_strip_all_tags($title)));

        // Prefer Yoast API (updates indexables correctly in most versions)
        if (class_exists('WPSEO_Meta')) {
            try {
                \WPSEO_Meta::set_value('metadesc', $first, $post_id);
                \WPSEO_Meta::set_value('title', $title_clean, $post_id);
                if (!empty($keywords)) {
                    $parts = preg_split('/[;,]\s*/', $keywords);
                    if (!empty($parts[0])) {
                        \WPSEO_Meta::set_value('focuskw', sanitize_text_field($parts[0]), $post_id);
                    }
                }
            } catch (\Throwable $e) {
                // Fallback to raw meta keys if the API throws
                update_post_meta($post_id, '_yoast_wpseo_metadesc', $first);
                update_post_meta($post_id, '_yoast_wpseo_title', $title_clean);
            }
        } else {
            update_post_meta($post_id, '_yoast_wpseo_metadesc', $first);
            update_post_meta($post_id, '_yoast_wpseo_title', $title_clean);
        }

        if ($primary_cat_id) {
            update_post_meta($post_id, '_yoast_wpseo_primary_category', (int) $primary_cat_id);
        }

        // Politely nudge Yoast to refresh its indexable, if helper exists
        if (function_exists('YoastSEO')) {
            try {
                if (method_exists(YoastSEO(), 'helpers') && isset(YoastSEO()->helpers->indexable)) {
                    if (method_exists(YoastSEO()->helpers->indexable, 'index_on_the_fly')) {
                        YoastSEO()->helpers->indexable->index_on_the_fly($post_id, 'post');
                    } elseif (method_exists(YoastSEO()->helpers->indexable, 'get_indexable_by_id')) {
                        YoastSEO()->helpers->indexable->get_indexable_by_id($post_id, 'post');
                    }
                }
            } catch (\Throwable $e) { /* ignore */ }
        }
    }

    private function apply_blacklist($text) {
        $black = get_option('albp_blacklist','');
        if (!$black) return $text;
        $terms = array_filter(array_map('trim', explode(',', $black)));
        foreach ($terms as $term) {
            if ($term === '') continue;
            $pattern = '/'.preg_quote($term,'/').'/i';
            $text = preg_replace($pattern, '—', $text);
        }
        return $text;
    }

    private function tags_from_keywords($keywords) {
        $tokens = array_filter(array_map('trim', preg_split('/[;,]\\s*/', $keywords)));
        $tokens = array_slice($tokens, 0, 10);
        $tags = [];
        foreach ($tokens as $t) {
            $t = sanitize_text_field($t);
            if (mb_strlen($t) >= 3 && mb_strlen($t) <= 30) { $tags[] = $t; }
        }
        return array_unique($tags);
    }

    public function generate_and_insert($item) {
        $model = get_option('albp_openai_model','o4-mini');
        $model = 'o4-mini';
        $lang  = get_option('albp_language','en');
        $minW  = (int) get_option('albp_min_words',400);
        $maxW  = (int) get_option('albp_max_words',1200);

        $title   = sanitize_text_field($item['title'] ?? '');
        $keywords= sanitize_text_field($item['keywords'] ?? '');
        $sources = array_map('esc_url_raw', $item['sources'] ?? []);
        $outline = array_map('sanitize_text_field', $item['outline'] ?? []);
        $image_q = sanitize_text_field($item['image_query'] ?? $title);
        $cats    = array_map('intval', $item['cats'] ?? []);
        if (!$title) return ['ok'=>false, 'msg'=>'Missing title'];
        if ($this->is_prohibited_topic($title, $keywords, $outline)) { return ['ok'=>false, 'msg'=>'Prohibited topic (compliance).']; }

        $blacklist = trim(get_option('albp_blacklist',''));
        $blk_line = $blacklist ? "\nStrictly avoid these words/phrases anywhere in the content: ".$blacklist : '';

        $sys = "You are a careful, senior editor. Write a helpful, original blog post in {$lang} for WordPress.
- Use a brief-first structure based on the provided outline.
- Use proper HTML headings (<h2>/<h3>) for sections. Do NOT use markdown ##/###.
- Cite ONLY the provided sources (do not invent URLs). Add a 'References' section at the end linking to them.
- No clickbait. Use H2/H3 headings, bullets where helpful, and a short conclusion.
{$blk_line}
- Do NOT produce content related to adult topics, sexual content, gambling, credit with interest/usury, drugs, or religion (haram topics). If requested, respond with a short safe notice instead.
- If not enough sources are provided, warn gently in the draft header and suggest 3 missing angles.";
        $usr = "TITLE: {$title}
KEYWORDS: {$keywords}
OUTLINE: ".implode(' | ', $outline)."
SOURCES: ".implode(', ', $sources)."
Requirements:
- Include at least 3 citations to the listed sources in the body (e.g., [1], [2]) and ensure the 'References' section lists them with links.
- Word count target: {$minW} to {$maxW} words.
";

        $messages = [['role'=>'system','content'=>$sys],['role'=>'user','content'=>$usr]];
        $content = $this->openai_chat($messages, $model);
        if (is_wp_error($content)) { return ['ok'=>false, 'msg'=>$content->get_error_message()]; }

        $content = $this->apply_blacklist($content);
        $content = $this->convert_md_headings($content);
        $content = $this->render_inline_and_lists($content);

        $html = wp_kses_post(wpautop($content));
        $html = $this->clamp_words($html, $maxW);
        $html = $this->append_references($html, $sources);
        $html = $this->tag_affiliate_links($html);
        $html = $this->suggest_internal_links($html, $keywords);

        $thumb_id = 0; $pex = null;
        $pex = $this->pexels_image($image_q);
        if ($pex) { $thumb_id = $this->download_to_media($pex); }

        $status_mode = get_option('albp_default_status','draft');
        $minHead  = (int) get_option('albp_min_headings',3);
        $minCite  = (int) get_option('albp_min_citations',3);
        $qc = $this->quality_check($html, $minW, $minHead, $minCite);
        $post_status = ($status_mode === 'publish_if_qc' && $qc['pass']) ? 'publish' : 'draft';

        $post_id = wp_insert_post([
            'post_title'=>$title,
            'post_content'=>$html,
            'post_status'=>$post_status,
            'post_type'=>'post',
            'post_category'=>$cats,
        ], true);
        if (is_wp_error($post_id)) { return ['ok'=>false, 'msg'=>$post_id->get_error_message()]; }

        if ($thumb_id) {
            set_post_thumbnail($post_id, $thumb_id);
            wp_update_post(['ID'=>$thumb_id, 'post_parent'=>$post_id]);
            update_post_meta($thumb_id, '_wp_attachment_image_alt', sanitize_text_field($title));
            $content_with_img = get_post_field('post_content', $post_id);
            if (stripos($content_with_img, '<img') === false) {
                $img_tag = wp_get_attachment_image($thumb_id, 'large');
                $caption = '';
                if ($pex && (!empty($pex['photographer']) || !empty($pex['link']))) {
                    $caption = '<figcaption>Photo: '.esc_html($pex['photographer']).(!empty($pex['link'])?' (<a href="'.esc_url($pex['link']).'" target="_blank" rel="nofollow">Pexels</a>)':'').'</figcaption>';
                }
                $figure = '<figure class="albp-lead-image">'.$img_tag.$caption.'</figure>';
                wp_update_post(['ID'=>$post_id, 'post_content'=>$figure."\n".$content_with_img]);
            }
        }

        $permalink = get_permalink($post_id);
        $html2 = $this->inject_jsonld(get_post_field('post_content', $post_id), $title, $permalink, $sources);
        wp_update_post(['ID'=>$post_id, 'post_content'=>$html2]);
        update_post_meta($post_id, '_albp_qc', wp_json_encode($qc));
        update_post_meta($post_id, '_albp_generated_by', 'ALBP');
        update_post_meta($post_id, '_albp_noindex', (get_option('albp_compliance_mode',1) && !$qc['pass']) ? 1 : 0);

        $primary_cat = !empty($cats) ? $cats[0] : 0;
        $this->yoast_fill($post_id, $title, $html2, $keywords, $primary_cat);

        $tags = $this->tags_from_keywords($keywords);
        if (!empty($tags)) { wp_set_post_terms($post_id, $tags, 'post_tag', false); }

        return ['ok'=>true, 'post_id'=>$post_id, 'status'=>$post_status, 'qc'=>$qc];
    }
}
