#!/usr/bin/env python3
"""
Audit estructural sección-por-sección por demo.

Para cada vista base (welcome/about/contact), compara:
  - Las SECCIONES VISIBLES del blade frontend (delimitadas por comments {{-- SECTION X --}}
    o por elementos <section>/<div class="section">)
  - Los BLOQUES del admin form (delimitados por <h4>N. Título</h4>)

Reporta:
  ✅ Sección del blade tiene bloque admin correspondiente
  🔴 Sección del blade que NO tiene bloque admin (cliente no puede editarla)
  ⚠️  Sección del admin que no parece corresponder a sección del blade

Uso:
    python3 scripts/audit-section-parity.py <core-slug>
    python3 scripts/audit-section-parity.py --all
"""

import json
import re
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent


def read(path):
    full = ROOT / path
    return full.read_text() if full.exists() else None


def extract_blade_sections(blade_content):
    """
    Detecta secciones del blade frontend.
    Heurísticas (en orden de prioridad):
      1. Comments tipo {{-- ====== TÍTULO ====== --}} o {{-- N. TÍTULO --}}
      2. Comments tipo {{-- TÍTULO --}}
      3. <section> o <div class="section ...">
    Retorna: [{name, start_line, end_line, content}]
    """
    sections = []
    lines = blade_content.split('\n')

    # Comments tipo {{-- ====== X ====== --}} o {{-- 1. X --}}
    for i, line in enumerate(lines, 1):
        # ====== TITLE ======
        m = re.search(r'\{\{--\s*[=\-\─━]+\s*([A-Z][A-Z0-9 ÁÉÍÓÚÑ&/\-]{3,60}?)(\s*[:\-—:].*)?\s*[=\-\─━]+\s*--\}\}', line)
        if m:
            sections.append({'name': m.group(1).strip(), 'start_line': i, 'pattern': '====='})
            continue
        # ─────── X ───────
        m = re.search(r'\{\{--\s*([A-Z][A-Z0-9 ÁÉÍÓÚÑ&/\-]{3,60})\s*[\——\-]\s*[a-z]', line)
        if m:
            sections.append({'name': m.group(1).strip(), 'start_line': i, 'pattern': 'header'})
            continue
        # SECTION X: TITLE  (single line)
        m = re.search(r'\{\{--\s*SECTION\s+\d+[:\.\)]\s*([A-Z][A-Z0-9 ÁÉÍÓÚÑ&/\-]{3,60})\s*--\}\}', line, re.I)
        if m:
            sections.append({'name': m.group(1).strip(), 'start_line': i, 'pattern': 'SECTION'})

    # Si no hay comments tipo SECTION, buscar SECTIONS multilinea (3 lines)
    if not sections:
        for i in range(len(lines)):
            line = lines[i]
            if '{{--' in line and ('═' in line or '━' in line or '----' in line):
                # Buscar el siguiente '--}}' y la línea con SECTION X
                for j in range(i+1, min(i+5, len(lines))):
                    m = re.search(r'SECTION\s+\d+[:\.\)]\s*([A-Z][A-Z0-9 ÁÉÍÓÚÑ&/\-]{3,60})', lines[j], re.I)
                    if m:
                        sections.append({'name': m.group(1).strip(), 'start_line': i+1, 'pattern': 'multiline'})
                        break

    return sections


def extract_admin_sections(admin_content):
    """
    Detecta secciones del admin form.
    Heurísticas:
      1. <h4>N. Título</h4> o <h4>Título</h4>
      2. {{-- N. TÍTULO --}}
    """
    sections = []
    lines = admin_content.split('\n')

    for i, line in enumerate(lines, 1):
        # <h4 class="...">N. Título</h4>
        m = re.search(r'<h4[^>]*>\s*(?:\d+(?:\.\d+)?\.\s*)?([A-Za-zÁÉÍÓÚÑáéíóúñ][^<]{3,80})</h4>', line)
        if m:
            sections.append({'name': m.group(1).strip(), 'start_line': i, 'pattern': 'h4'})
            continue
        # {{-- N. TÍTULO --}}
        m = re.search(r'\{\{--\s*\d+[\.\)]\s*([A-Z][A-Z0-9 ÁÉÍÓÚÑ&/\-]{3,60})\s*--\}\}', line)
        if m:
            sections.append({'name': m.group(1).strip(), 'start_line': i, 'pattern': 'comment'})

    return sections


def normalize(text):
    """Normaliza para comparación: lowercase, sin tildes/puntuación, sin stopwords."""
    text = text.lower()
    accents = {'á': 'a', 'é': 'e', 'í': 'i', 'ó': 'o', 'ú': 'u', 'ñ': 'n'}
    for k, v in accents.items():
        text = text.replace(k, v)
    text = re.sub(r'[^\w\s]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    # Filtrar stopwords cortas
    stopwords = {'de', 'la', 'el', 'los', 'las', 'una', 'uno', 'y', 'o', 'a', 'en', 'del', 'al', 'para', 'con'}
    words = [w for w in text.split() if w not in stopwords]
    return ' '.join(words)


def fuzzy_match(name1, name2):
    """¿Dos nombres de sección parecen referirse a lo mismo?"""
    n1, n2 = normalize(name1), normalize(name2)
    if not n1 or not n2:
        return False
    if n1 == n2:
        return True
    # Si comparten 1+ palabras significativas (>= 4 chars)
    words1 = {w for w in n1.split() if len(w) >= 4}
    words2 = {w for w in n2.split() if len(w) >= 4}
    if words1 & words2:
        return True
    # Substring
    if n1 in n2 or n2 in n1:
        return True
    return False


def compare_sections(blade_sections, admin_sections):
    """Devuelve: (matched_pairs, blade_only, admin_only)"""
    matched = []
    blade_only = list(blade_sections)
    admin_only = list(admin_sections)

    for b in list(blade_only):
        for a in list(admin_only):
            if fuzzy_match(b['name'], a['name']):
                matched.append((b, a))
                blade_only.remove(b)
                admin_only.remove(a)
                break

    return matched, blade_only, admin_only


def detect_hardcoded_english(blade_content):
    """Detecta strings hardcoded en inglés que aparecen entre tags HTML."""
    # Quitar bloques @php...@endphp
    stripped = re.sub(r'@php.*?@endphp', '', blade_content, flags=re.DOTALL)
    # Quitar comments {{-- ... --}}
    stripped = re.sub(r'\{\{--.*?--\}\}', '', stripped, flags=re.DOTALL)
    # Quitar todo lo que está dentro de {{ ... }} (variables Blade)
    stripped = re.sub(r'\{\{.*?\}\}', '', stripped, flags=re.DOTALL)
    # Buscar strings entre > y < que parezcan inglés (palabras con CamelCase ASCII puras + sin tildes)
    candidates = re.findall(r'>\s*([A-Z][a-zA-Z]{2,}(?:\s+[a-zA-Z]{2,}){0,5})\s*<', stripped)
    english_indicators = ['the', 'and', 'with', 'our', 'we', 'us', 'see', 'view', 'read', 'learn', 'discover',
                          'years', 'projects', 'clients', 'experience', 'completed', 'happy',
                          'latest', 'posts', 'gallery', 'services', 'about', 'contact', 'blog']
    suspect = []
    for c in candidates:
        words = c.lower().split()
        # Si tiene al menos 1 palabra clásica de inglés y NO tiene tildes
        if any(w in english_indicators for w in words) and not re.search(r'[áéíóúñ]', c.lower()):
            suspect.append(c)
    return list(set(suspect))


def audit(slug):
    print(f"\n\033[1m═══ {slug} ═══\033[0m")
    core_path = f'database/seeders/products/core/{slug}.json'
    core_raw = read(core_path)
    if not core_raw:
        print(f"  ❌ Core JSON no existe")
        return False
    core = json.loads(core_raw)
    demo = core.get('demo', '')

    all_ok = True
    for tab in ['welcome', 'about', 'contact']:
        blade_path = f'resources/views/modules/cd-base/frontend/demos/{demo}/{tab}.blade.php'
        admin_path = f'resources/views/admin/site-data/{tab}/{demo}.blade.php'
        blade_raw = read(blade_path)
        admin_raw = read(admin_path)
        if not blade_raw:
            print(f"\n  \033[91m🔴 {tab}: blade no existe ({blade_path})\033[0m")
            all_ok = False
            continue
        if not admin_raw:
            print(f"\n  \033[91m🔴 {tab}: admin form no existe ({admin_path})\033[0m")
            all_ok = False
            continue

        blade_sections = extract_blade_sections(blade_raw)
        admin_sections = extract_admin_sections(admin_raw)
        matched, blade_only, admin_only = compare_sections(blade_sections, admin_sections)

        # Hardcoded inglés
        hardcoded = detect_hardcoded_english(blade_raw)

        print(f"\n  \033[1m{tab.upper()}\033[0m: blade {len(blade_sections)} secciones | admin {len(admin_sections)} bloques | matched {len(matched)}")
        for b, a in matched:
            print(f"    ✅ \"{b['name'][:40]}\" ↔ \"{a['name'][:40]}\"")
        for b in blade_only:
            print(f"    \033[91m🔴 Sección del blade SIN bloque admin: \"{b['name'][:50]}\" (línea {b['start_line']})\033[0m")
            all_ok = False
        for a in admin_only:
            if a['name'].lower() not in ('encabezado de página', 'seo'):  # ignorar bloques meta SEO siempre presentes
                print(f"    \033[93m⚠️  Bloque admin sin sección clara en blade: \"{a['name'][:50]}\"\033[0m")
        if hardcoded:
            print(f"    \033[91m🔴 Strings hardcoded inglés detectados ({len(hardcoded)}):\033[0m")
            for h in hardcoded[:5]:
                print(f"        → \"{h[:60]}\"")
            all_ok = False

    return all_ok


def main():
    if len(sys.argv) < 2:
        print(__doc__)
        sys.exit(1)
    if sys.argv[1] == '--all':
        slugs = ['law-firm-digital', 'construction', 'corporative', 'foundations-ong', 'personal-brand']
    else:
        slugs = [sys.argv[1]]
    summary = {}
    for s in slugs:
        summary[s] = audit(s)

    print(f"\n\n\033[1m═══ RESUMEN GLOBAL ═══\033[0m")
    for s, ok in summary.items():
        status = '\033[92m✅ PARIDAD ESTRUCTURAL OK\033[0m' if ok else '\033[91m🔴 BUGS DETECTADOS\033[0m'
        print(f"  {s}: {status}")


if __name__ == '__main__':
    main()
