#!/usr/bin/env python3
"""
Generate Obsidian wikilinks and MOC (Map of Content) files for the Engram vault.

Scans all entity files, extracts cross-references, generates:
1. Wikilinks between related files (based on shared tags/topics)
2. MOC index files by topic, source, and date
3. Inline #tags from YAML frontmatter topics

Usage:
    python3 generate_wikilinks.py              # Generate all
    python3 generate_wikilinks.py --dry-run     # Preview
    python3 generate_wikilinks.py --moc-only    # Only generate MOC files
"""

import argparse
import logging
import re
from collections import defaultdict
from datetime import datetime
from pathlib import Path

import yaml

logging.basicConfig(level=logging.INFO, format="[wikilinks] %(message)s")
logger = logging.getLogger(__name__)

ENTITIES_DIR = Path(__file__).parent.parent / "entities"
MOC_DIR = ENTITIES_DIR / "_MOC"


def parse_frontmatter(filepath: Path) -> dict:
    """Extract YAML frontmatter from a Markdown file."""
    try:
        content = filepath.read_text(encoding="utf-8", errors="replace")
        match = re.match(r"^---\n(.*?)\n---\n", content, re.DOTALL)
        if match:
            return yaml.safe_load(match.group(1)) or {}
    except Exception:
        pass
    return {}


def scan_vault(entities_dir: Path) -> list:
    """Scan all entity files and extract metadata."""
    entities = []
    for f in entities_dir.iterdir():
        if not f.is_file() or not f.suffix == ".md" or f.name.startswith("_"):
            continue
        fm = parse_frontmatter(f)
        entities.append(
            {
                "path": f,
                "name": f.stem,
                "tags": fm.get("tags", []) or [],
                "topics": fm.get("topics", []) or [],
                "source": fm.get("source", "unknown"),
                "created_at": fm.get("created_at", ""),
            }
        )
    return entities


def build_tag_index(entities: list) -> dict:
    """Group entities by tag."""
    index = defaultdict(list)
    for e in entities:
        for tag in e["tags"]:
            index[tag].append(e)
        for topic in e["topics"]:
            index[topic].append(e)
    return dict(index)


def build_source_index(entities: list) -> dict:
    """Group entities by source."""
    index = defaultdict(list)
    for e in entities:
        index[e["source"]].append(e)
    return dict(index)


def build_date_index(entities: list) -> dict:
    """Group entities by month."""
    index = defaultdict(list)
    for e in entities:
        created = e.get("created_at", "")
        if created and len(str(created)) >= 7:
            month = str(created)[:7]  # YYYY-MM
            index[month].append(e)
    return dict(index)


def generate_moc_files(entities: list, moc_dir: Path, dry_run: bool = False):
    """Generate Map of Content index files."""
    moc_dir.mkdir(parents=True, exist_ok=True)

    tag_index = build_tag_index(entities)
    source_index = build_source_index(entities)
    date_index = build_date_index(entities)

    files_written = 0

    # Master index
    master = "# Engram Vault — Index\n\n"
    master += f"**Total entities:** {len(entities)}\n"
    master += f"**Last updated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"
    master += "## By Topic\n\n"
    for tag in sorted(tag_index.keys()):
        count = len(tag_index[tag])
        master += f"- [[_MOC/{tag}|{tag}]] ({count} files)\n"
    master += "\n## By Source\n\n"
    for source in sorted(source_index.keys()):
        count = len(source_index[source])
        master += f"- [[_MOC/source-{source}|{source}]] ({count} files)\n"
    master += "\n## By Month\n\n"
    for month in sorted(date_index.keys(), reverse=True):
        count = len(date_index[month])
        master += f"- [[_MOC/{month}|{month}]] ({count} files)\n"

    if not dry_run:
        (moc_dir / "_Index.md").write_text(master)
        files_written += 1
    logger.info(f"Master index: {len(tag_index)} topics, {len(source_index)} sources, {len(date_index)} months")

    # Topic MOC files
    for tag, tag_entities in tag_index.items():
        safe_tag = re.sub(r"[^a-zA-Z0-9_-]", "_", tag)
        content = f"# {tag}\n\n"
        content += f"**{len(tag_entities)} files**\n\n"
        for e in sorted(tag_entities, key=lambda x: x.get("created_at", ""), reverse=True):
            content += f"- [[{e['name']}]] ({e['source']}, {str(e.get('created_at', ''))[:10]})\n"
        if not dry_run:
            (moc_dir / f"{safe_tag}.md").write_text(content)
            files_written += 1

    # Source MOC files
    for source, source_entities in source_index.items():
        safe_source = re.sub(r"[^a-zA-Z0-9_-]", "_", source)
        content = f"# Source: {source}\n\n"
        content += f"**{len(source_entities)} files**\n\n"
        for e in sorted(source_entities, key=lambda x: x.get("created_at", ""), reverse=True):
            content += f"- [[{e['name']}]] ({str(e.get('created_at', ''))[:10]})\n"
        if not dry_run:
            (moc_dir / f"source-{safe_source}.md").write_text(content)
            files_written += 1

    # Date MOC files
    for month, month_entities in date_index.items():
        content = f"# {month}\n\n"
        content += f"**{len(month_entities)} files**\n\n"
        for e in sorted(month_entities, key=lambda x: x.get("created_at", ""), reverse=True):
            tags_str = ", ".join(e.get("tags", [])[:3])
            content += f"- [[{e['name']}]] ({e['source']}) {tags_str}\n"
        if not dry_run:
            (moc_dir / f"{month}.md").write_text(content)
            files_written += 1

    logger.info(f"Generated {files_written} MOC files")
    return files_written


def add_inline_tags(entities: list, dry_run: bool = False):
    """Add #hashtags to file bodies based on frontmatter topics."""
    updated = 0
    for e in entities:
        topics = e.get("topics", [])
        tags = e.get("tags", [])
        all_tags = set(topics + tags)
        if not all_tags:
            continue

        filepath = e["path"]
        content = filepath.read_text(encoding="utf-8", errors="replace")

        # Check if tags already exist as hashtags
        tag_line = " ".join(f"#{t.replace(' ', '-')}" for t in all_tags if t)
        if tag_line in content:
            continue

        # Add tags after the frontmatter
        match = re.match(r"^(---\n.*?\n---\n)", content, re.DOTALL)
        if match:
            insert_point = match.end()
            new_content = content[:insert_point] + f"\n{tag_line}\n" + content[insert_point:]
            if not dry_run:
                filepath.write_text(new_content, encoding="utf-8")
            updated += 1

    logger.info(f"Added inline tags to {updated} files")
    return updated


def main():
    parser = argparse.ArgumentParser(description="Generate Obsidian wikilinks for Engram")
    parser.add_argument("--dry-run", action="store_true")
    parser.add_argument("--moc-only", action="store_true", help="Only generate MOC files")
    parser.add_argument("--dir", default=str(ENTITIES_DIR))
    args = parser.parse_args()

    entities_dir = Path(args.dir)
    logger.info(f"Scanning {entities_dir}...")
    entities = scan_vault(entities_dir)
    logger.info(f"Found {len(entities)} entity files")

    if args.dry_run:
        logger.info("(DRY RUN)")

    # Generate MOC
    moc_dir = entities_dir / "_MOC"
    generate_moc_files(entities, moc_dir, args.dry_run)

    # Add inline tags
    if not args.moc_only:
        add_inline_tags(entities, args.dry_run)

    logger.info("Done!")


if __name__ == "__main__":
    main()