#!/usr/bin/env python3
import re
import sqlite3
import sys
from datetime import datetime
from pathlib import Path


def slugify(text):
    return re.sub(r"[^a-zA-Z0-9_-]", "_", str(text).strip().replace(" ", "_"))


_VALID_IDENTIFIER = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")


def _validate_identifier(name: str) -> str:
    """Validate that *name* is a safe SQL identifier (table or column name).
    Raises ValueError if the name contains anything unexpected."""
    if not _VALID_IDENTIFIER.match(name):
        raise ValueError(f"Invalid SQL identifier: {name!r}")
    return name


DB_PATH = Path.home() / ".local" / "share" / "goose" / "sessions" / "sessions.db"


def main():
    if not DB_PATH.exists():
        print(f"❌ DB not found: {DB_PATH}")
        sys.exit(1)

    print(f"✅ DB connected: {DB_PATH}")
    output_dir = Path.cwd() / "entities"
    output_dir.mkdir(exist_ok=True)

    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()

    # Tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = [r[0] for r in cursor.fetchall()]
    print(f"Tables: {tables}")

    msg_table = next((t for t in ["messages", "chats", "conversations"] if t in tables), None)
    if not msg_table:
        print(f"❌ No messages table. Tables: {tables}")
        conn.close()
        sys.exit(1)

    # Columns
    _validate_identifier(msg_table)
    cursor.execute(f"PRAGMA table_info({msg_table})")
    cols = [row[1] for row in cursor.fetchall()]

    session_col = next(
        (c for c in ["session_id", "conversation_id", "chat_id", "id"] if c in cols),
        None,
    )
    role_col = next((c for c in ["role", "speaker"] if c in cols), None)
    content_col = next((c for c in ["content_json", "message", "text", "body"] if c in cols), None)
    time_col = next((c for c in ["created_at", "timestamp", "time"] if c in cols), None)

    if not session_col or not role_col or not content_col:
        print(f"❌ Missing cols. Need {session_col=}, {role_col=}, {content_col=}. Found: {cols}")
        conn.close()
        sys.exit(1)

    # Validate all identifiers before interpolation into SQL
    _validate_identifier(session_col)
    _validate_identifier(role_col)
    _validate_identifier(content_col)

    order = f", {_validate_identifier(time_col)}" if time_col else ""
    query = f"SELECT DISTINCT {session_col}, {role_col}, {content_col} FROM {msg_table} ORDER BY {session_col}{order}"

    cursor.execute(query)
    rows = cursor.fetchall()

    sessions = {}
    for row in rows:
        sid = row[0]
        role = row[1]
        content = row[2] or ""
        if sid not in sessions:
            sessions[sid] = []
        sessions[sid].append(f"**{role.upper()}**: {content}")

    count = 0
    for sid, msgs in sessions.items():
        f"Goose_DB_Session_{slugify(str(sid))}"
        path = output_dir / "{title}.md"
        content_str = "\n\n".join(msgs)
        datetime.now().isoformat()
        frontmatter_lines = [
            "---",
            'name: "{title}"',
            'scope: "global"',
            'source: "goose_db_auto"',
            'imported_at: "{now}"',
            'tags: ["auto-export", "goose_db"]',
            "---",
            "",
            "# {title}",
            "",
            content_str,
        ]
        md = "\n".join(frontmatter_lines)
        path.write_text(md, encoding="utf-8")
        count += 1

    print("🎉 Full sync: {count} Goose_DB_Session_*.md written/overwritten!")
    conn.close()


if __name__ == "__main__":
    main()
