"""End-to-end v48 → v49 migration test against a realistic synthetic v48 fixture.

Verifies that ``status='mandatory'`` items, scalar ``knowledge_items.domain``
values, and ``memory_domain`` grants pointing at slug strings all land
correctly post-migration without data loss or orphans — and that the new
``data_packages`` / ``user_stack_subscriptions`` tables exist with the right
shape on a freshly-migrated v48 DB.

The fixture mirrors the shape a production v48 DB would have (corporate
memory + FTS + marketplace telemetry refactor already applied) so a regression
in any of the v49 steps is caught against realistic data, not just unit-level
seeds.
"""
import duckdb

from src.db import _v51_to_v52


def _seed_realistic_v48(conn):
    """Create a minimal but realistic v48-shaped DB."""
    conn.execute("CREATE TABLE schema_version (version INTEGER)")
    conn.execute("INSERT INTO schema_version VALUES (48)")
    conn.execute("CREATE TABLE user_groups (id VARCHAR PRIMARY KEY, name VARCHAR)")
    conn.execute(
        "INSERT INTO user_groups VALUES ('grp_sales', 'Sales'), ('grp_eng', 'Engineering')"
    )
    conn.execute(
        """
        CREATE TABLE resource_grants (
            id VARCHAR PRIMARY KEY,
            group_id VARCHAR,
            resource_type VARCHAR,
            resource_id VARCHAR
        )
        """
    )
    # Three memory_domain grants — two canonical (finance, engineering) plus
    # one non-canonical (sales-coaching) to exercise the defensive seed path.
    conn.execute(
        """
        INSERT INTO resource_grants(id, group_id, resource_type, resource_id) VALUES
            ('g1', 'grp_sales', 'memory_domain', 'finance'),
            ('g2', 'grp_eng',   'memory_domain', 'engineering'),
            ('g3', 'grp_sales', 'memory_domain', 'sales-coaching'),
            ('g4', 'grp_sales', 'data_package', 'pkg_pre_existing')
        """
    )
    conn.execute("CREATE TABLE table_registry (id VARCHAR PRIMARY KEY, name VARCHAR)")
    conn.execute(
        """
        CREATE TABLE knowledge_items (
            id VARCHAR PRIMARY KEY,
            title VARCHAR,
            content TEXT,
            status VARCHAR,
            domain VARCHAR
        )
        """
    )
    conn.execute(
        """
        INSERT INTO knowledge_items VALUES
            ('k1', 'GDPR rule',          'content', 'mandatory', 'finance'),
            ('k2', 'MEDDPICC',           'content', 'approved',  'sales-coaching'),
            ('k3', 'Code review SOP',    'content', 'mandatory', 'engineering'),
            ('k4', 'Onboarding script',  'content', 'pending',   NULL),
            ('k5', 'Personal pref',      'content', 'approved',  'product')
        """
    )
    # Marketplace telemetry tables present (v48 baseline includes them, and
    # we want to verify v49 doesn't disturb them).
    conn.execute(
        """
        CREATE TABLE usage_marketplace_item_daily (
            day            DATE,
            source         VARCHAR,
            type           VARCHAR,
            parent_plugin  VARCHAR,
            name           VARCHAR,
            count          INTEGER,
            distinct_users INTEGER,
            error_count    INTEGER
        )
        """
    )


def test_full_migration_fidelity():
    conn = duckdb.connect(":memory:")
    _seed_realistic_v48(conn)
    _v51_to_v52(conn)

    # 1) is_required correctly migrated. status returns to 'approved' for
    # all 'mandatory' rows; non-mandatory rows stay untouched.
    by_id = {
        r[0]: r
        for r in conn.execute(
            "SELECT id, status, is_required FROM knowledge_items ORDER BY id"
        ).fetchall()
    }
    assert by_id["k1"] == ("k1", "approved", True)
    assert by_id["k2"] == ("k2", "approved", False)
    assert by_id["k3"] == ("k3", "approved", True)
    assert by_id["k4"] == ("k4", "pending",  False)
    assert by_id["k5"] == ("k5", "approved", False)

    # 2) memory_domains seeded — six canonical + the one non-canonical
    # 'sales-coaching' picked up by the defensive backfill.
    slugs = {r[0] for r in conn.execute("SELECT slug FROM memory_domains").fetchall()}
    expected_canonical = {
        "finance", "engineering", "product", "data", "operations", "infrastructure",
    }
    assert expected_canonical.issubset(slugs)
    assert "sales-coaching" in slugs

    # 3) knowledge_item_domains junction populated. NULL domain on k4 →
    # no row; the remaining four items each land one row.
    j_rows = conn.execute(
        "SELECT kid.item_id, md.slug "
        "  FROM knowledge_item_domains kid "
        "  JOIN memory_domains md ON md.id = kid.domain_id "
        " ORDER BY kid.item_id"
    ).fetchall()
    assert ("k1", "finance") in j_rows
    assert ("k2", "sales-coaching") in j_rows
    assert ("k3", "engineering") in j_rows
    assert ("k5", "product") in j_rows
    assert not any(r[0] == "k4" for r in j_rows)
    assert len(j_rows) == 4

    # 4) Grants re-pointed from slug strings to memory_domains.id.
    grants = {
        r[0]: r[1]
        for r in conn.execute(
            "SELECT id, resource_id FROM resource_grants WHERE resource_type='memory_domain'"
        ).fetchall()
    }
    assert grants["g1"] == "md_finance"
    assert grants["g2"] == "md_engineering"
    # Non-canonical domain ID derives from the slug-normalized form; the
    # underscore variant matches the migration's regexp_replace expression.
    assert grants["g3"] == "md_sales_coaching"

    # 5) resource_grants.requirement default. All four pre-existing grants
    # get 'available' since the migration didn't promote any to 'required'.
    req_values = {
        r[0] for r in conn.execute("SELECT DISTINCT requirement FROM resource_grants").fetchall()
    }
    assert req_values == {"available"}

    # 6) ``domain`` column gone; ``is_required`` column present.
    ki_cols = [r[1] for r in conn.execute("PRAGMA table_info('knowledge_items')").fetchall()]
    assert "domain" not in ki_cols
    assert "is_required" in ki_cols

    # 7) New v49 tables exist with expected shape.
    tables = {
        r[0]
        for r in conn.execute(
            "SELECT table_name FROM information_schema.tables WHERE table_schema = 'main'"
        ).fetchall()
    }
    assert "data_packages" in tables
    assert "data_package_tables" in tables
    assert "memory_domains" in tables
    assert "knowledge_item_domains" in tables
    assert "user_stack_subscriptions" in tables

    # 8) Marketplace telemetry tables untouched by the v49 migration.
    assert "usage_marketplace_item_daily" in tables

    # 9) Schema version row bumped (renumbered v51 → v52 on the
    # second merge with main to make room for main's new v51).
    assert conn.execute("SELECT version FROM schema_version").fetchone()[0] == 52