agnes-the-ai-analyst/tests/test_admin_tables_ui_materialized.py

"""`/admin/tables` register modal exposes the BQ Type selector + Custom SQL.

The backend supports `query_mode='materialized'` since v0.25.0. The Jinja
template at `app/web/templates/admin_tables.html` exposes it via an
operator-facing **Type** selector (Table / View / Custom SQL Query) that
maps to query_mode in the payload (Table+View → remote, Query → materialized).

Structural-only test (no headless browser): loads the template through the
running app and asserts the expected element ids + attributes are present
in the rendered HTML for a `data_source_type='bigquery'` deployment.
"""
import pytest


def _auth(token):
    return {"Authorization": f"Bearer {token}"}


@pytest.fixture
def bq_instance(monkeypatch):
    """Force `data_source.type='bigquery'` so /admin/tables renders the BQ
    branch of the register modal."""
    fake_cfg = {
        "data_source": {
            "type": "bigquery",
            "bigquery": {"project": "my-test-project", "location": "us"},
        },
    }
    monkeypatch.setattr(
        "app.instance_config.load_instance_config",
        lambda: fake_cfg,
        raising=False,
    )
    from app.instance_config import reset_cache
    reset_cache()
    yield fake_cfg
    reset_cache()


def test_admin_tables_renders_two_question_radio_form(seeded_app, bq_instance):
    """Q1 = how should analysts access this data? (live / synced).
    Q2 = (only when synced) what to sync? (whole / custom).
    Replaces the earlier flat 4-option dropdown that mixed source-kind +
    distribution-mode into one selector — both UX reviewers (info-arch +
    analyst persona) flagged the conflation as the core confusion."""
    c = seeded_app["client"]
    token = seeded_app["admin_token"]

    r = c.get("/admin/tables", headers=_auth(token))
    assert r.status_code == 200, r.text
    html = r.text

    # Q1 radio group.
    assert 'name="bqAccessMode"' in html
    assert 'value="live"' in html
    assert 'value="synced"' in html
    assert "onBqAccessModeChange" in html

    # Q2 radio group (conditional on Q1).
    assert 'name="bqSyncMode"' in html
    assert 'value="whole"' in html
    assert 'value="custom"' in html
    assert "onBqSyncModeChange" in html

    # Custom-SQL textarea + "Use table as base" prefill button.
    assert 'id="bqSourceQuery"' in html
    assert "prefillFromTable" in html
    assert "bq-source-custom" in html

    # Table/dataset inputs reused across live + synced/whole.
    assert 'id="bqDataset"' in html
    assert 'id="bqSourceTable"' in html
    assert "bq-source-table" in html
    assert "bq-access-synced" in html

    # Discover + List tables buttons.
    assert "discoverBqDatasets" in html
    assert "discoverBqTables" in html

    # No leftover jargon labels from the prior Type-selector iterations.
    assert "Direct query" not in html
    assert "Sync to parquet" not in html

    # Vendor-agnostic — no internal issue refs in operator-facing UI text.
    assert "Milestone 2" not in html
    assert "issue #108" not in html

    # Phase E: form fields are inside the BQ tab content area.
    bq_tab_content = html[html.index('id="tab-content-bigquery"'):]
    bq_tab_end = bq_tab_content.index('</section>')
    bq_section = bq_tab_content[:bq_tab_end]
    assert 'name="bqAccessMode"' in bq_section
    assert 'id="bqDataset"' in bq_section
    assert 'id="bqSourceQuery"' in bq_section


def test_edit_modal_has_bq_parity_fields(seeded_app, bq_instance):
    """Edit modal mirrors Register's two-question radio model (Q1 access
    mode: live/synced; Q2 sync mode: whole/custom). Pre-fix Edit had only
    sync_strategy+primary_key+description+folder — missing all BQ-specific
    edit surface. Operator now can flip access mode, change dataset/table,
    rewrite SQL, and tweak the schedule without dropping & re-adding."""
    c = seeded_app["client"]
    token = seeded_app["admin_token"]

    r = c.get("/admin/tables", headers=_auth(token))
    assert r.status_code == 200, r.text
    html = r.text

    # Edit Q1 + Q2 radios.
    assert 'name="editBqAccessMode"' in html
    assert 'name="editBqSyncMode"' in html
    assert "onEditBqAccessModeChange" in html
    assert "onEditBqSyncModeChange" in html

    # BQ-specific edit fields.
    assert 'id="editBqDataset"' in html
    assert 'id="editBqSourceTable"' in html
    assert 'id="editBqSourceQuery"' in html
    assert 'id="editBqSyncSchedule"' in html

    # Visibility classes for adaptive show/hide on access/sync mode switch.
    assert "bq-edit-access-synced" in html
    assert "bq-edit-source-table" in html
    assert "bq-edit-source-custom" in html

    # Mode-switch warning surface (filled by JS when operator flips access
    # mode mid-edit).
    assert 'id="editBqModeWarning"' in html

    # Source-type badge so the JS branch knows whether to render BQ vs
    # Keboola fields without a second round-trip.
    assert 'id="editSourceTypeBadge"' in html

    # No leftover Type-selector remnants.
    assert 'id="editBqEntityType"' not in html
    assert "onEditBqTypeChange" not in html

    # Edit modal has the same Discover / List tables / Use-as-base buttons
    # as Register so the operator can re-pick the source from autocomplete
    # without dropping the row.
    assert "discoverBqDatasets('editBqDatasetList')" in html
    assert "discoverBqTables('editBqDataset', 'editBqTableList')" in html
    assert "prefillFromTable('editBqSourceQuery')" in html
    assert 'id="editBqDatasetList"' in html
    assert 'id="editBqTableList"' in html
    assert 'list="editBqDatasetList"' in html
    assert 'list="editBqTableList"' in html


def test_keboola_register_form_has_three_question_radio(seeded_app, monkeypatch):
    """Phase G (v26): Keboola tab Register form gains a third radio option
    'Direct extract (Storage API)' alongside the existing 'whole' and
    'custom' modes.

    - whole / custom → query_mode='materialized' (DuckDB Keboola extension)
    - direct → query_mode='local' + v26 sync_strategy panel
      (incremental / partitioned / full_refresh + where_filters)

    Phase F asserted `kbStrategy` was removed; v26 re-adds it inside the
    Direct-extract panel (visible only when 'direct' is selected).
    """
    fake_cfg = {"data_source": {"type": "keboola", "keboola": {}}}
    monkeypatch.setattr(
        "app.instance_config.load_instance_config",
        lambda: fake_cfg, raising=False,
    )
    from app.instance_config import reset_cache
    reset_cache()
    try:
        c = seeded_app["client"]
        token = seeded_app["admin_token"]
        r = c.get("/admin/tables", headers=_auth(token))
        html = r.text
        kb_tab = html[html.index('id="tab-content-keboola"'):]
        kb_tab = kb_tab[:kb_tab.index('</section>')]

        # All three radios present.
        assert 'name="kbSyncMode"' in kb_tab
        assert 'value="whole"' in kb_tab
        assert 'value="custom"' in kb_tab
        assert 'value="direct"' in kb_tab

        # Bucket + source-table inputs reused for whole + direct modes.
        assert 'id="kbBucket"' in kb_tab
        assert 'id="kbSourceTable"' in kb_tab
        # Custom-SQL textarea + Use-table-as-base prefill button.
        assert 'id="kbSourceQuery"' in kb_tab
        assert 'kbPrefillFromTable' in html or "prefillFromKeboolaTable('kbSourceQuery')" in html

        # Sync Schedule input.
        assert 'id="kbSyncSchedule"' in kb_tab

        # v26: Sync Strategy dropdown re-added (inside the Direct-extract panel)
        assert 'id="kbStrategy"' in kb_tab
        assert 'class="form-group kb-direct-only"' in kb_tab or \
               'kb-direct-only' in kb_tab

        # Primary Key — under <details>Advanced.
        assert 'id="kbPrimaryKey"' in kb_tab
        assert "<details" in kb_tab
        assert ">Advanced" in kb_tab

        # Discover datasets / List tables buttons.
        assert 'kbDiscoverBuckets' in html or "discoverKeboolaBuckets(" in html
        assert 'kbListTables' in html or "discoverKeboolaTables(" in html
    finally:
        reset_cache()


def test_keboola_register_payload_maps_to_materialized(seeded_app, monkeypatch):
    """The form's whole-table mode posts query_mode='materialized' with
    a synthetic SELECT * SQL — same pattern as BQ Synced/Whole."""
    fake_cfg = {"data_source": {"type": "keboola", "keboola": {}}}
    monkeypatch.setattr(
        "app.instance_config.load_instance_config",
        lambda: fake_cfg, raising=False,
    )
    from app.instance_config import reset_cache
    reset_cache()
    try:
        c = seeded_app["client"]
        token = seeded_app["admin_token"]
        auth = {"Authorization": f"Bearer {token}"}
        r = c.post(
            "/api/admin/register-table",
            headers=auth,
            json={
                "name": "orders",
                "source_type": "keboola",
                "query_mode": "materialized",
                "source_query": 'SELECT * FROM kbc."in.c-sales"."orders"',
                "sync_schedule": "every 6h",
            },
        )
        assert r.status_code == 201, r.text
    finally:
        reset_cache()


def test_keboola_edit_modal_parity(seeded_app, monkeypatch):
    """Phase G (v26): Edit modal mirrors Register's three-question structure
    (whole | direct | custom) for Keboola rows.

    Phase F asserted `editKbStrategy` was removed; v26 re-adds it inside
    the Direct-extract panel for the same reason as the Register form."""
    fake_cfg = {"data_source": {"type": "keboola", "keboola": {}}}
    monkeypatch.setattr(
        "app.instance_config.load_instance_config",
        lambda: fake_cfg, raising=False,
    )
    from app.instance_config import reset_cache
    reset_cache()
    try:
        c = seeded_app["client"]
        token = seeded_app["admin_token"]
        r = c.get("/admin/tables", headers=_auth(token))
        html = r.text
        # Q2 radio in edit (now three modes).
        assert 'name="editKbSyncMode"' in html
        assert 'id="editKbBucket"' in html
        assert 'id="editKbSourceTable"' in html
        assert 'id="editKbSourceQuery"' in html
        assert 'id="editKbSyncSchedule"' in html
        # Discover/List/Use-as-base buttons mirror Register.
        assert "discoverKeboolaBuckets('editKbBucketList')" in html
        assert "discoverKeboolaTables('editKbBucket', 'editKbTableList')" in html
        assert "prefillFromKeboolaTable('editKbSourceQuery')" in html
        # v26: Strategy dropdown re-added inside Direct-extract panel
        assert 'id="editKbStrategy"' in html
        assert 'editkb-direct-only' in html
        assert 'id="editKbPrimaryKey"' in html
    finally:
        reset_cache()


def test_bq_edit_modal_inside_tab_content_bigquery(seeded_app, bq_instance):
    """C2: BQ Edit modal physically lives inside <section id='tab-content-bigquery'>
    so the modal+form share the tab's DOM scope. Mirror of Phase E's BQ Register
    modal placement."""
    c = seeded_app["client"]
    token = seeded_app["admin_token"]
    r = c.get("/admin/tables", headers=_auth(token))
    html = r.text
    bq_section_start = html.index('id="tab-content-bigquery"')
    bq_section_end = html.index('</section>', bq_section_start)
    bq_section = html[bq_section_start:bq_section_end]
    assert 'id="editBqModal"' in bq_section
    assert 'id="editBqDataset"' in bq_section
    assert 'id="editBqSourceQuery"' in bq_section
    # Old shared #editModal either gone or only carries non-BQ fields.
    if 'id="editModal"' in html:
        edit_modal_start = html.index('id="editModal"')
        # rough lookahead: scan until the next modal-overlay sibling or </body>
        edit_modal_end = html.index('id="toast"', edit_modal_start) \
            if 'id="toast"' in html[edit_modal_start:] else len(html)
        edit_modal = html[edit_modal_start:edit_modal_end]
        assert 'id="editBqDataset"' not in edit_modal  # BQ fields aren't here anymore


def test_keboola_discover_buttons_hidden_on_bigquery_instance(seeded_app, monkeypatch):
    """C1: Discover/List/Use-as-base buttons in the Keboola tab are
    UI-hidden when the instance's data_source.type isn't keboola, because
    /api/admin/discover-tables routes by instance type and would return
    BQ data on a BQ instance."""
    fake_cfg = {"data_source": {"type": "bigquery", "bigquery": {"project": "p"}}}
    monkeypatch.setattr(
        "app.instance_config.load_instance_config",
        lambda: fake_cfg, raising=False,
    )
    from app.instance_config import reset_cache
    reset_cache()
    try:
        c = seeded_app["client"]
        token = seeded_app["admin_token"]
        r = c.get("/admin/tables", headers=_auth(token))
        html = r.text
        # Inputs stay (manual entry works).
        assert 'id="kbBucket"' in html
        assert 'id="kbSourceTable"' in html
        # Buttons hidden.
        assert "discoverKeboolaBuckets" not in html
        assert "discoverKeboolaTables" not in html
        assert "prefillFromKeboolaTable" not in html
    finally:
        reset_cache()


def test_keboola_discover_buttons_visible_on_keboola_instance(seeded_app, monkeypatch):
    """Inverse — buttons render on a Keboola-typed instance."""
    fake_cfg = {"data_source": {"type": "keboola", "keboola": {}}}
    monkeypatch.setattr(
        "app.instance_config.load_instance_config",
        lambda: fake_cfg, raising=False,
    )
    from app.instance_config import reset_cache
    reset_cache()
    try:
        c = seeded_app["client"]
        token = seeded_app["admin_token"]
        r = c.get("/admin/tables", headers=_auth(token))
        html = r.text
        assert "discoverKeboolaBuckets" in html
        assert "discoverKeboolaTables" in html
        assert "prefillFromKeboolaTable" in html
    finally:
        reset_cache()


def test_admin_tables_keboola_branch_unchanged(seeded_app, monkeypatch):
    """Phase E: the BQ form is always rendered (inside #tab-content-bigquery)
    regardless of data_source.type. On a Keboola instance the BQ tab is
    just hidden by default; the operator can still click into it. The
    legacy Type-selector remnant (#bqEntityType) must stay gone."""
    fake_cfg = {"data_source": {"type": "keboola", "keboola": {}}}
    monkeypatch.setattr(
        "app.instance_config.load_instance_config",
        lambda: fake_cfg,
        raising=False,
    )
    from app.instance_config import reset_cache
    reset_cache()

    c = seeded_app["client"]
    token = seeded_app["admin_token"]
    try:
        r = c.get("/admin/tables", headers=_auth(token))
        assert r.status_code == 200, r.text
        html = r.text
        # Legacy Type-selector remnant must stay gone.
        assert 'id="bqEntityType"' not in html
        # BQ form now always rendered inside #tab-content-bigquery.
        assert 'id="bqSourceQuery"' in html
        # C3: legacy #registerModal removed; the Phase F Keboola modal
        # at #registerKeboolaModal owns the Keboola flow now.
        assert 'id="registerModal"' not in html
        assert 'id="kbBucket"' in html
        assert 'id="kbViewName"' in html
    finally:
        reset_cache()