agnes-the-ai-analyst/tests/test_bigquery_extractor.py

"""Tests for BigQuery extractor (remote-only via DuckDB extension)."""

import re
from pathlib import Path
from unittest.mock import MagicMock

import duckdb
import pytest

from connectors.bigquery.extractor import _detect_table_type
from tests.helpers.contract import validate_extract_contract


@pytest.fixture
def output_dir(tmp_path):
    d = tmp_path / "extracts" / "bigquery"
    d.mkdir(parents=True)
    return str(d)


@pytest.fixture
def sample_configs():
    return [
        {
            "id": "project.analytics.orders",
            "name": "orders",
            "source_type": "bigquery",
            "bucket": "analytics",
            "source_table": "orders",
            "query_mode": "remote",
            "description": "Order data from BQ",
        },
        {
            "id": "project.analytics.sessions",
            "name": "sessions",
            "source_type": "bigquery",
            "bucket": "analytics",
            "source_table": "sessions",
            "query_mode": "remote",
            "description": "Session data",
        },
    ]


class _DuckDBProxy:
    """Proxy around a real DuckDB connection that intercepts BigQuery extension SQL."""

    def __init__(self, real_conn):
        self._real = real_conn

    def execute(self, sql, *args, **kwargs):
        sql_upper = sql.strip().upper()
        if sql_upper.startswith("INSTALL BIGQUERY") or sql_upper.startswith(
            "LOAD BIGQUERY"
        ):
            return MagicMock()
        if sql_upper.startswith("CREATE SECRET"):
            return MagicMock()
        if "ATTACH" in sql_upper and "BIGQUERY" in sql_upper:
            return MagicMock()
        if sql_upper.startswith("DETACH BQ"):
            return MagicMock()
        # CREATE VIEW referencing bq.* -> create a dummy table instead
        if "FROM BQ." in sql_upper and "CREATE" in sql_upper:
            match = re.search(r'VIEW\s+"?(\w+)"?', sql, re.IGNORECASE)
            if match:
                view_name = match.group(1)
                self._real.execute(
                    f'CREATE OR REPLACE TABLE "{view_name}" (dummy INTEGER)'
                )
                return MagicMock()
        return self._real.execute(sql, *args, **kwargs)

    def close(self):
        return self._real.close()

    def __getattr__(self, name):
        return getattr(self._real, name)


class TestBigQueryExtractor:
    def test_creates_extract_duckdb_with_meta(self, output_dir, sample_configs, monkeypatch):
        """Test that init_extract creates extract.duckdb with _meta and _remote_attach."""
        from unittest.mock import patch

        # Mock metadata-token auth + entity type detection so the test runs offline.
        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_metadata_token",
            lambda: "test-token",
        )
        monkeypatch.setattr(
            "connectors.bigquery.extractor._detect_table_type",
            lambda *a, **kw: "BASE TABLE",
        )

        def proxy_connect(path=None, **kwargs):
            real_conn = duckdb.connect(path)
            return _DuckDBProxy(real_conn)

        with patch("connectors.bigquery.extractor.duckdb") as mock_mod:
            mock_mod.connect = proxy_connect
            from connectors.bigquery.extractor import init_extract

            result = init_extract(output_dir, "my-project", sample_configs)

        assert result["tables_registered"] == 2
        assert len(result["errors"]) == 0

        # Verify extract.duckdb has _meta with correct data
        conn = duckdb.connect(str(Path(output_dir) / "extract.duckdb"))
        try:
            meta = conn.execute(
                "SELECT table_name, query_mode FROM _meta ORDER BY table_name"
            ).fetchall()
            assert len(meta) == 2
            assert meta[0][0] == "orders"
            assert meta[0][1] == "remote"
            assert meta[1][0] == "sessions"
            assert meta[1][1] == "remote"

            # Verify _remote_attach table for orchestrator re-ATTACH
            ra = conn.execute(
                "SELECT alias, extension, url, token_env FROM _remote_attach"
            ).fetchone()
            assert ra[0] == "bq"
            assert ra[1] == "bigquery"
            assert ra[2] == "project=my-project"
            assert ra[3] == ""  # BQ handles auth via env automatically
        finally:
            conn.close()

        validate_extract_contract(str(Path(output_dir) / "extract.duckdb"))

    def test_no_data_directory_created(self, output_dir, sample_configs):
        """BigQuery is remote-only -- no data/ directory should exist."""
        assert not (Path(output_dir) / "data").exists()

    def test_all_tables_are_remote(self, output_dir):
        """Verify all BigQuery tables get query_mode='remote' in _meta."""
        db_path = Path(output_dir) / "extract.duckdb"
        conn = duckdb.connect(str(db_path))
        conn.execute("""CREATE TABLE _meta (
            table_name VARCHAR, description VARCHAR, rows BIGINT,
            size_bytes BIGINT, extracted_at TIMESTAMP,
            query_mode VARCHAR DEFAULT 'remote'
        )""")
        conn.execute(
            "INSERT INTO _meta VALUES ('t1', '', 0, 0, current_timestamp, 'remote')"
        )

        result = conn.execute("SELECT query_mode FROM _meta").fetchone()
        assert result[0] == "remote"
        conn.close()

    def test_handles_registration_failure(self, output_dir):
        """A failed table registration records error but does not stop others."""
        db_path = Path(output_dir) / "extract.duckdb"
        conn = duckdb.connect(str(db_path))

        conn.execute("""CREATE TABLE _meta (
            table_name VARCHAR, description VARCHAR, rows BIGINT,
            size_bytes BIGINT, extracted_at TIMESTAMP,
            query_mode VARCHAR DEFAULT 'remote'
        )""")

        from datetime import datetime, timezone

        now = datetime.now(timezone.utc)
        # Simulate: first succeeds, second fails (not inserted)
        conn.execute(
            "INSERT INTO _meta VALUES ('good_table', '', 0, 0, ?, 'remote')", [now]
        )

        meta = conn.execute("SELECT count(*) FROM _meta").fetchone()
        assert meta[0] == 1  # Only good_table registered
        conn.close()

    def test_meta_table_schema(self, output_dir):
        """Verify _meta table has all required columns per the extract.duckdb contract."""
        from connectors.bigquery.extractor import _create_meta_table

        db_path = Path(output_dir) / "contract_check.duckdb"
        conn = duckdb.connect(str(db_path))
        _create_meta_table(conn)

        columns = conn.execute(
            "SELECT column_name FROM information_schema.columns "
            "WHERE table_name = '_meta' ORDER BY ordinal_position"
        ).fetchall()
        col_names = [c[0] for c in columns]
        assert col_names == [
            "table_name",
            "description",
            "rows",
            "size_bytes",
            "extracted_at",
            "query_mode",
        ]
        conn.close()


class TestDetectTableType:
    """Detect whether a BQ entity is a base table or a view."""

    def test_base_table_returns_table(self):
        conn = MagicMock()
        conn.execute.return_value.fetchone.return_value = ("BASE TABLE",)
        result = _detect_table_type(conn, "proj", "ds", "tbl")
        assert result == "BASE TABLE"

    def test_view_returns_view(self):
        conn = MagicMock()
        conn.execute.return_value.fetchone.return_value = ("VIEW",)
        result = _detect_table_type(conn, "proj", "ds", "tbl")
        assert result == "VIEW"

    def test_missing_returns_none(self):
        conn = MagicMock()
        conn.execute.return_value.fetchone.return_value = None
        result = _detect_table_type(conn, "proj", "ds", "tbl")
        assert result is None

    def test_query_uses_bigquery_query_function(self):
        """Detection must use bigquery_query() table function (works on views via jobs API)."""
        conn = MagicMock()
        conn.execute.return_value.fetchone.return_value = ("VIEW",)
        _detect_table_type(conn, "my-proj", "my_ds", "my_tbl")

        # SQL must use the bigquery_query() table function (not direct ref)
        sql = conn.execute.call_args[0][0]
        assert "bigquery_query" in sql.lower()

        # The inner BQ SQL is passed as a parameter, not f-stringed in.
        # Verify both project and the BQ SQL appear in the bound params.
        params = conn.execute.call_args[0][1]
        assert "my-proj" in params, f"expected project in params, got: {params}"
        # The inner BQ SQL is one of the params; it should reference INFORMATION_SCHEMA.TABLES
        bq_sql_param = next(
            (p for p in params if isinstance(p, str) and "INFORMATION_SCHEMA.TABLES" in p),
            None,
        )
        assert bq_sql_param is not None, f"inner BQ SQL not found in params: {params}"
        assert "my_ds" in bq_sql_param  # dataset is f-stringed into the BQ SQL identifier path
        # Table name should NOT be inline in the BQ SQL — it goes through the param chain
        assert "my_tbl" in params, f"table name should be a separate param, got: {params}"


class _CapturingProxy:
    """Wraps a real DuckDB connection, captures all SQL, stubs BQ-specific calls.

    DuckDBPyConnection.execute is a C-level read-only attribute, so we can't
    patch the method directly on the connection — we have to wrap with a proxy.
    """

    def __init__(self, real_conn, captured: list):
        self._real = real_conn
        self._captured = captured

    def execute(self, sql, *args, **kwargs):
        self._captured.append(sql)
        stripped_u = sql.strip().upper()
        # Stub only commands that would talk to BQ; CREATE TABLE / INSERT etc.
        # must pass through to the real DuckDB so _meta + _remote_attach persist.
        if stripped_u.startswith(("INSTALL ", "LOAD ", "CREATE SECRET")):
            return MagicMock()
        if stripped_u.startswith("ATTACH ") and "BIGQUERY" in stripped_u:
            return MagicMock()
        if stripped_u.startswith("DETACH "):
            return MagicMock()
        if 'FROM bq.' in sql or 'FROM bigquery_query' in sql:
            return MagicMock()
        return self._real.execute(sql, *args, **kwargs)

    def close(self):
        return self._real.close()

    def __getattr__(self, name):
        return getattr(self._real, name)


class TestViewVsTableTemplates:
    """init_extract must pick the right view template based on entity type."""

    def test_base_table_uses_direct_attach_ref(self, tmp_path, monkeypatch):
        """For BASE TABLE, generated DuckDB view references bq.dataset.table directly."""
        from connectors.bigquery.extractor import init_extract

        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_metadata_token",
            lambda: "test-token",
        )
        monkeypatch.setattr(
            "connectors.bigquery.extractor._detect_table_type",
            lambda *a, **kw: "BASE TABLE",
        )

        captured = []
        real_connect = duckdb.connect

        def spy_connect(*a, **kw):
            real_conn = real_connect(*a, **kw)
            return _CapturingProxy(real_conn, captured)

        monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", spy_connect)

        init_extract(
            str(tmp_path),
            "my-project",
            [{"name": "orders", "bucket": "my_ds", "source_table": "orders", "description": ""}],
        )

        view_sqls = [s for s in captured if "CREATE OR REPLACE VIEW" in s.upper() or 'CREATE VIEW' in s.upper()]
        assert any('FROM bq."my_ds"."orders"' in s for s in view_sqls), \
            f"expected direct bq.dataset.table ref for BASE TABLE; got: {view_sqls}"
        assert not any("bigquery_query(" in s for s in view_sqls), \
            "BASE TABLE should not use bigquery_query() function"

    def test_view_uses_bigquery_query_function(self, tmp_path, monkeypatch):
        """For VIEW with legacy_wrap_views=True, generated DuckDB view wraps bigquery_query() (jobs API path)."""
        from connectors.bigquery.extractor import init_extract

        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_metadata_token",
            lambda: "test-token",
        )
        monkeypatch.setattr(
            "connectors.bigquery.extractor._detect_table_type",
            lambda *a, **kw: "VIEW",
        )

        captured = []
        real_connect = duckdb.connect

        def spy_connect(*a, **kw):
            real_conn = real_connect(*a, **kw)
            return _CapturingProxy(real_conn, captured)

        monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", spy_connect)

        # Enable legacy toggle so this test verifies the old wrap-view path still works.
        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_value",
            lambda *args, default=None, **kw: True if "legacy_wrap_views" in args else default,
        )

        init_extract(
            str(tmp_path),
            "my-project",
            [{"name": "session_view", "bucket": "my_ds", "source_table": "session_view", "description": ""}],
        )

        view_sqls = [s for s in captured if "CREATE OR REPLACE VIEW" in s.upper() or 'CREATE VIEW' in s.upper()]
        view_create = next((s for s in view_sqls if '"session_view"' in s), None)
        assert view_create is not None, f"no CREATE VIEW for session_view; got: {view_sqls}"
        assert "bigquery_query(" in view_create
        assert "my-project" in view_create
        assert "`my-project.my_ds.session_view`" in view_create, \
            f"expected backtick-quoted full path; got: {view_create}"


class TestRemoteAttachForBQ:
    """For BQ source, _remote_attach must signal metadata-auth (empty token_env)."""

    def test_remote_attach_token_env_is_empty_for_bq(self, tmp_path, monkeypatch):
        from connectors.bigquery.extractor import init_extract

        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_metadata_token",
            lambda: "test-token",
        )
        monkeypatch.setattr(
            "connectors.bigquery.extractor._detect_table_type",
            lambda *a, **kw: "BASE TABLE",
        )

        captured = []
        real_connect = duckdb.connect

        def spy_connect(*a, **kw):
            real_conn = real_connect(*a, **kw)
            return _CapturingProxy(real_conn, captured)

        monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", spy_connect)

        init_extract(
            str(tmp_path),
            "my-project",
            [{"name": "t", "bucket": "ds", "source_table": "t", "description": ""}],
        )

        c = duckdb.connect(str(tmp_path / "extract.duckdb"), read_only=True)
        rows = c.execute(
            "SELECT alias, extension, url, token_env FROM _remote_attach"
        ).fetchall()
        c.close()

        assert len(rows) == 1
        alias, extension, url, token_env = rows[0]
        assert alias == "bq"
        assert extension == "bigquery"
        assert url == "project=my-project"
        assert token_env == "", \
            "BQ uses metadata auth — token_env must be empty so orchestrator triggers metadata path"


class TestInitExtractAuthFailure:
    """init_extract must abort cleanly if metadata token fetch fails."""

    def test_returns_error_when_metadata_unreachable(self, tmp_path, monkeypatch):
        from connectors.bigquery.extractor import init_extract
        from connectors.bigquery.auth import BQMetadataAuthError

        def boom():
            raise BQMetadataAuthError("metadata server unreachable: simulated")
        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_metadata_token",
            boom,
        )

        result = init_extract(
            str(tmp_path),
            "my-project",
            [{"name": "t", "bucket": "ds", "source_table": "t", "description": ""}],
        )

        # No partial extract.duckdb — auth failure aborts before any DB writes
        assert not (tmp_path / "extract.duckdb").exists(), \
            "extract.duckdb should not be created when auth fails"
        assert result["tables_registered"] == 0
        assert any("metadata" in e.get("error", "").lower() for e in result["errors"])


class TestIdentifierValidation:
    """init_extract must reject unsafe identifiers before any SQL construction."""

    def test_rejects_unsafe_dataset_name(self, tmp_path, monkeypatch):
        from connectors.bigquery.extractor import init_extract

        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_metadata_token",
            lambda: "test-token",
        )
        monkeypatch.setattr(
            "connectors.bigquery.extractor._detect_table_type",
            lambda *a, **kw: "BASE TABLE",
        )
        # Stub all DuckDB BQ-extension calls so the test stays offline
        captured = []
        real_connect = duckdb.connect
        def safe_connect(*a, **kw):
            return _CapturingProxy(real_connect(*a, **kw), captured)
        monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", safe_connect)

        result = init_extract(
            str(tmp_path),
            "my-project",
            [{
                "name": "t",
                "bucket": 'evil"; DROP TABLE foo; --',
                "source_table": "t",
                "description": "",
            }],
        )
        assert result["tables_registered"] == 0
        assert any("dataset" in e.get("error", "").lower() for e in result["errors"])

    def test_rejects_unsafe_source_table_name(self, tmp_path, monkeypatch):
        from connectors.bigquery.extractor import init_extract

        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_metadata_token",
            lambda: "test-token",
        )
        monkeypatch.setattr(
            "connectors.bigquery.extractor._detect_table_type",
            lambda *a, **kw: "BASE TABLE",
        )
        captured = []
        real_connect = duckdb.connect
        def safe_connect(*a, **kw):
            return _CapturingProxy(real_connect(*a, **kw), captured)
        monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", safe_connect)

        result = init_extract(
            str(tmp_path),
            "my-project",
            [{
                "name": "t",
                "bucket": "ds",
                "source_table": "evil`name",
                "description": "",
            }],
        )
        assert result["tables_registered"] == 0
        assert any("source_table" in e.get("error", "").lower() for e in result["errors"])


class TestExtractorMainModule:
    """Standalone `python -m connectors.bigquery.extractor` reads config correctly."""

    def test_main_reads_data_source_bigquery_project(self, tmp_path, monkeypatch):
        """__main__ must read project from data_source.bigquery.project (matches yaml example).

        Runs the production __main__ block via runpy and captures the project_id
        passed to init_extract. If __main__ ever regresses to reading
        config.get("bigquery", {}).get("project_id"), the captured value will
        be empty and the assertion will fail.
        """
        from unittest.mock import MagicMock

        captured: dict = {}

        def fake_init_extract(out, project_id, tables):
            captured["project"] = project_id
            captured["tables"] = tables
            return {"tables_registered": len(tables), "errors": []}

        # Patch every external dependency the __main__ block touches.
        # Targets are at the module path the __main__ block imports from,
        # because runpy re-executes the module under __name__ == "__main__".
        monkeypatch.setattr(
            "config.loader.load_instance_config",
            lambda: {
                "data_source": {
                    "type": "bigquery",
                    "bigquery": {"project": "my-test-project", "location": "US"},
                }
            },
        )
        fake_repo = MagicMock()
        fake_repo.list_by_source.return_value = [
            {"name": "t1", "bucket": "ds", "source_table": "t1", "description": ""},
        ]
        monkeypatch.setattr(
            "src.repositories.table_registry.TableRegistryRepository",
            lambda c: fake_repo,
        )
        monkeypatch.setattr(
            "src.db.get_system_db",
            lambda: MagicMock(close=lambda: None),
        )
        # __main__ looks up init_extract via the cached connectors.bigquery.extractor
        # module (sys.modules), so patching its attribute survives runpy's reimport.
        monkeypatch.setattr(
            "connectors.bigquery.extractor.init_extract",
            fake_init_extract,
        )
        monkeypatch.setenv("DATA_DIR", str(tmp_path))

        import runpy
        runpy.run_module("connectors.bigquery.extractor", run_name="__main__")

        assert captured.get("project") == "my-test-project", \
            f"expected __main__ to pass project='my-test-project' to init_extract; got {captured!r}"
        assert captured.get("tables", [{}])[0].get("name") == "t1"


    def test_main_exits_when_project_missing(self, tmp_path, monkeypatch):
        """__main__ must SystemExit(2) when data_source.bigquery.project is empty/missing."""
        monkeypatch.setattr(
            "config.loader.load_instance_config",
            lambda: {"data_source": {"type": "bigquery"}},  # no .bigquery.project
        )
        monkeypatch.setenv("DATA_DIR", str(tmp_path))

        import runpy
        with pytest.raises(SystemExit) as exc_info:
            runpy.run_module("connectors.bigquery.extractor", run_name="__main__")
        assert exc_info.value.code == 2


class TestDropWrapViewForBQViews:
    def test_view_entity_does_not_create_master_view_by_default(self, tmp_path, monkeypatch):
        from connectors.bigquery.extractor import init_extract
        monkeypatch.setattr("connectors.bigquery.extractor.get_metadata_token", lambda: "tok")
        monkeypatch.setattr("connectors.bigquery.extractor._detect_table_type", lambda *a, **kw: "VIEW")

        # Stub BQ extension calls to avoid hitting real BQ
        real_connect = duckdb.connect

        captured = []

        def safe_connect(*a, **kw):
            return _CapturingProxy(real_connect(*a, **kw), captured)
        monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", safe_connect)

        # legacy toggle is OFF by default → expect no CREATE VIEW for the BQ view
        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_value",
            lambda *args, default=None, **kw: False if "legacy_wrap_views" in args else default,
            raising=False,
        )

        init_extract(
            str(tmp_path),
            "my-project",
            [{"name": "myview", "bucket": "ds", "source_table": "myview", "description": ""}],
        )

        # Confirm extract.duckdb has _meta + _remote_attach but NO master view for myview
        c = duckdb.connect(str(tmp_path / "extract.duckdb"), read_only=True)
        try:
            views = c.execute(
                "SELECT view_name FROM duckdb_views() WHERE view_name='myview'"
            ).fetchall()
            assert views == [], f"expected no wrap view for VIEW entity by default; got {views}"
            meta = c.execute("SELECT table_name FROM _meta").fetchall()
            assert ("myview",) in meta, "_meta must still record the view"
        finally:
            c.close()

    def test_legacy_wrap_views_toggle_restores_old_behavior(self, tmp_path, monkeypatch):
        from connectors.bigquery.extractor import init_extract
        monkeypatch.setattr("connectors.bigquery.extractor.get_metadata_token", lambda: "tok")
        monkeypatch.setattr("connectors.bigquery.extractor._detect_table_type", lambda *a, **kw: "VIEW")

        real_connect = duckdb.connect
        captured = []

        def safe_connect(*a, **kw):
            return _CapturingProxy(real_connect(*a, **kw), captured)
        monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", safe_connect)

        # legacy toggle ON → should still create the wrap view
        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_value",
            lambda *args, default=None, **kw: True if "legacy_wrap_views" in args else default,
            raising=False,
        )

        init_extract(
            str(tmp_path),
            "my-project",
            [{"name": "myview", "bucket": "ds", "source_table": "myview", "description": ""}],
        )

        # With legacy ON the wrap view SQL should have been emitted
        view_sqls = [s for s in captured if "CREATE OR REPLACE VIEW" in s.upper()]
        myview_sqls = [s for s in view_sqls if '"myview"' in s]
        assert myview_sqls != [], \
            f"expected wrap view SQL for VIEW entity when legacy_wrap_views=True; captured={captured}"
        assert any("bigquery_query(" in s for s in myview_sqls), \
            f"legacy wrap view should use bigquery_query(); got: {myview_sqls}"


class TestInitExtractProjectIdValidation:
    """init_extract must reject unsafe project_id before any auth or DB work."""

    def test_rejects_unsafe_project_id_with_quote(self, tmp_path):
        """Project IDs containing SQL metacharacters must be rejected before
        any token fetch or DuckDB work."""
        from connectors.bigquery.extractor import init_extract

        result = init_extract(
            str(tmp_path),
            "evil'; DROP TABLE foo; --",
            [{"name": "t", "bucket": "ds", "source_table": "t", "description": ""}],
        )
        assert result["tables_registered"] == 0
        assert any("project_id" in e.get("error", "").lower() for e in result["errors"]), \
            f"expected error mentioning project_id; got: {result['errors']}"
        # No partial extract.duckdb on rejection
        assert not (tmp_path / "extract.duckdb").exists()

    def test_rejects_uppercase_project_id(self, tmp_path):
        """GCP project IDs are lowercase-only."""
        from connectors.bigquery.extractor import init_extract

        result = init_extract(
            str(tmp_path),
            "MY-PROJECT",
            [{"name": "t", "bucket": "ds", "source_table": "t", "description": ""}],
        )
        assert result["tables_registered"] == 0
        assert any("project_id" in e.get("error", "").lower() for e in result["errors"])

    def test_valid_project_id_passes_validation(self, tmp_path, monkeypatch):
        """A well-formed project_id must pass validation. We stub the metadata
        fetch to fail right after, which produces a different error shape — that
        confirms validation didn't reject the project_id itself."""
        from connectors.bigquery.extractor import init_extract
        from connectors.bigquery.auth import BQMetadataAuthError

        def fail_metadata():
            raise BQMetadataAuthError("simulated — beyond validation")
        monkeypatch.setattr(
            "connectors.bigquery.extractor.get_metadata_token",
            fail_metadata,
        )

        result = init_extract(
            str(tmp_path),
            "my-valid-project",
            [{"name": "t", "bucket": "ds", "source_table": "t", "description": ""}],
        )
        assert result["tables_registered"] == 0
        errors = result["errors"]
        assert errors, "expected metadata-stub error"
        assert all("project_id" not in e.get("error", "").lower() for e in errors), \
            f"valid project_id should not trip the validator; got: {errors}"