Pre-fix the fixtures lived inside tests/test_api_admin_materialized.py. Upcoming test files in this branch need them too; conftest is the canonical home so they resolve via pytest's auto-discovery.
369 lines
14 KiB
Python
369 lines
14 KiB
Python
"""Shared test fixtures for E2E tests."""
|
|
|
|
import os
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock
|
|
|
|
import duckdb
|
|
import pytest
|
|
|
|
# Ensure consistent JWT secret across all workers (pytest-xdist).
|
|
# Set at import time so every worker process picks up the same values
|
|
# before any module-level code in app.auth.jwt caches the secret.
|
|
os.environ.setdefault("TESTING", "1")
|
|
os.environ.setdefault("JWT_SECRET_KEY", "test-secret-key-minimum-32-characters!!")
|
|
|
|
# Ensure DATA_DIR-derived directories exist for modules that read DATA_DIR
|
|
# at import time (e.g. services/telegram_bot/config.py builds NOTIFICATIONS_DIR
|
|
# eagerly). The bot itself logs to stdout — there is no FileHandler anymore —
|
|
# but the directory still has to exist for the JSON state files.
|
|
import tempfile as _tf
|
|
|
|
if "DATA_DIR" not in os.environ:
|
|
os.environ["DATA_DIR"] = os.path.join(_tf.gettempdir(), ".agnes-test-data")
|
|
os.makedirs(os.path.join(os.environ["DATA_DIR"], "notifications"), exist_ok=True)
|
|
os.makedirs(os.path.join(os.environ["DATA_DIR"], "state"), exist_ok=True)
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _disable_auth_rate_limit_in_tests():
|
|
"""Disable the slowapi auth rate limiter for every test by default.
|
|
|
|
Production limits (e.g. 10/minute on /auth/password/login) would otherwise
|
|
bleed into test files that hammer auth endpoints in tight loops — those
|
|
tests existed long before the limiter and shouldn't have to know about
|
|
its bucket sizes. The dedicated rate-limit test in test_auth_rate_limit.py
|
|
flips ``limiter.enabled = True`` and resets state inside its own scope.
|
|
"""
|
|
from app.auth.rate_limit import limiter
|
|
was_enabled = limiter.enabled
|
|
limiter.enabled = False
|
|
try:
|
|
limiter.reset()
|
|
except Exception:
|
|
# In-memory backend always resets cleanly; defensive guard for
|
|
# third-party storage backends operators might wire in later.
|
|
pass
|
|
yield
|
|
limiter.enabled = was_enabled
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_module_caches():
|
|
"""Reset module-level caches that survive across tests on the same
|
|
pytest-xdist worker process. Without this, a test that populates
|
|
`app.instance_config._instance_config` (e.g. via `runpy.run_module`
|
|
in test_bigquery_extractor's __main__ tests, or via any path that
|
|
calls `app.instance_config.get_value`) leaves stale config visible
|
|
to the next test on that worker — including config that points at
|
|
a different DATA_DIR than the next test's e2e_env set.
|
|
|
|
Caches reset:
|
|
- app.instance_config._instance_config — instance.yaml deep-merge cache
|
|
- get_bq_access (functools.cache) — BqAccess(BqProjects(...)) lru
|
|
- app.api.v2_quota._quota_singleton — per-user quota tracker
|
|
|
|
Pre-existing flakiness; surfaced by issue #160 PR #168 shifting the
|
|
test bucket distribution on xdist worker gw2.
|
|
"""
|
|
try:
|
|
import app.instance_config as _ic
|
|
_ic._instance_config = None
|
|
try:
|
|
from connectors.bigquery.access import get_bq_access
|
|
get_bq_access.cache_clear()
|
|
except (ImportError, AttributeError):
|
|
pass
|
|
except ImportError:
|
|
pass
|
|
try:
|
|
import app.api.v2_quota as _q
|
|
_q._quota_singleton = None
|
|
except ImportError:
|
|
pass
|
|
yield
|
|
|
|
|
|
@pytest.fixture
|
|
def e2e_env(tmp_path, monkeypatch):
|
|
"""Set up complete E2E environment with DATA_DIR, create dirs."""
|
|
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
monkeypatch.setenv("JWT_SECRET_KEY", "test-secret-key-minimum-32-characters!!")
|
|
|
|
(tmp_path / "extracts").mkdir()
|
|
(tmp_path / "analytics").mkdir()
|
|
(tmp_path / "state").mkdir()
|
|
|
|
yield {
|
|
"data_dir": tmp_path,
|
|
"extracts_dir": tmp_path / "extracts",
|
|
"analytics_db": str(tmp_path / "analytics" / "server.duckdb"),
|
|
}
|
|
|
|
|
|
def create_mock_extract(extracts_dir: Path, source_name: str, tables: list[dict]):
|
|
"""Create a mock extract.duckdb with _meta and data tables.
|
|
|
|
tables: [{"name": "orders", "data": [{"id": "1", "total": "100"}], "query_mode": "local"}]
|
|
"""
|
|
source_dir = extracts_dir / source_name
|
|
source_dir.mkdir(exist_ok=True)
|
|
data_dir = source_dir / "data"
|
|
data_dir.mkdir(exist_ok=True)
|
|
|
|
db_path = source_dir / "extract.duckdb"
|
|
conn = duckdb.connect(str(db_path))
|
|
|
|
conn.execute("""CREATE TABLE IF NOT EXISTS _meta (
|
|
table_name VARCHAR, description VARCHAR, rows BIGINT,
|
|
size_bytes BIGINT, extracted_at TIMESTAMP, query_mode VARCHAR DEFAULT 'local'
|
|
)""")
|
|
# Delete existing meta rows to allow re-calling
|
|
conn.execute("DELETE FROM _meta")
|
|
|
|
for t in tables:
|
|
name = t["name"]
|
|
rows_data = t.get("data", [])
|
|
query_mode = t.get("query_mode", "local")
|
|
|
|
if rows_data and query_mode == "local":
|
|
# Write actual parquet file
|
|
pq_path = str(data_dir / f"{name}.parquet")
|
|
# Build SQL from data
|
|
selects = []
|
|
for row in rows_data:
|
|
vals = ", ".join(f"'{v}' AS {k}" for k, v in row.items())
|
|
selects.append(f"SELECT {vals}")
|
|
union_sql = " UNION ALL ".join(selects)
|
|
conn.execute(f"COPY ({union_sql}) TO '{pq_path}' (FORMAT PARQUET)")
|
|
|
|
rows = len(rows_data)
|
|
size = os.path.getsize(pq_path)
|
|
conn.execute(f"CREATE OR REPLACE VIEW \"{name}\" AS SELECT * FROM read_parquet('{pq_path}')")
|
|
conn.execute(
|
|
"INSERT INTO _meta VALUES (?, ?, ?, ?, current_timestamp, 'local')",
|
|
[name, t.get("description", ""), rows, size],
|
|
)
|
|
else:
|
|
# Remote or empty table
|
|
conn.execute(f'CREATE TABLE IF NOT EXISTS "{name}" (id VARCHAR)')
|
|
conn.execute(
|
|
"INSERT INTO _meta VALUES (?, ?, 0, 0, current_timestamp, ?)",
|
|
[name, t.get("description", ""), query_mode],
|
|
)
|
|
|
|
conn.close()
|
|
return db_path
|
|
|
|
|
|
def write_test_parquet(path: str, data: list[dict]):
|
|
"""Create a parquet file from list of dicts."""
|
|
conn = duckdb.connect()
|
|
selects = []
|
|
for row in data:
|
|
vals = ", ".join(f"'{v}' AS {k}" for k, v in row.items())
|
|
selects.append(f"SELECT {vals}")
|
|
union_sql = " UNION ALL ".join(selects)
|
|
conn.execute(f"COPY ({union_sql}) TO '{path}' (FORMAT PARQUET)")
|
|
conn.close()
|
|
|
|
|
|
@pytest.fixture
|
|
def seeded_app(e2e_env):
|
|
"""FastAPI TestClient with seeded users + JWT tokens for all four legacy
|
|
role tokens (admin, km_admin, analyst, viewer).
|
|
|
|
v13: roles are no longer the auth source of truth. The admin user is
|
|
placed in the Admin user_group; the others are Everyone-only members.
|
|
Tokens for km_admin and viewer are kept so role-gating regression tests
|
|
that still reference them keep passing — gate semantics still match
|
|
where it matters (admin bypass, dataset_permissions checks).
|
|
"""
|
|
from src.db import SYSTEM_ADMIN_GROUP, get_system_db
|
|
from src.repositories.user_group_members import UserGroupMembersRepository
|
|
from src.repositories.users import UserRepository
|
|
from app.auth.jwt import create_access_token
|
|
from app.main import create_app
|
|
from fastapi.testclient import TestClient
|
|
|
|
conn = get_system_db()
|
|
repo = UserRepository(conn)
|
|
repo.create(id="admin1", email="admin@test.com", name="Admin")
|
|
repo.create(id="km_admin1", email="km@test.com", name="KM Admin")
|
|
repo.create(id="analyst1", email="analyst@test.com", name="Analyst")
|
|
repo.create(id="viewer1", email="viewer@test.com", name="Viewer")
|
|
|
|
admin_gid = conn.execute("SELECT id FROM user_groups WHERE name = ?", [SYSTEM_ADMIN_GROUP]).fetchone()[0]
|
|
UserGroupMembersRepository(conn).add_member(
|
|
"admin1",
|
|
admin_gid,
|
|
source="system_seed",
|
|
)
|
|
conn.close()
|
|
|
|
app = create_app()
|
|
client = TestClient(app)
|
|
admin_token = create_access_token("admin1", "admin@test.com")
|
|
km_admin_token = create_access_token("km_admin1", "km@test.com")
|
|
analyst_token = create_access_token("analyst1", "analyst@test.com")
|
|
viewer_token = create_access_token("viewer1", "viewer@test.com")
|
|
|
|
return {
|
|
"client": client,
|
|
"admin_token": admin_token,
|
|
"km_admin_token": km_admin_token,
|
|
"analyst_token": analyst_token,
|
|
"viewer_token": viewer_token,
|
|
"env": e2e_env,
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_extract_factory(e2e_env):
|
|
"""Factory fixture for creating mock extract.duckdb files.
|
|
|
|
Returns a callable: factory(source_name, tables, remote_attach=None)
|
|
- source_name: str — name of the connector source directory
|
|
- tables: list[dict] — same format as create_mock_extract
|
|
- remote_attach: list[dict] | None — rows for _remote_attach table,
|
|
each dict with keys: alias, extension, url, token_env
|
|
"""
|
|
|
|
def _factory(source_name: str, tables: list[dict], remote_attach=None):
|
|
db_path = create_mock_extract(e2e_env["extracts_dir"], source_name, tables)
|
|
if remote_attach:
|
|
conn = duckdb.connect(str(db_path))
|
|
conn.execute("""CREATE TABLE IF NOT EXISTS _remote_attach (
|
|
alias VARCHAR,
|
|
extension VARCHAR,
|
|
url VARCHAR,
|
|
token_env VARCHAR
|
|
)""")
|
|
for row in remote_attach:
|
|
conn.execute(
|
|
"INSERT INTO _remote_attach VALUES (?, ?, ?, ?)",
|
|
[row["alias"], row["extension"], row["url"], row["token_env"]],
|
|
)
|
|
conn.close()
|
|
return db_path
|
|
|
|
return _factory
|
|
|
|
|
|
@pytest.fixture
|
|
def analyst_user(seeded_app):
|
|
"""Convenience fixture returning analyst auth headers dict."""
|
|
token = seeded_app["analyst_token"]
|
|
return {"Authorization": f"Bearer {token}"}
|
|
|
|
|
|
@pytest.fixture
|
|
def admin_user(seeded_app):
|
|
"""Convenience fixture returning admin auth headers dict."""
|
|
token = seeded_app["admin_token"]
|
|
return {"Authorization": f"Bearer {token}"}
|
|
|
|
|
|
import contextlib as _contextlib
|
|
|
|
|
|
@pytest.fixture
|
|
def bq_access():
|
|
"""Build a BqAccess with pluggable factories and override the FastAPI Depends.
|
|
|
|
Usage:
|
|
def test_x(bq_access):
|
|
mock_client = MagicMock()
|
|
bq = bq_access(client=mock_client)
|
|
# endpoint test code
|
|
|
|
Override is auto-cleared on fixture teardown.
|
|
|
|
NOTE: `contextlib.nullcontext(duckdb_conn)` does NOT close the conn on exit.
|
|
The production path closes via _default_duckdb_session_factory. Tests that
|
|
care about close behavior should use that factory directly (see
|
|
tests/test_bq_access.py::TestDefaultDuckdbSessionFactory).
|
|
"""
|
|
from connectors.bigquery.access import BqAccess, BqProjects, get_bq_access
|
|
from app.main import app
|
|
|
|
def _build(*, client=None, duckdb_conn=None,
|
|
billing="test-billing", data="test-data"):
|
|
bq = BqAccess(
|
|
BqProjects(billing=billing, data=data),
|
|
client_factory=(lambda projects: client) if client is not None else None,
|
|
duckdb_session_factory=(
|
|
lambda projects: _contextlib.nullcontext(duckdb_conn)
|
|
) if duckdb_conn is not None else None,
|
|
)
|
|
app.dependency_overrides[get_bq_access] = lambda: bq
|
|
return bq
|
|
|
|
yield _build
|
|
from app.main import app as _app
|
|
_app.dependency_overrides.pop(get_bq_access, None)
|
|
|
|
|
|
@pytest.fixture
|
|
def bq_instance(monkeypatch):
|
|
"""Force instance.yaml to look like a BigQuery deployment for the
|
|
duration of one test. Patches the cached load_instance_config so
|
|
/admin/server-config reads / get_value('data_source.bigquery.project')
|
|
return what we want, without touching the on-disk instance.yaml.
|
|
|
|
Tests that need BigQuery-specific admin API behaviour (project_id
|
|
validation, materialized source_query checks, etc.) depend on this
|
|
fixture. Yields the fake config dict so callers can inspect it.
|
|
|
|
Note: several test files (test_admin_bq_register.py,
|
|
test_admin_tables_ui_materialized.py, …) define their own local
|
|
``bq_instance`` fixture. Those local definitions shadow this one
|
|
inside those files — the conftest copy is the canonical provider for
|
|
any new test file that imports from this module."""
|
|
fake_cfg = {
|
|
"data_source": {
|
|
"type": "bigquery",
|
|
"bigquery": {"project": "my-test-project", "location": "us"},
|
|
},
|
|
}
|
|
monkeypatch.setattr(
|
|
"app.instance_config.load_instance_config",
|
|
lambda: fake_cfg,
|
|
raising=False,
|
|
)
|
|
from app.instance_config import reset_cache
|
|
reset_cache()
|
|
yield fake_cfg
|
|
reset_cache()
|
|
|
|
|
|
@pytest.fixture
|
|
def stub_bq_extractor(monkeypatch):
|
|
"""Mirror tests/test_admin_bq_register.py — bypasses real-BQ traffic
|
|
in the post-register rebuild path so the test stays offline. Required
|
|
whenever the test seeds a remote-mode BQ row via the HTTP API.
|
|
|
|
Patches:
|
|
- ``connectors.bigquery.extractor.rebuild_from_registry`` — returns a
|
|
minimal success dict so the admin register endpoint's 200/201 path
|
|
completes without touching a real BQ project.
|
|
- ``src.orchestrator.SyncOrchestrator`` — replaced with a no-op mock so
|
|
the post-register orchestrator.rebuild() call doesn't scan the
|
|
(empty) extracts directory during tests.
|
|
|
|
Returns the ``rebuild_from_registry`` MagicMock directly so callers
|
|
that only need the side-effect patcher can ignore the return value,
|
|
and callers that want to assert call args can inspect it."""
|
|
rebuild_mock = MagicMock(return_value={
|
|
"project_id": "my-test-project",
|
|
"tables_registered": 1, "errors": [], "skipped": False,
|
|
})
|
|
monkeypatch.setattr(
|
|
"connectors.bigquery.extractor.rebuild_from_registry",
|
|
rebuild_mock,
|
|
)
|
|
monkeypatch.setattr(
|
|
"src.orchestrator.SyncOrchestrator",
|
|
lambda *a, **kw: MagicMock(),
|
|
)
|
|
return rebuild_mock
|