agnes-the-ai-analyst/tests/test_journey_sync_query.py
ZdenekSrotyr 7967279181 test: add E2E journey tests (J1-J8) covering full user flows
40 tests across 8 files covering bootstrap/auth, sync+query, hybrid
queries, RBAC+access-requests, Jira webhooks, corporate memory,
analyst uploads, and multi-source orchestration. Adds mock_extract_factory
and admin_user fixtures to conftest, and journey marker to pytest.ini.
2026-04-12 11:13:51 +02:00

146 lines
4.8 KiB
Python

"""J2 — Sync & Query journey tests.
Complete flow: register table → create mock extract → rebuild orchestrator →
query data via API → verify catalog listing.
"""
import pytest
from tests.conftest import create_mock_extract
def _auth(token: str) -> dict:
return {"Authorization": f"Bearer {token}"}
@pytest.mark.journey
class TestSyncAndQuery:
def test_register_create_rebuild_query(self, seeded_app, mock_extract_factory):
"""Full flow: register → mock extract → rebuild → query rows."""
c = seeded_app["client"]
t = seeded_app["admin_token"]
env = seeded_app["env"]
# Step 1: register table
resp = c.post(
"/api/admin/register-table",
json={
"name": "orders",
"source_type": "keboola",
"bucket": "in.c-crm",
"source_table": "orders",
"query_mode": "local",
},
headers=_auth(t),
)
assert resp.status_code == 201
# Step 2: create mock extract
mock_extract_factory(
"keboola",
[
{
"name": "orders",
"data": [
{"id": "1", "product": "Widget", "amount": "100"},
{"id": "2", "product": "Gadget", "amount": "200"},
],
}
],
)
# Step 3: rebuild orchestrator
from src.orchestrator import SyncOrchestrator
result = SyncOrchestrator(analytics_db_path=env["analytics_db"]).rebuild()
assert "keboola" in result
assert "orders" in result["keboola"]
# Step 4: query data
resp = c.post(
"/api/query",
json={"sql": "SELECT * FROM orders ORDER BY id"},
headers=_auth(t),
)
assert resp.status_code == 200
body = resp.json()
assert body["row_count"] == 2
assert "id" in body["columns"]
def test_catalog_lists_registered_table(self, seeded_app):
"""After registration, table appears in /api/catalog/tables."""
c = seeded_app["client"]
t = seeded_app["admin_token"]
c.post(
"/api/admin/register-table",
json={"name": "customers", "source_type": "keboola", "query_mode": "local"},
headers=_auth(t),
)
resp = c.get("/api/catalog/tables", headers=_auth(t))
assert resp.status_code == 200
names = {tbl["name"] for tbl in resp.json()["tables"]}
assert "customers" in names
def test_query_blocked_keywords(self, seeded_app):
"""DROP and other DDL/dangerous statements are blocked."""
c = seeded_app["client"]
t = seeded_app["admin_token"]
for bad_sql in [
"DROP TABLE orders",
"INSERT INTO orders VALUES (1)",
"SELECT * FROM read_parquet('/tmp/x.parquet')",
]:
resp = c.post("/api/query", json={"sql": bad_sql}, headers=_auth(t))
assert resp.status_code == 400, f"Expected 400 for: {bad_sql}"
def test_manifest_reflects_synced_tables(self, seeded_app, mock_extract_factory):
"""After rebuild, manifest includes synced table with correct row count."""
c = seeded_app["client"]
t = seeded_app["admin_token"]
env = seeded_app["env"]
mock_extract_factory(
"keboola",
[
{
"name": "products",
"data": [
{"id": "1", "name": "Alpha"},
{"id": "2", "name": "Beta"},
{"id": "3", "name": "Gamma"},
],
}
],
)
from src.orchestrator import SyncOrchestrator
SyncOrchestrator(analytics_db_path=env["analytics_db"]).rebuild()
resp = c.get("/api/sync/manifest", headers=_auth(t))
assert resp.status_code == 200
tables = resp.json()["tables"]
assert "products" in tables
assert tables["products"]["rows"] == 3
def test_query_empty_result(self, seeded_app, mock_extract_factory):
"""Query against a view with no rows returns empty result set."""
c = seeded_app["client"]
t = seeded_app["admin_token"]
env = seeded_app["env"]
mock_extract_factory(
"keboola",
[{"name": "empty_table", "data": [{"id": "1", "val": "x"}]}],
)
from src.orchestrator import SyncOrchestrator
SyncOrchestrator(analytics_db_path=env["analytics_db"]).rebuild()
resp = c.post(
"/api/query",
json={"sql": "SELECT * FROM empty_table WHERE id = 'nonexistent'"},
headers=_auth(t),
)
assert resp.status_code == 200
assert resp.json()["row_count"] == 0