"""Repository + freshness tests for the persistent BQ metadata cache.""" from datetime import datetime, timedelta, timezone import pytest from src.repositories.bq_metadata_cache import BqMetadataCacheRepository def test_upsert_success_inserts_then_updates(seeded_app): from src.db import get_system_db conn = get_system_db() try: repo = BqMetadataCacheRepository(conn) repo.upsert_success( "orders", rows=10, size_bytes=2048, partition_by="event_date", clustered_by=["country"], ) row = repo.get("orders") assert row is not None assert row["rows"] == 10 assert row["size_bytes"] == 2048 assert row["partition_by"] == "event_date" assert row["clustered_by"] == ["country"] assert row["refreshed_at"] is not None assert row["error_at"] is None # Update with new numbers; refreshed_at advances. first_refresh = row["refreshed_at"] repo.upsert_success( "orders", rows=20, size_bytes=4096, partition_by=None, clustered_by=[], ) row2 = repo.get("orders") assert row2["rows"] == 20 assert row2["partition_by"] is None assert row2["clustered_by"] == [] assert row2["refreshed_at"] >= first_refresh finally: conn.close() def test_mark_error_preserves_prior_success(seeded_app): """After a successful refresh, a subsequent failure must keep the rows/size_bytes columns untouched — analyst Claude keeps using the last-known-good numbers while the next scheduled retry attempts to recover.""" from src.db import get_system_db conn = get_system_db() try: repo = BqMetadataCacheRepository(conn) repo.upsert_success( "orders", rows=100, size_bytes=1000, partition_by=None, clustered_by=None, ) repo.mark_error("orders", "BQ timeout") row = repo.get("orders") assert row["rows"] == 100, "prior success must be preserved across error" assert row["size_bytes"] == 1000 assert row["error_at"] is not None assert row["error_msg"] == "BQ timeout" # Subsequent success clears the error. repo.upsert_success( "orders", rows=200, size_bytes=2000, partition_by=None, clustered_by=None, ) row2 = repo.get("orders") assert row2["rows"] == 200 assert row2["error_at"] is None assert row2["error_msg"] is None finally: conn.close() def test_mark_error_truncates_long_messages(seeded_app): from src.db import get_system_db conn = get_system_db() try: repo = BqMetadataCacheRepository(conn) repo.mark_error("orders", "x" * 2000) row = repo.get("orders") assert len(row["error_msg"]) == 512 finally: conn.close() def test_list_all_orders_by_table_id(seeded_app): from src.db import get_system_db conn = get_system_db() try: repo = BqMetadataCacheRepository(conn) repo.upsert_success( "zeta", rows=1, size_bytes=1, partition_by=None, clustered_by=None, ) repo.upsert_success( "alpha", rows=2, size_bytes=2, partition_by=None, clustered_by=None, ) rows = repo.list_all() ids = [r["table_id"] for r in rows] assert ids == sorted(ids) finally: conn.close() def test_delete_removes_row(seeded_app): from src.db import get_system_db conn = get_system_db() try: repo = BqMetadataCacheRepository(conn) repo.upsert_success( "orders", rows=1, size_bytes=1, partition_by=None, clustered_by=None, ) repo.delete("orders") assert repo.get("orders") is None finally: conn.close() # ─── compute_freshness ──────────────────────────────────────────────────── def test_freshness_never_fetched_for_missing_row(): from app.api.bq_metadata_refresh import compute_freshness assert compute_freshness(None) == "never_fetched" def test_freshness_never_fetched_for_no_refresh_no_error(): from app.api.bq_metadata_refresh import compute_freshness row = {"refreshed_at": None, "error_at": None} assert compute_freshness(row) == "never_fetched" def test_freshness_error_when_only_error_present(): from app.api.bq_metadata_refresh import compute_freshness row = { "refreshed_at": None, "error_at": datetime.now(timezone.utc), } assert compute_freshness(row) == "error" def test_freshness_fresh_within_threshold(): from app.api.bq_metadata_refresh import compute_freshness now = datetime.now(timezone.utc) row = { "refreshed_at": now - timedelta(seconds=60), "error_at": None, } # 1-minute-old row with a 1-hour threshold ⇒ fresh. assert compute_freshness(row, now=now, fresh_threshold=3600) == "fresh" def test_freshness_stale_beyond_threshold(): from app.api.bq_metadata_refresh import compute_freshness now = datetime.now(timezone.utc) row = { "refreshed_at": now - timedelta(hours=10), "error_at": None, } assert compute_freshness(row, now=now, fresh_threshold=3600) == "stale"