Devin's second review pass on commit 16938ae7 surfaced 2 more issues:
BUG_pr-review-job-58ae3148_0001 — non-BQ materialized via PUT bypasses source_query check
app/api/admin.py update_table only enforces 'query_mode=materialized
requires source_query' for source_type='bigquery' rows (via the
synthetic RegisterTableRequest at line 2129+). Non-BQ source types
(Keboola) skip the check — admin could PUT {query_mode: materialized}
on a Keboola local row without source_query, persist successfully,
then crash at the next sync tick when kb_materialize_query received
sql=None and DuckDB rejected COPY (None) TO '...'.
Fix: generic coherence guard before the BQ-specific block — for ALL
source types, query_mode='materialized' requires non-empty source_query
in the merged record. Returns 422 with a hint about reverting via
query_mode='local'/'remote'.
ANALYSIS_pr-review-job-642ff90f_0007 — diagnose returns 'ok' on BQ resolution failure
app/api/health.py:_check_bq_billing_project caught get_bq_access()
exceptions and returned status='ok' with a 'could not resolve' detail.
Automated alerting keyed on status != 'ok' would silently miss missing
google-cloud-bigquery, auth failures, or malformed config. Fix: return
status='unknown' on resolution failure — surfaces it on operator
dashboards without promoting the overall health to 'degraded' (which
'warning' does, intentionally for the billing==project case).
Tests:
- test_update_keboola_to_materialized_without_source_query_rejected:
PUT {query_mode: materialized} on a Keboola local row returns 422
with 'source_query' in the detail
- test_diagnose_returns_unknown_status_when_bq_resolution_fails:
when get_bq_access raises, the bq_config service entry surfaces
status='unknown' (not 'ok')
Full sweep: 2507 passed, 25 skipped, 0 failed (+2 from previous sweep
because of the 2 new regression tests; 8 pre-existing internal_roles
schema-migration failures still ignored per task brief).
156 lines
5.4 KiB
Python
156 lines
5.4 KiB
Python
"""Phase K — `da diagnose` warning when BQ billing_project == project.
|
|
|
|
Surfaces via /api/health/detailed (which `da diagnose` already consumes):
|
|
when data_source.type == 'bigquery' and the resolved BqProjects.billing equals
|
|
BqProjects.data, the response includes a `services.bq_config` entry with
|
|
status='warning' and a hint about the 403 USER_PROJECT_DENIED footgun.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
|
|
def _auth(token: str) -> dict:
|
|
return {"Authorization": f"Bearer {token}"}
|
|
|
|
|
|
def _patch_instance_config(monkeypatch, cfg: dict) -> None:
|
|
"""Replace app.instance_config.load_instance_config + reset caches.
|
|
|
|
Also clears connectors.bigquery.access.get_bq_access's @functools.cache
|
|
so each test sees fresh BqProjects.
|
|
"""
|
|
monkeypatch.setattr(
|
|
"app.instance_config.load_instance_config",
|
|
lambda: cfg,
|
|
raising=False,
|
|
)
|
|
# DATA_SOURCE env var, if set in the user shell, would override
|
|
# get_data_source_type — strip it for deterministic tests.
|
|
monkeypatch.delenv("DATA_SOURCE", raising=False)
|
|
monkeypatch.delenv("BIGQUERY_PROJECT", raising=False)
|
|
|
|
from app.instance_config import reset_cache
|
|
reset_cache()
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_after(monkeypatch):
|
|
yield
|
|
# Always reset the cache after each test so the next test (or an
|
|
# unrelated suite running afterwards) sees fresh config.
|
|
try:
|
|
from app.instance_config import reset_cache
|
|
reset_cache()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def test_diagnose_warns_when_billing_equals_project(seeded_app, monkeypatch):
|
|
"""BQ instance with billing_project missing (or equal to project) → warning."""
|
|
_patch_instance_config(monkeypatch, {
|
|
"data_source": {
|
|
"type": "bigquery",
|
|
"bigquery": {
|
|
"project": "shared-data-prod",
|
|
"billing_project": "shared-data-prod",
|
|
},
|
|
},
|
|
})
|
|
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
r = c.get("/api/health/detailed", headers=_auth(token))
|
|
assert r.status_code == 200, r.text
|
|
body = r.json()
|
|
|
|
bq_cfg = body.get("services", {}).get("bq_config")
|
|
assert bq_cfg is not None, body
|
|
assert bq_cfg.get("status") == "warning", bq_cfg
|
|
# Hint mentions the YAML field path so operators know what to fix.
|
|
blob = (str(bq_cfg.get("detail", "")) + " " + str(bq_cfg.get("hint", ""))).lower()
|
|
assert "billing_project" in blob, bq_cfg
|
|
|
|
|
|
def test_diagnose_clean_when_billing_differs(seeded_app, monkeypatch):
|
|
"""Distinct billing_project → no warning surfaced."""
|
|
_patch_instance_config(monkeypatch, {
|
|
"data_source": {
|
|
"type": "bigquery",
|
|
"bigquery": {
|
|
"project": "data-prod",
|
|
"billing_project": "billing-dev",
|
|
},
|
|
},
|
|
})
|
|
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
r = c.get("/api/health/detailed", headers=_auth(token))
|
|
assert r.status_code == 200, r.text
|
|
body = r.json()
|
|
|
|
bq_cfg = body.get("services", {}).get("bq_config")
|
|
# If present, it must be ok; absence is also fine (means no warning).
|
|
if bq_cfg is not None:
|
|
assert bq_cfg.get("status") == "ok", bq_cfg
|
|
|
|
|
|
def test_diagnose_no_warning_on_keboola_instance(seeded_app, monkeypatch):
|
|
"""Non-BQ instance: BQ billing check shouldn't surface at all."""
|
|
_patch_instance_config(monkeypatch, {"data_source": {"type": "keboola"}})
|
|
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
r = c.get("/api/health/detailed", headers=_auth(token))
|
|
assert r.status_code == 200, r.text
|
|
body = r.json()
|
|
|
|
# Either absent or explicitly status='ok' (n/a). Definitely not 'warning'.
|
|
bq_cfg = body.get("services", {}).get("bq_config")
|
|
if bq_cfg is not None:
|
|
assert bq_cfg.get("status") != "warning", bq_cfg
|
|
|
|
|
|
def test_diagnose_returns_unknown_status_when_bq_resolution_fails(seeded_app, monkeypatch):
|
|
"""Devin finding 2026-05-01 (ANALYSIS_pr-review-job-642ff90f_0007):
|
|
if get_bq_access() raises (missing google-cloud-bigquery, auth error,
|
|
malformed config), the bq_config check must NOT report status='ok' —
|
|
automated alerting keyed on `status != 'ok'` would silently miss the
|
|
failure. Use 'unknown' so dashboards surface it without promoting the
|
|
overall check to 'degraded' (which 'warning' would do)."""
|
|
fake_cfg = {
|
|
"data_source": {
|
|
"type": "bigquery",
|
|
"bigquery": {"project": "p"},
|
|
},
|
|
}
|
|
monkeypatch.setattr(
|
|
"app.instance_config.load_instance_config",
|
|
lambda: fake_cfg, raising=False,
|
|
)
|
|
from app.instance_config import reset_cache
|
|
reset_cache()
|
|
|
|
# Force get_bq_access to raise.
|
|
def _raise(*a, **kw):
|
|
raise RuntimeError("simulated bq lib missing")
|
|
|
|
import connectors.bigquery.access as bq_access_mod
|
|
monkeypatch.setattr(bq_access_mod, "get_bq_access", _raise)
|
|
|
|
try:
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
r = c.get(
|
|
"/api/health/detailed",
|
|
headers={"Authorization": f"Bearer {token}"},
|
|
)
|
|
assert r.status_code == 200, r.text
|
|
body = r.json()
|
|
bq_check = body.get("services", {}).get("bq_config")
|
|
assert bq_check is not None, body
|
|
# Must NOT be 'ok' — that would mask the failure from alerting.
|
|
assert bq_check.get("status") == "unknown", bq_check
|
|
assert "could not resolve" in bq_check.get("detail", "").lower()
|
|
finally:
|
|
reset_cache()
|