agnes-the-ai-analyst/tests/test_admin_server_config.py
ZdenekSrotyr 28aba4c1f9 fix(query): #168 review iter 3 — RBAC name-vs-id, placeholder dead code
Devin Review iter #3 found 3 new real bugs after iter #2's fixes landed.

🔴 RBAC check at app/api/query.py:362 used `row["name"]` against
`accessible_set`, but `accessible_set` is keyed by registry IDs
(`get_accessible_tables` returns `resource_grants.resource_id` —
table IDs, not display names). Confirmed by `_table_blocks` projection
at `app/resource_types.py:157-158`. When `id != name` (e.g.
`id="bq.finance.ue", name="ue"`), non-admin users with valid grants
got 403 `bq_path_access_denied`. Switch to `row["id"]`.

🚩 Bare-name pass at app/api/query.py:332 had the same name-vs-id
mismatch (different impact): legitimate accessible rows were skipped
from `dry_run_set`, so the cost guardrail under-counted scan bytes
for non-admin users. Could let an over-cap query through and
under-bill quota. Switch to `row_id` comparison.

🟡 `placeholder_from` for billing_project was dead code.
`_BQ_OPTIONAL_FIELD_DEFAULTS["billing_project"] = ""` seeded an empty
string into every GET payload via `_ensure_bq_optional_fields`. JS
`isUnset = (value === undefined)` evaluated False, so the
`(defaults to <project>)` placeholder NEVER rendered. Drop the seed —
field stays in `known_fields` (UI sees it) but routes through the
unset rendering path on GET, where placeholder_from fires.

Tests: test_get_surfaces_bq_fields_even_when_unset assertion flipped
from "billing_project IS present" to "billing_project NOT auto-seeded"
to lock in the new shape. 67 affected tests pass.
2026-05-04 13:51:36 +02:00

943 lines
42 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for the /admin/server-config UI page and its companion API.
Covers:
- Auth gate on both the HTML page and GET/POST /api/admin/server-config.
- The page-shell markers the JS hangs off (so we don't break the contract
by renaming an ID/data-attribute without updating the script).
- GET response shape: editable_sections + per-section dict + secrets
redacted.
- POST request shape: section patch validates, danger-zone gate enforced,
unknown sections rejected.
- Audit log entry: written on every save with sanitized diff (secret
fields masked, non-secret diff present).
- instance.yaml write happens at DATA_DIR/state/instance.yaml.
"""
import json
import uuid
from pathlib import Path
import pytest
import yaml
def _auth(token):
return {"Authorization": f"Bearer {token}"}
# --- HTML page ----------------------------------------------------------------
class TestServerConfigPageAuth:
def test_admin_can_load_page(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["admin_token"]
# JWT in cookie so the auth-redirect middleware doesn't bounce us
# to /login (the same pattern the existing admin-UI tests use).
c.cookies.set("access_token", token)
try:
resp = c.get("/admin/server-config", headers={"Accept": "text/html"})
finally:
c.cookies.clear()
assert resp.status_code == 200, resp.text
body = resp.text
# Page-shell markers the JS targets.
assert 'data-page="server-config"' in body
assert "Server configuration" in body
# Form skeleton: loader + per-section container.
assert 'id="cfg-loading"' in body
assert 'id="cfg-sections"' in body
# Restart-required messaging on the page (issue #91 acceptance).
assert "restart" in body.lower()
# Danger-zone modal scaffolding.
assert 'id="danger-modal"' in body
assert 'id="danger-confirm-btn"' in body
# Endpoint constant — guards against URL drift between UI and API.
assert '/api/admin/server-config' in body
def test_non_admin_cannot_load_page(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["analyst_token"]
c.cookies.set("access_token", token)
try:
resp = c.get(
"/admin/server-config",
headers={"Accept": "text/html"},
follow_redirects=False,
)
finally:
c.cookies.clear()
# require_role(Role.ADMIN) → 403 for non-admins.
assert resp.status_code in (302, 401, 403), resp.text
def test_unauthenticated_redirects(self, seeded_app):
c = seeded_app["client"]
resp = c.get(
"/admin/server-config",
headers={"Accept": "text/html"},
follow_redirects=False,
)
assert resp.status_code in (302, 303, 401, 403), resp.text
class TestServerConfigPageRendersSections:
"""The page is shell-only — sections are rendered client-side from the
GET response. The static HTML still ships every section's title in
SECTION_META so the JS can label them; verify the labels are present."""
def test_page_includes_all_section_titles(self, seeded_app):
c = seeded_app["client"]
c.cookies.set("access_token", seeded_app["admin_token"])
try:
resp = c.get("/admin/server-config")
finally:
c.cookies.clear()
assert resp.status_code == 200
body = resp.text
# The eight editable sections appear in the JS SECTION_META map.
for section in (
"instance", "data_source", "email", "telegram",
"jira", "theme", "server", "auth",
):
assert f'{section}:' in body, f"section meta missing: {section}"
def test_page_marks_danger_sections_in_js(self, seeded_app):
c = seeded_app["client"]
c.cookies.set("access_token", seeded_app["admin_token"])
try:
resp = c.get("/admin/server-config")
finally:
c.cookies.clear()
body = resp.text
# auth + server are flagged danger client-side too.
assert 'DANGER_SECTIONS = new Set(["auth", "server"])' in body
# --- GET /api/admin/server-config --------------------------------------------
class TestGetServerConfigAPI:
def test_get_requires_auth(self, seeded_app):
c = seeded_app["client"]
resp = c.get("/api/admin/server-config")
assert resp.status_code == 401
def test_get_requires_admin(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["analyst_token"]
resp = c.get("/api/admin/server-config", headers=_auth(token))
assert resp.status_code == 403
def test_get_returns_section_envelope(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.get("/api/admin/server-config", headers=_auth(token))
assert resp.status_code == 200
data = resp.json()
assert "sections" in data
assert "editable_sections" in data
assert "danger_sections" in data
assert isinstance(data["sections"], dict)
# Every editable section is surfaced (empty dict when unset) so the
# UI can render headers without hitting "section missing" branches.
for section in data["editable_sections"]:
assert section in data["sections"]
# Danger zone is the documented subset.
assert set(data["danger_sections"]) == {"auth", "server"}
def test_get_redacts_secret_fields(self, seeded_app, monkeypatch, tmp_path):
"""Pre-populate instance.yaml with a secret; GET must not leak it."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
state = tmp_path / "state"
state.mkdir(parents=True, exist_ok=True)
(state / "instance.yaml").write_text(yaml.dump({
"instance": {"name": "Acme Analyst"},
"email": {"smtp_host": "smtp.example.com", "smtp_password": "supersecret"},
"auth": {"google_client_secret": "oauth-secret-value", "allowed_domain": "example.com"},
"telegram": {"bot_token": "telegram-token-value"},
}))
# Bust the in-process cache so the next GET re-reads the file.
import app.instance_config as ic
ic._instance_config = None
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.get("/api/admin/server-config", headers=_auth(token))
assert resp.status_code == 200
data = resp.json()
body = json.dumps(data)
# Cleartext values for any secret-looking key must NOT appear.
for leak in ("supersecret", "oauth-secret-value", "telegram-token-value"):
assert leak not in body, f"redaction failed: '{leak}' leaked in {body}"
# Non-secret fields pass through.
assert data["sections"]["instance"]["name"] == "Acme Analyst"
assert data["sections"]["email"]["smtp_host"] == "smtp.example.com"
assert data["sections"]["auth"]["allowed_domain"] == "example.com"
# --- POST /api/admin/server-config -------------------------------------------
class TestPostServerConfigAPI:
def test_post_requires_auth(self, seeded_app):
c = seeded_app["client"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"instance": {"name": "X"}}},
)
assert resp.status_code == 401
def test_post_requires_admin(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["analyst_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"instance": {"name": "X"}}},
headers=_auth(token),
)
assert resp.status_code == 403
def test_post_empty_sections_returns_422(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {}},
headers=_auth(token),
)
assert resp.status_code == 422
def test_post_unknown_section_rejected(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"thmee": {"primary": "#000"}}}, # typo
headers=_auth(token),
)
assert resp.status_code == 400
assert "thmee" in resp.json()["detail"]
def test_post_danger_section_without_confirmation_rejected(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"auth": {"allowed_domain": "evil.example.com"}}},
headers=_auth(token),
)
assert resp.status_code == 400
assert "confirm_danger" in resp.json()["detail"]
def test_post_danger_section_with_confirmation_accepted(self, seeded_app, tmp_path, monkeypatch):
monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={
"sections": {"auth": {"allowed_domain": "example.com"}},
"confirm_danger": True,
},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
data = resp.json()
assert data["status"] == "ok"
assert data["restart_required"] is True
assert "auth" in data["sections_updated"]
def test_post_writes_instance_yaml(self, seeded_app, tmp_path, monkeypatch):
monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"instance": {"name": "New Name", "subtitle": "Sub"}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
written = tmp_path / "state" / "instance.yaml"
assert written.exists(), "instance.yaml should be written to DATA_DIR/state/"
loaded = yaml.safe_load(written.read_text())
assert loaded["instance"]["name"] == "New Name"
assert loaded["instance"]["subtitle"] == "Sub"
def test_post_deep_merges_existing_sections(self, seeded_app, tmp_path, monkeypatch):
"""Saving one section must not wipe other sections that already
exist in the file. This is the regression that made us pick
deep-merge over wholesale replace."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
state = tmp_path / "state"
state.mkdir(parents=True, exist_ok=True)
(state / "instance.yaml").write_text(yaml.dump({
"instance": {"name": "Original", "subtitle": "Keep me"},
"email": {"smtp_host": "smtp.example.com", "smtp_port": 587},
}))
c = seeded_app["client"]
token = seeded_app["admin_token"]
# Patch ONLY instance.name — subtitle and the entire email section
# must survive.
resp = c.post(
"/api/admin/server-config",
json={"sections": {"instance": {"name": "Renamed"}}},
headers=_auth(token),
)
assert resp.status_code == 200
loaded = yaml.safe_load((state / "instance.yaml").read_text())
assert loaded["instance"]["name"] == "Renamed"
assert loaded["instance"]["subtitle"] == "Keep me"
assert loaded["email"]["smtp_host"] == "smtp.example.com"
assert loaded["email"]["smtp_port"] == 587
def test_post_section_must_be_object(self, seeded_app):
c = seeded_app["client"]
token = seeded_app["admin_token"]
# Pydantic accepts the request but our handler also validates the
# inner type — sending a string for a section value is a 422.
resp = c.post(
"/api/admin/server-config",
# Use a list — Pydantic Dict[str, Dict[str, Any]] coerces some
# types but a non-mapping fails validation outright.
json={"sections": {"instance": "not a dict"}},
headers=_auth(token),
)
# Either 422 (Pydantic validation) or 422-equivalent — both valid.
assert resp.status_code in (400, 422)
def test_corrupt_overlay_refused_with_500_not_silently_overwritten(
self, seeded_app, tmp_path, monkeypatch
):
"""A malformed overlay used to be silently replaced — that masked
disk corruption / partial writes / hand-edits and dropped every
previously-saved section on the next save. The endpoint must now
refuse with 500 so the operator can investigate."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
state = tmp_path / "state"
state.mkdir(parents=True, exist_ok=True)
# Plant a deliberately broken YAML — unclosed brace + tab indent —
# so PyYAML raises rather than yielding a dict.
overlay_path = state / "instance.yaml"
overlay_path.write_text("instance: {name: 'good'\nauth:\n\tallowed_domain: bad")
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"instance": {"name": "New Name"}}},
headers=_auth(token),
)
assert resp.status_code == 500, resp.text
assert "corrupt overlay" in resp.json()["detail"]
# Critical: the corrupt file is NOT replaced. Operator gets a chance
# to back it up.
assert overlay_path.read_text().startswith("instance: {name: 'good'")
def test_round_tripped_redacted_secret_does_not_corrupt_overlay(
self, seeded_app, tmp_path, monkeypatch
):
"""Regression: GET redacts secret-keyed children in nested objects to
``"***"`` so the form never displays cleartext. The data_source
section renders as a JSON textarea, so a no-op save would otherwise
send the masked JSON back verbatim. Without scrub, the deep-merge
would persist ``token_env: "***"`` on top of the real
``"KEBOOLA_STORAGE_TOKEN"``, silently breaking the next sync. The
server-side scrub must drop the sentinel so the overlay value
survives a round-trip."""
import yaml as _yaml
monkeypatch.setenv("DATA_DIR", str(tmp_path))
state = tmp_path / "state"
state.mkdir(parents=True, exist_ok=True)
# Plant a real overlay with the actual env-var name in token_env.
(state / "instance.yaml").write_text(_yaml.dump({
"data_source": {
"type": "keboola",
"keboola": {
"stack_url": "https://connection.keboola.com",
"token_env": "KEBOOLA_STORAGE_TOKEN",
},
},
}))
# Bust the in-process cache so the next read sees the planted overlay.
import app.instance_config as ic
ic._instance_config = None
c = seeded_app["client"]
token = seeded_app["admin_token"]
# Simulate a UI round-trip: send back the redacted payload verbatim.
# The sentinel matches the string `_mask` produces.
resp = c.post(
"/api/admin/server-config",
json={"sections": {"data_source": {"keboola": {
"stack_url": "https://connection.keboola.com",
"token_env": "***", # redaction sentinel — must not persist
}}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
# Real env-var name must survive on disk.
overlay = _yaml.safe_load((state / "instance.yaml").read_text())
assert overlay["data_source"]["keboola"]["token_env"] == "KEBOOLA_STORAGE_TOKEN", \
f"overlay corrupted by round-tripped sentinel: {overlay}"
@pytest.mark.parametrize("private_url", [
"http://169.254.169.254/latest/meta-data/",
"http://127.0.0.1:8080/",
"http://10.0.0.1/",
"http://localhost/",
])
def test_post_rejects_private_url_in_keboola_stack_url(
self, seeded_app, tmp_path, monkeypatch, private_url
):
"""SSRF gate symmetric with /configure: data_source.keboola.stack_url
must not point to private/loopback/link-local networks. Without this
check, the editor would let an admin sneak the GCE/EC2 metadata
endpoint or an internal service into the overlay and trigger SSRF on
the next sync."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"data_source": {"keboola": {"stack_url": private_url}}}},
headers=_auth(token),
)
assert resp.status_code == 400, resp.text
assert "private or reserved network" in resp.json()["detail"] \
or "could not resolve hostname" in resp.json()["detail"]
# Overlay must not have been written.
assert not (tmp_path / "state" / "instance.yaml").exists()
class TestPostServerConfigAuditLog:
def test_save_writes_audit_entry_with_sanitized_diff(self, seeded_app, tmp_path, monkeypatch):
"""Each save = one audit_log row tagged instance_config.update with
a diff that mentions changed fields by path but masks any secret
values."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
# Apply a patch that changes both a non-secret field and a secret.
resp = c.post(
"/api/admin/server-config",
json={"sections": {"email": {
"smtp_host": "smtp.example.com",
"smtp_password": "fresh-secret-value-1234",
}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
# Pull the audit row directly from the DB — bypasses the API
# surface so we're verifying the actual persisted shape.
from src.db import get_system_db
conn = get_system_db()
try:
rows = conn.execute(
"SELECT user_id, action, resource, params FROM audit_log "
"WHERE action = 'instance_config.update' ORDER BY timestamp DESC LIMIT 1"
).fetchall()
finally:
conn.close()
assert rows, "audit_log should contain at least one instance_config.update row"
user_id, action, resource, params_raw = rows[0]
assert action == "instance_config.update"
assert resource == "instance.yaml"
assert user_id == "admin1" # seeded admin
params = json.loads(params_raw) if isinstance(params_raw, str) else params_raw
assert params["sections"] == ["email"]
assert isinstance(params["diff"], list)
# Diff includes the non-secret change verbatim.
smtp_diff = next((d for d in params["diff"] if d["path"] == "email.smtp_host"), None)
assert smtp_diff is not None
assert smtp_diff["after"] == "smtp.example.com"
# Secret diff path is present but value is MASKED — cleartext must
# never land in audit_log.
all_text = json.dumps(params)
assert "fresh-secret-value-1234" not in all_text, \
f"secret leaked into audit log params: {all_text}"
pwd_diff = next((d for d in params["diff"] if d["path"] == "email.smtp_password"), None)
assert pwd_diff is not None
assert pwd_diff["after"] == "***"
def test_diff_records_dict_to_scalar_replacement(self):
"""Regression: when one side is a dict and the other is a non-None
scalar, _diff_dicts used to recurse with the missing side coerced
to {} — losing the scalar entirely. Now the shape change is
recorded as a single replacement at the parent path."""
from app.api.admin import _diff_dicts
# dict → scalar
diff = _diff_dicts(
{"keboola": {"stack_url": "https://x", "token_env": "Y"}},
{"keboola": "disabled"},
)
# The whole 'keboola' entry should appear as a single shape change,
# not as per-field removals.
keboola_rows = [d for d in diff if d["path"] == "keboola"]
assert len(keboola_rows) == 1, f"missing dict→scalar row in {diff}"
assert keboola_rows[0]["after"] == "disabled"
def test_diff_records_scalar_to_dict_replacement(self):
from app.api.admin import _diff_dicts
diff = _diff_dicts(
{"keboola": "disabled"},
{"keboola": {"stack_url": "https://x"}},
)
keboola_rows = [d for d in diff if d["path"] == "keboola"]
assert len(keboola_rows) == 1
assert keboola_rows[0]["before"] == "disabled"
assert keboola_rows[0]["after"] == {"stack_url": "https://x"}
def test_diff_shape_change_redacts_secret_keyed_children(self):
"""Regression: when a section's shape flips between dict and scalar,
the dict side may carry secret-keyed children (token_env →
env-resolved cleartext). The shape-change branch used to log the
raw dict verbatim because the parent key wasn't itself secret-named,
so a wholesale replacement of `data_source.keboola: {...}` ->
`data_source.keboola: "disabled"` would dump every cleartext token
/ password the dict contained into audit_log.params. The branch now
runs `_redact` over the dict side so secret-keyed children are
masked even when the parent isn't."""
from app.api.admin import _diff_dicts
# dict-with-secret-child → scalar
diff = _diff_dicts(
{"keboola": {"stack_url": "https://x", "token_env": "hunter2-cleartext"}},
{"keboola": "disabled"},
)
row = next(d for d in diff if d["path"] == "keboola")
assert row["before"]["stack_url"] == "https://x" # non-secret stays
assert row["before"]["token_env"] == "***" # secret masked
assert "hunter2-cleartext" not in str(row), \
f"cleartext leaked into audit row: {row}"
# scalar → dict-with-secret-child (symmetric)
diff = _diff_dicts(
{"email": "disabled"},
{"email": {"smtp_host": "smtp.example.com", "smtp_password": "supersecret"}},
)
row = next(d for d in diff if d["path"] == "email")
assert row["after"]["smtp_host"] == "smtp.example.com"
assert row["after"]["smtp_password"] == "***"
assert "supersecret" not in str(row)
def test_overlay_does_not_resolve_env_var_placeholders(self, seeded_app, tmp_path, monkeypatch):
"""Regression: prior fix wrote every editable section from the merged
(env-resolved) config to the overlay. Static `smtp_password: ${SMTP_PASSWORD}`
would get persisted as the actual cleartext password — turning a
config-leak into a disk-leak. Now we write only the sections the
user explicitly patched, deep-merged onto whatever was already in
the overlay; static env-var placeholders never touch the overlay
unless the admin types a literal value to replace them."""
import yaml as _yaml
static_dir = tmp_path / "static"
static_dir.mkdir()
(static_dir / "instance.yaml").write_text(_yaml.dump({
"instance": {"name": "Old"},
"auth": {"allowed_domain": "example.com", "webapp_secret_key": "x"},
"server": {"host": "1.2.3.4", "hostname": "example.com"},
"email": {
"smtp_host": "smtp.example.com",
"smtp_password": "${SMTP_PASSWORD}",
},
}))
monkeypatch.setenv("DATA_DIR", str(tmp_path))
monkeypatch.setenv("CONFIG_DIR", str(static_dir))
monkeypatch.setenv("SMTP_PASSWORD", "hunter2-cleartext-secret")
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
from pathlib import Path as _Path
import config.loader as _loader_mod
monkeypatch.setattr(_loader_mod, "CONFIG_DIR", _Path(static_dir))
from app.instance_config import reset_cache
reset_cache()
c = seeded_app["client"]
token = seeded_app["admin_token"]
# Admin patches `instance.name` only — does NOT touch `email`.
resp = c.post(
"/api/admin/server-config",
json={"sections": {"instance": {"name": "New"}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
overlay_text = (tmp_path / "state" / "instance.yaml").read_text()
# The cleartext secret value MUST NOT appear in the overlay because
# the user didn't touch `email`. Only the patched section lands.
assert "hunter2-cleartext-secret" not in overlay_text, \
f"env-resolved secret leaked into overlay:\n{overlay_text}"
# And the email section shouldn't be there at all (user didn't patch it).
overlay = _yaml.safe_load(overlay_text)
assert "email" not in overlay, \
f"untouched section persisted to overlay: {overlay}"
# And `instance` IS persisted with the user's value.
assert overlay["instance"]["name"] == "New"
def test_load_instance_config_still_returns_static_sections_after_editor_save(self, seeded_app, tmp_path, monkeypatch):
"""End-to-end regression for the load_instance_config × narrow-overlay
bug: pre-fix, load_instance_config() returned the overlay verbatim
when it existed (no merge). Combined with the editor's narrow-write
strategy, the first save deleted static-only sections (datasets,
corporate_memory, openmetadata) from every runtime get_value() /
get_datasets() call. This test exercises the actual call path the
rest of the app uses, not just the overlay's on-disk shape."""
import yaml as _yaml
static_dir = tmp_path / "static"
static_dir.mkdir()
# Required fields per config.loader's strict validation, plus the
# static-only sections we want to assert flow through after a save.
(static_dir / "instance.yaml").write_text(_yaml.dump({
"instance": {"name": "Old"},
"auth": {"allowed_domain": "example.com", "webapp_secret_key": "x"},
"server": {"host": "1.2.3.4", "hostname": "example.com"},
"datasets": {"sales": {"primary_key": ["id"]}},
"corporate_memory": {"enabled": True, "retention_days": 90},
}))
monkeypatch.setenv("DATA_DIR", str(tmp_path))
monkeypatch.setenv("CONFIG_DIR", str(static_dir))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
# config.loader caches CONFIG_DIR at module import time, so the
# env-var override above doesn't propagate. Patch the module
# attribute directly so the static base resolves to our fixture.
from pathlib import Path as _Path
import config.loader as _loader_mod
monkeypatch.setattr(_loader_mod, "CONFIG_DIR", _Path(static_dir))
from app.instance_config import reset_cache
reset_cache()
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"instance": {"name": "New"}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
# Now use the SAME function the rest of the app uses.
from app.instance_config import load_instance_config, get_value
reset_cache() # make sure we're reading fresh post-save
cfg = load_instance_config()
assert cfg.get("instance", {}).get("name") == "New", "edit didn't land"
assert "corporate_memory" in cfg, \
f"static-only section vanished after save: keys={list(cfg.keys())}"
assert cfg["corporate_memory"]["enabled"] is True
assert cfg["corporate_memory"]["retention_days"] == 90
assert "datasets" in cfg
assert cfg["datasets"]["sales"]["primary_key"] == ["id"]
# And via get_value (the path admin.py / sync.py / catalog actually use):
assert get_value("corporate_memory", "enabled") is True
assert get_value("datasets", "sales", "primary_key") == ["id"]
def test_overlay_does_not_shadow_static_non_editable_sections(self, seeded_app, tmp_path, monkeypatch):
"""Regression: writing the full merged config to the overlay would
snapshot non-editable sections (e.g. `corporate_memory`, `datasets`)
and silently shadow later updates to those sections in the static
file. Overlay must persist ONLY the editable sections so the static
file stays the source of truth for everything else."""
import yaml as _yaml
# Static config has both editable + non-editable sections.
static_dir = tmp_path / "static"
static_dir.mkdir()
static_path = static_dir / "instance.yaml"
static_path.write_text(_yaml.dump({
"instance": {"name": "Old"},
"datasets": {"sales": {"primary_key": ["id"]}},
"corporate_memory": {"enabled": True, "retention_days": 90},
}))
monkeypatch.setenv("DATA_DIR", str(tmp_path))
monkeypatch.setenv("CONFIG_DIR", str(static_dir))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
# Edit only an editable section.
resp = c.post(
"/api/admin/server-config",
json={"sections": {"instance": {"name": "New"}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
# Overlay should contain ONLY editable sections, not corporate_memory
# or datasets — those stay in the static file.
overlay = _yaml.safe_load((tmp_path / "state" / "instance.yaml").read_text())
assert "instance" in overlay
assert overlay["instance"]["name"] == "New"
assert "corporate_memory" not in overlay, \
f"non-editable section leaked into overlay: {overlay}"
assert "datasets" not in overlay
def test_secret_rotation_is_visible_in_audit_diff(self, seeded_app, tmp_path, monkeypatch):
"""Regression: pre-masking inputs to _diff_dicts collapses a
secret-rotation (password A → password B) into 'no diff' because
both sides redact to '***'. Compute diff on RAW values, redact
per-row at emit time, so a rotation row IS recorded (with both
sides masked but the path present)."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
# First save: set initial secret.
resp = c.post(
"/api/admin/server-config",
json={"sections": {"email": {"smtp_password": "OLD_secret_value"}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
# Second save: rotate to a different secret.
resp = c.post(
"/api/admin/server-config",
json={"sections": {"email": {"smtp_password": "NEW_rotated_secret"}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
from src.db import get_system_db
conn = get_system_db()
try:
row = conn.execute(
"SELECT params FROM audit_log "
"WHERE action = 'instance_config.update' ORDER BY timestamp DESC LIMIT 1"
).fetchone()
finally:
conn.close()
assert row is not None
params = json.loads(row[0]) if isinstance(row[0], str) else row[0]
# The rotation MUST appear as a diff row even though both sides
# are secrets — pre-fix would have produced an empty diff.
pwd_diffs = [d for d in params["diff"] if d["path"] == "email.smtp_password"]
assert len(pwd_diffs) == 1, f"rotation lost from audit diff: {params['diff']}"
assert pwd_diffs[0]["before"] == "***"
assert pwd_diffs[0]["after"] == "***"
# Neither cleartext leaks.
all_text = json.dumps(params)
assert "OLD_secret_value" not in all_text
assert "NEW_rotated_secret" not in all_text
def test_danger_save_records_danger_sections(self, seeded_app, tmp_path, monkeypatch):
monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={
"sections": {"server": {"hostname": "data.example.com"}},
"confirm_danger": True,
},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
from src.db import get_system_db
conn = get_system_db()
try:
row = conn.execute(
"SELECT params FROM audit_log "
"WHERE action = 'instance_config.update' ORDER BY timestamp DESC LIMIT 1"
).fetchone()
finally:
conn.close()
assert row is not None
params = json.loads(row[0]) if isinstance(row[0], str) else row[0]
assert "server" in params["danger_sections"]
class TestRedactionHelpers:
"""Unit-level tests on the masking helpers — these run without the API
so a regression in the redaction logic is easy to spot."""
def test_is_secret_key_matches_common_patterns(self):
from app.api.admin import _is_secret_key
assert _is_secret_key("smtp_password")
assert _is_secret_key("google_client_secret")
assert _is_secret_key("api_token")
assert _is_secret_key("WEBAPP_SECRET_KEY")
assert _is_secret_key("anthropic_api_key")
assert not _is_secret_key("smtp_host")
assert not _is_secret_key("name")
assert not _is_secret_key("allowed_domain")
def test_redact_masks_nested_secrets(self):
from app.api.admin import _redact
before = {
"email": {"smtp_host": "h", "smtp_password": "leak"},
"auth": {"google_client_secret": "leak2", "allowed_domain": "example.com"},
}
after = _redact(before)
assert after["email"]["smtp_host"] == "h"
assert after["email"]["smtp_password"] == "***"
assert after["auth"]["google_client_secret"] == "***"
assert after["auth"]["allowed_domain"] == "example.com"
def test_diff_dicts_flat_paths(self):
from app.api.admin import _diff_dicts
before = {"email": {"smtp_host": "old", "smtp_port": 587}}
after = {"email": {"smtp_host": "new", "smtp_port": 587}}
diff = _diff_dicts(before, after)
assert diff == [{"path": "email.smtp_host", "before": "old", "after": "new"}]
def test_deep_merge_preserves_other_sections(self):
from app.api.admin import _deep_merge
base = {"a": {"x": 1, "y": 2}, "b": {"z": 3}}
patch = {"a": {"y": 99}}
out = _deep_merge(base, patch)
assert out == {"a": {"x": 1, "y": 99}, "b": {"z": 3}}
# --- Phase J: BQ fields exposure in /admin/server-config ---------------------
class TestServerConfigBigQueryFields:
"""Phase J — billing_project and max_bytes_per_materialize are surfaced
in the UI/API so an operator can set them without SSH'ing to the VM.
They were previously only addressable via direct YAML edits.
legacy_wrap_views was removed in #160 — VIEW/MAT_VIEW are now always
wrapped via bigquery_query() (the previous opt-in path)."""
def test_get_surfaces_bq_fields_even_when_unset(self, seeded_app, tmp_path, monkeypatch):
"""GET response always includes the BQ optional fields under
data_source.bigquery so the UI's JSON-textarea rendering shows them
as editable keys even when YAML omits them. Without this, the
operator has no UI hint that the knobs exist."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
state = tmp_path / "state"
state.mkdir(parents=True, exist_ok=True)
# Plant a minimal instance.yaml that has data_source.bigquery but
# none of the optional fields set.
(state / "instance.yaml").write_text(yaml.dump({
"data_source": {
"type": "bigquery",
"bigquery": {"project": "my-data-prj", "location": "US"},
},
}))
import app.instance_config as ic
ic._instance_config = None
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.get("/api/admin/server-config", headers=_auth(token))
assert resp.status_code == 200, resp.text
bq = resp.json()["sections"]["data_source"]["bigquery"]
# `billing_project` intentionally NOT auto-seeded into GET payload
# — that would inject empty string and break the placeholder_from
# render path. The field still appears in `known_fields` (so the
# UI knows about it) and renders via the unset path with the
# `(defaults to <project>)` placeholder. Devin Review iter #3.
assert "billing_project" not in bq, \
f"billing_project should not be auto-seeded in sections; got: {bq}"
assert "max_bytes_per_materialize" in bq, \
f"max_bytes_per_materialize missing from GET: {bq}"
# legacy_wrap_views was removed in #160; UI must NOT surface it any
# longer (the wrap behavior is now unconditional).
assert "legacy_wrap_views" not in bq, \
f"legacy_wrap_views was removed in #160 but still present in GET: {bq}"
def test_get_preserves_existing_bq_field_values(self, seeded_app, tmp_path, monkeypatch):
"""When the operator HAS set the fields, GET must surface their actual
values, not the unset defaults."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
state = tmp_path / "state"
state.mkdir(parents=True, exist_ok=True)
(state / "instance.yaml").write_text(yaml.dump({
"data_source": {
"type": "bigquery",
"bigquery": {
"project": "my-data-prj",
"billing_project": "my-billing-prj",
"max_bytes_per_materialize": 5368709120, # 5 GiB
},
},
}))
import app.instance_config as ic
ic._instance_config = None
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.get("/api/admin/server-config", headers=_auth(token))
bq = resp.json()["sections"]["data_source"]["bigquery"]
assert bq["billing_project"] == "my-billing-prj"
assert bq["max_bytes_per_materialize"] == 5368709120
def test_post_persists_billing_project(self, seeded_app, tmp_path, monkeypatch):
"""POST through the existing section-patch flow persists
data_source.bigquery.billing_project to the overlay."""
monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"data_source": {"bigquery": {
"billing_project": "my-billing-prj",
}}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
# Round-trip: GET should now reflect it.
resp = c.get("/api/admin/server-config", headers=_auth(token))
bq = resp.json()["sections"]["data_source"]["bigquery"]
assert bq["billing_project"] == "my-billing-prj"
def test_post_persists_max_bytes_per_materialize(self, seeded_app, tmp_path, monkeypatch):
monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"data_source": {"bigquery": {
"max_bytes_per_materialize": 21474836480, # 20 GiB
}}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
resp = c.get("/api/admin/server-config", headers=_auth(token))
bq = resp.json()["sections"]["data_source"]["bigquery"]
assert bq["max_bytes_per_materialize"] == 21474836480
def test_template_documents_bq_optional_fields(self, seeded_app):
"""BQ optional fields are surfaced through the known-fields registry
(GET /api/admin/server-config carries them in
`known_fields.data_source.bigquery.fields`), not hardcoded into the
template text. The renderer reads the registry at runtime and creates
a structured form with hints for each leaf — so the test verifies
operator-discoverability through the API channel rather than via
static HTML inspection.
"""
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.get("/api/admin/server-config", headers=_auth(token))
assert resp.status_code == 200, resp.text
bq_fields = resp.json()["known_fields"]["data_source"]["bigquery"]["fields"]
assert "billing_project" in bq_fields, \
"registry must expose billing_project as a known field"
assert "max_bytes_per_materialize" in bq_fields, \
"registry must expose max_bytes_per_materialize as a known field"
# legacy_wrap_views was removed in #160 (VIEW/MAT_VIEW always wrap).
assert "legacy_wrap_views" not in bq_fields, \
"legacy_wrap_views was removed in #160 but still present in known_fields"
# Each surviving field must carry a hint so the renderer can show
# operator-facing help text — no anonymous knobs.
for k in ("billing_project", "max_bytes_per_materialize"):
assert "hint" in bq_fields[k] and bq_fields[k]["hint"], \
f"{k} must carry a non-empty hint"