Now that VIEW/MATERIALIZED_VIEW always wrap via bigquery_query() (the prior `legacy_wrap_views=True` branch behavior, made unconditional in the previous commit), the toggle has no semantic meaning and is removed across the codebase. Production code: - app/api/admin.py: drop the field from _OPTIONAL_FIELDS["data_source"] ["bigquery"]["fields"] and from _BQ_OPTIONAL_FIELD_DEFAULTS, plus the comment block above the defaults dict. - config/instance.yaml.example: drop the example snippet. - src/orchestrator.py: update the inner-objects skip-branch comment to reflect the new BQ behavior (the skip itself stays — keboola use_extension=False still inserts _meta rows without inner views). - app/web/templates/admin_tables.html: rewrite operator copy in the register and edit forms to reflect always-wrap. Tests: - tests/test_admin_server_config.py (TestServerConfigBigQueryFields): flip assertions from "field IS present" to "field NOT present" on legacy_wrap_views. Drop the test_post_persists_legacy_wrap_views test since the field no longer exists. - tests/test_admin_server_config_known_fields.py: same flip on the known-fields registry assertion. - tests/test_bigquery_extractor.py: drop the obsolete test_view_entity_does_not_create_master_view_by_default (asserted the bug we fixed) and test_legacy_wrap_views_toggle_restores_old_behavior (toggle no longer meaningful). Update remaining test docstrings. Operators with `legacy_wrap_views: true` set in their overlay get the new (equivalent) behavior automatically — the unrecognized key is silently ignored by the YAML loader. Operators with `false` get the issue-#160 fix as a behavior change, not a regression. Spec gate updated: production code grep gate grep -rn 'legacy_wrap_views' connectors app src config cli must return zero. tests/ excluded — historical "removed in #160" breadcrumbs and `assert "X" not in fields` regression guards retained as anti-regression signals.
937 lines
42 KiB
Python
937 lines
42 KiB
Python
"""Tests for the /admin/server-config UI page and its companion API.
|
||
|
||
Covers:
|
||
- Auth gate on both the HTML page and GET/POST /api/admin/server-config.
|
||
- The page-shell markers the JS hangs off (so we don't break the contract
|
||
by renaming an ID/data-attribute without updating the script).
|
||
- GET response shape: editable_sections + per-section dict + secrets
|
||
redacted.
|
||
- POST request shape: section patch validates, danger-zone gate enforced,
|
||
unknown sections rejected.
|
||
- Audit log entry: written on every save with sanitized diff (secret
|
||
fields masked, non-secret diff present).
|
||
- instance.yaml write happens at DATA_DIR/state/instance.yaml.
|
||
"""
|
||
|
||
import json
|
||
import uuid
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
import yaml
|
||
|
||
|
||
def _auth(token):
|
||
return {"Authorization": f"Bearer {token}"}
|
||
|
||
|
||
# --- HTML page ----------------------------------------------------------------
|
||
|
||
|
||
class TestServerConfigPageAuth:
|
||
def test_admin_can_load_page(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
# JWT in cookie so the auth-redirect middleware doesn't bounce us
|
||
# to /login (the same pattern the existing admin-UI tests use).
|
||
c.cookies.set("access_token", token)
|
||
try:
|
||
resp = c.get("/admin/server-config", headers={"Accept": "text/html"})
|
||
finally:
|
||
c.cookies.clear()
|
||
assert resp.status_code == 200, resp.text
|
||
body = resp.text
|
||
|
||
# Page-shell markers the JS targets.
|
||
assert 'data-page="server-config"' in body
|
||
assert "Server configuration" in body
|
||
# Form skeleton: loader + per-section container.
|
||
assert 'id="cfg-loading"' in body
|
||
assert 'id="cfg-sections"' in body
|
||
# Restart-required messaging on the page (issue #91 acceptance).
|
||
assert "restart" in body.lower()
|
||
# Danger-zone modal scaffolding.
|
||
assert 'id="danger-modal"' in body
|
||
assert 'id="danger-confirm-btn"' in body
|
||
# Endpoint constant — guards against URL drift between UI and API.
|
||
assert '/api/admin/server-config' in body
|
||
|
||
def test_non_admin_cannot_load_page(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
token = seeded_app["analyst_token"]
|
||
c.cookies.set("access_token", token)
|
||
try:
|
||
resp = c.get(
|
||
"/admin/server-config",
|
||
headers={"Accept": "text/html"},
|
||
follow_redirects=False,
|
||
)
|
||
finally:
|
||
c.cookies.clear()
|
||
# require_role(Role.ADMIN) → 403 for non-admins.
|
||
assert resp.status_code in (302, 401, 403), resp.text
|
||
|
||
def test_unauthenticated_redirects(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
resp = c.get(
|
||
"/admin/server-config",
|
||
headers={"Accept": "text/html"},
|
||
follow_redirects=False,
|
||
)
|
||
assert resp.status_code in (302, 303, 401, 403), resp.text
|
||
|
||
|
||
class TestServerConfigPageRendersSections:
|
||
"""The page is shell-only — sections are rendered client-side from the
|
||
GET response. The static HTML still ships every section's title in
|
||
SECTION_META so the JS can label them; verify the labels are present."""
|
||
|
||
def test_page_includes_all_section_titles(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
c.cookies.set("access_token", seeded_app["admin_token"])
|
||
try:
|
||
resp = c.get("/admin/server-config")
|
||
finally:
|
||
c.cookies.clear()
|
||
assert resp.status_code == 200
|
||
body = resp.text
|
||
# The eight editable sections appear in the JS SECTION_META map.
|
||
for section in (
|
||
"instance", "data_source", "email", "telegram",
|
||
"jira", "theme", "server", "auth",
|
||
):
|
||
assert f'{section}:' in body, f"section meta missing: {section}"
|
||
|
||
def test_page_marks_danger_sections_in_js(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
c.cookies.set("access_token", seeded_app["admin_token"])
|
||
try:
|
||
resp = c.get("/admin/server-config")
|
||
finally:
|
||
c.cookies.clear()
|
||
body = resp.text
|
||
# auth + server are flagged danger client-side too.
|
||
assert 'DANGER_SECTIONS = new Set(["auth", "server"])' in body
|
||
|
||
|
||
# --- GET /api/admin/server-config --------------------------------------------
|
||
|
||
|
||
class TestGetServerConfigAPI:
|
||
def test_get_requires_auth(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
resp = c.get("/api/admin/server-config")
|
||
assert resp.status_code == 401
|
||
|
||
def test_get_requires_admin(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
token = seeded_app["analyst_token"]
|
||
resp = c.get("/api/admin/server-config", headers=_auth(token))
|
||
assert resp.status_code == 403
|
||
|
||
def test_get_returns_section_envelope(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.get("/api/admin/server-config", headers=_auth(token))
|
||
assert resp.status_code == 200
|
||
data = resp.json()
|
||
assert "sections" in data
|
||
assert "editable_sections" in data
|
||
assert "danger_sections" in data
|
||
assert isinstance(data["sections"], dict)
|
||
# Every editable section is surfaced (empty dict when unset) so the
|
||
# UI can render headers without hitting "section missing" branches.
|
||
for section in data["editable_sections"]:
|
||
assert section in data["sections"]
|
||
# Danger zone is the documented subset.
|
||
assert set(data["danger_sections"]) == {"auth", "server"}
|
||
|
||
def test_get_redacts_secret_fields(self, seeded_app, monkeypatch, tmp_path):
|
||
"""Pre-populate instance.yaml with a secret; GET must not leak it."""
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
state = tmp_path / "state"
|
||
state.mkdir(parents=True, exist_ok=True)
|
||
(state / "instance.yaml").write_text(yaml.dump({
|
||
"instance": {"name": "Acme Analyst"},
|
||
"email": {"smtp_host": "smtp.example.com", "smtp_password": "supersecret"},
|
||
"auth": {"google_client_secret": "oauth-secret-value", "allowed_domain": "example.com"},
|
||
"telegram": {"bot_token": "telegram-token-value"},
|
||
}))
|
||
# Bust the in-process cache so the next GET re-reads the file.
|
||
import app.instance_config as ic
|
||
ic._instance_config = None
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.get("/api/admin/server-config", headers=_auth(token))
|
||
assert resp.status_code == 200
|
||
data = resp.json()
|
||
body = json.dumps(data)
|
||
# Cleartext values for any secret-looking key must NOT appear.
|
||
for leak in ("supersecret", "oauth-secret-value", "telegram-token-value"):
|
||
assert leak not in body, f"redaction failed: '{leak}' leaked in {body}"
|
||
# Non-secret fields pass through.
|
||
assert data["sections"]["instance"]["name"] == "Acme Analyst"
|
||
assert data["sections"]["email"]["smtp_host"] == "smtp.example.com"
|
||
assert data["sections"]["auth"]["allowed_domain"] == "example.com"
|
||
|
||
|
||
# --- POST /api/admin/server-config -------------------------------------------
|
||
|
||
|
||
class TestPostServerConfigAPI:
|
||
def test_post_requires_auth(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"instance": {"name": "X"}}},
|
||
)
|
||
assert resp.status_code == 401
|
||
|
||
def test_post_requires_admin(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
token = seeded_app["analyst_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"instance": {"name": "X"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 403
|
||
|
||
def test_post_empty_sections_returns_422(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 422
|
||
|
||
def test_post_unknown_section_rejected(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"thmee": {"primary": "#000"}}}, # typo
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 400
|
||
assert "thmee" in resp.json()["detail"]
|
||
|
||
def test_post_danger_section_without_confirmation_rejected(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"auth": {"allowed_domain": "evil.example.com"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 400
|
||
assert "confirm_danger" in resp.json()["detail"]
|
||
|
||
def test_post_danger_section_with_confirmation_accepted(self, seeded_app, tmp_path, monkeypatch):
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={
|
||
"sections": {"auth": {"allowed_domain": "example.com"}},
|
||
"confirm_danger": True,
|
||
},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
data = resp.json()
|
||
assert data["status"] == "ok"
|
||
assert data["restart_required"] is True
|
||
assert "auth" in data["sections_updated"]
|
||
|
||
def test_post_writes_instance_yaml(self, seeded_app, tmp_path, monkeypatch):
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"instance": {"name": "New Name", "subtitle": "Sub"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
|
||
written = tmp_path / "state" / "instance.yaml"
|
||
assert written.exists(), "instance.yaml should be written to DATA_DIR/state/"
|
||
loaded = yaml.safe_load(written.read_text())
|
||
assert loaded["instance"]["name"] == "New Name"
|
||
assert loaded["instance"]["subtitle"] == "Sub"
|
||
|
||
def test_post_deep_merges_existing_sections(self, seeded_app, tmp_path, monkeypatch):
|
||
"""Saving one section must not wipe other sections that already
|
||
exist in the file. This is the regression that made us pick
|
||
deep-merge over wholesale replace."""
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
state = tmp_path / "state"
|
||
state.mkdir(parents=True, exist_ok=True)
|
||
(state / "instance.yaml").write_text(yaml.dump({
|
||
"instance": {"name": "Original", "subtitle": "Keep me"},
|
||
"email": {"smtp_host": "smtp.example.com", "smtp_port": 587},
|
||
}))
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
# Patch ONLY instance.name — subtitle and the entire email section
|
||
# must survive.
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"instance": {"name": "Renamed"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200
|
||
|
||
loaded = yaml.safe_load((state / "instance.yaml").read_text())
|
||
assert loaded["instance"]["name"] == "Renamed"
|
||
assert loaded["instance"]["subtitle"] == "Keep me"
|
||
assert loaded["email"]["smtp_host"] == "smtp.example.com"
|
||
assert loaded["email"]["smtp_port"] == 587
|
||
|
||
def test_post_section_must_be_object(self, seeded_app):
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
# Pydantic accepts the request but our handler also validates the
|
||
# inner type — sending a string for a section value is a 422.
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
# Use a list — Pydantic Dict[str, Dict[str, Any]] coerces some
|
||
# types but a non-mapping fails validation outright.
|
||
json={"sections": {"instance": "not a dict"}},
|
||
headers=_auth(token),
|
||
)
|
||
# Either 422 (Pydantic validation) or 422-equivalent — both valid.
|
||
assert resp.status_code in (400, 422)
|
||
|
||
def test_corrupt_overlay_refused_with_500_not_silently_overwritten(
|
||
self, seeded_app, tmp_path, monkeypatch
|
||
):
|
||
"""A malformed overlay used to be silently replaced — that masked
|
||
disk corruption / partial writes / hand-edits and dropped every
|
||
previously-saved section on the next save. The endpoint must now
|
||
refuse with 500 so the operator can investigate."""
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
state = tmp_path / "state"
|
||
state.mkdir(parents=True, exist_ok=True)
|
||
# Plant a deliberately broken YAML — unclosed brace + tab indent —
|
||
# so PyYAML raises rather than yielding a dict.
|
||
overlay_path = state / "instance.yaml"
|
||
overlay_path.write_text("instance: {name: 'good'\nauth:\n\tallowed_domain: bad")
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"instance": {"name": "New Name"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 500, resp.text
|
||
assert "corrupt overlay" in resp.json()["detail"]
|
||
# Critical: the corrupt file is NOT replaced. Operator gets a chance
|
||
# to back it up.
|
||
assert overlay_path.read_text().startswith("instance: {name: 'good'")
|
||
|
||
def test_round_tripped_redacted_secret_does_not_corrupt_overlay(
|
||
self, seeded_app, tmp_path, monkeypatch
|
||
):
|
||
"""Regression: GET redacts secret-keyed children in nested objects to
|
||
``"***"`` so the form never displays cleartext. The data_source
|
||
section renders as a JSON textarea, so a no-op save would otherwise
|
||
send the masked JSON back verbatim. Without scrub, the deep-merge
|
||
would persist ``token_env: "***"`` on top of the real
|
||
``"KEBOOLA_STORAGE_TOKEN"``, silently breaking the next sync. The
|
||
server-side scrub must drop the sentinel so the overlay value
|
||
survives a round-trip."""
|
||
import yaml as _yaml
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
state = tmp_path / "state"
|
||
state.mkdir(parents=True, exist_ok=True)
|
||
# Plant a real overlay with the actual env-var name in token_env.
|
||
(state / "instance.yaml").write_text(_yaml.dump({
|
||
"data_source": {
|
||
"type": "keboola",
|
||
"keboola": {
|
||
"stack_url": "https://connection.keboola.com",
|
||
"token_env": "KEBOOLA_STORAGE_TOKEN",
|
||
},
|
||
},
|
||
}))
|
||
# Bust the in-process cache so the next read sees the planted overlay.
|
||
import app.instance_config as ic
|
||
ic._instance_config = None
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
|
||
# Simulate a UI round-trip: send back the redacted payload verbatim.
|
||
# The sentinel matches the string `_mask` produces.
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"data_source": {"keboola": {
|
||
"stack_url": "https://connection.keboola.com",
|
||
"token_env": "***", # redaction sentinel — must not persist
|
||
}}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
|
||
# Real env-var name must survive on disk.
|
||
overlay = _yaml.safe_load((state / "instance.yaml").read_text())
|
||
assert overlay["data_source"]["keboola"]["token_env"] == "KEBOOLA_STORAGE_TOKEN", \
|
||
f"overlay corrupted by round-tripped sentinel: {overlay}"
|
||
|
||
@pytest.mark.parametrize("private_url", [
|
||
"http://169.254.169.254/latest/meta-data/",
|
||
"http://127.0.0.1:8080/",
|
||
"http://10.0.0.1/",
|
||
"http://localhost/",
|
||
])
|
||
def test_post_rejects_private_url_in_keboola_stack_url(
|
||
self, seeded_app, tmp_path, monkeypatch, private_url
|
||
):
|
||
"""SSRF gate symmetric with /configure: data_source.keboola.stack_url
|
||
must not point to private/loopback/link-local networks. Without this
|
||
check, the editor would let an admin sneak the GCE/EC2 metadata
|
||
endpoint or an internal service into the overlay and trigger SSRF on
|
||
the next sync."""
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"data_source": {"keboola": {"stack_url": private_url}}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 400, resp.text
|
||
assert "private or reserved network" in resp.json()["detail"] \
|
||
or "could not resolve hostname" in resp.json()["detail"]
|
||
# Overlay must not have been written.
|
||
assert not (tmp_path / "state" / "instance.yaml").exists()
|
||
|
||
|
||
class TestPostServerConfigAuditLog:
|
||
def test_save_writes_audit_entry_with_sanitized_diff(self, seeded_app, tmp_path, monkeypatch):
|
||
"""Each save = one audit_log row tagged instance_config.update with
|
||
a diff that mentions changed fields by path but masks any secret
|
||
values."""
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
|
||
# Apply a patch that changes both a non-secret field and a secret.
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"email": {
|
||
"smtp_host": "smtp.example.com",
|
||
"smtp_password": "fresh-secret-value-1234",
|
||
}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
|
||
# Pull the audit row directly from the DB — bypasses the API
|
||
# surface so we're verifying the actual persisted shape.
|
||
from src.db import get_system_db
|
||
conn = get_system_db()
|
||
try:
|
||
rows = conn.execute(
|
||
"SELECT user_id, action, resource, params FROM audit_log "
|
||
"WHERE action = 'instance_config.update' ORDER BY timestamp DESC LIMIT 1"
|
||
).fetchall()
|
||
finally:
|
||
conn.close()
|
||
|
||
assert rows, "audit_log should contain at least one instance_config.update row"
|
||
user_id, action, resource, params_raw = rows[0]
|
||
assert action == "instance_config.update"
|
||
assert resource == "instance.yaml"
|
||
assert user_id == "admin1" # seeded admin
|
||
params = json.loads(params_raw) if isinstance(params_raw, str) else params_raw
|
||
assert params["sections"] == ["email"]
|
||
assert isinstance(params["diff"], list)
|
||
# Diff includes the non-secret change verbatim.
|
||
smtp_diff = next((d for d in params["diff"] if d["path"] == "email.smtp_host"), None)
|
||
assert smtp_diff is not None
|
||
assert smtp_diff["after"] == "smtp.example.com"
|
||
# Secret diff path is present but value is MASKED — cleartext must
|
||
# never land in audit_log.
|
||
all_text = json.dumps(params)
|
||
assert "fresh-secret-value-1234" not in all_text, \
|
||
f"secret leaked into audit log params: {all_text}"
|
||
pwd_diff = next((d for d in params["diff"] if d["path"] == "email.smtp_password"), None)
|
||
assert pwd_diff is not None
|
||
assert pwd_diff["after"] == "***"
|
||
|
||
def test_diff_records_dict_to_scalar_replacement(self):
|
||
"""Regression: when one side is a dict and the other is a non-None
|
||
scalar, _diff_dicts used to recurse with the missing side coerced
|
||
to {} — losing the scalar entirely. Now the shape change is
|
||
recorded as a single replacement at the parent path."""
|
||
from app.api.admin import _diff_dicts
|
||
# dict → scalar
|
||
diff = _diff_dicts(
|
||
{"keboola": {"stack_url": "https://x", "token_env": "Y"}},
|
||
{"keboola": "disabled"},
|
||
)
|
||
# The whole 'keboola' entry should appear as a single shape change,
|
||
# not as per-field removals.
|
||
keboola_rows = [d for d in diff if d["path"] == "keboola"]
|
||
assert len(keboola_rows) == 1, f"missing dict→scalar row in {diff}"
|
||
assert keboola_rows[0]["after"] == "disabled"
|
||
|
||
def test_diff_records_scalar_to_dict_replacement(self):
|
||
from app.api.admin import _diff_dicts
|
||
diff = _diff_dicts(
|
||
{"keboola": "disabled"},
|
||
{"keboola": {"stack_url": "https://x"}},
|
||
)
|
||
keboola_rows = [d for d in diff if d["path"] == "keboola"]
|
||
assert len(keboola_rows) == 1
|
||
assert keboola_rows[0]["before"] == "disabled"
|
||
assert keboola_rows[0]["after"] == {"stack_url": "https://x"}
|
||
|
||
def test_diff_shape_change_redacts_secret_keyed_children(self):
|
||
"""Regression: when a section's shape flips between dict and scalar,
|
||
the dict side may carry secret-keyed children (token_env →
|
||
env-resolved cleartext). The shape-change branch used to log the
|
||
raw dict verbatim because the parent key wasn't itself secret-named,
|
||
so a wholesale replacement of `data_source.keboola: {...}` ->
|
||
`data_source.keboola: "disabled"` would dump every cleartext token
|
||
/ password the dict contained into audit_log.params. The branch now
|
||
runs `_redact` over the dict side so secret-keyed children are
|
||
masked even when the parent isn't."""
|
||
from app.api.admin import _diff_dicts
|
||
# dict-with-secret-child → scalar
|
||
diff = _diff_dicts(
|
||
{"keboola": {"stack_url": "https://x", "token_env": "hunter2-cleartext"}},
|
||
{"keboola": "disabled"},
|
||
)
|
||
row = next(d for d in diff if d["path"] == "keboola")
|
||
assert row["before"]["stack_url"] == "https://x" # non-secret stays
|
||
assert row["before"]["token_env"] == "***" # secret masked
|
||
assert "hunter2-cleartext" not in str(row), \
|
||
f"cleartext leaked into audit row: {row}"
|
||
|
||
# scalar → dict-with-secret-child (symmetric)
|
||
diff = _diff_dicts(
|
||
{"email": "disabled"},
|
||
{"email": {"smtp_host": "smtp.example.com", "smtp_password": "supersecret"}},
|
||
)
|
||
row = next(d for d in diff if d["path"] == "email")
|
||
assert row["after"]["smtp_host"] == "smtp.example.com"
|
||
assert row["after"]["smtp_password"] == "***"
|
||
assert "supersecret" not in str(row)
|
||
|
||
def test_overlay_does_not_resolve_env_var_placeholders(self, seeded_app, tmp_path, monkeypatch):
|
||
"""Regression: prior fix wrote every editable section from the merged
|
||
(env-resolved) config to the overlay. Static `smtp_password: ${SMTP_PASSWORD}`
|
||
would get persisted as the actual cleartext password — turning a
|
||
config-leak into a disk-leak. Now we write only the sections the
|
||
user explicitly patched, deep-merged onto whatever was already in
|
||
the overlay; static env-var placeholders never touch the overlay
|
||
unless the admin types a literal value to replace them."""
|
||
import yaml as _yaml
|
||
static_dir = tmp_path / "static"
|
||
static_dir.mkdir()
|
||
(static_dir / "instance.yaml").write_text(_yaml.dump({
|
||
"instance": {"name": "Old"},
|
||
"auth": {"allowed_domain": "example.com", "webapp_secret_key": "x"},
|
||
"server": {"host": "1.2.3.4", "hostname": "example.com"},
|
||
"email": {
|
||
"smtp_host": "smtp.example.com",
|
||
"smtp_password": "${SMTP_PASSWORD}",
|
||
},
|
||
}))
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
monkeypatch.setenv("CONFIG_DIR", str(static_dir))
|
||
monkeypatch.setenv("SMTP_PASSWORD", "hunter2-cleartext-secret")
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
from pathlib import Path as _Path
|
||
import config.loader as _loader_mod
|
||
monkeypatch.setattr(_loader_mod, "CONFIG_DIR", _Path(static_dir))
|
||
from app.instance_config import reset_cache
|
||
reset_cache()
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
# Admin patches `instance.name` only — does NOT touch `email`.
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"instance": {"name": "New"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
|
||
overlay_text = (tmp_path / "state" / "instance.yaml").read_text()
|
||
# The cleartext secret value MUST NOT appear in the overlay because
|
||
# the user didn't touch `email`. Only the patched section lands.
|
||
assert "hunter2-cleartext-secret" not in overlay_text, \
|
||
f"env-resolved secret leaked into overlay:\n{overlay_text}"
|
||
# And the email section shouldn't be there at all (user didn't patch it).
|
||
overlay = _yaml.safe_load(overlay_text)
|
||
assert "email" not in overlay, \
|
||
f"untouched section persisted to overlay: {overlay}"
|
||
# And `instance` IS persisted with the user's value.
|
||
assert overlay["instance"]["name"] == "New"
|
||
|
||
def test_load_instance_config_still_returns_static_sections_after_editor_save(self, seeded_app, tmp_path, monkeypatch):
|
||
"""End-to-end regression for the load_instance_config × narrow-overlay
|
||
bug: pre-fix, load_instance_config() returned the overlay verbatim
|
||
when it existed (no merge). Combined with the editor's narrow-write
|
||
strategy, the first save deleted static-only sections (datasets,
|
||
corporate_memory, openmetadata) from every runtime get_value() /
|
||
get_datasets() call. This test exercises the actual call path the
|
||
rest of the app uses, not just the overlay's on-disk shape."""
|
||
import yaml as _yaml
|
||
static_dir = tmp_path / "static"
|
||
static_dir.mkdir()
|
||
# Required fields per config.loader's strict validation, plus the
|
||
# static-only sections we want to assert flow through after a save.
|
||
(static_dir / "instance.yaml").write_text(_yaml.dump({
|
||
"instance": {"name": "Old"},
|
||
"auth": {"allowed_domain": "example.com", "webapp_secret_key": "x"},
|
||
"server": {"host": "1.2.3.4", "hostname": "example.com"},
|
||
"datasets": {"sales": {"primary_key": ["id"]}},
|
||
"corporate_memory": {"enabled": True, "retention_days": 90},
|
||
}))
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
monkeypatch.setenv("CONFIG_DIR", str(static_dir))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
|
||
# config.loader caches CONFIG_DIR at module import time, so the
|
||
# env-var override above doesn't propagate. Patch the module
|
||
# attribute directly so the static base resolves to our fixture.
|
||
from pathlib import Path as _Path
|
||
import config.loader as _loader_mod
|
||
monkeypatch.setattr(_loader_mod, "CONFIG_DIR", _Path(static_dir))
|
||
|
||
from app.instance_config import reset_cache
|
||
reset_cache()
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"instance": {"name": "New"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
|
||
# Now use the SAME function the rest of the app uses.
|
||
from app.instance_config import load_instance_config, get_value
|
||
reset_cache() # make sure we're reading fresh post-save
|
||
cfg = load_instance_config()
|
||
assert cfg.get("instance", {}).get("name") == "New", "edit didn't land"
|
||
assert "corporate_memory" in cfg, \
|
||
f"static-only section vanished after save: keys={list(cfg.keys())}"
|
||
assert cfg["corporate_memory"]["enabled"] is True
|
||
assert cfg["corporate_memory"]["retention_days"] == 90
|
||
assert "datasets" in cfg
|
||
assert cfg["datasets"]["sales"]["primary_key"] == ["id"]
|
||
# And via get_value (the path admin.py / sync.py / catalog actually use):
|
||
assert get_value("corporate_memory", "enabled") is True
|
||
assert get_value("datasets", "sales", "primary_key") == ["id"]
|
||
|
||
def test_overlay_does_not_shadow_static_non_editable_sections(self, seeded_app, tmp_path, monkeypatch):
|
||
"""Regression: writing the full merged config to the overlay would
|
||
snapshot non-editable sections (e.g. `corporate_memory`, `datasets`)
|
||
and silently shadow later updates to those sections in the static
|
||
file. Overlay must persist ONLY the editable sections so the static
|
||
file stays the source of truth for everything else."""
|
||
import yaml as _yaml
|
||
# Static config has both editable + non-editable sections.
|
||
static_dir = tmp_path / "static"
|
||
static_dir.mkdir()
|
||
static_path = static_dir / "instance.yaml"
|
||
static_path.write_text(_yaml.dump({
|
||
"instance": {"name": "Old"},
|
||
"datasets": {"sales": {"primary_key": ["id"]}},
|
||
"corporate_memory": {"enabled": True, "retention_days": 90},
|
||
}))
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
monkeypatch.setenv("CONFIG_DIR", str(static_dir))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
# Edit only an editable section.
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"instance": {"name": "New"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
|
||
# Overlay should contain ONLY editable sections, not corporate_memory
|
||
# or datasets — those stay in the static file.
|
||
overlay = _yaml.safe_load((tmp_path / "state" / "instance.yaml").read_text())
|
||
assert "instance" in overlay
|
||
assert overlay["instance"]["name"] == "New"
|
||
assert "corporate_memory" not in overlay, \
|
||
f"non-editable section leaked into overlay: {overlay}"
|
||
assert "datasets" not in overlay
|
||
|
||
def test_secret_rotation_is_visible_in_audit_diff(self, seeded_app, tmp_path, monkeypatch):
|
||
"""Regression: pre-masking inputs to _diff_dicts collapses a
|
||
secret-rotation (password A → password B) into 'no diff' because
|
||
both sides redact to '***'. Compute diff on RAW values, redact
|
||
per-row at emit time, so a rotation row IS recorded (with both
|
||
sides masked but the path present)."""
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
|
||
# First save: set initial secret.
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"email": {"smtp_password": "OLD_secret_value"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
|
||
# Second save: rotate to a different secret.
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"email": {"smtp_password": "NEW_rotated_secret"}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
|
||
from src.db import get_system_db
|
||
conn = get_system_db()
|
||
try:
|
||
row = conn.execute(
|
||
"SELECT params FROM audit_log "
|
||
"WHERE action = 'instance_config.update' ORDER BY timestamp DESC LIMIT 1"
|
||
).fetchone()
|
||
finally:
|
||
conn.close()
|
||
assert row is not None
|
||
params = json.loads(row[0]) if isinstance(row[0], str) else row[0]
|
||
# The rotation MUST appear as a diff row even though both sides
|
||
# are secrets — pre-fix would have produced an empty diff.
|
||
pwd_diffs = [d for d in params["diff"] if d["path"] == "email.smtp_password"]
|
||
assert len(pwd_diffs) == 1, f"rotation lost from audit diff: {params['diff']}"
|
||
assert pwd_diffs[0]["before"] == "***"
|
||
assert pwd_diffs[0]["after"] == "***"
|
||
# Neither cleartext leaks.
|
||
all_text = json.dumps(params)
|
||
assert "OLD_secret_value" not in all_text
|
||
assert "NEW_rotated_secret" not in all_text
|
||
|
||
def test_danger_save_records_danger_sections(self, seeded_app, tmp_path, monkeypatch):
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={
|
||
"sections": {"server": {"hostname": "data.example.com"}},
|
||
"confirm_danger": True,
|
||
},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
|
||
from src.db import get_system_db
|
||
conn = get_system_db()
|
||
try:
|
||
row = conn.execute(
|
||
"SELECT params FROM audit_log "
|
||
"WHERE action = 'instance_config.update' ORDER BY timestamp DESC LIMIT 1"
|
||
).fetchone()
|
||
finally:
|
||
conn.close()
|
||
assert row is not None
|
||
params = json.loads(row[0]) if isinstance(row[0], str) else row[0]
|
||
assert "server" in params["danger_sections"]
|
||
|
||
|
||
class TestRedactionHelpers:
|
||
"""Unit-level tests on the masking helpers — these run without the API
|
||
so a regression in the redaction logic is easy to spot."""
|
||
|
||
def test_is_secret_key_matches_common_patterns(self):
|
||
from app.api.admin import _is_secret_key
|
||
assert _is_secret_key("smtp_password")
|
||
assert _is_secret_key("google_client_secret")
|
||
assert _is_secret_key("api_token")
|
||
assert _is_secret_key("WEBAPP_SECRET_KEY")
|
||
assert _is_secret_key("anthropic_api_key")
|
||
assert not _is_secret_key("smtp_host")
|
||
assert not _is_secret_key("name")
|
||
assert not _is_secret_key("allowed_domain")
|
||
|
||
def test_redact_masks_nested_secrets(self):
|
||
from app.api.admin import _redact
|
||
before = {
|
||
"email": {"smtp_host": "h", "smtp_password": "leak"},
|
||
"auth": {"google_client_secret": "leak2", "allowed_domain": "example.com"},
|
||
}
|
||
after = _redact(before)
|
||
assert after["email"]["smtp_host"] == "h"
|
||
assert after["email"]["smtp_password"] == "***"
|
||
assert after["auth"]["google_client_secret"] == "***"
|
||
assert after["auth"]["allowed_domain"] == "example.com"
|
||
|
||
def test_diff_dicts_flat_paths(self):
|
||
from app.api.admin import _diff_dicts
|
||
before = {"email": {"smtp_host": "old", "smtp_port": 587}}
|
||
after = {"email": {"smtp_host": "new", "smtp_port": 587}}
|
||
diff = _diff_dicts(before, after)
|
||
assert diff == [{"path": "email.smtp_host", "before": "old", "after": "new"}]
|
||
|
||
def test_deep_merge_preserves_other_sections(self):
|
||
from app.api.admin import _deep_merge
|
||
base = {"a": {"x": 1, "y": 2}, "b": {"z": 3}}
|
||
patch = {"a": {"y": 99}}
|
||
out = _deep_merge(base, patch)
|
||
assert out == {"a": {"x": 1, "y": 99}, "b": {"z": 3}}
|
||
|
||
|
||
# --- Phase J: BQ fields exposure in /admin/server-config ---------------------
|
||
|
||
|
||
class TestServerConfigBigQueryFields:
|
||
"""Phase J — billing_project and max_bytes_per_materialize are surfaced
|
||
in the UI/API so an operator can set them without SSH'ing to the VM.
|
||
They were previously only addressable via direct YAML edits.
|
||
|
||
legacy_wrap_views was removed in #160 — VIEW/MAT_VIEW are now always
|
||
wrapped via bigquery_query() (the previous opt-in path)."""
|
||
|
||
def test_get_surfaces_bq_fields_even_when_unset(self, seeded_app, tmp_path, monkeypatch):
|
||
"""GET response always includes the BQ optional fields under
|
||
data_source.bigquery so the UI's JSON-textarea rendering shows them
|
||
as editable keys even when YAML omits them. Without this, the
|
||
operator has no UI hint that the knobs exist."""
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
state = tmp_path / "state"
|
||
state.mkdir(parents=True, exist_ok=True)
|
||
# Plant a minimal instance.yaml that has data_source.bigquery but
|
||
# none of the optional fields set.
|
||
(state / "instance.yaml").write_text(yaml.dump({
|
||
"data_source": {
|
||
"type": "bigquery",
|
||
"bigquery": {"project": "my-data-prj", "location": "US"},
|
||
},
|
||
}))
|
||
import app.instance_config as ic
|
||
ic._instance_config = None
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.get("/api/admin/server-config", headers=_auth(token))
|
||
assert resp.status_code == 200, resp.text
|
||
bq = resp.json()["sections"]["data_source"]["bigquery"]
|
||
assert "billing_project" in bq, f"billing_project missing from GET: {bq}"
|
||
assert "max_bytes_per_materialize" in bq, \
|
||
f"max_bytes_per_materialize missing from GET: {bq}"
|
||
# legacy_wrap_views was removed in #160; UI must NOT surface it any
|
||
# longer (the wrap behavior is now unconditional).
|
||
assert "legacy_wrap_views" not in bq, \
|
||
f"legacy_wrap_views was removed in #160 but still present in GET: {bq}"
|
||
|
||
def test_get_preserves_existing_bq_field_values(self, seeded_app, tmp_path, monkeypatch):
|
||
"""When the operator HAS set the fields, GET must surface their actual
|
||
values, not the unset defaults."""
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
state = tmp_path / "state"
|
||
state.mkdir(parents=True, exist_ok=True)
|
||
(state / "instance.yaml").write_text(yaml.dump({
|
||
"data_source": {
|
||
"type": "bigquery",
|
||
"bigquery": {
|
||
"project": "my-data-prj",
|
||
"billing_project": "my-billing-prj",
|
||
"max_bytes_per_materialize": 5368709120, # 5 GiB
|
||
},
|
||
},
|
||
}))
|
||
import app.instance_config as ic
|
||
ic._instance_config = None
|
||
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.get("/api/admin/server-config", headers=_auth(token))
|
||
bq = resp.json()["sections"]["data_source"]["bigquery"]
|
||
assert bq["billing_project"] == "my-billing-prj"
|
||
assert bq["max_bytes_per_materialize"] == 5368709120
|
||
|
||
def test_post_persists_billing_project(self, seeded_app, tmp_path, monkeypatch):
|
||
"""POST through the existing section-patch flow persists
|
||
data_source.bigquery.billing_project to the overlay."""
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"data_source": {"bigquery": {
|
||
"billing_project": "my-billing-prj",
|
||
}}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
# Round-trip: GET should now reflect it.
|
||
resp = c.get("/api/admin/server-config", headers=_auth(token))
|
||
bq = resp.json()["sections"]["data_source"]["bigquery"]
|
||
assert bq["billing_project"] == "my-billing-prj"
|
||
|
||
def test_post_persists_max_bytes_per_materialize(self, seeded_app, tmp_path, monkeypatch):
|
||
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
||
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.post(
|
||
"/api/admin/server-config",
|
||
json={"sections": {"data_source": {"bigquery": {
|
||
"max_bytes_per_materialize": 21474836480, # 20 GiB
|
||
}}}},
|
||
headers=_auth(token),
|
||
)
|
||
assert resp.status_code == 200, resp.text
|
||
resp = c.get("/api/admin/server-config", headers=_auth(token))
|
||
bq = resp.json()["sections"]["data_source"]["bigquery"]
|
||
assert bq["max_bytes_per_materialize"] == 21474836480
|
||
|
||
def test_template_documents_bq_optional_fields(self, seeded_app):
|
||
"""BQ optional fields are surfaced through the known-fields registry
|
||
(GET /api/admin/server-config carries them in
|
||
`known_fields.data_source.bigquery.fields`), not hardcoded into the
|
||
template text. The renderer reads the registry at runtime and creates
|
||
a structured form with hints for each leaf — so the test verifies
|
||
operator-discoverability through the API channel rather than via
|
||
static HTML inspection.
|
||
"""
|
||
c = seeded_app["client"]
|
||
token = seeded_app["admin_token"]
|
||
resp = c.get("/api/admin/server-config", headers=_auth(token))
|
||
assert resp.status_code == 200, resp.text
|
||
bq_fields = resp.json()["known_fields"]["data_source"]["bigquery"]["fields"]
|
||
assert "billing_project" in bq_fields, \
|
||
"registry must expose billing_project as a known field"
|
||
assert "max_bytes_per_materialize" in bq_fields, \
|
||
"registry must expose max_bytes_per_materialize as a known field"
|
||
# legacy_wrap_views was removed in #160 (VIEW/MAT_VIEW always wrap).
|
||
assert "legacy_wrap_views" not in bq_fields, \
|
||
"legacy_wrap_views was removed in #160 but still present in known_fields"
|
||
# Each surviving field must carry a hint so the renderer can show
|
||
# operator-facing help text — no anonymous knobs.
|
||
for k in ("billing_project", "max_bytes_per_materialize"):
|
||
assert "hint" in bq_fields[k] and bq_fields[k]["hint"], \
|
||
f"{k} must carry a non-empty hint"
|