Replaces the BigQuery wrap-view pattern with a discovery + scoped-fetch toolkit driven by the analyst's Claude session. Adds /api/v2/{catalog,schema,sample,scan,scan/estimate}, da catalog/schema/describe/fetch/snapshot/disk-info CLI commands, sqlglot-backed WHERE validator, process-local quota tracker, agent rails skill (cli/skills/agnes-data-querying.md). BREAKING: BQ wrap views off by default — set data_source.bigquery.legacy_wrap_views=true for one cycle. Backward-compat field_validator on primary_key. Catalog cache now matches documented 300s TTL with RBAC fresh per request. Cuts release v0.14.0.
504 lines
19 KiB
Python
504 lines
19 KiB
Python
"""Tests for admin configure and registry API endpoints."""
|
|
|
|
import ipaddress
|
|
import socket
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
|
|
def _auth(token):
|
|
return {"Authorization": f"Bearer {token}"}
|
|
|
|
|
|
class TestAdminConfigure:
|
|
def test_configure_local_source(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "local"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["status"] == "ok"
|
|
assert data["data_source"] == "local"
|
|
|
|
def test_configure_invalid_source_type_returns_400(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "invalid_source"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
assert "data_source" in resp.json()["detail"].lower() or "must be" in resp.json()["detail"]
|
|
|
|
def test_configure_requires_admin(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["analyst_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "local"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 403
|
|
|
|
def test_configure_requires_auth(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "local"},
|
|
)
|
|
assert resp.status_code == 401
|
|
|
|
def test_configure_bigquery_missing_project_returns_400(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "bigquery"}, # missing bigquery_project
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
def test_configure_bigquery_with_project(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "bigquery", "bigquery_project": "my-project"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["data_source"] == "bigquery"
|
|
|
|
def test_configure_missing_data_source_returns_422(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={}, # missing data_source entirely
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 422
|
|
|
|
def test_configure_overlay_does_not_resolve_env_var_placeholders(
|
|
self, seeded_app, tmp_path, monkeypatch
|
|
):
|
|
"""Regression: pre-fix `/api/admin/configure` seeded `existing` from
|
|
the static config when no overlay existed, then wrote the whole
|
|
thing back. Static `${SMTP_PASSWORD}` placeholders got resolved
|
|
by `config.loader` along the way, so the cleartext secret landed
|
|
in the writable overlay file even though the wizard only sets
|
|
`instance` / `auth` / `data_source`. The narrow-overlay rewrite
|
|
must read the overlay verbatim (or empty) and write only those
|
|
three sections — same contract as `/api/admin/server-config`.
|
|
"""
|
|
import yaml as _yaml
|
|
static_dir = tmp_path / "static"
|
|
static_dir.mkdir()
|
|
(static_dir / "instance.yaml").write_text(_yaml.dump({
|
|
"instance": {"name": "Old"},
|
|
"auth": {"allowed_domain": "example.com", "webapp_secret_key": "x"},
|
|
"server": {"host": "1.2.3.4", "hostname": "example.com"},
|
|
"email": {
|
|
"smtp_host": "smtp.example.com",
|
|
"smtp_password": "${SMTP_PASSWORD}",
|
|
},
|
|
}))
|
|
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
monkeypatch.setenv("CONFIG_DIR", str(static_dir))
|
|
monkeypatch.setenv("SMTP_PASSWORD", "hunter2-cleartext-secret")
|
|
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
|
|
from pathlib import Path as _Path
|
|
import config.loader as _loader_mod
|
|
monkeypatch.setattr(_loader_mod, "CONFIG_DIR", _Path(static_dir))
|
|
from app.instance_config import reset_cache
|
|
reset_cache()
|
|
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "local", "instance_name": "New"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 200, resp.text
|
|
|
|
overlay_text = (tmp_path / "state" / "instance.yaml").read_text()
|
|
assert "hunter2-cleartext-secret" not in overlay_text, \
|
|
f"env-resolved secret leaked into overlay:\n{overlay_text}"
|
|
overlay = _yaml.safe_load(overlay_text)
|
|
# email/server/auth.webapp_secret_key are static-only here — wizard
|
|
# never touches them, so they must not appear in the overlay.
|
|
assert "email" not in overlay
|
|
assert "server" not in overlay
|
|
# The wizard's three sections DO land:
|
|
assert overlay["instance"]["name"] == "New"
|
|
assert overlay["data_source"]["type"] == "local"
|
|
|
|
def test_corrupt_overlay_refused_with_500_not_silently_overwritten(
|
|
self, seeded_app, tmp_path, monkeypatch
|
|
):
|
|
"""Symmetric to the server-config editor: /configure must refuse to
|
|
overwrite a corrupt overlay so the operator can investigate, instead
|
|
of silently dropping every previously-saved section."""
|
|
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
state = tmp_path / "state"
|
|
state.mkdir(parents=True, exist_ok=True)
|
|
overlay_path = state / "instance.yaml"
|
|
overlay_path.write_text("instance: {name: 'good'\nauth:\n\tallowed_domain: bad")
|
|
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "local", "instance_name": "New"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 500, resp.text
|
|
assert "corrupt overlay" in resp.json()["detail"]
|
|
assert overlay_path.read_text().startswith("instance: {name: 'good'")
|
|
|
|
|
|
class TestAdminConfigureSSRF:
|
|
"""SSRF protection: keboola_url must not point to private/reserved networks.
|
|
|
|
Uses socket.getaddrinfo + ipaddress checks — tests mock DNS resolution
|
|
so they work regardless of the test runner's network/IPv6 config.
|
|
"""
|
|
|
|
@staticmethod
|
|
def _mock_getaddrinfo(host, port, **kwargs):
|
|
"""Predictable DNS resolution for tests — returns the IP literal as-is."""
|
|
try:
|
|
ip = ipaddress.ip_address(host)
|
|
family = socket.AF_INET6 if ip.version == 6 else socket.AF_INET
|
|
return [(family, socket.SOCK_STREAM, socket.IPPROTO_TCP, "", (str(ip), port))]
|
|
except ValueError:
|
|
# Not an IP literal — let real DNS resolve (for public URL test)
|
|
return socket.getaddrinfo(host, port, **kwargs)
|
|
|
|
def test_configure_rejects_localhost_url(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "http://localhost:8080"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
assert "private" in resp.json()["detail"].lower() or "reserved" in resp.json()["detail"].lower()
|
|
|
|
def test_configure_rejects_127_0_0_1_url(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
with patch("app.api.admin._socket.getaddrinfo", self._mock_getaddrinfo):
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "https://127.0.0.1"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
def test_configure_rejects_10_0_0_1_url(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
with patch("app.api.admin._socket.getaddrinfo", self._mock_getaddrinfo):
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "https://10.0.0.1"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
def test_configure_rejects_192_168_url(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
with patch("app.api.admin._socket.getaddrinfo", self._mock_getaddrinfo):
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "https://192.168.1.1"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
def test_configure_rejects_169_254_metadata_url(self, seeded_app):
|
|
"""169.254.x.x (link-local) must be rejected — cloud metadata endpoint."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
with patch("app.api.admin._socket.getaddrinfo", self._mock_getaddrinfo):
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "http://169.254.169.254"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
def test_configure_rejects_ipv6_loopback(self, seeded_app):
|
|
"""IPv6 loopback ::1 must be rejected."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
with patch("app.api.admin._socket.getaddrinfo", self._mock_getaddrinfo):
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "http://[::1]:8080"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
def test_configure_rejects_ipv6_link_local(self, seeded_app):
|
|
"""IPv6 link-local fe80::1 must be rejected."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
with patch("app.api.admin._socket.getaddrinfo", self._mock_getaddrinfo):
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "http://[fe80::1]:8080"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
def test_configure_rejects_ipv6_unique_local(self, seeded_app):
|
|
"""IPv6 unique-local fc00::1 must be rejected."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
with patch("app.api.admin._socket.getaddrinfo", self._mock_getaddrinfo):
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "http://[fc00::1]:8080"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
def test_configure_rejects_ipv6_multicast(self, seeded_app):
|
|
"""IPv6 multicast ff02::1 must be rejected."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
with patch("app.api.admin._socket.getaddrinfo", self._mock_getaddrinfo):
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "http://[ff02::1]:8080"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
def test_configure_accepts_public_url(self, seeded_app):
|
|
"""A public URL should pass SSRF validation (connection test may still fail)."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "keboola", "keboola_token": "tok", "keboola_url": "https://connection.keboola.com"},
|
|
headers=_auth(token),
|
|
)
|
|
# Should NOT be 400 with SSRF message — may be 400 from failed connection test, or 200
|
|
if resp.status_code == 400:
|
|
assert "private" not in resp.json()["detail"].lower()
|
|
|
|
|
|
class TestAdminRegistry:
|
|
def test_list_registry_empty(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.get("/api/admin/registry", headers=_auth(token))
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "tables" in data
|
|
assert "count" in data
|
|
assert data["count"] == 0
|
|
|
|
def test_list_registry_requires_admin(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["analyst_token"]
|
|
resp = c.get("/api/admin/registry", headers=_auth(token))
|
|
assert resp.status_code == 403
|
|
|
|
def test_list_registry_requires_auth(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
resp = c.get("/api/admin/registry")
|
|
assert resp.status_code == 401
|
|
|
|
|
|
class TestRegisterTable:
|
|
def test_register_table_success(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/register-table",
|
|
json={"name": "orders", "source_type": "keboola", "bucket": "in.c-crm",
|
|
"source_table": "orders", "query_mode": "local"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 201
|
|
data = resp.json()
|
|
assert data["id"] == "orders"
|
|
assert data["name"] == "orders"
|
|
assert data["status"] == "registered"
|
|
|
|
def test_register_table_appears_in_registry(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
|
|
c.post(
|
|
"/api/admin/register-table",
|
|
json={"name": "customers", "source_type": "keboola"},
|
|
headers=_auth(token),
|
|
)
|
|
|
|
resp = c.get("/api/admin/registry", headers=_auth(token))
|
|
assert resp.status_code == 200
|
|
names = [t["name"] for t in resp.json()["tables"]]
|
|
assert "customers" in names
|
|
|
|
def test_register_duplicate_returns_409(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
|
|
# Register once
|
|
c.post(
|
|
"/api/admin/register-table",
|
|
json={"name": "dup_table"},
|
|
headers=_auth(token),
|
|
)
|
|
|
|
# Register again
|
|
resp = c.post(
|
|
"/api/admin/register-table",
|
|
json={"name": "dup_table"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 409
|
|
|
|
def test_register_requires_admin(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["analyst_token"]
|
|
resp = c.post(
|
|
"/api/admin/register-table",
|
|
json={"name": "new_table"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 403
|
|
|
|
def test_register_requires_auth(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
resp = c.post(
|
|
"/api/admin/register-table",
|
|
json={"name": "new_table"},
|
|
)
|
|
assert resp.status_code == 401
|
|
|
|
def test_register_table_with_all_fields(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/register-table",
|
|
json={
|
|
"name": "full_table",
|
|
"source_type": "keboola",
|
|
"bucket": "in.c-crm",
|
|
"source_table": "full_table",
|
|
"query_mode": "local",
|
|
"sync_schedule": "0 6 * * *",
|
|
"description": "Full configuration table",
|
|
"profile_after_sync": True,
|
|
},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 201
|
|
|
|
def test_register_table_accepts_string_primary_key_for_backcompat(self, seeded_app):
|
|
"""primary_key changed from Optional[str] to Optional[List[str]] in
|
|
0.14.0. Pydantic v2 doesn't coerce, so without a backward-compat
|
|
normalizer a CLI script posting `"primary_key": "session_id"` would
|
|
hit a 422. The field validator wraps a bare string in a one-element
|
|
list so old and new callers both work."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.post(
|
|
"/api/admin/register-table",
|
|
json={"name": "single_pk", "primary_key": "session_id"},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 201, resp.text
|
|
|
|
resp = c.post(
|
|
"/api/admin/register-table",
|
|
json={"name": "composite_pk", "primary_key": ["session_id", "event_date"]},
|
|
headers=_auth(token),
|
|
)
|
|
assert resp.status_code == 201, resp.text
|
|
|
|
|
|
class TestDeleteRegistryTable:
|
|
def test_delete_registered_table(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
|
|
# Register
|
|
c.post(
|
|
"/api/admin/register-table",
|
|
json={"name": "to_delete"},
|
|
headers=_auth(token),
|
|
)
|
|
|
|
# Delete
|
|
resp = c.delete("/api/admin/registry/to_delete", headers=_auth(token))
|
|
assert resp.status_code == 204
|
|
|
|
# Verify gone from registry
|
|
list_resp = c.get("/api/admin/registry", headers=_auth(token))
|
|
names = [t["name"] for t in list_resp.json()["tables"]]
|
|
assert "to_delete" not in names
|
|
|
|
def test_delete_nonexistent_table_returns_404(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
resp = c.delete("/api/admin/registry/nonexistent_table", headers=_auth(token))
|
|
assert resp.status_code == 404
|
|
|
|
def test_delete_requires_admin(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["analyst_token"]
|
|
resp = c.delete("/api/admin/registry/some_table", headers=_auth(token))
|
|
assert resp.status_code == 403
|
|
|
|
def test_delete_requires_auth(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
resp = c.delete("/api/admin/registry/some_table")
|
|
assert resp.status_code == 401
|
|
|
|
|
|
class TestDiscoverAndRegister:
|
|
def test_discover_and_register_requires_admin(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
token = seeded_app["analyst_token"]
|
|
resp = c.post("/api/admin/discover-and-register", headers=_auth(token))
|
|
assert resp.status_code == 403
|
|
|
|
def test_discover_and_register_requires_auth(self, seeded_app):
|
|
c = seeded_app["client"]
|
|
resp = c.post("/api/admin/discover-and-register")
|
|
assert resp.status_code == 401
|
|
|
|
def test_discover_and_register_non_keboola_returns_zero(self, seeded_app):
|
|
"""With no keboola config, discover-and-register returns 0 registered tables."""
|
|
c = seeded_app["client"]
|
|
token = seeded_app["admin_token"]
|
|
|
|
# Configure as local (non-keboola)
|
|
c.post(
|
|
"/api/admin/configure",
|
|
json={"data_source": "local"},
|
|
headers=_auth(token),
|
|
)
|
|
|
|
resp = c.post("/api/admin/discover-and-register", headers=_auth(token))
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["registered"] == 0
|
|
assert data["source"] != "keboola"
|