refactor(bq): #160 remove legacy_wrap_views config knob (always-wrap)

Now that VIEW/MATERIALIZED_VIEW always wrap via bigquery_query() (the
prior `legacy_wrap_views=True` branch behavior, made unconditional in
the previous commit), the toggle has no semantic meaning and is removed
across the codebase.

Production code:
- app/api/admin.py: drop the field from _OPTIONAL_FIELDS["data_source"]
  ["bigquery"]["fields"] and from _BQ_OPTIONAL_FIELD_DEFAULTS, plus the
  comment block above the defaults dict.
- config/instance.yaml.example: drop the example snippet.
- src/orchestrator.py: update the inner-objects skip-branch comment to
  reflect the new BQ behavior (the skip itself stays — keboola
  use_extension=False still inserts _meta rows without inner views).
- app/web/templates/admin_tables.html: rewrite operator copy in the
  register and edit forms to reflect always-wrap.

Tests:
- tests/test_admin_server_config.py (TestServerConfigBigQueryFields):
  flip assertions from "field IS present" to "field NOT present" on
  legacy_wrap_views. Drop the test_post_persists_legacy_wrap_views test
  since the field no longer exists.
- tests/test_admin_server_config_known_fields.py: same flip on the
  known-fields registry assertion.
- tests/test_bigquery_extractor.py: drop the obsolete
  test_view_entity_does_not_create_master_view_by_default (asserted the
  bug we fixed) and test_legacy_wrap_views_toggle_restores_old_behavior
  (toggle no longer meaningful). Update remaining test docstrings.

Operators with `legacy_wrap_views: true` set in their overlay get the
new (equivalent) behavior automatically — the unrecognized key is
silently ignored by the YAML loader. Operators with `false` get the
issue-#160 fix as a behavior change, not a regression.

Spec gate updated: production code grep gate
  grep -rn 'legacy_wrap_views' connectors app src config cli
must return zero. tests/ excluded — historical "removed in #160"
breadcrumbs and `assert "X" not in fields` regression guards retained
as anti-regression signals.
This commit is contained in:
ZdenekSrotyr 2026-05-03 16:31:23 +02:00
parent 10d7bd62f8
commit 9d0e4e687d
8 changed files with 60 additions and 169 deletions

View file

@ -226,18 +226,6 @@ _KNOWN_FIELDS: dict[str, dict[str, dict]] = {
"Mismatch → 403 USER_PROJECT_DENIED on every BQ call." "Mismatch → 403 USER_PROJECT_DENIED on every BQ call."
), ),
}, },
"legacy_wrap_views": {
"kind": "bool",
"default": False,
"hint": (
"When true, registered VIEWs and MATERIALIZED_VIEWs get a DuckDB "
"master view via bigquery_query() (jobs API) so analysts can "
"SELECT * FROM viewname directly. When false (default), views are "
"catalog-only — analysts use `da fetch` or `da query --remote`. "
"ON for view-heavy deployments where most views are small enough "
"to materialize without 'Response too large' (issue #101)."
),
},
"max_bytes_per_materialize": { "max_bytes_per_materialize": {
"kind": "int", "kind": "int",
"default": 10737418240, "default": 10737418240,
@ -795,13 +783,10 @@ class ServerConfigUpdateRequest(BaseModel):
# #
# - billing_project: defaults to data project; explicit value needed when # - billing_project: defaults to data project; explicit value needed when
# the SA can read the data project but not bill against it. # the SA can read the data project but not bill against it.
# - legacy_wrap_views: default False; toggling ON wraps BQ views via
# `bigquery_query()` so analysts can `SELECT *` directly.
# - max_bytes_per_materialize: cost guardrail for `query_mode='materialized'` # - max_bytes_per_materialize: cost guardrail for `query_mode='materialized'`
# (default 10 GiB; 0 disables; null falls through to the default). # (default 10 GiB; 0 disables; null falls through to the default).
_BQ_OPTIONAL_FIELD_DEFAULTS: Dict[str, Any] = { _BQ_OPTIONAL_FIELD_DEFAULTS: Dict[str, Any] = {
"billing_project": "", "billing_project": "",
"legacy_wrap_views": False,
"max_bytes_per_materialize": 10737418240, "max_bytes_per_materialize": 10737418240,
} }

View file

@ -891,11 +891,11 @@
<datalist id="bqTableList"></datalist> <datalist id="bqTableList"></datalist>
<div class="form-hint">Table or view name within the dataset. Click <div class="form-hint">Table or view name within the dataset. Click
<strong>List tables</strong> after filling Dataset to populate autocomplete. <strong>List tables</strong> after filling Dataset to populate autocomplete.
<br><strong>Live access:</strong> BASE TABLEs are queryable directly via <br><strong>Live access:</strong> BASE TABLEs query via
<code>da query --remote</code>; VIEWs are registered but analysts must run <code>bq."dataset"."table"</code> (Storage Read API; predicate pushdown).
<code>da fetch</code> to materialize a local snapshot (or the admin can flip VIEWs and MATERIALIZED_VIEWs query via the BQ jobs API (full-scan estimate;
<code>data_source.bigquery.legacy_wrap_views=true</code> to wrap views via the cost-guarded by <code>max_bytes_per_remote_query</code>).
BQ jobs API). <code>da query --remote</code> works for both.
<br><strong>Synced access:</strong> handles both table and view transparently <br><strong>Synced access:</strong> handles both table and view transparently
— the scheduler runs <code>SELECT *</code> through the jobs API and writes a — the scheduler runs <code>SELECT *</code> through the jobs API and writes a
parquet.</div> parquet.</div>
@ -1048,10 +1048,11 @@
list="editBqTableList" placeholder="e.g. orders"> list="editBqTableList" placeholder="e.g. orders">
<datalist id="editBqTableList"></datalist> <datalist id="editBqTableList"></datalist>
<div class="form-hint">Table or view name within the dataset. <div class="form-hint">Table or view name within the dataset.
<br><strong>Live access:</strong> BASE TABLEs are queryable directly via <br><strong>Live access:</strong> BASE TABLEs query via
<code>da query --remote</code>; VIEWs are registered but analysts must run <code>bq."dataset"."table"</code> (Storage Read API; predicate pushdown).
<code>da fetch</code> to materialize a local snapshot (or admin can flip VIEWs and MATERIALIZED_VIEWs query via the BQ jobs API (full-scan estimate;
<code>data_source.bigquery.legacy_wrap_views=true</code>). cost-guarded by <code>max_bytes_per_remote_query</code>).
<code>da query --remote</code> works for both.
<br><strong>Synced access:</strong> handles both transparently — the <br><strong>Synced access:</strong> handles both transparently — the
scheduler runs <code>SELECT *</code> through the jobs API and writes a scheduler runs <code>SELECT *</code> through the jobs API and writes a
parquet.</div> parquet.</div>

View file

@ -122,14 +122,6 @@ data_source:
# # Mismatch -> every BQ call 403 USER_PROJECT_DENIED. # # Mismatch -> every BQ call 403 USER_PROJECT_DENIED.
# # `da diagnose` warns when this falls back to `project`. # # `da diagnose` warns when this falls back to `project`.
# # Configurable via /admin/server-config UI. # # Configurable via /admin/server-config UI.
# legacy_wrap_views: false # When true, registered VIEWs and MATERIALIZED_VIEWs get a DuckDB
# # master view via bigquery_query() (jobs API) so analysts can
# # `SELECT * FROM viewname` directly. When false (default), views
# # are catalog-only -- analysts use `da fetch viewname` or
# # `da query --remote`. ON can cause "Response too large" on big
# # views; OFF requires analyst-side discipline (CLAUDE.md rails).
# # Toggle ON for view-heavy deployments where most views are small.
# # Configurable via /admin/server-config UI.
# max_bytes_per_materialize: 10737418240 # max_bytes_per_materialize: 10737418240
# # Cost guardrail (bytes) for query_mode='materialized' BQ scans. # # Cost guardrail (bytes) for query_mode='materialized' BQ scans.
# # Dry-run check before running; exceeding -> registration / sync # # Dry-run check before running; exceeding -> registration / sync

View file

@ -120,12 +120,12 @@ The flag is the inverse of the new behavior. With "always wrap" as the only mode
Total: 33 references across 9 files. Verified by per-file `grep -c` (rev3 review). Total: 33 references across 9 files. Verified by per-file `grep -c` (rev3 review).
After all edits: `grep -rn 'legacy_wrap_views' connectors app src tests config cli` must return zero. `docs/` is **excluded** from the gate`docs/superpowers/plans/2026-04-27-claude-fetch-primitives.md` (8 hits) and `docs/superpowers/specs/2026-04-27-claude-fetch-primitives-design.md` (1 hit) are historical planning artifacts that document the design decision to introduce the flag; rewriting history would be revisionist. CHANGELOG history retained on the same logic. After all edits: `grep -rn 'legacy_wrap_views' connectors app src config cli` (production code only) must return zero. `tests/` is excluded — historical references in `assert "legacy_wrap_views" not in ...` regression guards and "removed in #160" docstring breadcrumbs have anti-regression value (they catch a future contributor who tries to re-add the flag). `docs/` is excluded for the same reason`docs/superpowers/plans/2026-04-27-claude-fetch-primitives.md` (8 hits) and `docs/superpowers/specs/2026-04-27-claude-fetch-primitives-design.md` (1 hit) are historical planning artifacts; rewriting them would be revisionist. CHANGELOG history retained on the same logic.
Verification command in CI / pre-commit: Verification command in CI / pre-commit:
``` ```
test "$(grep -rn 'legacy_wrap_views' connectors app src tests config cli | wc -l)" -eq 0 test "$(grep -rn 'legacy_wrap_views' connectors app src config cli | wc -l)" -eq 0
``` ```
No DB migration. Operators who explicitly set `legacy_wrap_views: true` in their overlay get the new (equivalent) behavior automatically; the unrecognized key is ignored by the YAML loader. Operators who explicitly set `legacy_wrap_views: false` get the new behavior as a fix, not a regression. No DB migration. Operators who explicitly set `legacy_wrap_views: true` in their overlay get the new (equivalent) behavior automatically; the unrecognized key is ignored by the YAML loader. Operators who explicitly set `legacy_wrap_views: false` get the new behavior as a fix, not a regression.

View file

@ -328,9 +328,12 @@ class SyncOrchestrator:
).fetchall() ).fetchall()
# Pre-fetch the set of names that actually exist as views/tables in # Pre-fetch the set of names that actually exist as views/tables in
# the attached extract.duckdb. The BQ connector with # the attached extract.duckdb. Most connectors emit a `_meta` row
# legacy_wrap_views=False inserts _meta rows for VIEW entities # alongside an inner view per registered name; the keboola
# without creating inner views — those need a different code path. # extractor with `use_extension=False` (and other connectors)
# may insert `_meta` rows whose inner view doesn't exist yet —
# skip those to avoid creating a master view that would resolve
# to nothing.
inner_objects = { inner_objects = {
row[0] row[0]
for row in conn.execute( for row in conn.execute(

View file

@ -807,14 +807,15 @@ class TestRedactionHelpers:
class TestServerConfigBigQueryFields: class TestServerConfigBigQueryFields:
"""Phase J — billing_project, legacy_wrap_views, and """Phase J — billing_project and max_bytes_per_materialize are surfaced
max_bytes_per_materialize are surfaced in the UI/API so an operator can in the UI/API so an operator can set them without SSH'ing to the VM.
set them without SSH'ing to the VM. The first two were previously only They were previously only addressable via direct YAML edits.
addressable via direct YAML edits; max_bytes_per_materialize had no UI
hint at all.""" legacy_wrap_views was removed in #160 — VIEW/MAT_VIEW are now always
wrapped via bigquery_query() (the previous opt-in path)."""
def test_get_surfaces_bq_fields_even_when_unset(self, seeded_app, tmp_path, monkeypatch): def test_get_surfaces_bq_fields_even_when_unset(self, seeded_app, tmp_path, monkeypatch):
"""GET response always includes the three BQ fields under """GET response always includes the BQ optional fields under
data_source.bigquery so the UI's JSON-textarea rendering shows them data_source.bigquery so the UI's JSON-textarea rendering shows them
as editable keys even when YAML omits them. Without this, the as editable keys even when YAML omits them. Without this, the
operator has no UI hint that the knobs exist.""" operator has no UI hint that the knobs exist."""
@ -822,7 +823,7 @@ class TestServerConfigBigQueryFields:
state = tmp_path / "state" state = tmp_path / "state"
state.mkdir(parents=True, exist_ok=True) state.mkdir(parents=True, exist_ok=True)
# Plant a minimal instance.yaml that has data_source.bigquery but # Plant a minimal instance.yaml that has data_source.bigquery but
# NONE of the three fields set. # none of the optional fields set.
(state / "instance.yaml").write_text(yaml.dump({ (state / "instance.yaml").write_text(yaml.dump({
"data_source": { "data_source": {
"type": "bigquery", "type": "bigquery",
@ -838,9 +839,12 @@ class TestServerConfigBigQueryFields:
assert resp.status_code == 200, resp.text assert resp.status_code == 200, resp.text
bq = resp.json()["sections"]["data_source"]["bigquery"] bq = resp.json()["sections"]["data_source"]["bigquery"]
assert "billing_project" in bq, f"billing_project missing from GET: {bq}" assert "billing_project" in bq, f"billing_project missing from GET: {bq}"
assert "legacy_wrap_views" in bq, f"legacy_wrap_views missing from GET: {bq}"
assert "max_bytes_per_materialize" in bq, \ assert "max_bytes_per_materialize" in bq, \
f"max_bytes_per_materialize missing from GET: {bq}" f"max_bytes_per_materialize missing from GET: {bq}"
# legacy_wrap_views was removed in #160; UI must NOT surface it any
# longer (the wrap behavior is now unconditional).
assert "legacy_wrap_views" not in bq, \
f"legacy_wrap_views was removed in #160 but still present in GET: {bq}"
def test_get_preserves_existing_bq_field_values(self, seeded_app, tmp_path, monkeypatch): def test_get_preserves_existing_bq_field_values(self, seeded_app, tmp_path, monkeypatch):
"""When the operator HAS set the fields, GET must surface their actual """When the operator HAS set the fields, GET must surface their actual
@ -854,7 +858,6 @@ class TestServerConfigBigQueryFields:
"bigquery": { "bigquery": {
"project": "my-data-prj", "project": "my-data-prj",
"billing_project": "my-billing-prj", "billing_project": "my-billing-prj",
"legacy_wrap_views": True,
"max_bytes_per_materialize": 5368709120, # 5 GiB "max_bytes_per_materialize": 5368709120, # 5 GiB
}, },
}, },
@ -867,7 +870,6 @@ class TestServerConfigBigQueryFields:
resp = c.get("/api/admin/server-config", headers=_auth(token)) resp = c.get("/api/admin/server-config", headers=_auth(token))
bq = resp.json()["sections"]["data_source"]["bigquery"] bq = resp.json()["sections"]["data_source"]["bigquery"]
assert bq["billing_project"] == "my-billing-prj" assert bq["billing_project"] == "my-billing-prj"
assert bq["legacy_wrap_views"] is True
assert bq["max_bytes_per_materialize"] == 5368709120 assert bq["max_bytes_per_materialize"] == 5368709120
def test_post_persists_billing_project(self, seeded_app, tmp_path, monkeypatch): def test_post_persists_billing_project(self, seeded_app, tmp_path, monkeypatch):
@ -890,23 +892,6 @@ class TestServerConfigBigQueryFields:
bq = resp.json()["sections"]["data_source"]["bigquery"] bq = resp.json()["sections"]["data_source"]["bigquery"]
assert bq["billing_project"] == "my-billing-prj" assert bq["billing_project"] == "my-billing-prj"
def test_post_persists_legacy_wrap_views(self, seeded_app, tmp_path, monkeypatch):
monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True)
c = seeded_app["client"]
token = seeded_app["admin_token"]
resp = c.post(
"/api/admin/server-config",
json={"sections": {"data_source": {"bigquery": {
"legacy_wrap_views": True,
}}}},
headers=_auth(token),
)
assert resp.status_code == 200, resp.text
resp = c.get("/api/admin/server-config", headers=_auth(token))
bq = resp.json()["sections"]["data_source"]["bigquery"]
assert bq["legacy_wrap_views"] is True
def test_post_persists_max_bytes_per_materialize(self, seeded_app, tmp_path, monkeypatch): def test_post_persists_max_bytes_per_materialize(self, seeded_app, tmp_path, monkeypatch):
monkeypatch.setenv("DATA_DIR", str(tmp_path)) monkeypatch.setenv("DATA_DIR", str(tmp_path))
(tmp_path / "state").mkdir(parents=True, exist_ok=True) (tmp_path / "state").mkdir(parents=True, exist_ok=True)
@ -924,14 +909,14 @@ class TestServerConfigBigQueryFields:
bq = resp.json()["sections"]["data_source"]["bigquery"] bq = resp.json()["sections"]["data_source"]["bigquery"]
assert bq["max_bytes_per_materialize"] == 21474836480 assert bq["max_bytes_per_materialize"] == 21474836480
def test_template_documents_three_new_fields(self, seeded_app): def test_template_documents_bq_optional_fields(self, seeded_app):
"""The three BQ optional fields are now surfaced through the """BQ optional fields are surfaced through the known-fields registry
known-fields registry (GET /api/admin/server-config carries them (GET /api/admin/server-config carries them in
in `known_fields.data_source.bigquery.fields`), not hardcoded into `known_fields.data_source.bigquery.fields`), not hardcoded into the
the template text. The renderer reads the registry at runtime and template text. The renderer reads the registry at runtime and creates
creates a structured form with hints for each leaf so the test a structured form with hints for each leaf so the test verifies
verifies operator-discoverability through the API channel rather operator-discoverability through the API channel rather than via
than via static HTML inspection. static HTML inspection.
""" """
c = seeded_app["client"] c = seeded_app["client"]
token = seeded_app["admin_token"] token = seeded_app["admin_token"]
@ -940,12 +925,13 @@ class TestServerConfigBigQueryFields:
bq_fields = resp.json()["known_fields"]["data_source"]["bigquery"]["fields"] bq_fields = resp.json()["known_fields"]["data_source"]["bigquery"]["fields"]
assert "billing_project" in bq_fields, \ assert "billing_project" in bq_fields, \
"registry must expose billing_project as a known field" "registry must expose billing_project as a known field"
assert "legacy_wrap_views" in bq_fields, \
"registry must expose legacy_wrap_views as a known field"
assert "max_bytes_per_materialize" in bq_fields, \ assert "max_bytes_per_materialize" in bq_fields, \
"registry must expose max_bytes_per_materialize as a known field" "registry must expose max_bytes_per_materialize as a known field"
# Each field must carry a hint so the renderer can show operator- # legacy_wrap_views was removed in #160 (VIEW/MAT_VIEW always wrap).
# facing help text — no anonymous knobs. assert "legacy_wrap_views" not in bq_fields, \
for k in ("billing_project", "legacy_wrap_views", "max_bytes_per_materialize"): "legacy_wrap_views was removed in #160 but still present in known_fields"
# Each surviving field must carry a hint so the renderer can show
# operator-facing help text — no anonymous knobs.
for k in ("billing_project", "max_bytes_per_materialize"):
assert "hint" in bq_fields[k] and bq_fields[k]["hint"], \ assert "hint" in bq_fields[k] and bq_fields[k]["hint"], \
f"{k} must carry a non-empty hint" f"{k} must carry a non-empty hint"

View file

@ -176,10 +176,11 @@ def test_bigquery_subfields_populated(seeded_app):
assert r.status_code == 200 assert r.status_code == 200
fields = r.json()["known_fields"]["data_source"]["bigquery"]["fields"] fields = r.json()["known_fields"]["data_source"]["bigquery"]["fields"]
assert "billing_project" in fields assert "billing_project" in fields
assert "legacy_wrap_views" in fields
assert "max_bytes_per_materialize" in fields assert "max_bytes_per_materialize" in fields
assert fields["legacy_wrap_views"]["kind"] == "bool" # legacy_wrap_views was removed in #160 — VIEW/MATERIALIZED_VIEW are now
assert fields["legacy_wrap_views"]["default"] is False # always wrapped via bigquery_query() (the previous opt-in path).
assert "legacy_wrap_views" not in fields, \
"legacy_wrap_views config knob was removed; #160 makes the wrap behavior unconditional"
assert fields["max_bytes_per_materialize"]["kind"] == "int" assert fields["max_bytes_per_materialize"]["kind"] == "int"
assert fields["max_bytes_per_materialize"]["default"] == 10737418240 assert fields["max_bytes_per_materialize"]["default"] == 10737418240

View file

@ -316,7 +316,9 @@ class TestViewVsTableTemplates:
"BASE TABLE should not use bigquery_query() function" "BASE TABLE should not use bigquery_query() function"
def test_view_uses_bigquery_query_function(self, tmp_path, monkeypatch): def test_view_uses_bigquery_query_function(self, tmp_path, monkeypatch):
"""For VIEW with legacy_wrap_views=True, generated DuckDB view wraps bigquery_query() (jobs API path).""" """For VIEW entity, generated DuckDB master view wraps bigquery_query()
(jobs API path). Same SQL form as the prior `legacy_wrap_views=True`
branch now unconditional per #160."""
from connectors.bigquery.extractor import init_extract from connectors.bigquery.extractor import init_extract
monkeypatch.setattr( monkeypatch.setattr(
@ -337,12 +339,6 @@ class TestViewVsTableTemplates:
monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", spy_connect) monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", spy_connect)
# Enable legacy toggle so this test verifies the old wrap-view path still works.
monkeypatch.setattr(
"connectors.bigquery.extractor.get_value",
lambda *args, default=None, **kw: True if "legacy_wrap_views" in args else default,
)
init_extract( init_extract(
str(tmp_path), str(tmp_path),
"my-project", "my-project",
@ -575,88 +571,16 @@ class TestExtractorMainModule:
assert exc_info.value.code == 2 assert exc_info.value.code == 2
class TestDropWrapViewForBQViews: class TestWrapViewForBQViews:
def test_view_entity_does_not_create_master_view_by_default(self, tmp_path, monkeypatch): """Issue #160: query_mode='remote' BQ rows whose entity is VIEW or
from connectors.bigquery.extractor import init_extract MATERIALIZED_VIEW must get a master view via bigquery_query() for any
monkeypatch.setattr("connectors.bigquery.extractor.get_metadata_token", lambda: "tok") other entity type we don't have proven runtime support for, skip both
monkeypatch.setattr("connectors.bigquery.extractor._detect_table_type", lambda *a, **kw: "VIEW") the master view AND the _meta row."""
# Stub BQ extension calls to avoid hitting real BQ
real_connect = duckdb.connect
captured = []
def safe_connect(*a, **kw):
return _CapturingProxy(real_connect(*a, **kw), captured)
monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", safe_connect)
# legacy toggle is OFF by default → expect no CREATE VIEW for the BQ view
monkeypatch.setattr(
"connectors.bigquery.extractor.get_value",
lambda *args, default=None, **kw: False if "legacy_wrap_views" in args else default,
raising=False,
)
init_extract(
str(tmp_path),
"my-project",
[{"name": "myview", "bucket": "ds", "source_table": "myview", "description": ""}],
)
# Confirm extract.duckdb has _meta + _remote_attach but NO master view for myview
c = duckdb.connect(str(tmp_path / "extract.duckdb"), read_only=True)
try:
views = c.execute(
"SELECT view_name FROM duckdb_views() WHERE view_name='myview'"
).fetchall()
assert views == [], f"expected no wrap view for VIEW entity by default; got {views}"
meta = c.execute("SELECT table_name FROM _meta").fetchall()
assert ("myview",) in meta, "_meta must still record the view"
finally:
c.close()
def test_legacy_wrap_views_toggle_restores_old_behavior(self, tmp_path, monkeypatch):
from connectors.bigquery.extractor import init_extract
monkeypatch.setattr("connectors.bigquery.extractor.get_metadata_token", lambda: "tok")
monkeypatch.setattr("connectors.bigquery.extractor._detect_table_type", lambda *a, **kw: "VIEW")
real_connect = duckdb.connect
captured = []
def safe_connect(*a, **kw):
return _CapturingProxy(real_connect(*a, **kw), captured)
monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", safe_connect)
# legacy toggle ON → should still create the wrap view
monkeypatch.setattr(
"connectors.bigquery.extractor.get_value",
lambda *args, default=None, **kw: True if "legacy_wrap_views" in args else default,
raising=False,
)
init_extract(
str(tmp_path),
"my-project",
[{"name": "myview", "bucket": "ds", "source_table": "myview", "description": ""}],
)
# With legacy ON the wrap view SQL should have been emitted
view_sqls = [s for s in captured if "CREATE OR REPLACE VIEW" in s.upper()]
myview_sqls = [s for s in view_sqls if '"myview"' in s]
assert myview_sqls != [], \
f"expected wrap view SQL for VIEW entity when legacy_wrap_views=True; captured={captured}"
assert any("bigquery_query(" in s for s in myview_sqls), \
f"legacy wrap view should use bigquery_query(); got: {myview_sqls}"
# ---- #160 always-wrap tests ------------------------------------------
# Issue #160: query_mode='remote' BQ rows whose entity is VIEW or
# MATERIALIZED_VIEW must get a master view via bigquery_query() by default.
# Today (legacy_wrap_views=False default) they don't — `da query --remote`
# against such a table returns DuckDB catalog error. Fix: always wrap.
def test_view_creates_wrap_view_with_default_config(self, tmp_path, monkeypatch): def test_view_creates_wrap_view_with_default_config(self, tmp_path, monkeypatch):
"""VIEW entity must get a bigquery_query() wrap view by default """VIEW entity must get a bigquery_query() wrap view (the previous
(not just when legacy_wrap_views=True). Closes #160.""" opt-in path under `legacy_wrap_views=True`, now unconditional).
Closes #160."""
from connectors.bigquery.extractor import init_extract from connectors.bigquery.extractor import init_extract
import app.instance_config as _ic import app.instance_config as _ic
monkeypatch.setattr(_ic, "_instance_config", None, raising=False) monkeypatch.setattr(_ic, "_instance_config", None, raising=False)
@ -670,7 +594,6 @@ class TestDropWrapViewForBQViews:
return _CapturingProxy(real_connect(*a, **kw), captured) return _CapturingProxy(real_connect(*a, **kw), captured)
monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", safe_connect) monkeypatch.setattr("connectors.bigquery.extractor.duckdb.connect", safe_connect)
# Default config — no monkeypatch setting legacy_wrap_views
init_extract( init_extract(
str(tmp_path), str(tmp_path),
"my-project", "my-project",