agnes-the-ai-analyst/tests/test_bigquery_auth.py
ZdenekSrotyr 2e1dfb7553
feat(v2): claude-driven fetch primitives + 0.14.0 (#102)
Replaces the BigQuery wrap-view pattern with a discovery + scoped-fetch toolkit driven by the analyst's Claude session. Adds /api/v2/{catalog,schema,sample,scan,scan/estimate}, da catalog/schema/describe/fetch/snapshot/disk-info CLI commands, sqlglot-backed WHERE validator, process-local quota tracker, agent rails skill (cli/skills/agnes-data-querying.md). BREAKING: BQ wrap views off by default — set data_source.bigquery.legacy_wrap_views=true for one cycle. Backward-compat field_validator on primary_key. Catalog cache now matches documented 300s TTL with RBAC fresh per request. Cuts release v0.14.0.
2026-04-29 01:07:19 +02:00

169 lines
7.4 KiB
Python

"""Tests for BQ metadata-token auth helper."""
from unittest.mock import patch, MagicMock
import json
import pytest
from connectors.bigquery.auth import (
get_metadata_token,
clear_token_cache,
BQMetadataAuthError,
)
from connectors.bigquery.auth import _METADATA_TOKEN_URL as _METADATA_TOKEN_URL_FOR_TEST
@pytest.fixture(autouse=True)
def _reset_token_cache():
"""Reset the module-level token cache between tests so cache state from one
test does not leak into another."""
clear_token_cache()
yield
clear_token_cache()
def _mock_urlopen(payload: dict, status: int = 200):
resp = MagicMock()
resp.read.return_value = json.dumps(payload).encode()
resp.__enter__ = lambda self: self
resp.__exit__ = lambda self, *a: None
return resp
class TestGetMetadataToken:
def test_returns_token_string(self):
with patch("connectors.bigquery.auth.urllib.request.urlopen") as m:
m.return_value = _mock_urlopen({"access_token": "ya29.test", "expires_in": 3599})
token = get_metadata_token()
assert token == "ya29.test"
def test_passes_metadata_flavor_header(self):
with patch("connectors.bigquery.auth.urllib.request.urlopen") as m:
m.return_value = _mock_urlopen({"access_token": "tok"})
get_metadata_token()
req = m.call_args[0][0]
assert req.get_header("Metadata-flavor") == "Google"
assert "metadata.google.internal" in req.full_url
def test_raises_on_unreachable_metadata(self):
"""Metadata unreachable + ADC also unavailable → raise with both reasons.
Issue #112 added an ADC fallback; the test mocks both paths to fail
so the original 'metadata-only failure' contract is exercised."""
from urllib.error import URLError
with patch("connectors.bigquery.auth.urllib.request.urlopen", side_effect=URLError("no route")), \
patch("connectors.bigquery.auth._fetch_adc_token",
side_effect=BQMetadataAuthError("no ADC creds")):
with pytest.raises(BQMetadataAuthError, match="metadata server unreachable"):
get_metadata_token()
def test_raises_on_missing_access_token_field(self):
with patch("connectors.bigquery.auth.urllib.request.urlopen") as m, \
patch("connectors.bigquery.auth._fetch_adc_token",
side_effect=BQMetadataAuthError("no ADC creds")):
m.return_value = _mock_urlopen({"error": "bad"})
with pytest.raises(BQMetadataAuthError, match="no access_token in response"):
get_metadata_token()
def test_raises_on_http_error(self):
"""When metadata server returns 4xx/5xx (e.g. SA misconfiguration → 403)
AND ADC is also unavailable, raise BQMetadataAuthError. ADC fallback
is mocked to fail so this asserts the metadata-error branch."""
from urllib.error import HTTPError
err = HTTPError(
url=_METADATA_TOKEN_URL_FOR_TEST,
code=403,
msg="Forbidden",
hdrs=None,
fp=None,
)
with patch("connectors.bigquery.auth.urllib.request.urlopen", side_effect=err), \
patch("connectors.bigquery.auth._fetch_adc_token",
side_effect=BQMetadataAuthError("no ADC creds")):
with pytest.raises(BQMetadataAuthError):
get_metadata_token()
def test_falls_back_to_adc_when_metadata_unreachable(self):
"""Issue #112 — when GCE metadata is unreachable (laptop dev), fall
through to ADC. The combined wrapper must return a token from ADC."""
from urllib.error import URLError
with patch("connectors.bigquery.auth.urllib.request.urlopen",
side_effect=URLError("no route")), \
patch("connectors.bigquery.auth._fetch_adc_token",
return_value=("adc-token", 3600)):
assert get_metadata_token() == "adc-token"
class TestTokenCache:
"""get_metadata_token() must cache valid tokens until shortly before expiry."""
def test_second_call_returns_cached_value_without_new_urlopen(self):
"""Within the cache TTL, only one urlopen happens."""
with patch("connectors.bigquery.auth.urllib.request.urlopen") as m:
m.return_value = _mock_urlopen(
{"access_token": "ya29.fresh", "expires_in": 3599}
)
t1 = get_metadata_token()
t2 = get_metadata_token()
assert t1 == "ya29.fresh"
assert t2 == "ya29.fresh"
assert m.call_count == 1, \
f"second call should hit cache, not metadata server; got {m.call_count} urlopen calls"
def test_cache_refetches_after_expiry(self):
"""Once the safety-buffered expiry passes, the next call re-fetches."""
# Populate cache with a token that expires almost immediately (small expires_in
# is below the safety buffer, so the populator code path SKIPS caching).
# Use a different mechanism: prime cache with a normal token, then advance
# monotonic time past expiry and verify a second urlopen happens.
import connectors.bigquery.auth as auth_mod
with patch("connectors.bigquery.auth.urllib.request.urlopen") as m:
m.return_value = _mock_urlopen(
{"access_token": "ya29.first", "expires_in": 3600}
)
t1 = get_metadata_token()
assert t1 == "ya29.first"
assert m.call_count == 1
# Force the cache expiry into the past (jump monotonic forward by
# editing the cached tuple directly — public API doesn't expose this).
with auth_mod._cache_lock:
cached_token, _ = auth_mod._token_cache
auth_mod._token_cache = (cached_token, 0.0) # already expired
# Next call should re-fetch
m.return_value = _mock_urlopen(
{"access_token": "ya29.refreshed", "expires_in": 3600}
)
t2 = get_metadata_token()
assert t2 == "ya29.refreshed"
assert m.call_count == 2
def test_no_caching_when_expires_in_missing_or_too_small(self):
"""If the response lacks expires_in (or it's smaller than the safety buffer),
skip caching so the next call retries — protects against poison-cached
zero-TTL responses."""
with patch("connectors.bigquery.auth.urllib.request.urlopen") as m:
# First response: no expires_in field at all
m.return_value = _mock_urlopen({"access_token": "ya29.no_ttl"})
t1 = get_metadata_token()
t2 = get_metadata_token()
assert t1 == "ya29.no_ttl"
assert t2 == "ya29.no_ttl"
# Both calls hit the network because the first response wasn't cached
assert m.call_count == 2
def test_clear_token_cache_forces_refetch(self):
"""Public clear_token_cache() invalidates so the next call re-fetches."""
with patch("connectors.bigquery.auth.urllib.request.urlopen") as m:
m.return_value = _mock_urlopen(
{"access_token": "ya29.cached", "expires_in": 3600}
)
get_metadata_token()
assert m.call_count == 1
clear_token_cache()
m.return_value = _mock_urlopen(
{"access_token": "ya29.after_clear", "expires_in": 3600}
)
t = get_metadata_token()
assert t == "ya29.after_clear"
assert m.call_count == 2