- Remove iCloud duplicate files (test_db 2.py, src/db 2.py) - Fix metrics expression fallback to top-level field in transformer + webapp - Fix sync_data.sh rsync exception pattern for $SSH_HOST variable - Fix deploy_guard cp regex to skip shell variable expansions - Update sudoers-deploy with missing root:data-ops rules - Update CRITICAL_DIRS ownership expectations to match deploy.sh reality 913 tests passing, 0 failures.
391 lines
16 KiB
Python
391 lines
16 KiB
Python
"""Tests for sync script reliability and live rsync diagnostics.
|
|
|
|
Static tests verify that sync_data.sh and sync_jira.sh follow reliability
|
|
patterns (retry wrapper, SSH keepalive, timeouts, partial transfers).
|
|
|
|
Live tests (marked @pytest.mark.live) run actual rsync transfers against
|
|
the data-analyst server and write diagnostic logs to data/sync_diagnostics/.
|
|
"""
|
|
|
|
import re
|
|
import subprocess
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Constants
|
|
# ---------------------------------------------------------------------------
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
SCRIPTS_DIR = REPO_ROOT / "scripts"
|
|
SYNC_DATA_SH = SCRIPTS_DIR / "sync_data.sh"
|
|
SYNC_JIRA_SH = REPO_ROOT / "connectors" / "jira" / "scripts" / "sync_jira.sh"
|
|
SYNC_SCRIPTS = [SYNC_DATA_SH, SYNC_JIRA_SH]
|
|
DIAG_DIR = REPO_ROOT / "data" / "sync_diagnostics"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _find_rsync_lines(script_path: Path) -> list[tuple[int, str]]:
|
|
"""Find all lines that invoke rsync (not as part of variable/comment)."""
|
|
results = []
|
|
content = script_path.read_text()
|
|
for line_num, line in enumerate(content.splitlines(), 1):
|
|
stripped = line.strip()
|
|
if stripped.startswith("#"):
|
|
continue
|
|
# Match lines that call rsync as a command (not in variable names)
|
|
if re.match(r'^\s*(rsync_reliable|rsync)\s', stripped):
|
|
results.append((line_num, stripped))
|
|
return results
|
|
|
|
|
|
def _extract_function_body(script_content: str, func_name: str) -> str:
|
|
"""Extract the body of a bash function."""
|
|
pattern = rf'{func_name}\s*\(\)\s*\{{(.*?)\n\}}'
|
|
match = re.search(pattern, script_content, re.DOTALL)
|
|
return match.group(1) if match else ""
|
|
|
|
|
|
def _write_diagnostic(filename: str, content: str) -> None:
|
|
"""Write diagnostic output to DIAG_DIR with a timestamp header."""
|
|
DIAG_DIR.mkdir(parents=True, exist_ok=True)
|
|
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
path = DIAG_DIR / filename
|
|
path.write_text(f"# Diagnostic run at {timestamp}\n\n{content}\n")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Live diagnostic tests — require server access
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSyncDiagnostics:
|
|
"""Live tests that run actual rsync transfers for diagnostics."""
|
|
|
|
@pytest.mark.live
|
|
def test_ssh_connectivity(self):
|
|
"""Verify basic SSH connectivity to data-analyst host."""
|
|
result = subprocess.run(
|
|
["ssh", "-o", "ConnectTimeout=10", "data-analyst", "echo", "ok"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=15,
|
|
)
|
|
_write_diagnostic(
|
|
"ssh_connectivity.log",
|
|
f"returncode: {result.returncode}\n"
|
|
f"stdout: {result.stdout}\n"
|
|
f"stderr: {result.stderr}",
|
|
)
|
|
assert result.returncode == 0, (
|
|
f"SSH connection failed (rc={result.returncode}): {result.stderr}"
|
|
)
|
|
|
|
@pytest.mark.live
|
|
def test_rsync_small_directory(self):
|
|
"""Sync a small directory (docs) to verify basic rsync works."""
|
|
dest = Path("/tmp/claude/sync_test_docs/")
|
|
dest.mkdir(parents=True, exist_ok=True)
|
|
result = subprocess.run(
|
|
[
|
|
"rsync", "-avz", "--timeout=60",
|
|
"data-analyst:server/docs/", str(dest),
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=180,
|
|
)
|
|
_write_diagnostic(
|
|
"rsync_small_directory.log",
|
|
f"returncode: {result.returncode}\n"
|
|
f"stdout:\n{result.stdout}\n"
|
|
f"stderr:\n{result.stderr}",
|
|
)
|
|
assert result.returncode == 0, (
|
|
f"rsync docs failed (rc={result.returncode}): {result.stderr}"
|
|
)
|
|
|
|
@pytest.mark.live
|
|
def test_rsync_with_keepalive(self):
|
|
"""Sync parquet directory with SSH keepalive options."""
|
|
dest = Path("/tmp/claude/sync_test_parquet/")
|
|
dest.mkdir(parents=True, exist_ok=True)
|
|
try:
|
|
result = subprocess.run(
|
|
[
|
|
"rsync",
|
|
"-e", "ssh -o ServerAliveInterval=60 -o ServerAliveCountMax=3",
|
|
"-av", "--timeout=300", "--delete",
|
|
"data-analyst:server/parquet/", str(dest),
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=600,
|
|
)
|
|
_write_diagnostic(
|
|
"rsync_with_keepalive.log",
|
|
f"returncode: {result.returncode}\n"
|
|
f"stdout:\n{result.stdout}\n"
|
|
f"stderr:\n{result.stderr}",
|
|
)
|
|
assert result.returncode == 0, (
|
|
f"rsync with keepalive failed (rc={result.returncode}): {result.stderr}"
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
_write_diagnostic(
|
|
"rsync_with_keepalive.log",
|
|
"TIMEOUT: rsync with keepalive exceeded 600s limit",
|
|
)
|
|
pytest.fail("rsync with keepalive timed out after 600s")
|
|
|
|
@pytest.mark.live
|
|
def test_rsync_with_timeout(self):
|
|
"""Sync parquet with a short timeout to test timeout behaviour."""
|
|
dest = Path("/tmp/claude/sync_test_timeout/")
|
|
dest.mkdir(parents=True, exist_ok=True)
|
|
timed_out = False
|
|
try:
|
|
result = subprocess.run(
|
|
[
|
|
"rsync", "-av", "--timeout=60",
|
|
"data-analyst:server/parquet/", str(dest),
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=180,
|
|
)
|
|
outcome = (
|
|
f"returncode: {result.returncode}\n"
|
|
f"stdout:\n{result.stdout}\n"
|
|
f"stderr:\n{result.stderr}"
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
timed_out = True
|
|
outcome = "TIMEOUT: rsync with short timeout exceeded 180s limit"
|
|
|
|
_write_diagnostic("rsync_with_timeout.log", outcome)
|
|
# This is a diagnostic test — we log whether it timed out or succeeded.
|
|
if timed_out:
|
|
pytest.skip("rsync timed out (diagnostic recorded)")
|
|
|
|
@pytest.mark.live
|
|
def test_rsync_per_subdirectory(self):
|
|
"""List parquet subdirectories and sync each one individually."""
|
|
# First, list remote subdirectories
|
|
ls_result = subprocess.run(
|
|
["ssh", "data-analyst", "ls server/parquet/"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30,
|
|
)
|
|
assert ls_result.returncode == 0, (
|
|
f"Failed to list remote parquet dirs: {ls_result.stderr}"
|
|
)
|
|
|
|
subdirs = [d for d in ls_result.stdout.strip().splitlines() if d]
|
|
results_log: list[str] = [f"Found {len(subdirs)} subdirectories\n"]
|
|
|
|
for subdir in subdirs:
|
|
dest = Path(f"/tmp/claude/sync_test_subdir/{subdir}/")
|
|
dest.mkdir(parents=True, exist_ok=True)
|
|
try:
|
|
result = subprocess.run(
|
|
[
|
|
"rsync", "-av", "--timeout=120",
|
|
f"data-analyst:server/parquet/{subdir}/", str(dest),
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300,
|
|
)
|
|
results_log.append(
|
|
f"[{subdir}] rc={result.returncode} "
|
|
f"({'OK' if result.returncode == 0 else 'FAIL'})"
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
results_log.append(f"[{subdir}] TIMEOUT after 300s")
|
|
|
|
_write_diagnostic("rsync_per_subdirectory.log", "\n".join(results_log))
|
|
|
|
@pytest.mark.live
|
|
def test_rsync_without_compression(self):
|
|
"""Sync parquet without -z flag (compression hurts for binary data)."""
|
|
dest = Path("/tmp/claude/sync_test_nocompress/")
|
|
dest.mkdir(parents=True, exist_ok=True)
|
|
try:
|
|
result = subprocess.run(
|
|
[
|
|
"rsync", "-av", "--timeout=300", "--delete",
|
|
"data-analyst:server/parquet/", str(dest),
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=600,
|
|
)
|
|
_write_diagnostic(
|
|
"rsync_without_compression.log",
|
|
f"returncode: {result.returncode}\n"
|
|
f"stdout:\n{result.stdout}\n"
|
|
f"stderr:\n{result.stderr}",
|
|
)
|
|
assert result.returncode == 0, (
|
|
f"rsync without compression failed (rc={result.returncode}): "
|
|
f"{result.stderr}"
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
_write_diagnostic(
|
|
"rsync_without_compression.log",
|
|
"TIMEOUT: rsync without compression exceeded 600s limit",
|
|
)
|
|
pytest.fail("rsync without compression timed out after 600s")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Static regression tests — run in CI, no server needed
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSyncScriptReliability:
|
|
"""Verify sync scripts follow reliability patterns (retry, keepalive, etc.)."""
|
|
|
|
def test_sync_scripts_define_rsync_reliable(self):
|
|
"""Both sync scripts must define an rsync_reliable() function."""
|
|
for script in SYNC_SCRIPTS:
|
|
content = script.read_text()
|
|
assert re.search(r'rsync_reliable\s*\(\)', content), (
|
|
f"{script.name} does not define rsync_reliable() function"
|
|
)
|
|
|
|
def test_sync_scripts_define_reliability_constants(self):
|
|
"""Both scripts must define RSYNC_SSH_OPTS, RSYNC_TIMEOUT,
|
|
RSYNC_MAX_RETRIES, and RSYNC_RETRY_DELAY."""
|
|
required_constants = [
|
|
"RSYNC_SSH_OPTS",
|
|
"RSYNC_TIMEOUT",
|
|
"RSYNC_MAX_RETRIES",
|
|
"RSYNC_RETRY_DELAY",
|
|
]
|
|
for script in SYNC_SCRIPTS:
|
|
content = script.read_text()
|
|
for const in required_constants:
|
|
assert re.search(rf'{const}=', content), (
|
|
f"{script.name} does not define {const}"
|
|
)
|
|
|
|
def test_rsync_reliable_uses_ssh_keepalive(self):
|
|
"""rsync_reliable function must use SSH keepalive (via RSYNC_SSH_OPTS)."""
|
|
for script in SYNC_SCRIPTS:
|
|
content = script.read_text()
|
|
body = _extract_function_body(content, "rsync_reliable")
|
|
assert body, f"{script.name}: could not extract rsync_reliable body"
|
|
# Function references RSYNC_SSH_OPTS which must contain keepalive
|
|
assert "RSYNC_SSH_OPTS" in body, (
|
|
f"{script.name}: rsync_reliable does not reference RSYNC_SSH_OPTS"
|
|
)
|
|
assert "ServerAliveInterval" in content, (
|
|
f"{script.name}: RSYNC_SSH_OPTS does not define ServerAliveInterval"
|
|
)
|
|
|
|
def test_rsync_reliable_uses_timeout(self):
|
|
"""rsync_reliable function must use --timeout."""
|
|
for script in SYNC_SCRIPTS:
|
|
content = script.read_text()
|
|
body = _extract_function_body(content, "rsync_reliable")
|
|
assert body, f"{script.name}: could not extract rsync_reliable body"
|
|
assert "--timeout" in body, (
|
|
f"{script.name}: rsync_reliable does not use --timeout"
|
|
)
|
|
|
|
def test_rsync_reliable_uses_partial_dir(self):
|
|
"""rsync_reliable function must use --partial-dir."""
|
|
for script in SYNC_SCRIPTS:
|
|
content = script.read_text()
|
|
body = _extract_function_body(content, "rsync_reliable")
|
|
assert body, f"{script.name}: could not extract rsync_reliable body"
|
|
assert "--partial-dir" in body, (
|
|
f"{script.name}: rsync_reliable does not use --partial-dir"
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
"script_path",
|
|
[
|
|
pytest.param(SYNC_DATA_SH, id="sync_data.sh"),
|
|
pytest.param(SYNC_JIRA_SH, id="sync_jira.sh"),
|
|
],
|
|
)
|
|
def test_all_rsync_calls_use_reliable_wrapper(self, script_path: Path):
|
|
"""All rsync invocations must go through rsync_reliable, with
|
|
narrow exceptions:
|
|
- lines inside the rsync_reliable() function definition itself
|
|
- lines checking rsync availability (command -v rsync)
|
|
- the self-update rsync in sync_data.sh that has an scp fallback
|
|
"""
|
|
content = script_path.read_text()
|
|
func_body = _extract_function_body(content, "rsync_reliable")
|
|
# Line numbers that belong to rsync_reliable function body
|
|
func_lines: set[int] = set()
|
|
if func_body:
|
|
start_idx = content.index(func_body)
|
|
start_line = content[:start_idx].count("\n") + 1
|
|
end_line = start_line + func_body.count("\n")
|
|
func_lines = set(range(start_line, end_line + 1))
|
|
|
|
all_lines = _find_rsync_lines(script_path)
|
|
violations: list[tuple[int, str]] = []
|
|
|
|
for line_num, line in all_lines:
|
|
# Skip lines inside rsync_reliable() definition
|
|
if line_num in func_lines:
|
|
continue
|
|
# Skip rsync availability checks
|
|
if "command -v rsync" in line:
|
|
continue
|
|
# Skip self-update rsync with scp fallback (sync_data.sh only)
|
|
if "server/scripts/" in line and script_path.name == "sync_data.sh":
|
|
continue
|
|
# Everything else must use rsync_reliable, not bare rsync
|
|
if line.startswith("rsync ") or re.match(r'^rsync\s', line):
|
|
violations.append((line_num, line))
|
|
|
|
assert not violations, (
|
|
f"{script_path.name} has bare rsync calls that should use "
|
|
f"rsync_reliable:\n"
|
|
+ "\n".join(f" L{n}: {l}" for n, l in violations)
|
|
)
|
|
|
|
def test_parquet_rsync_does_not_use_compression(self):
|
|
"""rsync calls targeting parquet paths must NOT use -z flag.
|
|
Parquet is already compressed; -z adds CPU overhead for no gain."""
|
|
for script in SYNC_SCRIPTS:
|
|
lines = _find_rsync_lines(script)
|
|
for line_num, line in lines:
|
|
if "parquet" not in line:
|
|
continue
|
|
# Check for -avz or standalone -z in flags
|
|
assert "-avz" not in line, (
|
|
f"{script.name} L{line_num}: parquet rsync uses -avz "
|
|
f"(should be -av without z): {line}"
|
|
)
|
|
|
|
def test_text_rsync_uses_compression(self):
|
|
"""rsync calls targeting text paths (docs, scripts, metadata,
|
|
examples) should use -z compression."""
|
|
text_indicators = ["docs", "scripts", "metadata", "examples"]
|
|
binary_indicators = ["parquet", "attachments"]
|
|
|
|
for script in SYNC_SCRIPTS:
|
|
lines = _find_rsync_lines(script)
|
|
for line_num, line in lines:
|
|
# Skip lines targeting binary paths
|
|
if any(b in line for b in binary_indicators):
|
|
continue
|
|
# Only check lines targeting known text paths
|
|
if not any(t in line for t in text_indicators):
|
|
continue
|
|
assert "-z" in line or "-avz" in line, (
|
|
f"{script.name} L{line_num}: text rsync missing -z "
|
|
f"compression flag: {line}"
|
|
)
|