diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f8ddcf..56fd2cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,16 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ## [Unreleased] +## [0.38.3] — 2026-05-06 + +### Changed +- **Admin / Tables**: registry table now shows Source (bucket/table), Schedule, Folder, Registered by/at, and a sync-error warning icon per row. The page widens to ~1600px to accommodate. + +### Fixed +- **Admin / Tables**: long table descriptions no longer push the row's Edit / Manage access / Delete buttons off-screen. The Description column is now clamped to 2 lines with the full text available on hover and in the Edit modal. +- **Admin / Tables**: descriptions stored with shell-quoting backslash-escapes (`Don\'t`, `\n`) now render correctly. The same normalization also runs at register/update time so newly-saved descriptions are never corrupted. +- **Admin / Tables**: `scripts/fix_description_escapes.py` cleans up already-corrupted descriptions in `table_registry` (run with `--dry-run` first, then `--apply`). + ## [0.38.2] — 2026-05-06 ### Fixed diff --git a/app/api/admin.py b/app/api/admin.py index 4469299..a697197 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -88,6 +88,31 @@ def _validate_url_not_private(url: str, field_name: str = "url") -> None: ) +def _unescape_shell_quoting(s: str | None) -> str | None: + """Defensive normalization for descriptions arriving via shell-quoting tooling. + + Some operators register tables with bash/curl invocations whose quoting + injects literal backslash escapes into the payload (e.g. ``Don\\'t`` or + embedded ``\\n`` instead of real newlines). The backend would otherwise + persist those bytes verbatim and the UI would render them verbatim too. + Mirrored in JS as ``unescapeShellQuoting`` in + ``app/web/templates/admin_tables.html`` for already-stored rows. + """ + if not s: + return s + # Order matters: protect real backslashes first. + SENTINEL = "\x00" + return ( + s.replace("\\\\", SENTINEL) + .replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\'", "'") + .replace('\\"', '"') + .replace(SENTINEL, "\\") + ) + + def _normalize_primary_key(v): """Coerce a string primary_key to ``[v]`` for backward compatibility. @@ -1273,6 +1298,13 @@ class RegisterTableRequest(BaseModel): def _coerce_primary_key(cls, v): return _normalize_primary_key(v) + @field_validator("description", mode="before") + @classmethod + def _normalize_description(cls, v): + # Defensive normalization for descriptions arriving via shell-quoting + # tooling that injects literal backslash escapes (e.g. `Don\'t`, `\n`). + return _unescape_shell_quoting(v) + @field_validator("source_type", mode="before") @classmethod def _validate_source_type(cls, v): @@ -1676,6 +1708,13 @@ class UpdateTableRequest(BaseModel): def _coerce_primary_key(cls, v): return _normalize_primary_key(v) + @field_validator("description", mode="before") + @classmethod + def _normalize_description(cls, v): + # Defensive normalization for descriptions arriving via shell-quoting + # tooling that injects literal backslash escapes (e.g. `Don\'t`, `\n`). + return _unescape_shell_quoting(v) + # Duplicated from RegisterTableRequest — Pydantic v2 validators don't # inherit cleanly across unrelated BaseModel classes; a shared mixin # would be overkill for two fields. diff --git a/app/web/templates/admin_tables.html b/app/web/templates/admin_tables.html index 082c635..2870489 100644 --- a/app/web/templates/admin_tables.html +++ b/app/web/templates/admin_tables.html @@ -114,7 +114,7 @@ /* ── Page Title ── */ .page-title { - max-width: 1000px; + max-width: 1600px; margin: 0 auto; padding: 32px 24px 24px; } @@ -133,7 +133,7 @@ /* ── Content Layout ── */ .content { - max-width: 1000px; + max-width: 1600px; margin: 0 auto; padding: 0 24px 32px; display: flex; @@ -473,6 +473,17 @@ .registry-table { width: 100%; border-collapse: collapse; + table-layout: fixed; + } + + .registry-table .col-description { + overflow: hidden; + display: -webkit-box; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; + overflow-wrap: anywhere; + line-height: 1.4; + color: var(--text-secondary); } .registry-table th { @@ -505,15 +516,105 @@ .registry-table .col-id { font-family: var(--font-mono); font-size: 12px; - max-width: 280px; + max-width: 220px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } .registry-table .col-actions { - width: 80px; - text-align: right; + width: 120px; + min-width: 120px; + white-space: nowrap; + vertical-align: top; + } + + /* ── Registry table — wide layout ── */ + .registry-table .col-mode { + width: 100px; + } + + .registry-table .col-source { + width: 200px; + font-family: var(--font-mono); + font-size: 12px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + color: var(--text-secondary); + } + + .registry-table .col-pk { + width: 100px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + + .registry-table .col-schedule { + width: 100px; + font-size: 12px; + color: var(--text-secondary); + } + + .registry-table .col-folder { + width: 120px; + } + + .registry-table .col-registered { + width: 160px; + font-size: 11px; + line-height: 1.4; + overflow: hidden; + } + + .registry-table .col-registered .registered-by { + color: var(--text-primary); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + + .registry-table .col-registered .registered-at { + color: var(--text-secondary); + } + + .registry-table .col-status { + width: 40px; + text-align: center; + } + + .mode-badge { + display: inline-block; + padding: 2px 8px; + border-radius: 4px; + font-size: 11px; + font-weight: 600; + } + + .mode-badge.mode-local { + background: var(--success-light); + color: var(--success); + } + + .mode-badge.mode-remote { + background: var(--primary-light); + color: var(--primary); + } + + .mode-badge.mode-materialized { + background: rgba(139, 92, 246, 0.1); + color: #8B5CF6; + } + + .folder-badge { + display: inline-block; + padding: 1px 6px; + border-radius: 3px; + background: var(--background); + border: 1px solid var(--border); + font-size: 11px; + color: var(--text-secondary); } /* ── Modal overlay ── */ @@ -1544,6 +1645,24 @@ return div.innerHTML; } + // Defensive normalization for descriptions registered via shell-quoting + // tooling that injected literal backslash escapes (e.g. `Don\'t`, `\n`). + // Mirrors _unescape_shell_quoting in app/api/admin.py — applied at render + // time so already-stored corrupt rows still display readably. + function unescapeShellQuoting(s) { + if (!s) return s; + // Order matters: protect real backslashes via NUL sentinel first, + // unescape the well-known sequences, then restore real backslashes. + return s + .replace(/\\\\/g, '') + .replace(/\\n/g, '\n') + .replace(/\\r/g, '\r') + .replace(/\\t/g, '\t') + .replace(/\\'/g, "'") + .replace(/\\"/g, '"') + .replace(//g, '\\'); + } + // C3: removed dead Discovery panel JS. The global Discovery card + // its #discoverBtn / #discoveryResults DOM hooks were removed when // the per-tab UI landed; per-tab Discover/List datalist helpers live @@ -2587,6 +2706,12 @@ }); } + function renderModeBadge(mode) { + var m = (mode || 'local').toLowerCase(); + var cls = 'mode-badge mode-' + (['local','remote','materialized'].indexOf(m) >= 0 ? m : 'local'); + return '' + escapeHtml(m) + ''; + } + function renderRegistryListing(target, tables) { if (!target) return; if (tables.length === 0) { @@ -2595,19 +2720,69 @@ } var html = ''; html += ''; - html += ''; - html += ''; - html += ''; + html += ''; + html += ''; + html += ''; + html += ''; + html += ''; + html += ''; html += ''; + html += ''; + html += ''; html += ''; html += ''; tables.forEach(function(table) { html += ''; html += ''; - html += ''; - html += ''; - html += ''; - html += ''; + + // Source: bucket / source_table; em-dash when both empty (custom-SQL row). + var bucket = table.bucket || ''; + var srcTable = table.source_table || ''; + var sourceText = ''; + if (bucket && srcTable) { + sourceText = bucket + ' / ' + srcTable; + } else if (bucket || srcTable) { + sourceText = bucket || srcTable; + } + var sourceCell = sourceText ? escapeHtml(sourceText) : '—'; + html += ''; + + html += ''; + html += ''; + + // Folder badge — '—' when null/empty. + if (table.folder) { + html += ''; + } else { + html += ''; + } + + var desc = unescapeShellQuoting(table.description || ''); + html += ''; + + // Registered: stacked email + date (first 10 chars of ISO timestamp). + var regBy = table.registered_by || ''; + var regByDisplay = regBy; + if (regBy.length > 24 && regBy.indexOf('@') > 0) { + regByDisplay = regBy.split('@')[0]; + } + var regAt = table.registered_at ? String(table.registered_at).slice(0, 10) : ''; + html += ''; + + // Status: warning icon when the last sync errored. + html += ''; + + html += ''; + html += ''; }); html += '
Table IDModePrimary KeyTable IDModeSourcePrimary KeyScheduleFolderDescriptionRegisteredActions
' + escapeHtml(table.id) + '' + escapeHtml(table.query_mode || 'local') + '' + escapeHtml((table.primary_key || []).join(', ') || '-') + '' + escapeHtml(table.description || '-') + ''; + html += '' + renderModeBadge(table.query_mode) + '' + sourceCell + '' + escapeHtml((table.primary_key || []).join(', ') || '—') + '' + escapeHtml(table.sync_schedule || '—') + '' + escapeHtml(table.folder) + '' + escapeHtml(desc || '—') + ''; + html += '
' + (regByDisplay ? escapeHtml(regByDisplay) : '—') + '
'; + html += '
' + escapeHtml(regAt || '') + '
'; + html += '
'; + if (table.last_sync_error) { + html += ''; + html += ''; + html += ''; + } + html += '
'; html += ''; @@ -2618,7 +2793,7 @@ html += ''; - html += '
'; target.innerHTML = html; diff --git a/pyproject.toml b/pyproject.toml index def8c93..b6ccc9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agnes-the-ai-analyst" -version = "0.38.2" +version = "0.38.3" description = "Agnes — AI Data Analyst platform for AI analytical systems" requires-python = ">=3.11,<3.14" license = "MIT" diff --git a/scripts/fix_description_escapes.py b/scripts/fix_description_escapes.py new file mode 100644 index 0000000..887fbaf --- /dev/null +++ b/scripts/fix_description_escapes.py @@ -0,0 +1,142 @@ +"""One-shot cleanup for ``table_registry.description`` rows corrupted by shell-quoting. + +Background +---------- +Some operators registered tables via shell/curl invocations whose quoting +injected literal backslash escapes into the JSON payload — e.g. ``Don\\'t +confuse...``, ``it\\'s...``, and embedded ``\\n`` instead of real newlines. +The backend stored those bytes verbatim and the admin UI rendered them +verbatim too. ``app/api/admin.py`` now applies ``_unescape_shell_quoting`` +on register/update so newly-saved descriptions are clean, but rows that +were registered before that fix landed still hold the corrupted text. + +This script rewrites every affected ``table_registry.description`` to its +unescaped form. Idempotent — once normalized, a second run is a no-op +because the helper has nothing left to substitute. + +Usage +----- + # 1) Preview the changes that would be made (default). + python scripts/fix_description_escapes.py + + # 2) Apply for real once the diff looks right. + python scripts/fix_description_escapes.py --apply +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +# Add project root to path so ``src`` is importable when invoked as a script. +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from app.logging_config import setup_logging # noqa: E402 +from src.db import get_system_db # noqa: E402 + +setup_logging(__name__) +logger = logging.getLogger(__name__) + + +def _unescape_shell_quoting(s: str | None) -> str | None: + """Mirror of ``app.api.admin._unescape_shell_quoting``. + + Kept inline (rather than imported) so this script stays runnable as a + standalone one-shot even if ``app.api.admin`` grows imports that an + operator's cleanup environment can't satisfy. + """ + if not s: + return s + SENTINEL = "\x00" + return ( + s.replace("\\\\", SENTINEL) + .replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\'", "'") + .replace('\\"', '"') + .replace(SENTINEL, "\\") + ) + + +def _preview(text: str, width: int = 80) -> str: + """Single-line preview of a possibly multi-line description.""" + flat = text.replace("\n", " \\n ").replace("\r", " ").replace("\t", " ") + if len(flat) > width: + flat = flat[: width - 1] + "…" + return flat + + +def main() -> int: + parser = argparse.ArgumentParser( + description=( + "Fix table_registry.description rows corrupted by shell-quoting " + "backslash-escapes. Defaults to dry-run; pass --apply to write." + ) + ) + group = parser.add_mutually_exclusive_group() + group.add_argument( + "--dry-run", + dest="dry_run", + action="store_true", + default=True, + help="Print the diff but do not write (default).", + ) + group.add_argument( + "--apply", + dest="dry_run", + action="store_false", + help="Apply the UPDATE statements.", + ) + args = parser.parse_args() + + conn = get_system_db() + try: + rows = conn.execute( + "SELECT id, name, description FROM table_registry " + "WHERE description IS NOT NULL" + ).fetchall() + finally: + # get_system_db returns a cursor over a shared connection; closing + # the cursor is safe and does not close the underlying handle. + try: + conn.close() + except Exception: + pass + + changed = 0 + for table_id, name, description in rows: + normalized = _unescape_shell_quoting(description) + if normalized == description: + continue + changed += 1 + print(f"{table_id} | {name} | {_preview(normalized or '')}") + + if not args.dry_run: + write_conn = get_system_db() + try: + write_conn.execute( + "UPDATE table_registry SET description = ? WHERE id = ?", + [normalized, table_id], + ) + finally: + try: + write_conn.close() + except Exception: + pass + + if changed == 0: + print("No rows need normalization.") + else: + action = "would update" if args.dry_run else "updated" + print(f"\n{action} {changed} row(s).") + if args.dry_run: + print("Re-run with --apply to write the changes.") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_admin_unescape_shell_quoting.py b/tests/test_admin_unescape_shell_quoting.py new file mode 100644 index 0000000..a536672 --- /dev/null +++ b/tests/test_admin_unescape_shell_quoting.py @@ -0,0 +1,119 @@ +"""Unit tests for `app.api.admin._unescape_shell_quoting`. + +Pinning the contract: descriptions arriving with bash-style backslash +escapes (`Don\\'t`, `\\n`, `\\t`, `\\"`, etc.) are normalized at register / +update time so the row in `table_registry` carries the resolved text and +the UI doesn't have to render the literal escape bytes. + +The JS mirror (`unescapeShellQuoting` in +`app/web/templates/admin_tables.html`) uses the same NUL-byte sentinel +to protect real backslashes during the unescape pass — these tests +indirectly pin the symmetry by anchoring the Python end of it. +""" + +from __future__ import annotations + +import pytest + +from app.api.admin import _unescape_shell_quoting + + +class TestNoOpInputs: + @pytest.mark.parametrize("value", [None, ""]) + def test_passes_through(self, value): + assert _unescape_shell_quoting(value) == value + + def test_plain_text_unchanged(self): + assert _unescape_shell_quoting("hello world") == "hello world" + + def test_real_apostrophes_unchanged(self): + assert _unescape_shell_quoting("Don't worry") == "Don't worry" + + def test_real_newline_unchanged(self): + assert _unescape_shell_quoting("line1\nline2") == "line1\nline2" + + +class TestStandardEscapes: + def test_backslash_apostrophe(self): + assert _unescape_shell_quoting(r"Don\'t") == "Don't" + + def test_backslash_n_to_real_newline(self): + assert _unescape_shell_quoting(r"a\nb") == "a\nb" + + def test_backslash_t_to_real_tab(self): + assert _unescape_shell_quoting(r"a\tb") == "a\tb" + + def test_backslash_r_to_real_cr(self): + assert _unescape_shell_quoting(r"a\rb") == "a\rb" + + def test_backslash_double_quote(self): + assert _unescape_shell_quoting(r'say \"hi\"') == 'say "hi"' + + def test_multiple_in_one_string(self): + src = r"Don\'t do this:\nfoo\tbar" + assert _unescape_shell_quoting(src) == "Don't do this:\nfoo\tbar" + + +class TestNulSentinel: + """Real backslashes must survive the unescape pass — the helper uses a + NUL-byte sentinel to protect them. Tests target that path explicitly + so a refactor that breaks the sentinel order is caught immediately. + """ + + def test_double_backslash_becomes_single(self): + """`\\\\` (4 chars: `\\` `\\`) → `\\` (1 char).""" + assert _unescape_shell_quoting("\\\\") == "\\" + + def test_real_backslash_followed_by_n_is_preserved(self): + """A real backslash + literal `n` (`\\\\n`) must not collapse to a + newline — the sentinel pass protects the leading backslash.""" + assert _unescape_shell_quoting(r"\\n") == r"\n" + + def test_real_backslash_followed_by_apostrophe_is_preserved(self): + assert _unescape_shell_quoting(r"\\'") == r"\'" + + +class TestIdempotency: + """After one pass, the well-known escape *digraphs* (``\\n``, ``\\t``, + ``\\'``, ``\\"``) are gone. A second pass on **canonical** output must + be a no-op — that's what the migration script relies on so re-runs are + safe. + + Caveat: a raw single backslash followed by ``n`` / ``r`` / ``t`` / ``'`` + / ``"`` is ambiguous with the digraph form, so an input shaped like + ``\\not`` (single backslash + `not`) is NOT idempotent under repeated + application — the second pass would unescape ``\\n`` to a newline. + Documented as a known limitation; the migration script's `--dry-run` + surface lets operators preview before committing. + """ + + @pytest.mark.parametrize( + "src", + [ + r"Don\'t do this", + r"a\nb\tc", + r"mix \'ed \\ stuff \n here", + "plain text", + "real\nnewline", + "Don't worry — apostrophes are fine", + ], + ) + def test_second_pass_is_no_op_on_canonical(self, src): + once = _unescape_shell_quoting(src) + twice = _unescape_shell_quoting(once) + assert once == twice + + def test_documented_non_idempotent_case(self): + """Anchor the known-limitation behavior so a refactor can't silently + change it. ``\\\\not`` (4 chars: two backslashes + `not`) collapses + to ``\\not`` (4 chars: backslash + `not`) on the first pass; a + second pass would then read ``\\n`` as a newline escape. Operators + running the migration script with `--dry-run` see this preview + before committing.""" + first = _unescape_shell_quoting(r"\\not") + assert first == r"\not" + second = _unescape_shell_quoting(first) + # Second pass DOES change the value — because `\not` reads as + # `\n` + `ot` to the unescape logic. + assert second != first + assert second == "\not" # newline + 'ot'