Merge pull request #198 from keboola/zs/admin-tables-description-clamp

fix(admin/tables): keep row Actions reachable + sanitize description escapes
This commit is contained in:
ZdenekSrotyr 2026-05-06 11:50:13 +02:00 committed by GitHub
commit 9649f42b99
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 499 additions and 14 deletions

View file

@ -10,6 +10,16 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
## [Unreleased]
## [0.38.3] — 2026-05-06
### Changed
- **Admin / Tables**: registry table now shows Source (bucket/table), Schedule, Folder, Registered by/at, and a sync-error warning icon per row. The page widens to ~1600px to accommodate.
### Fixed
- **Admin / Tables**: long table descriptions no longer push the row's Edit / Manage access / Delete buttons off-screen. The Description column is now clamped to 2 lines with the full text available on hover and in the Edit modal.
- **Admin / Tables**: descriptions stored with shell-quoting backslash-escapes (`Don\'t`, `\n`) now render correctly. The same normalization also runs at register/update time so newly-saved descriptions are never corrupted.
- **Admin / Tables**: `scripts/fix_description_escapes.py` cleans up already-corrupted descriptions in `table_registry` (run with `--dry-run` first, then `--apply`).
## [0.38.2] — 2026-05-06
### Fixed

View file

@ -88,6 +88,31 @@ def _validate_url_not_private(url: str, field_name: str = "url") -> None:
)
def _unescape_shell_quoting(s: str | None) -> str | None:
"""Defensive normalization for descriptions arriving via shell-quoting tooling.
Some operators register tables with bash/curl invocations whose quoting
injects literal backslash escapes into the payload (e.g. ``Don\\'t`` or
embedded ``\\n`` instead of real newlines). The backend would otherwise
persist those bytes verbatim and the UI would render them verbatim too.
Mirrored in JS as ``unescapeShellQuoting`` in
``app/web/templates/admin_tables.html`` for already-stored rows.
"""
if not s:
return s
# Order matters: protect real backslashes first.
SENTINEL = "\x00"
return (
s.replace("\\\\", SENTINEL)
.replace("\\n", "\n")
.replace("\\r", "\r")
.replace("\\t", "\t")
.replace("\\'", "'")
.replace('\\"', '"')
.replace(SENTINEL, "\\")
)
def _normalize_primary_key(v):
"""Coerce a string primary_key to ``[v]`` for backward compatibility.
@ -1273,6 +1298,13 @@ class RegisterTableRequest(BaseModel):
def _coerce_primary_key(cls, v):
return _normalize_primary_key(v)
@field_validator("description", mode="before")
@classmethod
def _normalize_description(cls, v):
# Defensive normalization for descriptions arriving via shell-quoting
# tooling that injects literal backslash escapes (e.g. `Don\'t`, `\n`).
return _unescape_shell_quoting(v)
@field_validator("source_type", mode="before")
@classmethod
def _validate_source_type(cls, v):
@ -1676,6 +1708,13 @@ class UpdateTableRequest(BaseModel):
def _coerce_primary_key(cls, v):
return _normalize_primary_key(v)
@field_validator("description", mode="before")
@classmethod
def _normalize_description(cls, v):
# Defensive normalization for descriptions arriving via shell-quoting
# tooling that injects literal backslash escapes (e.g. `Don\'t`, `\n`).
return _unescape_shell_quoting(v)
# Duplicated from RegisterTableRequest — Pydantic v2 validators don't
# inherit cleanly across unrelated BaseModel classes; a shared mixin
# would be overkill for two fields.

View file

@ -114,7 +114,7 @@
/* ── Page Title ── */
.page-title {
max-width: 1000px;
max-width: 1600px;
margin: 0 auto;
padding: 32px 24px 24px;
}
@ -133,7 +133,7 @@
/* ── Content Layout ── */
.content {
max-width: 1000px;
max-width: 1600px;
margin: 0 auto;
padding: 0 24px 32px;
display: flex;
@ -473,6 +473,17 @@
.registry-table {
width: 100%;
border-collapse: collapse;
table-layout: fixed;
}
.registry-table .col-description {
overflow: hidden;
display: -webkit-box;
-webkit-line-clamp: 2;
-webkit-box-orient: vertical;
overflow-wrap: anywhere;
line-height: 1.4;
color: var(--text-secondary);
}
.registry-table th {
@ -505,15 +516,105 @@
.registry-table .col-id {
font-family: var(--font-mono);
font-size: 12px;
max-width: 280px;
max-width: 220px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.registry-table .col-actions {
width: 80px;
text-align: right;
width: 120px;
min-width: 120px;
white-space: nowrap;
vertical-align: top;
}
/* ── Registry table — wide layout ── */
.registry-table .col-mode {
width: 100px;
}
.registry-table .col-source {
width: 200px;
font-family: var(--font-mono);
font-size: 12px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
color: var(--text-secondary);
}
.registry-table .col-pk {
width: 100px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.registry-table .col-schedule {
width: 100px;
font-size: 12px;
color: var(--text-secondary);
}
.registry-table .col-folder {
width: 120px;
}
.registry-table .col-registered {
width: 160px;
font-size: 11px;
line-height: 1.4;
overflow: hidden;
}
.registry-table .col-registered .registered-by {
color: var(--text-primary);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.registry-table .col-registered .registered-at {
color: var(--text-secondary);
}
.registry-table .col-status {
width: 40px;
text-align: center;
}
.mode-badge {
display: inline-block;
padding: 2px 8px;
border-radius: 4px;
font-size: 11px;
font-weight: 600;
}
.mode-badge.mode-local {
background: var(--success-light);
color: var(--success);
}
.mode-badge.mode-remote {
background: var(--primary-light);
color: var(--primary);
}
.mode-badge.mode-materialized {
background: rgba(139, 92, 246, 0.1);
color: #8B5CF6;
}
.folder-badge {
display: inline-block;
padding: 1px 6px;
border-radius: 3px;
background: var(--background);
border: 1px solid var(--border);
font-size: 11px;
color: var(--text-secondary);
}
/* ── Modal overlay ── */
@ -1544,6 +1645,24 @@
return div.innerHTML;
}
// Defensive normalization for descriptions registered via shell-quoting
// tooling that injected literal backslash escapes (e.g. `Don\'t`, `\n`).
// Mirrors _unescape_shell_quoting in app/api/admin.py — applied at render
// time so already-stored corrupt rows still display readably.
function unescapeShellQuoting(s) {
if (!s) return s;
// Order matters: protect real backslashes via NUL sentinel first,
// unescape the well-known sequences, then restore real backslashes.
return s
.replace(/\\\\/g, '')
.replace(/\\n/g, '\n')
.replace(/\\r/g, '\r')
.replace(/\\t/g, '\t')
.replace(/\\'/g, "'")
.replace(/\\"/g, '"')
.replace(//g, '\\');
}
// C3: removed dead Discovery panel JS. The global Discovery card +
// its #discoverBtn / #discoveryResults DOM hooks were removed when
// the per-tab UI landed; per-tab Discover/List datalist helpers live
@ -2587,6 +2706,12 @@
});
}
function renderModeBadge(mode) {
var m = (mode || 'local').toLowerCase();
var cls = 'mode-badge mode-' + (['local','remote','materialized'].indexOf(m) >= 0 ? m : 'local');
return '<span class="' + cls + '">' + escapeHtml(m) + '</span>';
}
function renderRegistryListing(target, tables) {
if (!target) return;
if (tables.length === 0) {
@ -2595,19 +2720,69 @@
}
var html = '<table class="registry-table">';
html += '<thead><tr>';
html += '<th>Table ID</th>';
html += '<th>Mode</th>';
html += '<th>Primary Key</th>';
html += '<th class="col-id">Table ID</th>';
html += '<th class="col-mode">Mode</th>';
html += '<th class="col-source">Source</th>';
html += '<th class="col-pk">Primary Key</th>';
html += '<th class="col-schedule">Schedule</th>';
html += '<th class="col-folder">Folder</th>';
html += '<th>Description</th>';
html += '<th class="col-registered">Registered</th>';
html += '<th class="col-status"></th>';
html += '<th class="col-actions">Actions</th>';
html += '</tr></thead><tbody>';
tables.forEach(function(table) {
html += '<tr>';
html += '<td class="col-id" title="' + escapeHtml(table.id) + '">' + escapeHtml(table.id) + '</td>';
html += '<td>' + escapeHtml(table.query_mode || 'local') + '</td>';
html += '<td>' + escapeHtml((table.primary_key || []).join(', ') || '-') + '</td>';
html += '<td>' + escapeHtml(table.description || '-') + '</td>';
html += '<td class="col-actions">';
html += '<td class="col-mode">' + renderModeBadge(table.query_mode) + '</td>';
// Source: bucket / source_table; em-dash when both empty (custom-SQL row).
var bucket = table.bucket || '';
var srcTable = table.source_table || '';
var sourceText = '';
if (bucket && srcTable) {
sourceText = bucket + ' / ' + srcTable;
} else if (bucket || srcTable) {
sourceText = bucket || srcTable;
}
var sourceCell = sourceText ? escapeHtml(sourceText) : '—';
html += '<td class="col-source" title="' + escapeHtml(sourceText) + '">' + sourceCell + '</td>';
html += '<td class="col-pk">' + escapeHtml((table.primary_key || []).join(', ') || '—') + '</td>';
html += '<td class="col-schedule">' + escapeHtml(table.sync_schedule || '—') + '</td>';
// Folder badge — '—' when null/empty.
if (table.folder) {
html += '<td class="col-folder"><span class="folder-badge">' + escapeHtml(table.folder) + '</span></td>';
} else {
html += '<td class="col-folder"></td>';
}
var desc = unescapeShellQuoting(table.description || '');
html += '<td class="col-description" title="' + escapeHtml(desc) + '">' + escapeHtml(desc || '—') + '</td>';
// Registered: stacked email + date (first 10 chars of ISO timestamp).
var regBy = table.registered_by || '';
var regByDisplay = regBy;
if (regBy.length > 24 && regBy.indexOf('@') > 0) {
regByDisplay = regBy.split('@')[0];
}
var regAt = table.registered_at ? String(table.registered_at).slice(0, 10) : '';
html += '<td class="col-registered">';
html += '<div class="registered-by" title="' + escapeHtml(regBy) + '">' + (regByDisplay ? escapeHtml(regByDisplay) : '—') + '</div>';
html += '<div class="registered-at">' + escapeHtml(regAt || '') + '</div>';
html += '</td>';
// Status: warning icon when the last sync errored.
html += '<td class="col-status">';
if (table.last_sync_error) {
html += '<span title="' + escapeHtml(table.last_sync_error) + '">';
html += '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="color: var(--error);"><path d="M10.29 3.86 1.82 18a2 2 0 0 0 1.71 3h16.94a2 2 0 0 0 1.71-3L13.71 3.86a2 2 0 0 0-3.42 0z"/><line x1="12" y1="9" x2="12" y2="13"/><line x1="12" y1="17" x2="12.01" y2="17"/></svg>';
html += '</span>';
}
html += '</td>';
html += '<td class="col-actions"><div style="display:flex; gap:4px; justify-content:flex-end;">';
html += '<button class="btn-icon" title="Edit" onclick=\'openEditModal(' + JSON.stringify(table).replace(/'/g, "\\'") + ')\'>';
html += '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"/><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"/></svg>';
html += '</button>';
@ -2618,7 +2793,7 @@
html += '<button class="btn-icon danger" title="Delete" onclick="deleteTable(\'' + escapeHtml(table.id).replace(/'/g, "\\'") + '\')">';
html += '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polyline points="3 6 5 6 21 6"/><path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"/></svg>';
html += '</button>';
html += '</td></tr>';
html += '</div></td></tr>';
});
html += '</tbody></table>';
target.innerHTML = html;

View file

@ -1,6 +1,6 @@
[project]
name = "agnes-the-ai-analyst"
version = "0.38.2"
version = "0.38.3"
description = "Agnes — AI Data Analyst platform for AI analytical systems"
requires-python = ">=3.11,<3.14"
license = "MIT"

View file

@ -0,0 +1,142 @@
"""One-shot cleanup for ``table_registry.description`` rows corrupted by shell-quoting.
Background
----------
Some operators registered tables via shell/curl invocations whose quoting
injected literal backslash escapes into the JSON payload e.g. ``Don\\'t
confuse...``, ``it\\'s...``, and embedded ``\\n`` instead of real newlines.
The backend stored those bytes verbatim and the admin UI rendered them
verbatim too. ``app/api/admin.py`` now applies ``_unescape_shell_quoting``
on register/update so newly-saved descriptions are clean, but rows that
were registered before that fix landed still hold the corrupted text.
This script rewrites every affected ``table_registry.description`` to its
unescaped form. Idempotent once normalized, a second run is a no-op
because the helper has nothing left to substitute.
Usage
-----
# 1) Preview the changes that would be made (default).
python scripts/fix_description_escapes.py
# 2) Apply for real once the diff looks right.
python scripts/fix_description_escapes.py --apply
"""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
# Add project root to path so ``src`` is importable when invoked as a script.
sys.path.insert(0, str(Path(__file__).parent.parent))
from app.logging_config import setup_logging # noqa: E402
from src.db import get_system_db # noqa: E402
setup_logging(__name__)
logger = logging.getLogger(__name__)
def _unescape_shell_quoting(s: str | None) -> str | None:
"""Mirror of ``app.api.admin._unescape_shell_quoting``.
Kept inline (rather than imported) so this script stays runnable as a
standalone one-shot even if ``app.api.admin`` grows imports that an
operator's cleanup environment can't satisfy.
"""
if not s:
return s
SENTINEL = "\x00"
return (
s.replace("\\\\", SENTINEL)
.replace("\\n", "\n")
.replace("\\r", "\r")
.replace("\\t", "\t")
.replace("\\'", "'")
.replace('\\"', '"')
.replace(SENTINEL, "\\")
)
def _preview(text: str, width: int = 80) -> str:
"""Single-line preview of a possibly multi-line description."""
flat = text.replace("\n", " \\n ").replace("\r", " ").replace("\t", " ")
if len(flat) > width:
flat = flat[: width - 1] + ""
return flat
def main() -> int:
parser = argparse.ArgumentParser(
description=(
"Fix table_registry.description rows corrupted by shell-quoting "
"backslash-escapes. Defaults to dry-run; pass --apply to write."
)
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--dry-run",
dest="dry_run",
action="store_true",
default=True,
help="Print the diff but do not write (default).",
)
group.add_argument(
"--apply",
dest="dry_run",
action="store_false",
help="Apply the UPDATE statements.",
)
args = parser.parse_args()
conn = get_system_db()
try:
rows = conn.execute(
"SELECT id, name, description FROM table_registry "
"WHERE description IS NOT NULL"
).fetchall()
finally:
# get_system_db returns a cursor over a shared connection; closing
# the cursor is safe and does not close the underlying handle.
try:
conn.close()
except Exception:
pass
changed = 0
for table_id, name, description in rows:
normalized = _unescape_shell_quoting(description)
if normalized == description:
continue
changed += 1
print(f"{table_id} | {name} | {_preview(normalized or '')}")
if not args.dry_run:
write_conn = get_system_db()
try:
write_conn.execute(
"UPDATE table_registry SET description = ? WHERE id = ?",
[normalized, table_id],
)
finally:
try:
write_conn.close()
except Exception:
pass
if changed == 0:
print("No rows need normalization.")
else:
action = "would update" if args.dry_run else "updated"
print(f"\n{action} {changed} row(s).")
if args.dry_run:
print("Re-run with --apply to write the changes.")
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,119 @@
"""Unit tests for `app.api.admin._unescape_shell_quoting`.
Pinning the contract: descriptions arriving with bash-style backslash
escapes (`Don\\'t`, `\\n`, `\\t`, `\\"`, etc.) are normalized at register /
update time so the row in `table_registry` carries the resolved text and
the UI doesn't have to render the literal escape bytes.
The JS mirror (`unescapeShellQuoting` in
`app/web/templates/admin_tables.html`) uses the same NUL-byte sentinel
to protect real backslashes during the unescape pass these tests
indirectly pin the symmetry by anchoring the Python end of it.
"""
from __future__ import annotations
import pytest
from app.api.admin import _unescape_shell_quoting
class TestNoOpInputs:
@pytest.mark.parametrize("value", [None, ""])
def test_passes_through(self, value):
assert _unescape_shell_quoting(value) == value
def test_plain_text_unchanged(self):
assert _unescape_shell_quoting("hello world") == "hello world"
def test_real_apostrophes_unchanged(self):
assert _unescape_shell_quoting("Don't worry") == "Don't worry"
def test_real_newline_unchanged(self):
assert _unescape_shell_quoting("line1\nline2") == "line1\nline2"
class TestStandardEscapes:
def test_backslash_apostrophe(self):
assert _unescape_shell_quoting(r"Don\'t") == "Don't"
def test_backslash_n_to_real_newline(self):
assert _unescape_shell_quoting(r"a\nb") == "a\nb"
def test_backslash_t_to_real_tab(self):
assert _unescape_shell_quoting(r"a\tb") == "a\tb"
def test_backslash_r_to_real_cr(self):
assert _unescape_shell_quoting(r"a\rb") == "a\rb"
def test_backslash_double_quote(self):
assert _unescape_shell_quoting(r'say \"hi\"') == 'say "hi"'
def test_multiple_in_one_string(self):
src = r"Don\'t do this:\nfoo\tbar"
assert _unescape_shell_quoting(src) == "Don't do this:\nfoo\tbar"
class TestNulSentinel:
"""Real backslashes must survive the unescape pass — the helper uses a
NUL-byte sentinel to protect them. Tests target that path explicitly
so a refactor that breaks the sentinel order is caught immediately.
"""
def test_double_backslash_becomes_single(self):
"""`\\\\` (4 chars: `\\` `\\`) → `\\` (1 char)."""
assert _unescape_shell_quoting("\\\\") == "\\"
def test_real_backslash_followed_by_n_is_preserved(self):
"""A real backslash + literal `n` (`\\\\n`) must not collapse to a
newline the sentinel pass protects the leading backslash."""
assert _unescape_shell_quoting(r"\\n") == r"\n"
def test_real_backslash_followed_by_apostrophe_is_preserved(self):
assert _unescape_shell_quoting(r"\\'") == r"\'"
class TestIdempotency:
"""After one pass, the well-known escape *digraphs* (``\\n``, ``\\t``,
``\\'``, ``\\"``) are gone. A second pass on **canonical** output must
be a no-op that's what the migration script relies on so re-runs are
safe.
Caveat: a raw single backslash followed by ``n`` / ``r`` / ``t`` / ``'``
/ ``"`` is ambiguous with the digraph form, so an input shaped like
``\\not`` (single backslash + `not`) is NOT idempotent under repeated
application the second pass would unescape ``\\n`` to a newline.
Documented as a known limitation; the migration script's `--dry-run`
surface lets operators preview before committing.
"""
@pytest.mark.parametrize(
"src",
[
r"Don\'t do this",
r"a\nb\tc",
r"mix \'ed \\ stuff \n here",
"plain text",
"real\nnewline",
"Don't worry — apostrophes are fine",
],
)
def test_second_pass_is_no_op_on_canonical(self, src):
once = _unescape_shell_quoting(src)
twice = _unescape_shell_quoting(once)
assert once == twice
def test_documented_non_idempotent_case(self):
"""Anchor the known-limitation behavior so a refactor can't silently
change it. ``\\\\not`` (4 chars: two backslashes + `not`) collapses
to ``\\not`` (4 chars: backslash + `not`) on the first pass; a
second pass would then read ``\\n`` as a newline escape. Operators
running the migration script with `--dry-run` see this preview
before committing."""
first = _unescape_shell_quoting(r"\\not")
assert first == r"\not"
second = _unescape_shell_quoting(first)
# Second pass DOES change the value — because `\not` reads as
# `\n` + `ot` to the unescape logic.
assert second != first
assert second == "\not" # newline + 'ot'