From c1c3ba5fef552348ed366f70cb15b9a1c4a9e6c7 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Wed, 6 May 2026 10:14:23 +0200 Subject: [PATCH] fix(admin/tables): script to clean already-corrupted descriptions in registry --- scripts/fix_description_escapes.py | 142 +++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 scripts/fix_description_escapes.py diff --git a/scripts/fix_description_escapes.py b/scripts/fix_description_escapes.py new file mode 100644 index 0000000..887fbaf --- /dev/null +++ b/scripts/fix_description_escapes.py @@ -0,0 +1,142 @@ +"""One-shot cleanup for ``table_registry.description`` rows corrupted by shell-quoting. + +Background +---------- +Some operators registered tables via shell/curl invocations whose quoting +injected literal backslash escapes into the JSON payload — e.g. ``Don\\'t +confuse...``, ``it\\'s...``, and embedded ``\\n`` instead of real newlines. +The backend stored those bytes verbatim and the admin UI rendered them +verbatim too. ``app/api/admin.py`` now applies ``_unescape_shell_quoting`` +on register/update so newly-saved descriptions are clean, but rows that +were registered before that fix landed still hold the corrupted text. + +This script rewrites every affected ``table_registry.description`` to its +unescaped form. Idempotent — once normalized, a second run is a no-op +because the helper has nothing left to substitute. + +Usage +----- + # 1) Preview the changes that would be made (default). + python scripts/fix_description_escapes.py + + # 2) Apply for real once the diff looks right. + python scripts/fix_description_escapes.py --apply +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +# Add project root to path so ``src`` is importable when invoked as a script. +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from app.logging_config import setup_logging # noqa: E402 +from src.db import get_system_db # noqa: E402 + +setup_logging(__name__) +logger = logging.getLogger(__name__) + + +def _unescape_shell_quoting(s: str | None) -> str | None: + """Mirror of ``app.api.admin._unescape_shell_quoting``. + + Kept inline (rather than imported) so this script stays runnable as a + standalone one-shot even if ``app.api.admin`` grows imports that an + operator's cleanup environment can't satisfy. + """ + if not s: + return s + SENTINEL = "\x00" + return ( + s.replace("\\\\", SENTINEL) + .replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\'", "'") + .replace('\\"', '"') + .replace(SENTINEL, "\\") + ) + + +def _preview(text: str, width: int = 80) -> str: + """Single-line preview of a possibly multi-line description.""" + flat = text.replace("\n", " \\n ").replace("\r", " ").replace("\t", " ") + if len(flat) > width: + flat = flat[: width - 1] + "…" + return flat + + +def main() -> int: + parser = argparse.ArgumentParser( + description=( + "Fix table_registry.description rows corrupted by shell-quoting " + "backslash-escapes. Defaults to dry-run; pass --apply to write." + ) + ) + group = parser.add_mutually_exclusive_group() + group.add_argument( + "--dry-run", + dest="dry_run", + action="store_true", + default=True, + help="Print the diff but do not write (default).", + ) + group.add_argument( + "--apply", + dest="dry_run", + action="store_false", + help="Apply the UPDATE statements.", + ) + args = parser.parse_args() + + conn = get_system_db() + try: + rows = conn.execute( + "SELECT id, name, description FROM table_registry " + "WHERE description IS NOT NULL" + ).fetchall() + finally: + # get_system_db returns a cursor over a shared connection; closing + # the cursor is safe and does not close the underlying handle. + try: + conn.close() + except Exception: + pass + + changed = 0 + for table_id, name, description in rows: + normalized = _unescape_shell_quoting(description) + if normalized == description: + continue + changed += 1 + print(f"{table_id} | {name} | {_preview(normalized or '')}") + + if not args.dry_run: + write_conn = get_system_db() + try: + write_conn.execute( + "UPDATE table_registry SET description = ? WHERE id = ?", + [normalized, table_id], + ) + finally: + try: + write_conn.close() + except Exception: + pass + + if changed == 0: + print("No rows need normalization.") + else: + action = "would update" if args.dry_run else "updated" + print(f"\n{action} {changed} row(s).") + if args.dry_run: + print("Re-run with --apply to write the changes.") + + return 0 + + +if __name__ == "__main__": + sys.exit(main())