fix(admin/tables): script to clean already-corrupted descriptions in registry
This commit is contained in:
parent
05e535d743
commit
c1c3ba5fef
1 changed files with 142 additions and 0 deletions
142
scripts/fix_description_escapes.py
Normal file
142
scripts/fix_description_escapes.py
Normal file
|
|
@ -0,0 +1,142 @@
|
||||||
|
"""One-shot cleanup for ``table_registry.description`` rows corrupted by shell-quoting.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
Some operators registered tables via shell/curl invocations whose quoting
|
||||||
|
injected literal backslash escapes into the JSON payload — e.g. ``Don\\'t
|
||||||
|
confuse...``, ``it\\'s...``, and embedded ``\\n`` instead of real newlines.
|
||||||
|
The backend stored those bytes verbatim and the admin UI rendered them
|
||||||
|
verbatim too. ``app/api/admin.py`` now applies ``_unescape_shell_quoting``
|
||||||
|
on register/update so newly-saved descriptions are clean, but rows that
|
||||||
|
were registered before that fix landed still hold the corrupted text.
|
||||||
|
|
||||||
|
This script rewrites every affected ``table_registry.description`` to its
|
||||||
|
unescaped form. Idempotent — once normalized, a second run is a no-op
|
||||||
|
because the helper has nothing left to substitute.
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
# 1) Preview the changes that would be made (default).
|
||||||
|
python scripts/fix_description_escapes.py
|
||||||
|
|
||||||
|
# 2) Apply for real once the diff looks right.
|
||||||
|
python scripts/fix_description_escapes.py --apply
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add project root to path so ``src`` is importable when invoked as a script.
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from app.logging_config import setup_logging # noqa: E402
|
||||||
|
from src.db import get_system_db # noqa: E402
|
||||||
|
|
||||||
|
setup_logging(__name__)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _unescape_shell_quoting(s: str | None) -> str | None:
|
||||||
|
"""Mirror of ``app.api.admin._unescape_shell_quoting``.
|
||||||
|
|
||||||
|
Kept inline (rather than imported) so this script stays runnable as a
|
||||||
|
standalone one-shot even if ``app.api.admin`` grows imports that an
|
||||||
|
operator's cleanup environment can't satisfy.
|
||||||
|
"""
|
||||||
|
if not s:
|
||||||
|
return s
|
||||||
|
SENTINEL = "\x00"
|
||||||
|
return (
|
||||||
|
s.replace("\\\\", SENTINEL)
|
||||||
|
.replace("\\n", "\n")
|
||||||
|
.replace("\\r", "\r")
|
||||||
|
.replace("\\t", "\t")
|
||||||
|
.replace("\\'", "'")
|
||||||
|
.replace('\\"', '"')
|
||||||
|
.replace(SENTINEL, "\\")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _preview(text: str, width: int = 80) -> str:
|
||||||
|
"""Single-line preview of a possibly multi-line description."""
|
||||||
|
flat = text.replace("\n", " \\n ").replace("\r", " ").replace("\t", " ")
|
||||||
|
if len(flat) > width:
|
||||||
|
flat = flat[: width - 1] + "…"
|
||||||
|
return flat
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=(
|
||||||
|
"Fix table_registry.description rows corrupted by shell-quoting "
|
||||||
|
"backslash-escapes. Defaults to dry-run; pass --apply to write."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
group = parser.add_mutually_exclusive_group()
|
||||||
|
group.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
dest="dry_run",
|
||||||
|
action="store_true",
|
||||||
|
default=True,
|
||||||
|
help="Print the diff but do not write (default).",
|
||||||
|
)
|
||||||
|
group.add_argument(
|
||||||
|
"--apply",
|
||||||
|
dest="dry_run",
|
||||||
|
action="store_false",
|
||||||
|
help="Apply the UPDATE statements.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
conn = get_system_db()
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, name, description FROM table_registry "
|
||||||
|
"WHERE description IS NOT NULL"
|
||||||
|
).fetchall()
|
||||||
|
finally:
|
||||||
|
# get_system_db returns a cursor over a shared connection; closing
|
||||||
|
# the cursor is safe and does not close the underlying handle.
|
||||||
|
try:
|
||||||
|
conn.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
changed = 0
|
||||||
|
for table_id, name, description in rows:
|
||||||
|
normalized = _unescape_shell_quoting(description)
|
||||||
|
if normalized == description:
|
||||||
|
continue
|
||||||
|
changed += 1
|
||||||
|
print(f"{table_id} | {name} | {_preview(normalized or '')}")
|
||||||
|
|
||||||
|
if not args.dry_run:
|
||||||
|
write_conn = get_system_db()
|
||||||
|
try:
|
||||||
|
write_conn.execute(
|
||||||
|
"UPDATE table_registry SET description = ? WHERE id = ?",
|
||||||
|
[normalized, table_id],
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
write_conn.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if changed == 0:
|
||||||
|
print("No rows need normalization.")
|
||||||
|
else:
|
||||||
|
action = "would update" if args.dry_run else "updated"
|
||||||
|
print(f"\n{action} {changed} row(s).")
|
||||||
|
if args.dry_run:
|
||||||
|
print("Re-run with --apply to write the changes.")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
Loading…
Reference in a new issue