fix(admin/tables): script to clean already-corrupted descriptions in registry
This commit is contained in:
parent
05e535d743
commit
c1c3ba5fef
1 changed files with 142 additions and 0 deletions
142
scripts/fix_description_escapes.py
Normal file
142
scripts/fix_description_escapes.py
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
"""One-shot cleanup for ``table_registry.description`` rows corrupted by shell-quoting.
|
||||
|
||||
Background
|
||||
----------
|
||||
Some operators registered tables via shell/curl invocations whose quoting
|
||||
injected literal backslash escapes into the JSON payload — e.g. ``Don\\'t
|
||||
confuse...``, ``it\\'s...``, and embedded ``\\n`` instead of real newlines.
|
||||
The backend stored those bytes verbatim and the admin UI rendered them
|
||||
verbatim too. ``app/api/admin.py`` now applies ``_unescape_shell_quoting``
|
||||
on register/update so newly-saved descriptions are clean, but rows that
|
||||
were registered before that fix landed still hold the corrupted text.
|
||||
|
||||
This script rewrites every affected ``table_registry.description`` to its
|
||||
unescaped form. Idempotent — once normalized, a second run is a no-op
|
||||
because the helper has nothing left to substitute.
|
||||
|
||||
Usage
|
||||
-----
|
||||
# 1) Preview the changes that would be made (default).
|
||||
python scripts/fix_description_escapes.py
|
||||
|
||||
# 2) Apply for real once the diff looks right.
|
||||
python scripts/fix_description_escapes.py --apply
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add project root to path so ``src`` is importable when invoked as a script.
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from app.logging_config import setup_logging # noqa: E402
|
||||
from src.db import get_system_db # noqa: E402
|
||||
|
||||
setup_logging(__name__)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _unescape_shell_quoting(s: str | None) -> str | None:
|
||||
"""Mirror of ``app.api.admin._unescape_shell_quoting``.
|
||||
|
||||
Kept inline (rather than imported) so this script stays runnable as a
|
||||
standalone one-shot even if ``app.api.admin`` grows imports that an
|
||||
operator's cleanup environment can't satisfy.
|
||||
"""
|
||||
if not s:
|
||||
return s
|
||||
SENTINEL = "\x00"
|
||||
return (
|
||||
s.replace("\\\\", SENTINEL)
|
||||
.replace("\\n", "\n")
|
||||
.replace("\\r", "\r")
|
||||
.replace("\\t", "\t")
|
||||
.replace("\\'", "'")
|
||||
.replace('\\"', '"')
|
||||
.replace(SENTINEL, "\\")
|
||||
)
|
||||
|
||||
|
||||
def _preview(text: str, width: int = 80) -> str:
|
||||
"""Single-line preview of a possibly multi-line description."""
|
||||
flat = text.replace("\n", " \\n ").replace("\r", " ").replace("\t", " ")
|
||||
if len(flat) > width:
|
||||
flat = flat[: width - 1] + "…"
|
||||
return flat
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"Fix table_registry.description rows corrupted by shell-quoting "
|
||||
"backslash-escapes. Defaults to dry-run; pass --apply to write."
|
||||
)
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument(
|
||||
"--dry-run",
|
||||
dest="dry_run",
|
||||
action="store_true",
|
||||
default=True,
|
||||
help="Print the diff but do not write (default).",
|
||||
)
|
||||
group.add_argument(
|
||||
"--apply",
|
||||
dest="dry_run",
|
||||
action="store_false",
|
||||
help="Apply the UPDATE statements.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
conn = get_system_db()
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT id, name, description FROM table_registry "
|
||||
"WHERE description IS NOT NULL"
|
||||
).fetchall()
|
||||
finally:
|
||||
# get_system_db returns a cursor over a shared connection; closing
|
||||
# the cursor is safe and does not close the underlying handle.
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
changed = 0
|
||||
for table_id, name, description in rows:
|
||||
normalized = _unescape_shell_quoting(description)
|
||||
if normalized == description:
|
||||
continue
|
||||
changed += 1
|
||||
print(f"{table_id} | {name} | {_preview(normalized or '')}")
|
||||
|
||||
if not args.dry_run:
|
||||
write_conn = get_system_db()
|
||||
try:
|
||||
write_conn.execute(
|
||||
"UPDATE table_registry SET description = ? WHERE id = ?",
|
||||
[normalized, table_id],
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
write_conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if changed == 0:
|
||||
print("No rows need normalization.")
|
||||
else:
|
||||
action = "would update" if args.dry_run else "updated"
|
||||
print(f"\n{action} {changed} row(s).")
|
||||
if args.dry_run:
|
||||
print("Re-run with --apply to write the changes.")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Reference in a new issue