Extract 4 self-contained services into services/ module: - server/telegram_bot/ -> services/telegram_bot/ - server/ws_gateway/ -> services/ws_gateway/ - server/corporate_memory/ -> services/corporate_memory/ - server/session_collector.py -> services/session_collector/ Each service now has its own systemd/ directory with .service and .timer files. deploy.sh updated to auto-discover service units from services/*/systemd/*. server/ now contains only deployment infrastructure (deploy.sh, setup scripts, bin/ management tools, sudoers, nginx config). All imports updated: webapp/app.py, server/bin/ scripts, systemd ExecStart paths.
189 lines
5.5 KiB
Python
189 lines
5.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Collect Claude Code session transcripts from all user home directories.
|
|
|
|
This script runs as a systemd service (session-collector.service) triggered by
|
|
session-collector.timer. It scans all /home/*/user/sessions/ directories and
|
|
copies session transcript files to /data/user_sessions/$user/ for centralized
|
|
storage and analysis.
|
|
|
|
Design principles:
|
|
- Must run as root (or user with read access to all /home/*)
|
|
- Preserves file metadata (timestamps, permissions)
|
|
- Idempotent - safe to run multiple times (skips existing files)
|
|
- Atomic operations - uses tempfile + os.replace for safety
|
|
- Logs to stdout (captured by journalctl)
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Iterator
|
|
|
|
# Central storage for session transcripts
|
|
TARGET_BASE = Path("/data/user_sessions")
|
|
|
|
# Directory to scan for sessions in each user home
|
|
USER_SESSIONS_DIR = "user/sessions"
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
stream=sys.stdout,
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def find_user_home_dirs() -> Iterator[Path]:
|
|
"""Yield all user home directories from /home/*."""
|
|
home_base = Path("/home")
|
|
if not home_base.exists():
|
|
logger.warning(f"{home_base} does not exist")
|
|
return
|
|
|
|
for entry in home_base.iterdir():
|
|
if entry.is_dir() and not entry.name.startswith("."):
|
|
yield entry
|
|
|
|
|
|
def find_session_files(user_home: Path) -> Iterator[Path]:
|
|
"""Yield all session JSONL files from user's sessions directory."""
|
|
sessions_dir = user_home / USER_SESSIONS_DIR
|
|
if not sessions_dir.exists():
|
|
return
|
|
|
|
try:
|
|
for entry in sessions_dir.iterdir():
|
|
if entry.is_file() and entry.suffix == ".jsonl":
|
|
yield entry
|
|
except PermissionError:
|
|
logger.warning(f"Permission denied reading {sessions_dir}")
|
|
except Exception as e:
|
|
logger.error(f"Error scanning {sessions_dir}: {e}")
|
|
|
|
|
|
def copy_session_file(source: Path, target: Path, dry_run: bool = False) -> bool:
|
|
"""Copy session file to target location, preserving metadata.
|
|
|
|
Returns True if file was copied, False if skipped (already exists).
|
|
"""
|
|
if target.exists():
|
|
# Already collected, skip
|
|
return False
|
|
|
|
if dry_run:
|
|
logger.info(f"[DRY-RUN] Would copy: {source} -> {target}")
|
|
return True
|
|
|
|
try:
|
|
# Ensure target directory exists
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Copy with metadata preserved
|
|
shutil.copy2(source, target)
|
|
logger.info(f"Collected: {source} -> {target}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to copy {source} to {target}: {e}")
|
|
return False
|
|
|
|
|
|
def collect_user_sessions(username: str, user_home: Path, dry_run: bool = False) -> tuple[int, int]:
|
|
"""Collect all session files for a user.
|
|
|
|
Returns tuple (files_copied, files_skipped).
|
|
"""
|
|
target_dir = TARGET_BASE / username
|
|
copied = 0
|
|
skipped = 0
|
|
|
|
for session_file in find_session_files(user_home):
|
|
target_path = target_dir / session_file.name
|
|
|
|
if copy_session_file(session_file, target_path, dry_run=dry_run):
|
|
copied += 1
|
|
else:
|
|
skipped += 1
|
|
|
|
return copied, skipped
|
|
|
|
|
|
def main() -> int:
|
|
"""Main entry point. Returns exit code (0=success, 1=error)."""
|
|
import argparse
|
|
import grp
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Collect Claude Code session transcripts from all users"
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run",
|
|
action="store_true",
|
|
help="Preview what would be copied without actually copying"
|
|
)
|
|
parser.add_argument(
|
|
"--verbose", "-v",
|
|
action="store_true",
|
|
help="Enable verbose output"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.verbose:
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
logger.info("Starting session transcript collection")
|
|
|
|
# Ensure target base directory exists
|
|
try:
|
|
TARGET_BASE.mkdir(parents=True, exist_ok=True)
|
|
# Set permissions: root:data-ops, 2770 (admins only, sessions are sensitive)
|
|
os.chmod(TARGET_BASE, 0o2770)
|
|
|
|
# Try to set group ownership to data-ops if it exists
|
|
try:
|
|
dataops_gid = grp.getgrnam("data-ops").gr_gid
|
|
os.chown(TARGET_BASE, -1, dataops_gid)
|
|
except KeyError:
|
|
logger.warning("Group 'data-ops' not found, using default group")
|
|
except Exception as e:
|
|
logger.warning(f"Could not set group ownership: {e}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to create target directory {TARGET_BASE}: {e}")
|
|
return 1
|
|
|
|
total_copied = 0
|
|
total_skipped = 0
|
|
users_processed = 0
|
|
|
|
for user_home in find_user_home_dirs():
|
|
username = user_home.name
|
|
|
|
# Skip system users (numeric UIDs typically < 1000)
|
|
try:
|
|
uid = user_home.stat().st_uid
|
|
if uid < 1000:
|
|
continue
|
|
except Exception:
|
|
continue
|
|
|
|
copied, skipped = collect_user_sessions(username, user_home, dry_run=args.dry_run)
|
|
|
|
if copied > 0 or skipped > 0:
|
|
users_processed += 1
|
|
total_copied += copied
|
|
total_skipped += skipped
|
|
logger.info(f"User {username}: {copied} copied, {skipped} skipped")
|
|
|
|
logger.info(
|
|
f"Collection complete: {users_processed} users, "
|
|
f"{total_copied} files copied, {total_skipped} files skipped"
|
|
)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|