Security fixes: sanitize dev_docs, harden sudoers and config validation
H1 - Sanitize dev_docs/ for public release:
- Replace all real employee names with generic placeholders
(padak->admin1, matejkys->admin2, dasa->admin3, petr->john, etc.)
- Replace GCP project ID (kids-ai-data-analysis -> your-gcp-project)
- Replace server hostname (data-broker-for-claude -> your-server)
- Replace real IP address (34.88.8.46 -> YOUR_SERVER_IP)
- Replace internal FQDN with placeholder
- Covers: security.md, server.md, disaster-recovery.md, desktop-app.md,
session_explore.md, plan-rsync-fix.md, draft/*.md
H3 - webapp-setup.sh: validate sudoers syntax BEFORE copying to /etc/sudoers.d
- Prevents broken sudo if syntax is invalid
- Uses install -m 440 for atomic copy with correct permissions
M1 - setup.sh: deploy user created with /usr/sbin/nologin instead of /bin/bash
- CI/CD service account does not need interactive shell
M2 - config/loader.py: warn on missing env vars, validate webapp_secret_key
- _resolve_env_refs now logs warnings for unset ${ENV_VAR} references
- _validate_config checks auth.webapp_secret_key is non-empty
- Prevents Flask signing sessions with empty secret key
All 118 tests pass.
This commit is contained in:
parent
26c4e0934d
commit
485ac0a742
11 changed files with 129 additions and 107 deletions
|
|
@ -24,24 +24,43 @@ CONFIG_DIR = Path(os.environ.get("CONFIG_DIR", "./config"))
|
|||
_ENV_PATTERN = re.compile(r"\$\{([^}]+)\}")
|
||||
|
||||
|
||||
def _resolve_env_refs(value: Any) -> Any:
|
||||
def _resolve_env_refs(value: Any, _path: str = "") -> Any:
|
||||
"""Resolve ${ENV_VAR} references in config values.
|
||||
|
||||
Walks the config tree recursively. String values containing ${VAR}
|
||||
are replaced with the corresponding environment variable value
|
||||
(empty string if not set). Non-string values pass through unchanged.
|
||||
are replaced with the corresponding environment variable value.
|
||||
Logs a warning for unset variables so misconfiguration is visible.
|
||||
Non-string values pass through unchanged.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
missing_vars: list[str] = []
|
||||
|
||||
def replacer(match: re.Match) -> str:
|
||||
env_key = match.group(1)
|
||||
return os.environ.get(env_key, "")
|
||||
env_val = os.environ.get(env_key)
|
||||
if env_val is None:
|
||||
missing_vars.append(env_key)
|
||||
return ""
|
||||
return env_val
|
||||
|
||||
return _ENV_PATTERN.sub(replacer, value)
|
||||
resolved = _ENV_PATTERN.sub(replacer, value)
|
||||
for var in missing_vars:
|
||||
logger.warning(
|
||||
"Environment variable %s not set (referenced in config %s)",
|
||||
var,
|
||||
_path or "value",
|
||||
)
|
||||
return resolved
|
||||
if isinstance(value, dict):
|
||||
return {k: _resolve_env_refs(v) for k, v in value.items()}
|
||||
return {
|
||||
k: _resolve_env_refs(v, _path=f"{_path}.{k}" if _path else k)
|
||||
for k, v in value.items()
|
||||
}
|
||||
if isinstance(value, list):
|
||||
return [_resolve_env_refs(item) for item in value]
|
||||
return [
|
||||
_resolve_env_refs(item, _path=f"{_path}[{i}]")
|
||||
for i, item in enumerate(value)
|
||||
]
|
||||
return value
|
||||
|
||||
|
||||
|
|
@ -94,8 +113,13 @@ def _validate_config(config: dict) -> None:
|
|||
("server", "hostname"),
|
||||
]
|
||||
|
||||
# Secret fields that must resolve to non-empty values (from .env)
|
||||
required_secrets = [
|
||||
("auth", "webapp_secret_key"),
|
||||
]
|
||||
|
||||
missing = []
|
||||
for keys in required_paths:
|
||||
for keys in required_paths + required_secrets:
|
||||
value = config
|
||||
path_str = ".".join(keys)
|
||||
for key in keys:
|
||||
|
|
@ -110,7 +134,7 @@ def _validate_config(config: dict) -> None:
|
|||
if missing:
|
||||
raise ValueError(
|
||||
f"Missing required instance config fields: {', '.join(missing)}. "
|
||||
f"Check config/instance.yaml"
|
||||
f"Check config/instance.yaml and .env"
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ Server: `wss://your-instance.example.com/ws/notifications`
|
|||
|
||||
```
|
||||
Client -> Server: {"type":"auth","token":"eyJ..."}
|
||||
Server -> Client: {"type":"auth_ok","username":"petr"}
|
||||
Server -> Client: {"type":"auth_ok","username":"john"}
|
||||
Server -> Client: {"type":"notification","id":"uuid","title":"Revenue Drop","message":"...","image_url":"/api/notifications/images/abc.png","script":"revenue_check","timestamp":"2026-01-30T10:00:00Z"}
|
||||
Server -> Client: {"type":"ping"}
|
||||
Client -> Server: {"type":"pong"}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Disaster Recovery
|
||||
|
||||
Recovery procedures for the Data Broker Server (`data-broker-for-claude`).
|
||||
Recovery procedures for the Data Broker Server (`your-server`).
|
||||
|
||||
## Overview
|
||||
|
||||
|
|
@ -40,8 +40,8 @@ Disk Layout:
|
|||
1. **Create new VM** (same zone, attach existing disks):
|
||||
```bash
|
||||
# Create new instance with existing disks
|
||||
gcloud compute instances create data-broker-for-claude \
|
||||
--project=kids-ai-data-analysis \
|
||||
gcloud compute instances create your-server \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--machine-type=e2-medium \
|
||||
--image-family=debian-12 \
|
||||
|
|
@ -50,13 +50,13 @@ Disk Layout:
|
|||
--tags=http-server,https-server
|
||||
|
||||
# Attach existing data disks
|
||||
gcloud compute instances attach-disk data-broker-for-claude \
|
||||
--project=kids-ai-data-analysis \
|
||||
gcloud compute instances attach-disk your-server \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--disk=data-disk
|
||||
|
||||
gcloud compute instances attach-disk data-broker-for-claude \
|
||||
--project=kids-ai-data-analysis \
|
||||
gcloud compute instances attach-disk your-server \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--disk=home-disk
|
||||
```
|
||||
|
|
@ -159,19 +159,19 @@ Disk Layout:
|
|||
|
||||
```bash
|
||||
# Find latest snapshot
|
||||
gcloud compute snapshots list --project=kids-ai-data-analysis \
|
||||
gcloud compute snapshots list --project=your-gcp-project \
|
||||
--filter="sourceDisk:data-disk" --sort-by=~creationTimestamp --limit=5
|
||||
|
||||
# Create new disk from snapshot
|
||||
gcloud compute disks create data-disk \
|
||||
--project=kids-ai-data-analysis \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--source-snapshot=SNAPSHOT_NAME \
|
||||
--type=pd-balanced
|
||||
|
||||
# Attach to VM (may need to stop VM first)
|
||||
gcloud compute instances attach-disk data-broker-for-claude \
|
||||
--project=kids-ai-data-analysis \
|
||||
gcloud compute instances attach-disk your-server \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--disk=data-disk
|
||||
|
||||
|
|
@ -184,7 +184,7 @@ ssh kids "sudo mount /dev/sdb /data"
|
|||
```bash
|
||||
# Create fresh disk
|
||||
gcloud compute disks create data-disk \
|
||||
--project=kids-ai-data-analysis \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--size=30GB \
|
||||
--type=pd-balanced
|
||||
|
|
@ -209,19 +209,19 @@ ssh kids "cd /opt/data-analyst/repo && ./scripts/update.sh"
|
|||
|
||||
```bash
|
||||
# Find latest snapshot
|
||||
gcloud compute snapshots list --project=kids-ai-data-analysis \
|
||||
gcloud compute snapshots list --project=your-gcp-project \
|
||||
--filter="sourceDisk:home-disk" --sort-by=~creationTimestamp --limit=5
|
||||
|
||||
# Create new disk from snapshot
|
||||
gcloud compute disks create home-disk \
|
||||
--project=kids-ai-data-analysis \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--source-snapshot=SNAPSHOT_NAME \
|
||||
--type=pd-balanced
|
||||
|
||||
# Attach to VM
|
||||
gcloud compute instances attach-disk data-broker-for-claude \
|
||||
--project=kids-ai-data-analysis \
|
||||
gcloud compute instances attach-disk your-server \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--disk=home-disk
|
||||
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ def _get_server_username(webapp_username) -> str # Reuse WEBAPP_TO
|
|||
Storage format (`connections.json`):
|
||||
```json
|
||||
{
|
||||
"petr": {
|
||||
"john": {
|
||||
"purchase_orders": {
|
||||
"connected": true,
|
||||
"api_key": "pk_live_abc123...",
|
||||
|
|
@ -126,7 +126,7 @@ What each external service needs to implement:
|
|||
```
|
||||
POST /api/internal/token-exchange
|
||||
Authorization: Bearer <shared_secret>
|
||||
Body: {"user_email": "petr@your-domain.com", "ttl_days": 90}
|
||||
Body: {"user_email": "john@your-domain.com", "ttl_days": 90}
|
||||
Response: {"status": "ok", "api_key": "...", "token_id": "...", "expires_at": "..."}
|
||||
|
||||
POST /api/internal/token-revoke
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ SKILLS_DIR = Path(os.environ.get("SC_SKILLS_DIR", "/data/docs/service_connector_
|
|||
# Username mapping (reuse existing pattern)
|
||||
WEBAPP_TO_SERVER_USERNAME = {
|
||||
# Add overrides here if webapp username != server username
|
||||
# "dasa.damaskova": "dasa",
|
||||
# "jane.smith": "jane",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -503,7 +503,7 @@ What each internal service needs to implement (simple Bearer + JSON):
|
|||
POST /api/internal/token-exchange
|
||||
Authorization: Bearer <shared_secret>
|
||||
Content-Type: application/json
|
||||
Body: {"user_email": "petr@your-domain.com", "ttl_days": 365}
|
||||
Body: {"user_email": "john@your-domain.com", "ttl_days": 365}
|
||||
Response: {"status": "ok", "api_key": "...", "token_id": "...", "expires_at": "..."}
|
||||
|
||||
POST /api/internal/token-revoke
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
## Problem
|
||||
|
||||
Rsync from GCP server (34.88.8.46) hangs after 1-5 minutes. Process exists but has 0% CPU and no network activity. 100% reproducible with ~7000 parquet files.
|
||||
Rsync from GCP server (YOUR_SERVER_IP) hangs after 1-5 minutes. Process exists but has 0% CPU and no network activity. 100% reproducible with ~7000 parquet files.
|
||||
|
||||
## Root Cause Analysis
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Security Audit Report: Data Broker Server
|
||||
|
||||
**Date:** 2026-01-30
|
||||
**Server:** `data-broker-for-claude` (YOUR_SERVER_IP), Debian 12 (bookworm), GCP e2-medium
|
||||
**Server:** `your-server` (YOUR_SERVER_IP), Debian 12 (bookworm), GCP e2-medium
|
||||
**Auditors:** Claude Opus 4.5 (primary) + Perplexity Sonar (validation) + OpenAI Codex (second opinion)
|
||||
**Scope:** Linux server security, user isolation, CI/CD pipeline, notification system, desktop app attack surface
|
||||
**Status:** Read-only audit -- no changes were made to the server
|
||||
|
|
@ -44,8 +44,8 @@ These findings were independently validated by Perplexity (CVE references, Unix
|
|||
|
||||
| Parameter | Value |
|
||||
|-----------|-------|
|
||||
| Hostname | data-broker-for-claude |
|
||||
| GCP Project | kids-ai-data-analysis |
|
||||
| Hostname | your-server |
|
||||
| GCP Project | your-gcp-project |
|
||||
| Zone | europe-north1-a |
|
||||
| OS | Debian 12 (bookworm) |
|
||||
| External IP | YOUR_SERVER_IP |
|
||||
|
|
@ -55,9 +55,9 @@ These findings were independently validated by Perplexity (CVE references, Unix
|
|||
|
||||
| Group | Members | Purpose |
|
||||
|-------|---------|---------|
|
||||
| `dataread` | padak, matejkys, dasa, petr, fisa, dasa.damaskova, martin.lepka, pavel.dolezal, martin.matejka, jiri.manas | Public data read access |
|
||||
| `data-private` | padak, matejkys, dasa | Private/sensitive data access |
|
||||
| `data-ops` | deploy, padak, matejkys, dasa, www-data | Application deployment and operations |
|
||||
| `dataread` | admin1, admin2, admin3, john, analyst1, jane.smith, bob.jones, alice.wilson, mike.brown, tom.davis | Public data read access |
|
||||
| `data-private` | admin1, admin2, admin3 | Private/sensitive data access |
|
||||
| `data-ops` | deploy, admin1, admin2, admin3, www-data | Application deployment and operations |
|
||||
|
||||
### Services
|
||||
|
||||
|
|
@ -203,7 +203,7 @@ Source: SentinelOne vulnerability database, Compass Security research on dangero
|
|||
|
||||
#### Description
|
||||
|
||||
The directory `/data/src_data/parquet/private/` is intended to be accessible only to members of the `data-private` group (3 privileged users: padak, matejkys, dasa). However, its POSIX ACL also grants access to the `dataread` group, which contains **all 10 analysts**:
|
||||
The directory `/data/src_data/parquet/private/` is intended to be accessible only to members of the `data-private` group (3 privileged users: admin1, admin2, admin3). However, its POSIX ACL also grants access to the `dataread` group, which contains **all 10 analysts**:
|
||||
|
||||
```
|
||||
# getfacl /data/src_data/parquet/private/
|
||||
|
|
@ -224,13 +224,13 @@ The POSIX ACL mask (`mask::rwx`) does not restrict the `dataread` entry because
|
|||
|
||||
#### Proof of Exploitation
|
||||
|
||||
Tested directly on server with user `fisa` (standard analyst, member of `dataread` only, NOT `data-private`):
|
||||
Tested directly on server with user `analyst1` (standard analyst, member of `dataread` only, NOT `data-private`):
|
||||
|
||||
```bash
|
||||
$ sudo -u fisa ls -la /data/src_data/parquet/private/
|
||||
$ sudo -u analyst1 ls -la /data/src_data/parquet/private/
|
||||
total 16
|
||||
drwxrws---+ 2 padak data-ops 4096 Jan 21 14:29 .
|
||||
drwxrws---+ 7 padak data-ops 4096 Jan 23 18:29 ..
|
||||
drwxrws---+ 2 admin1 data-ops 4096 Jan 21 14:29 .
|
||||
drwxrws---+ 7 admin1 data-ops 4096 Jan 23 18:29 ..
|
||||
# Exit code: 0 (access granted)
|
||||
```
|
||||
|
||||
|
|
@ -265,7 +265,7 @@ sudo setfacl -R -m u::rwx,g::rwx,g:data-private:r-x,g:data-ops:rwx,o::--- /data/
|
|||
sudo setfacl -R -d -m u::rwx,g::rwx,g:data-private:r-x,g:data-ops:rwx,o::--- /data/src_data/parquet/private/
|
||||
|
||||
# Verify:
|
||||
sudo -u fisa ls /data/src_data/parquet/private/
|
||||
sudo -u analyst1 ls /data/src_data/parquet/private/
|
||||
# Expected: "ls: cannot open directory 'private/': Permission denied"
|
||||
```
|
||||
|
||||
|
|
@ -461,7 +461,7 @@ The server may be reachable on port 25 from the internet, potentially allowing:
|
|||
|
||||
```
|
||||
# /etc/postfix/main.cf
|
||||
myhostname = data-broker-for-claude.c.kids-ai-data-analysis.internal
|
||||
myhostname = your-server.c.your-gcp-project.internal
|
||||
mydestination = $myhostname, localhost
|
||||
inet_interfaces = all
|
||||
|
||||
|
|
@ -751,14 +751,14 @@ The following findings were identified by the OpenAI Codex second opinion review
|
|||
- `webapp/desktop_auth.py`, `webapp/user_service.py` (auth flows)
|
||||
- `.github/workflows/deploy.yml` (CI/CD configuration)
|
||||
|
||||
2. **Live server inspection** (read-only, via SSH as padak):
|
||||
2. **Live server inspection** (read-only, via SSH as admin1):
|
||||
- File permissions: `ls -la`, `stat`, `getfacl` on all critical paths
|
||||
- Socket permissions: `/run/notify-bot/`, `/run/ws-gateway/`
|
||||
- Group memberships: `getent group` for dataread, data-private, data-ops
|
||||
- Service status: `systemctl list-units`
|
||||
- Network: `ss -tlnp`, iptables, SSH config, nginx config
|
||||
- Crontabs: all users checked
|
||||
- Access control test: `sudo -u fisa ls` on private directory
|
||||
- Access control test: `sudo -u analyst1 ls` on private directory
|
||||
|
||||
3. **Validation**: Perplexity Sonar search for CVE references and best practices on:
|
||||
- Unix socket 0666 security (dirty_sock CVE-2019-7304)
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ Central server for distributing data to AI analytical systems.
|
|||
|
||||
| Parameter | Value |
|
||||
|-----------|-------|
|
||||
| Name | data-broker-for-claude |
|
||||
| GCP Project | kids-ai-data-analysis |
|
||||
| Name | your-server |
|
||||
| GCP Project | your-gcp-project |
|
||||
| Zone | europe-north1-a |
|
||||
| Type | e2-medium |
|
||||
| OS | Debian 12 (bookworm) |
|
||||
|
|
@ -36,13 +36,13 @@ Requires SSH config:
|
|||
```
|
||||
Host kids
|
||||
HostName YOUR_SERVER_IP
|
||||
User padak
|
||||
User admin1
|
||||
IdentityFile ~/.ssh/google_compute_engine
|
||||
```
|
||||
|
||||
Or via gcloud:
|
||||
```bash
|
||||
gcloud compute ssh data-broker-for-claude --project=kids-ai-data-analysis --zone=europe-north1-a
|
||||
gcloud compute ssh your-server --project=your-gcp-project --zone=europe-north1-a
|
||||
```
|
||||
|
||||
## Data Structure
|
||||
|
|
@ -62,7 +62,7 @@ gcloud compute ssh data-broker-for-claude --project=kids-ai-data-analysis --zone
|
|||
├── docs/ # Documentation (deployed from repo)
|
||||
│ └── schema.yml # Auto-generated table schemas (from data sync)
|
||||
├── scripts/ # Helper scripts (deployed from repo)
|
||||
├── examples/ # Example notification scripts (padak:data-ops, 755)
|
||||
├── examples/ # Example notification scripts (admin1:data-ops, 755)
|
||||
│ └── notifications/ # Example notification scripts for analysts
|
||||
├── notifications/ # Notification data (deploy:data-ops, 2770 setgid)
|
||||
│ ├── telegram_users.json # username -> {chat_id, linked_at} mapping
|
||||
|
|
@ -116,7 +116,7 @@ Three-tier permission model:
|
|||
Data in `/data/src_data/` uses ACL for granular access:
|
||||
|
||||
```
|
||||
/data/src_data/ owner: padak, group: data-ops
|
||||
/data/src_data/ owner: admin1, group: data-ops
|
||||
├── raw/ data-ops: rwx, dataread: r-x
|
||||
├── parquet/ data-ops: rwx, dataread: r-x
|
||||
│ └── private/ data-ops: rwx, data-private: r-x
|
||||
|
|
@ -211,20 +211,20 @@ sudo add-analyst novak "ssh-rsa AAAAB3... jan.novak@example.com"
|
|||
sudo add-analyst ceo "ssh-rsa AAAAB3... ceo@example.com" --private
|
||||
|
||||
# Server administrator
|
||||
sudo add-admin matejkys "ssh-rsa AAAAB3... matejkys@example.com"
|
||||
sudo add-admin dasa "ssh-ed25519 AAAAC3... dasa@your-domain.com"
|
||||
sudo add-admin admin2 "ssh-rsa AAAAB3... admin2@example.com"
|
||||
sudo add-admin admin3 "ssh-ed25519 AAAAC3... admin3@your-domain.com"
|
||||
```
|
||||
|
||||
Output for admin:
|
||||
```
|
||||
Admin matejkys created successfully
|
||||
Admin admin2 created successfully
|
||||
- Added to group: sudo (server administration)
|
||||
- Added to group: dataread (public data access)
|
||||
- Added to group: data-private (private data access)
|
||||
- Added to group: data-ops (application deployment)
|
||||
- Added to resource limits (unlimited)
|
||||
- Workspace: /home/matejkys/workspace
|
||||
- Data link: /home/matejkys/data -> /data/src_data
|
||||
- Workspace: /home/admin2/workspace
|
||||
- Data link: /home/admin2/data -> /data/src_data
|
||||
```
|
||||
|
||||
## SSH Configuration
|
||||
|
|
@ -531,7 +531,7 @@ ssh kids "du -sh /data/*"
|
|||
|
||||
| Disk | Mount | Size | Purpose | Backup |
|
||||
|------|-------|------|---------|--------|
|
||||
| `data-broker-for-claude` (sda) | `/` | 10 GB | OS, packages, app | Expendable (rebuild from git) |
|
||||
| `your-server` (sda) | `/` | 10 GB | OS, packages, app | Expendable (rebuild from git) |
|
||||
| `data-disk` (sdb) | `/data` | 30 GB | Parquet data, docs, scripts | Daily GCP snapshots |
|
||||
| `home-disk` (sdc) | `/home` | 30 GB | User homes, SSH keys, workspaces | Daily GCP snapshots |
|
||||
| `tmp-disk` (sdd) | `/tmp` | 100 GB | Temporary files | Expendable (not snapshotted) |
|
||||
|
|
@ -543,14 +543,14 @@ Both `data-disk` and `home-disk` have daily GCP snapshot schedules with 14-day r
|
|||
```bash
|
||||
# Check snapshot schedule status
|
||||
gcloud compute resource-policies describe daily-backup \
|
||||
--project=kids-ai-data-analysis --region=europe-north1
|
||||
--project=your-gcp-project --region=europe-north1
|
||||
|
||||
# List existing snapshots
|
||||
gcloud compute snapshots list --project=kids-ai-data-analysis
|
||||
gcloud compute snapshots list --project=your-gcp-project
|
||||
|
||||
# Manual snapshot (if needed)
|
||||
gcloud compute disks snapshot data-disk home-disk \
|
||||
--project=kids-ai-data-analysis \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--snapshot-names=data-disk-$(date +%Y%m%d),home-disk-$(date +%Y%m%d)
|
||||
```
|
||||
|
|
@ -696,9 +696,9 @@ sudo /opt/data-analyst/repo/server/setup.sh
|
|||
|
||||
**5. Add existing admins to data-ops group:**
|
||||
```bash
|
||||
sudo usermod -aG data-ops padak
|
||||
sudo usermod -aG data-ops matejkys
|
||||
sudo usermod -aG data-ops dasa
|
||||
sudo usermod -aG data-ops admin1
|
||||
sudo usermod -aG data-ops admin2
|
||||
sudo usermod -aG data-ops admin3
|
||||
```
|
||||
|
||||
### GitHub Secrets Required
|
||||
|
|
@ -918,7 +918,7 @@ This is handled in `webapp/user_service.py` and `server/telegram_bot/runner.py`.
|
|||
Username is generated from email address: the part before `@` converted to lowercase.
|
||||
|
||||
Examples:
|
||||
- `Petr.Simecek@your-domain.com` -> `petr.simecek`
|
||||
- `John.Doe@your-domain.com` -> `john.doe`
|
||||
- `john@your-domain.com` -> `john`
|
||||
|
||||
If a username conflicts with a reserved system name or existing non-analyst account, the user sees an error and must contact an admin to create the account manually with a different username.
|
||||
|
|
@ -929,7 +929,7 @@ If a username conflicts with a reserved system name or existing non-analyst acco
|
|||
```bash
|
||||
# Allow HTTP/HTTPS traffic (required for Let's Encrypt and webapp)
|
||||
gcloud compute firewall-rules create allow-http-data-broker \
|
||||
--project=kids-ai-data-analysis \
|
||||
--project=your-gcp-project \
|
||||
--direction=INGRESS \
|
||||
--priority=1000 \
|
||||
--network=default \
|
||||
|
|
@ -939,8 +939,8 @@ gcloud compute firewall-rules create allow-http-data-broker \
|
|||
--target-tags=http-server,https-server
|
||||
|
||||
# Add tags to VM
|
||||
gcloud compute instances add-tags data-broker-for-claude \
|
||||
--project=kids-ai-data-analysis \
|
||||
gcloud compute instances add-tags your-server \
|
||||
--project=your-gcp-project \
|
||||
--zone=europe-north1-a \
|
||||
--tags=http-server,https-server
|
||||
```
|
||||
|
|
@ -1192,7 +1192,7 @@ Users can configure which optional datasets to sync via the web portal at `https
|
|||
**sync_settings.json format:**
|
||||
```json
|
||||
{
|
||||
"petr.simecek": {
|
||||
"john.doe": {
|
||||
"datasets": {
|
||||
"jira": true,
|
||||
"jira_attachments": false
|
||||
|
|
@ -1524,7 +1524,7 @@ Cron (update.sh, 3x daily)
|
|||
Step 3: python -m src.profiler → profiles.json
|
||||
│
|
||||
▼
|
||||
/data/src_data/metadata/profiles.json (mode 644, padak:data-ops)
|
||||
/data/src_data/metadata/profiles.json (mode 644, admin1:data-ops)
|
||||
│
|
||||
▼
|
||||
Webapp: GET /api/catalog/profile/<table_name>
|
||||
|
|
@ -1756,23 +1756,23 @@ The webapp runs as `www-data` which cannot write to `/home/{user}/` directories
|
|||
|
||||
### Username Mapping
|
||||
|
||||
The webapp uses email-derived usernames (e.g., `petr.simecek`) while the server uses Linux home directory names (e.g., `petr`). Most users match, only Petr differs.
|
||||
The webapp uses email-derived usernames (e.g., `john.doe`) while the server uses Linux home directory names (e.g., `john`). Most users match directly; add overrides when they differ.
|
||||
|
||||
Mapping is in `webapp/corporate_memory_service.py`:
|
||||
```python
|
||||
WEBAPP_TO_SERVER_USERNAME = {
|
||||
"petr.simecek": "petr",
|
||||
"john.doe": "john",
|
||||
}
|
||||
```
|
||||
|
||||
Display names for avatars (initials + tooltip):
|
||||
```python
|
||||
USER_DISPLAY_NAMES = {
|
||||
"petr": {"name": "Petr Simecek", "initials": "PS"},
|
||||
"dasa.damaskova": {"name": "Dasa Damaskova", "initials": "DD"},
|
||||
"martin.matejka": {"name": "Martin Matejka", "initials": "MM"},
|
||||
"jiri.manas": {"name": "Jiri Manas", "initials": "JM"},
|
||||
"pavel.dolezal": {"name": "Pavel Dolezal", "initials": "PD"},
|
||||
"john": {"name": "John Doe", "initials": "JD"},
|
||||
"jane.smith": {"name": "Jane Smith", "initials": "DD"},
|
||||
"mike.brown": {"name": "Mike Brown", "initials": "MM"},
|
||||
"tom.davis": {"name": "Tom Davis", "initials": "JM"},
|
||||
"alice.wilson": {"name": "Alice Wilson", "initials": "PD"},
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -1802,7 +1802,7 @@ USER_DISPLAY_NAMES = {
|
|||
"content": "Always read schema before queries...",
|
||||
"category": "workflow",
|
||||
"tags": ["duckdb", "best-practices"],
|
||||
"source_users": ["petr"],
|
||||
"source_users": ["john"],
|
||||
"extracted_at": "2026-02-05T21:54:18Z",
|
||||
"updated_at": "2026-02-05T21:54:18Z"
|
||||
}
|
||||
|
|
@ -1817,7 +1817,7 @@ USER_DISPLAY_NAMES = {
|
|||
**votes.json structure:**
|
||||
```json
|
||||
{
|
||||
"petr": {
|
||||
"john": {
|
||||
"km_abc123": 1,
|
||||
"km_def456": -1
|
||||
}
|
||||
|
|
@ -1940,7 +1940,7 @@ cat /data/corporate-memory/votes.json | python3 -m json.tool
|
|||
cat /data/corporate-memory/user_hashes.json | python3 -m json.tool
|
||||
|
||||
# View a user's synced rules
|
||||
ls -la /home/petr/.claude_rules/
|
||||
ls -la /home/john/.claude_rules/
|
||||
```
|
||||
|
||||
### Webapp Integration
|
||||
|
|
@ -2092,7 +2092,7 @@ Custom metrics derived from logs for trend analysis:
|
|||
- Real-time CPU, Memory, Disk, Network graphs
|
||||
- Systemd service failures
|
||||
- Health endpoint status
|
||||
- URL: https://console.cloud.google.com/monitoring/dashboards/custom/09cdd94b-a0ed-4458-952f-3cca2bd5ba6e?project=kids-ai-data-analysis
|
||||
- URL: https://console.cloud.google.com/monitoring/dashboards/custom/09cdd94b-a0ed-4458-952f-3cca2bd5ba6e?project=your-gcp-project
|
||||
|
||||
### Health Endpoint & Uptime Monitoring
|
||||
|
||||
|
|
@ -2135,7 +2135,7 @@ Returns detailed server status in JSON format:
|
|||
### Viewing Logs
|
||||
|
||||
**Cloud Logging Console:**
|
||||
https://console.cloud.google.com/logs?project=kids-ai-data-analysis
|
||||
https://console.cloud.google.com/logs?project=your-gcp-project
|
||||
|
||||
**Useful log queries:**
|
||||
|
||||
|
|
@ -2168,7 +2168,7 @@ resource.labels.instance_id="656c1763-11a1-49bb-bbc3-9782acf15aef"
|
|||
### Viewing Metrics
|
||||
|
||||
**Cloud Monitoring Console:**
|
||||
https://console.cloud.google.com/monitoring?project=kids-ai-data-analysis
|
||||
https://console.cloud.google.com/monitoring?project=your-gcp-project
|
||||
|
||||
**Metrics Explorer** - Useful metric queries:
|
||||
- CPU: `compute.googleapis.com/instance/cpu/utilization`
|
||||
|
|
@ -2191,21 +2191,21 @@ Significantly cheaper than Datadog (~$15-31/host/month).
|
|||
**List alert policies:**
|
||||
```bash
|
||||
gcloud alpha monitoring policies list \
|
||||
--project=kids-ai-data-analysis \
|
||||
--project=your-gcp-project \
|
||||
--format="table(displayName,enabled,conditions[0].conditionThreshold.thresholdValue)"
|
||||
```
|
||||
|
||||
**Disable an alert:**
|
||||
```bash
|
||||
gcloud alpha monitoring policies update POLICY_ID \
|
||||
--project=kids-ai-data-analysis \
|
||||
--project=your-gcp-project \
|
||||
--no-enabled
|
||||
```
|
||||
|
||||
**Add notification channel:**
|
||||
```bash
|
||||
gcloud alpha monitoring channels create \
|
||||
--project=kids-ai-data-analysis \
|
||||
--project=your-gcp-project \
|
||||
--display-name="New Person" \
|
||||
--type=email \
|
||||
--channel-labels=email_address=person@your-domain.com
|
||||
|
|
@ -2234,7 +2234,7 @@ When investigating server issues (like the 2026-02-13 systemd-journald crash):
|
|||
```bash
|
||||
# Export logs to file
|
||||
gcloud logging read "resource.labels.instance_id=\"656c1763-11a1-49bb-bbc3-9782acf15aef\"" \
|
||||
--project=kids-ai-data-analysis \
|
||||
--project=your-gcp-project \
|
||||
--limit=1000 \
|
||||
--format=json \
|
||||
--freshness=1d > server_logs.json
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ Guide for exploring Claude Code session transcripts to identify friction points
|
|||
|
||||
## Session Data Location
|
||||
|
||||
**Server:** `data-broker-for-claude` (alias: `kids`)
|
||||
**Server:** `your-server` (alias: `kids`)
|
||||
**Path:** `/data/user_sessions/`
|
||||
|
||||
Sessions are collected by systemd service `session-collector.timer` (runs every 30 minutes).
|
||||
|
|
@ -14,12 +14,12 @@ Sessions are collected by systemd service `session-collector.timer` (runs every
|
|||
Sessions are organized by user:
|
||||
```
|
||||
/data/user_sessions/
|
||||
├── petr/
|
||||
├── john/
|
||||
│ ├── 2026-02-10_49898dbe-5045-45f5-9177-2ff10917de4a.jsonl
|
||||
│ └── ...
|
||||
├── martin.matejka/
|
||||
├── mike.brown/
|
||||
│ └── ...
|
||||
└── jakub.sochan/
|
||||
└── sam.taylor/
|
||||
└── ...
|
||||
```
|
||||
|
||||
|
|
@ -29,10 +29,10 @@ Session files are owned by `root:data-ops` with `-rw-------` permissions, making
|
|||
|
||||
```bash
|
||||
# This FAILS
|
||||
scp kids:/data/user_sessions/petr/session.jsonl .
|
||||
scp kids:/data/user_sessions/john/session.jsonl .
|
||||
|
||||
# Use this instead
|
||||
ssh kids "sudo cat /data/user_sessions/petr/session.jsonl" > session.jsonl
|
||||
ssh kids "sudo cat /data/user_sessions/john/session.jsonl" > session.jsonl
|
||||
```
|
||||
|
||||
---
|
||||
|
|
@ -79,7 +79,7 @@ total_time = sum(event['data']['elapsedTimeSeconds'] for event in bash_events)
|
|||
|
||||
**Example:**
|
||||
```
|
||||
Session petr.hunka_2026-02-09_19c0a02f:
|
||||
Session john.doe_2026-02-09_19c0a02f:
|
||||
First event: 2026-02-05 16:06:52
|
||||
Last event: 2026-02-09 15:18:50
|
||||
Span: 3 days, 23 hours
|
||||
|
|
@ -166,7 +166,7 @@ for cmd_id, times in commands.items():
|
|||
**Example:**
|
||||
```
|
||||
Issue #84 fixed: 2026-02-06 21:37:49
|
||||
Session file: petr.hunka_2026-02-09_19c0a02f.jsonl
|
||||
Session file: john.doe_2026-02-09_19c0a02f.jsonl
|
||||
File mtime: 2026-02-09 20:50 (within 48h filter)
|
||||
|
||||
BUT: Session started 2026-02-05 15:56 (BEFORE fix!)
|
||||
|
|
@ -339,17 +339,17 @@ ssh kids "ls /data/user_sessions/ | wc -l"
|
|||
ssh kids "find /data/user_sessions -name '*.jsonl' -mtime -7"
|
||||
|
||||
# Find sessions by user
|
||||
ssh kids "ls /data/user_sessions/ | grep '^petr-'"
|
||||
ssh kids "ls /data/user_sessions/ | grep '^john-'"
|
||||
```
|
||||
|
||||
### Download Sessions for Local Analysis
|
||||
|
||||
```bash
|
||||
# Download specific session
|
||||
scp kids:/data/user_sessions/petr-2024-12-15-abc123.jsonl .
|
||||
scp kids:/data/user_sessions/john-2024-12-15-abc123.jsonl .
|
||||
|
||||
# Download all sessions for a user
|
||||
scp kids:/data/user_sessions/petr-*.jsonl ./sessions/
|
||||
scp kids:/data/user_sessions/john-*.jsonl ./sessions/
|
||||
|
||||
# Download recent sessions
|
||||
ssh kids "find /data/user_sessions -mtime -7" | xargs -I {} scp kids:{} ./sessions/
|
||||
|
|
@ -828,7 +828,7 @@ def calculate_active_time(session_file, gap_threshold_minutes=10):
|
|||
}
|
||||
|
||||
# Example usage
|
||||
session = Path("~/session-analysis/raw/petr.hunka_2026-02-09_19c0a02f.jsonl").expanduser()
|
||||
session = Path("~/session-analysis/raw/john.doe_2026-02-09_19c0a02f.jsonl").expanduser()
|
||||
result = calculate_active_time(session)
|
||||
|
||||
print(f"Total span: {result['total_span_hours']:.2f} hours")
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ done
|
|||
|
||||
# Create deploy user (for CI/CD automated deployment)
|
||||
if ! id deploy > /dev/null 2>&1; then
|
||||
useradd -r -m -s /bin/bash -G data-ops deploy
|
||||
useradd -r -m -s /usr/sbin/nologin -G data-ops deploy
|
||||
echo "Created deploy user"
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -84,17 +84,15 @@ echo "Adding www-data to data-ops group..."
|
|||
usermod -aG data-ops www-data
|
||||
|
||||
# Install sudoers rules for www-data (from repo, includes all required rules)
|
||||
# Validate BEFORE copying to prevent broken sudo if syntax is invalid
|
||||
echo "Configuring sudoers..."
|
||||
SUDOERS_FILE="/etc/sudoers.d/webapp"
|
||||
cp "${REPO_DIR}/server/sudoers-webapp" "$SUDOERS_FILE"
|
||||
chmod 440 "$SUDOERS_FILE"
|
||||
|
||||
# Validate sudoers syntax
|
||||
if ! visudo -cf "$SUDOERS_FILE"; then
|
||||
echo "ERROR: Invalid sudoers syntax"
|
||||
rm -f "$SUDOERS_FILE"
|
||||
SUDOERS_SRC="${REPO_DIR}/server/sudoers-webapp"
|
||||
if ! visudo -cf "$SUDOERS_SRC"; then
|
||||
echo "ERROR: Invalid sudoers syntax in $SUDOERS_SRC"
|
||||
exit 1
|
||||
fi
|
||||
install -m 440 "$SUDOERS_SRC" "$SUDOERS_FILE"
|
||||
|
||||
# Install systemd service
|
||||
echo "Installing systemd service..."
|
||||
|
|
|
|||
Loading…
Reference in a new issue