Security fixes: sanitize dev_docs, harden sudoers and config validation
H1 - Sanitize dev_docs/ for public release:
- Replace all real employee names with generic placeholders
(padak->admin1, matejkys->admin2, dasa->admin3, petr->john, etc.)
- Replace GCP project ID (kids-ai-data-analysis -> your-gcp-project)
- Replace server hostname (data-broker-for-claude -> your-server)
- Replace real IP address (34.88.8.46 -> YOUR_SERVER_IP)
- Replace internal FQDN with placeholder
- Covers: security.md, server.md, disaster-recovery.md, desktop-app.md,
session_explore.md, plan-rsync-fix.md, draft/*.md
H3 - webapp-setup.sh: validate sudoers syntax BEFORE copying to /etc/sudoers.d
- Prevents broken sudo if syntax is invalid
- Uses install -m 440 for atomic copy with correct permissions
M1 - setup.sh: deploy user created with /usr/sbin/nologin instead of /bin/bash
- CI/CD service account does not need interactive shell
M2 - config/loader.py: warn on missing env vars, validate webapp_secret_key
- _resolve_env_refs now logs warnings for unset ${ENV_VAR} references
- _validate_config checks auth.webapp_secret_key is non-empty
- Prevents Flask signing sessions with empty secret key
All 118 tests pass.
This commit is contained in:
parent
26c4e0934d
commit
485ac0a742
11 changed files with 129 additions and 107 deletions
|
|
@ -24,24 +24,43 @@ CONFIG_DIR = Path(os.environ.get("CONFIG_DIR", "./config"))
|
||||||
_ENV_PATTERN = re.compile(r"\$\{([^}]+)\}")
|
_ENV_PATTERN = re.compile(r"\$\{([^}]+)\}")
|
||||||
|
|
||||||
|
|
||||||
def _resolve_env_refs(value: Any) -> Any:
|
def _resolve_env_refs(value: Any, _path: str = "") -> Any:
|
||||||
"""Resolve ${ENV_VAR} references in config values.
|
"""Resolve ${ENV_VAR} references in config values.
|
||||||
|
|
||||||
Walks the config tree recursively. String values containing ${VAR}
|
Walks the config tree recursively. String values containing ${VAR}
|
||||||
are replaced with the corresponding environment variable value
|
are replaced with the corresponding environment variable value.
|
||||||
(empty string if not set). Non-string values pass through unchanged.
|
Logs a warning for unset variables so misconfiguration is visible.
|
||||||
|
Non-string values pass through unchanged.
|
||||||
"""
|
"""
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
|
missing_vars: list[str] = []
|
||||||
|
|
||||||
def replacer(match: re.Match) -> str:
|
def replacer(match: re.Match) -> str:
|
||||||
env_key = match.group(1)
|
env_key = match.group(1)
|
||||||
return os.environ.get(env_key, "")
|
env_val = os.environ.get(env_key)
|
||||||
|
if env_val is None:
|
||||||
|
missing_vars.append(env_key)
|
||||||
|
return ""
|
||||||
|
return env_val
|
||||||
|
|
||||||
return _ENV_PATTERN.sub(replacer, value)
|
resolved = _ENV_PATTERN.sub(replacer, value)
|
||||||
|
for var in missing_vars:
|
||||||
|
logger.warning(
|
||||||
|
"Environment variable %s not set (referenced in config %s)",
|
||||||
|
var,
|
||||||
|
_path or "value",
|
||||||
|
)
|
||||||
|
return resolved
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
return {k: _resolve_env_refs(v) for k, v in value.items()}
|
return {
|
||||||
|
k: _resolve_env_refs(v, _path=f"{_path}.{k}" if _path else k)
|
||||||
|
for k, v in value.items()
|
||||||
|
}
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
return [_resolve_env_refs(item) for item in value]
|
return [
|
||||||
|
_resolve_env_refs(item, _path=f"{_path}[{i}]")
|
||||||
|
for i, item in enumerate(value)
|
||||||
|
]
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -94,8 +113,13 @@ def _validate_config(config: dict) -> None:
|
||||||
("server", "hostname"),
|
("server", "hostname"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Secret fields that must resolve to non-empty values (from .env)
|
||||||
|
required_secrets = [
|
||||||
|
("auth", "webapp_secret_key"),
|
||||||
|
]
|
||||||
|
|
||||||
missing = []
|
missing = []
|
||||||
for keys in required_paths:
|
for keys in required_paths + required_secrets:
|
||||||
value = config
|
value = config
|
||||||
path_str = ".".join(keys)
|
path_str = ".".join(keys)
|
||||||
for key in keys:
|
for key in keys:
|
||||||
|
|
@ -110,7 +134,7 @@ def _validate_config(config: dict) -> None:
|
||||||
if missing:
|
if missing:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Missing required instance config fields: {', '.join(missing)}. "
|
f"Missing required instance config fields: {', '.join(missing)}. "
|
||||||
f"Check config/instance.yaml"
|
f"Check config/instance.yaml and .env"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,7 @@ Server: `wss://your-instance.example.com/ws/notifications`
|
||||||
|
|
||||||
```
|
```
|
||||||
Client -> Server: {"type":"auth","token":"eyJ..."}
|
Client -> Server: {"type":"auth","token":"eyJ..."}
|
||||||
Server -> Client: {"type":"auth_ok","username":"petr"}
|
Server -> Client: {"type":"auth_ok","username":"john"}
|
||||||
Server -> Client: {"type":"notification","id":"uuid","title":"Revenue Drop","message":"...","image_url":"/api/notifications/images/abc.png","script":"revenue_check","timestamp":"2026-01-30T10:00:00Z"}
|
Server -> Client: {"type":"notification","id":"uuid","title":"Revenue Drop","message":"...","image_url":"/api/notifications/images/abc.png","script":"revenue_check","timestamp":"2026-01-30T10:00:00Z"}
|
||||||
Server -> Client: {"type":"ping"}
|
Server -> Client: {"type":"ping"}
|
||||||
Client -> Server: {"type":"pong"}
|
Client -> Server: {"type":"pong"}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# Disaster Recovery
|
# Disaster Recovery
|
||||||
|
|
||||||
Recovery procedures for the Data Broker Server (`data-broker-for-claude`).
|
Recovery procedures for the Data Broker Server (`your-server`).
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
|
|
@ -40,8 +40,8 @@ Disk Layout:
|
||||||
1. **Create new VM** (same zone, attach existing disks):
|
1. **Create new VM** (same zone, attach existing disks):
|
||||||
```bash
|
```bash
|
||||||
# Create new instance with existing disks
|
# Create new instance with existing disks
|
||||||
gcloud compute instances create data-broker-for-claude \
|
gcloud compute instances create your-server \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--machine-type=e2-medium \
|
--machine-type=e2-medium \
|
||||||
--image-family=debian-12 \
|
--image-family=debian-12 \
|
||||||
|
|
@ -50,13 +50,13 @@ Disk Layout:
|
||||||
--tags=http-server,https-server
|
--tags=http-server,https-server
|
||||||
|
|
||||||
# Attach existing data disks
|
# Attach existing data disks
|
||||||
gcloud compute instances attach-disk data-broker-for-claude \
|
gcloud compute instances attach-disk your-server \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--disk=data-disk
|
--disk=data-disk
|
||||||
|
|
||||||
gcloud compute instances attach-disk data-broker-for-claude \
|
gcloud compute instances attach-disk your-server \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--disk=home-disk
|
--disk=home-disk
|
||||||
```
|
```
|
||||||
|
|
@ -159,19 +159,19 @@ Disk Layout:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Find latest snapshot
|
# Find latest snapshot
|
||||||
gcloud compute snapshots list --project=kids-ai-data-analysis \
|
gcloud compute snapshots list --project=your-gcp-project \
|
||||||
--filter="sourceDisk:data-disk" --sort-by=~creationTimestamp --limit=5
|
--filter="sourceDisk:data-disk" --sort-by=~creationTimestamp --limit=5
|
||||||
|
|
||||||
# Create new disk from snapshot
|
# Create new disk from snapshot
|
||||||
gcloud compute disks create data-disk \
|
gcloud compute disks create data-disk \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--source-snapshot=SNAPSHOT_NAME \
|
--source-snapshot=SNAPSHOT_NAME \
|
||||||
--type=pd-balanced
|
--type=pd-balanced
|
||||||
|
|
||||||
# Attach to VM (may need to stop VM first)
|
# Attach to VM (may need to stop VM first)
|
||||||
gcloud compute instances attach-disk data-broker-for-claude \
|
gcloud compute instances attach-disk your-server \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--disk=data-disk
|
--disk=data-disk
|
||||||
|
|
||||||
|
|
@ -184,7 +184,7 @@ ssh kids "sudo mount /dev/sdb /data"
|
||||||
```bash
|
```bash
|
||||||
# Create fresh disk
|
# Create fresh disk
|
||||||
gcloud compute disks create data-disk \
|
gcloud compute disks create data-disk \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--size=30GB \
|
--size=30GB \
|
||||||
--type=pd-balanced
|
--type=pd-balanced
|
||||||
|
|
@ -209,19 +209,19 @@ ssh kids "cd /opt/data-analyst/repo && ./scripts/update.sh"
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Find latest snapshot
|
# Find latest snapshot
|
||||||
gcloud compute snapshots list --project=kids-ai-data-analysis \
|
gcloud compute snapshots list --project=your-gcp-project \
|
||||||
--filter="sourceDisk:home-disk" --sort-by=~creationTimestamp --limit=5
|
--filter="sourceDisk:home-disk" --sort-by=~creationTimestamp --limit=5
|
||||||
|
|
||||||
# Create new disk from snapshot
|
# Create new disk from snapshot
|
||||||
gcloud compute disks create home-disk \
|
gcloud compute disks create home-disk \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--source-snapshot=SNAPSHOT_NAME \
|
--source-snapshot=SNAPSHOT_NAME \
|
||||||
--type=pd-balanced
|
--type=pd-balanced
|
||||||
|
|
||||||
# Attach to VM
|
# Attach to VM
|
||||||
gcloud compute instances attach-disk data-broker-for-claude \
|
gcloud compute instances attach-disk your-server \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--disk=home-disk
|
--disk=home-disk
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ def _get_server_username(webapp_username) -> str # Reuse WEBAPP_TO
|
||||||
Storage format (`connections.json`):
|
Storage format (`connections.json`):
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"petr": {
|
"john": {
|
||||||
"purchase_orders": {
|
"purchase_orders": {
|
||||||
"connected": true,
|
"connected": true,
|
||||||
"api_key": "pk_live_abc123...",
|
"api_key": "pk_live_abc123...",
|
||||||
|
|
@ -126,7 +126,7 @@ What each external service needs to implement:
|
||||||
```
|
```
|
||||||
POST /api/internal/token-exchange
|
POST /api/internal/token-exchange
|
||||||
Authorization: Bearer <shared_secret>
|
Authorization: Bearer <shared_secret>
|
||||||
Body: {"user_email": "petr@your-domain.com", "ttl_days": 90}
|
Body: {"user_email": "john@your-domain.com", "ttl_days": 90}
|
||||||
Response: {"status": "ok", "api_key": "...", "token_id": "...", "expires_at": "..."}
|
Response: {"status": "ok", "api_key": "...", "token_id": "...", "expires_at": "..."}
|
||||||
|
|
||||||
POST /api/internal/token-revoke
|
POST /api/internal/token-revoke
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@ SKILLS_DIR = Path(os.environ.get("SC_SKILLS_DIR", "/data/docs/service_connector_
|
||||||
# Username mapping (reuse existing pattern)
|
# Username mapping (reuse existing pattern)
|
||||||
WEBAPP_TO_SERVER_USERNAME = {
|
WEBAPP_TO_SERVER_USERNAME = {
|
||||||
# Add overrides here if webapp username != server username
|
# Add overrides here if webapp username != server username
|
||||||
# "dasa.damaskova": "dasa",
|
# "jane.smith": "jane",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -503,7 +503,7 @@ What each internal service needs to implement (simple Bearer + JSON):
|
||||||
POST /api/internal/token-exchange
|
POST /api/internal/token-exchange
|
||||||
Authorization: Bearer <shared_secret>
|
Authorization: Bearer <shared_secret>
|
||||||
Content-Type: application/json
|
Content-Type: application/json
|
||||||
Body: {"user_email": "petr@your-domain.com", "ttl_days": 365}
|
Body: {"user_email": "john@your-domain.com", "ttl_days": 365}
|
||||||
Response: {"status": "ok", "api_key": "...", "token_id": "...", "expires_at": "..."}
|
Response: {"status": "ok", "api_key": "...", "token_id": "...", "expires_at": "..."}
|
||||||
|
|
||||||
POST /api/internal/token-revoke
|
POST /api/internal/token-revoke
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
## Problem
|
## Problem
|
||||||
|
|
||||||
Rsync from GCP server (34.88.8.46) hangs after 1-5 minutes. Process exists but has 0% CPU and no network activity. 100% reproducible with ~7000 parquet files.
|
Rsync from GCP server (YOUR_SERVER_IP) hangs after 1-5 minutes. Process exists but has 0% CPU and no network activity. 100% reproducible with ~7000 parquet files.
|
||||||
|
|
||||||
## Root Cause Analysis
|
## Root Cause Analysis
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
# Security Audit Report: Data Broker Server
|
# Security Audit Report: Data Broker Server
|
||||||
|
|
||||||
**Date:** 2026-01-30
|
**Date:** 2026-01-30
|
||||||
**Server:** `data-broker-for-claude` (YOUR_SERVER_IP), Debian 12 (bookworm), GCP e2-medium
|
**Server:** `your-server` (YOUR_SERVER_IP), Debian 12 (bookworm), GCP e2-medium
|
||||||
**Auditors:** Claude Opus 4.5 (primary) + Perplexity Sonar (validation) + OpenAI Codex (second opinion)
|
**Auditors:** Claude Opus 4.5 (primary) + Perplexity Sonar (validation) + OpenAI Codex (second opinion)
|
||||||
**Scope:** Linux server security, user isolation, CI/CD pipeline, notification system, desktop app attack surface
|
**Scope:** Linux server security, user isolation, CI/CD pipeline, notification system, desktop app attack surface
|
||||||
**Status:** Read-only audit -- no changes were made to the server
|
**Status:** Read-only audit -- no changes were made to the server
|
||||||
|
|
@ -44,8 +44,8 @@ These findings were independently validated by Perplexity (CVE references, Unix
|
||||||
|
|
||||||
| Parameter | Value |
|
| Parameter | Value |
|
||||||
|-----------|-------|
|
|-----------|-------|
|
||||||
| Hostname | data-broker-for-claude |
|
| Hostname | your-server |
|
||||||
| GCP Project | kids-ai-data-analysis |
|
| GCP Project | your-gcp-project |
|
||||||
| Zone | europe-north1-a |
|
| Zone | europe-north1-a |
|
||||||
| OS | Debian 12 (bookworm) |
|
| OS | Debian 12 (bookworm) |
|
||||||
| External IP | YOUR_SERVER_IP |
|
| External IP | YOUR_SERVER_IP |
|
||||||
|
|
@ -55,9 +55,9 @@ These findings were independently validated by Perplexity (CVE references, Unix
|
||||||
|
|
||||||
| Group | Members | Purpose |
|
| Group | Members | Purpose |
|
||||||
|-------|---------|---------|
|
|-------|---------|---------|
|
||||||
| `dataread` | padak, matejkys, dasa, petr, fisa, dasa.damaskova, martin.lepka, pavel.dolezal, martin.matejka, jiri.manas | Public data read access |
|
| `dataread` | admin1, admin2, admin3, john, analyst1, jane.smith, bob.jones, alice.wilson, mike.brown, tom.davis | Public data read access |
|
||||||
| `data-private` | padak, matejkys, dasa | Private/sensitive data access |
|
| `data-private` | admin1, admin2, admin3 | Private/sensitive data access |
|
||||||
| `data-ops` | deploy, padak, matejkys, dasa, www-data | Application deployment and operations |
|
| `data-ops` | deploy, admin1, admin2, admin3, www-data | Application deployment and operations |
|
||||||
|
|
||||||
### Services
|
### Services
|
||||||
|
|
||||||
|
|
@ -203,7 +203,7 @@ Source: SentinelOne vulnerability database, Compass Security research on dangero
|
||||||
|
|
||||||
#### Description
|
#### Description
|
||||||
|
|
||||||
The directory `/data/src_data/parquet/private/` is intended to be accessible only to members of the `data-private` group (3 privileged users: padak, matejkys, dasa). However, its POSIX ACL also grants access to the `dataread` group, which contains **all 10 analysts**:
|
The directory `/data/src_data/parquet/private/` is intended to be accessible only to members of the `data-private` group (3 privileged users: admin1, admin2, admin3). However, its POSIX ACL also grants access to the `dataread` group, which contains **all 10 analysts**:
|
||||||
|
|
||||||
```
|
```
|
||||||
# getfacl /data/src_data/parquet/private/
|
# getfacl /data/src_data/parquet/private/
|
||||||
|
|
@ -224,13 +224,13 @@ The POSIX ACL mask (`mask::rwx`) does not restrict the `dataread` entry because
|
||||||
|
|
||||||
#### Proof of Exploitation
|
#### Proof of Exploitation
|
||||||
|
|
||||||
Tested directly on server with user `fisa` (standard analyst, member of `dataread` only, NOT `data-private`):
|
Tested directly on server with user `analyst1` (standard analyst, member of `dataread` only, NOT `data-private`):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ sudo -u fisa ls -la /data/src_data/parquet/private/
|
$ sudo -u analyst1 ls -la /data/src_data/parquet/private/
|
||||||
total 16
|
total 16
|
||||||
drwxrws---+ 2 padak data-ops 4096 Jan 21 14:29 .
|
drwxrws---+ 2 admin1 data-ops 4096 Jan 21 14:29 .
|
||||||
drwxrws---+ 7 padak data-ops 4096 Jan 23 18:29 ..
|
drwxrws---+ 7 admin1 data-ops 4096 Jan 23 18:29 ..
|
||||||
# Exit code: 0 (access granted)
|
# Exit code: 0 (access granted)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -265,7 +265,7 @@ sudo setfacl -R -m u::rwx,g::rwx,g:data-private:r-x,g:data-ops:rwx,o::--- /data/
|
||||||
sudo setfacl -R -d -m u::rwx,g::rwx,g:data-private:r-x,g:data-ops:rwx,o::--- /data/src_data/parquet/private/
|
sudo setfacl -R -d -m u::rwx,g::rwx,g:data-private:r-x,g:data-ops:rwx,o::--- /data/src_data/parquet/private/
|
||||||
|
|
||||||
# Verify:
|
# Verify:
|
||||||
sudo -u fisa ls /data/src_data/parquet/private/
|
sudo -u analyst1 ls /data/src_data/parquet/private/
|
||||||
# Expected: "ls: cannot open directory 'private/': Permission denied"
|
# Expected: "ls: cannot open directory 'private/': Permission denied"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -461,7 +461,7 @@ The server may be reachable on port 25 from the internet, potentially allowing:
|
||||||
|
|
||||||
```
|
```
|
||||||
# /etc/postfix/main.cf
|
# /etc/postfix/main.cf
|
||||||
myhostname = data-broker-for-claude.c.kids-ai-data-analysis.internal
|
myhostname = your-server.c.your-gcp-project.internal
|
||||||
mydestination = $myhostname, localhost
|
mydestination = $myhostname, localhost
|
||||||
inet_interfaces = all
|
inet_interfaces = all
|
||||||
|
|
||||||
|
|
@ -751,14 +751,14 @@ The following findings were identified by the OpenAI Codex second opinion review
|
||||||
- `webapp/desktop_auth.py`, `webapp/user_service.py` (auth flows)
|
- `webapp/desktop_auth.py`, `webapp/user_service.py` (auth flows)
|
||||||
- `.github/workflows/deploy.yml` (CI/CD configuration)
|
- `.github/workflows/deploy.yml` (CI/CD configuration)
|
||||||
|
|
||||||
2. **Live server inspection** (read-only, via SSH as padak):
|
2. **Live server inspection** (read-only, via SSH as admin1):
|
||||||
- File permissions: `ls -la`, `stat`, `getfacl` on all critical paths
|
- File permissions: `ls -la`, `stat`, `getfacl` on all critical paths
|
||||||
- Socket permissions: `/run/notify-bot/`, `/run/ws-gateway/`
|
- Socket permissions: `/run/notify-bot/`, `/run/ws-gateway/`
|
||||||
- Group memberships: `getent group` for dataread, data-private, data-ops
|
- Group memberships: `getent group` for dataread, data-private, data-ops
|
||||||
- Service status: `systemctl list-units`
|
- Service status: `systemctl list-units`
|
||||||
- Network: `ss -tlnp`, iptables, SSH config, nginx config
|
- Network: `ss -tlnp`, iptables, SSH config, nginx config
|
||||||
- Crontabs: all users checked
|
- Crontabs: all users checked
|
||||||
- Access control test: `sudo -u fisa ls` on private directory
|
- Access control test: `sudo -u analyst1 ls` on private directory
|
||||||
|
|
||||||
3. **Validation**: Perplexity Sonar search for CVE references and best practices on:
|
3. **Validation**: Perplexity Sonar search for CVE references and best practices on:
|
||||||
- Unix socket 0666 security (dirty_sock CVE-2019-7304)
|
- Unix socket 0666 security (dirty_sock CVE-2019-7304)
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,8 @@ Central server for distributing data to AI analytical systems.
|
||||||
|
|
||||||
| Parameter | Value |
|
| Parameter | Value |
|
||||||
|-----------|-------|
|
|-----------|-------|
|
||||||
| Name | data-broker-for-claude |
|
| Name | your-server |
|
||||||
| GCP Project | kids-ai-data-analysis |
|
| GCP Project | your-gcp-project |
|
||||||
| Zone | europe-north1-a |
|
| Zone | europe-north1-a |
|
||||||
| Type | e2-medium |
|
| Type | e2-medium |
|
||||||
| OS | Debian 12 (bookworm) |
|
| OS | Debian 12 (bookworm) |
|
||||||
|
|
@ -36,13 +36,13 @@ Requires SSH config:
|
||||||
```
|
```
|
||||||
Host kids
|
Host kids
|
||||||
HostName YOUR_SERVER_IP
|
HostName YOUR_SERVER_IP
|
||||||
User padak
|
User admin1
|
||||||
IdentityFile ~/.ssh/google_compute_engine
|
IdentityFile ~/.ssh/google_compute_engine
|
||||||
```
|
```
|
||||||
|
|
||||||
Or via gcloud:
|
Or via gcloud:
|
||||||
```bash
|
```bash
|
||||||
gcloud compute ssh data-broker-for-claude --project=kids-ai-data-analysis --zone=europe-north1-a
|
gcloud compute ssh your-server --project=your-gcp-project --zone=europe-north1-a
|
||||||
```
|
```
|
||||||
|
|
||||||
## Data Structure
|
## Data Structure
|
||||||
|
|
@ -62,7 +62,7 @@ gcloud compute ssh data-broker-for-claude --project=kids-ai-data-analysis --zone
|
||||||
├── docs/ # Documentation (deployed from repo)
|
├── docs/ # Documentation (deployed from repo)
|
||||||
│ └── schema.yml # Auto-generated table schemas (from data sync)
|
│ └── schema.yml # Auto-generated table schemas (from data sync)
|
||||||
├── scripts/ # Helper scripts (deployed from repo)
|
├── scripts/ # Helper scripts (deployed from repo)
|
||||||
├── examples/ # Example notification scripts (padak:data-ops, 755)
|
├── examples/ # Example notification scripts (admin1:data-ops, 755)
|
||||||
│ └── notifications/ # Example notification scripts for analysts
|
│ └── notifications/ # Example notification scripts for analysts
|
||||||
├── notifications/ # Notification data (deploy:data-ops, 2770 setgid)
|
├── notifications/ # Notification data (deploy:data-ops, 2770 setgid)
|
||||||
│ ├── telegram_users.json # username -> {chat_id, linked_at} mapping
|
│ ├── telegram_users.json # username -> {chat_id, linked_at} mapping
|
||||||
|
|
@ -116,7 +116,7 @@ Three-tier permission model:
|
||||||
Data in `/data/src_data/` uses ACL for granular access:
|
Data in `/data/src_data/` uses ACL for granular access:
|
||||||
|
|
||||||
```
|
```
|
||||||
/data/src_data/ owner: padak, group: data-ops
|
/data/src_data/ owner: admin1, group: data-ops
|
||||||
├── raw/ data-ops: rwx, dataread: r-x
|
├── raw/ data-ops: rwx, dataread: r-x
|
||||||
├── parquet/ data-ops: rwx, dataread: r-x
|
├── parquet/ data-ops: rwx, dataread: r-x
|
||||||
│ └── private/ data-ops: rwx, data-private: r-x
|
│ └── private/ data-ops: rwx, data-private: r-x
|
||||||
|
|
@ -211,20 +211,20 @@ sudo add-analyst novak "ssh-rsa AAAAB3... jan.novak@example.com"
|
||||||
sudo add-analyst ceo "ssh-rsa AAAAB3... ceo@example.com" --private
|
sudo add-analyst ceo "ssh-rsa AAAAB3... ceo@example.com" --private
|
||||||
|
|
||||||
# Server administrator
|
# Server administrator
|
||||||
sudo add-admin matejkys "ssh-rsa AAAAB3... matejkys@example.com"
|
sudo add-admin admin2 "ssh-rsa AAAAB3... admin2@example.com"
|
||||||
sudo add-admin dasa "ssh-ed25519 AAAAC3... dasa@your-domain.com"
|
sudo add-admin admin3 "ssh-ed25519 AAAAC3... admin3@your-domain.com"
|
||||||
```
|
```
|
||||||
|
|
||||||
Output for admin:
|
Output for admin:
|
||||||
```
|
```
|
||||||
Admin matejkys created successfully
|
Admin admin2 created successfully
|
||||||
- Added to group: sudo (server administration)
|
- Added to group: sudo (server administration)
|
||||||
- Added to group: dataread (public data access)
|
- Added to group: dataread (public data access)
|
||||||
- Added to group: data-private (private data access)
|
- Added to group: data-private (private data access)
|
||||||
- Added to group: data-ops (application deployment)
|
- Added to group: data-ops (application deployment)
|
||||||
- Added to resource limits (unlimited)
|
- Added to resource limits (unlimited)
|
||||||
- Workspace: /home/matejkys/workspace
|
- Workspace: /home/admin2/workspace
|
||||||
- Data link: /home/matejkys/data -> /data/src_data
|
- Data link: /home/admin2/data -> /data/src_data
|
||||||
```
|
```
|
||||||
|
|
||||||
## SSH Configuration
|
## SSH Configuration
|
||||||
|
|
@ -531,7 +531,7 @@ ssh kids "du -sh /data/*"
|
||||||
|
|
||||||
| Disk | Mount | Size | Purpose | Backup |
|
| Disk | Mount | Size | Purpose | Backup |
|
||||||
|------|-------|------|---------|--------|
|
|------|-------|------|---------|--------|
|
||||||
| `data-broker-for-claude` (sda) | `/` | 10 GB | OS, packages, app | Expendable (rebuild from git) |
|
| `your-server` (sda) | `/` | 10 GB | OS, packages, app | Expendable (rebuild from git) |
|
||||||
| `data-disk` (sdb) | `/data` | 30 GB | Parquet data, docs, scripts | Daily GCP snapshots |
|
| `data-disk` (sdb) | `/data` | 30 GB | Parquet data, docs, scripts | Daily GCP snapshots |
|
||||||
| `home-disk` (sdc) | `/home` | 30 GB | User homes, SSH keys, workspaces | Daily GCP snapshots |
|
| `home-disk` (sdc) | `/home` | 30 GB | User homes, SSH keys, workspaces | Daily GCP snapshots |
|
||||||
| `tmp-disk` (sdd) | `/tmp` | 100 GB | Temporary files | Expendable (not snapshotted) |
|
| `tmp-disk` (sdd) | `/tmp` | 100 GB | Temporary files | Expendable (not snapshotted) |
|
||||||
|
|
@ -543,14 +543,14 @@ Both `data-disk` and `home-disk` have daily GCP snapshot schedules with 14-day r
|
||||||
```bash
|
```bash
|
||||||
# Check snapshot schedule status
|
# Check snapshot schedule status
|
||||||
gcloud compute resource-policies describe daily-backup \
|
gcloud compute resource-policies describe daily-backup \
|
||||||
--project=kids-ai-data-analysis --region=europe-north1
|
--project=your-gcp-project --region=europe-north1
|
||||||
|
|
||||||
# List existing snapshots
|
# List existing snapshots
|
||||||
gcloud compute snapshots list --project=kids-ai-data-analysis
|
gcloud compute snapshots list --project=your-gcp-project
|
||||||
|
|
||||||
# Manual snapshot (if needed)
|
# Manual snapshot (if needed)
|
||||||
gcloud compute disks snapshot data-disk home-disk \
|
gcloud compute disks snapshot data-disk home-disk \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--snapshot-names=data-disk-$(date +%Y%m%d),home-disk-$(date +%Y%m%d)
|
--snapshot-names=data-disk-$(date +%Y%m%d),home-disk-$(date +%Y%m%d)
|
||||||
```
|
```
|
||||||
|
|
@ -696,9 +696,9 @@ sudo /opt/data-analyst/repo/server/setup.sh
|
||||||
|
|
||||||
**5. Add existing admins to data-ops group:**
|
**5. Add existing admins to data-ops group:**
|
||||||
```bash
|
```bash
|
||||||
sudo usermod -aG data-ops padak
|
sudo usermod -aG data-ops admin1
|
||||||
sudo usermod -aG data-ops matejkys
|
sudo usermod -aG data-ops admin2
|
||||||
sudo usermod -aG data-ops dasa
|
sudo usermod -aG data-ops admin3
|
||||||
```
|
```
|
||||||
|
|
||||||
### GitHub Secrets Required
|
### GitHub Secrets Required
|
||||||
|
|
@ -918,7 +918,7 @@ This is handled in `webapp/user_service.py` and `server/telegram_bot/runner.py`.
|
||||||
Username is generated from email address: the part before `@` converted to lowercase.
|
Username is generated from email address: the part before `@` converted to lowercase.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
- `Petr.Simecek@your-domain.com` -> `petr.simecek`
|
- `John.Doe@your-domain.com` -> `john.doe`
|
||||||
- `john@your-domain.com` -> `john`
|
- `john@your-domain.com` -> `john`
|
||||||
|
|
||||||
If a username conflicts with a reserved system name or existing non-analyst account, the user sees an error and must contact an admin to create the account manually with a different username.
|
If a username conflicts with a reserved system name or existing non-analyst account, the user sees an error and must contact an admin to create the account manually with a different username.
|
||||||
|
|
@ -929,7 +929,7 @@ If a username conflicts with a reserved system name or existing non-analyst acco
|
||||||
```bash
|
```bash
|
||||||
# Allow HTTP/HTTPS traffic (required for Let's Encrypt and webapp)
|
# Allow HTTP/HTTPS traffic (required for Let's Encrypt and webapp)
|
||||||
gcloud compute firewall-rules create allow-http-data-broker \
|
gcloud compute firewall-rules create allow-http-data-broker \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--direction=INGRESS \
|
--direction=INGRESS \
|
||||||
--priority=1000 \
|
--priority=1000 \
|
||||||
--network=default \
|
--network=default \
|
||||||
|
|
@ -939,8 +939,8 @@ gcloud compute firewall-rules create allow-http-data-broker \
|
||||||
--target-tags=http-server,https-server
|
--target-tags=http-server,https-server
|
||||||
|
|
||||||
# Add tags to VM
|
# Add tags to VM
|
||||||
gcloud compute instances add-tags data-broker-for-claude \
|
gcloud compute instances add-tags your-server \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--zone=europe-north1-a \
|
--zone=europe-north1-a \
|
||||||
--tags=http-server,https-server
|
--tags=http-server,https-server
|
||||||
```
|
```
|
||||||
|
|
@ -1192,7 +1192,7 @@ Users can configure which optional datasets to sync via the web portal at `https
|
||||||
**sync_settings.json format:**
|
**sync_settings.json format:**
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"petr.simecek": {
|
"john.doe": {
|
||||||
"datasets": {
|
"datasets": {
|
||||||
"jira": true,
|
"jira": true,
|
||||||
"jira_attachments": false
|
"jira_attachments": false
|
||||||
|
|
@ -1524,7 +1524,7 @@ Cron (update.sh, 3x daily)
|
||||||
Step 3: python -m src.profiler → profiles.json
|
Step 3: python -m src.profiler → profiles.json
|
||||||
│
|
│
|
||||||
▼
|
▼
|
||||||
/data/src_data/metadata/profiles.json (mode 644, padak:data-ops)
|
/data/src_data/metadata/profiles.json (mode 644, admin1:data-ops)
|
||||||
│
|
│
|
||||||
▼
|
▼
|
||||||
Webapp: GET /api/catalog/profile/<table_name>
|
Webapp: GET /api/catalog/profile/<table_name>
|
||||||
|
|
@ -1756,23 +1756,23 @@ The webapp runs as `www-data` which cannot write to `/home/{user}/` directories
|
||||||
|
|
||||||
### Username Mapping
|
### Username Mapping
|
||||||
|
|
||||||
The webapp uses email-derived usernames (e.g., `petr.simecek`) while the server uses Linux home directory names (e.g., `petr`). Most users match, only Petr differs.
|
The webapp uses email-derived usernames (e.g., `john.doe`) while the server uses Linux home directory names (e.g., `john`). Most users match directly; add overrides when they differ.
|
||||||
|
|
||||||
Mapping is in `webapp/corporate_memory_service.py`:
|
Mapping is in `webapp/corporate_memory_service.py`:
|
||||||
```python
|
```python
|
||||||
WEBAPP_TO_SERVER_USERNAME = {
|
WEBAPP_TO_SERVER_USERNAME = {
|
||||||
"petr.simecek": "petr",
|
"john.doe": "john",
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Display names for avatars (initials + tooltip):
|
Display names for avatars (initials + tooltip):
|
||||||
```python
|
```python
|
||||||
USER_DISPLAY_NAMES = {
|
USER_DISPLAY_NAMES = {
|
||||||
"petr": {"name": "Petr Simecek", "initials": "PS"},
|
"john": {"name": "John Doe", "initials": "JD"},
|
||||||
"dasa.damaskova": {"name": "Dasa Damaskova", "initials": "DD"},
|
"jane.smith": {"name": "Jane Smith", "initials": "DD"},
|
||||||
"martin.matejka": {"name": "Martin Matejka", "initials": "MM"},
|
"mike.brown": {"name": "Mike Brown", "initials": "MM"},
|
||||||
"jiri.manas": {"name": "Jiri Manas", "initials": "JM"},
|
"tom.davis": {"name": "Tom Davis", "initials": "JM"},
|
||||||
"pavel.dolezal": {"name": "Pavel Dolezal", "initials": "PD"},
|
"alice.wilson": {"name": "Alice Wilson", "initials": "PD"},
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -1802,7 +1802,7 @@ USER_DISPLAY_NAMES = {
|
||||||
"content": "Always read schema before queries...",
|
"content": "Always read schema before queries...",
|
||||||
"category": "workflow",
|
"category": "workflow",
|
||||||
"tags": ["duckdb", "best-practices"],
|
"tags": ["duckdb", "best-practices"],
|
||||||
"source_users": ["petr"],
|
"source_users": ["john"],
|
||||||
"extracted_at": "2026-02-05T21:54:18Z",
|
"extracted_at": "2026-02-05T21:54:18Z",
|
||||||
"updated_at": "2026-02-05T21:54:18Z"
|
"updated_at": "2026-02-05T21:54:18Z"
|
||||||
}
|
}
|
||||||
|
|
@ -1817,7 +1817,7 @@ USER_DISPLAY_NAMES = {
|
||||||
**votes.json structure:**
|
**votes.json structure:**
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"petr": {
|
"john": {
|
||||||
"km_abc123": 1,
|
"km_abc123": 1,
|
||||||
"km_def456": -1
|
"km_def456": -1
|
||||||
}
|
}
|
||||||
|
|
@ -1940,7 +1940,7 @@ cat /data/corporate-memory/votes.json | python3 -m json.tool
|
||||||
cat /data/corporate-memory/user_hashes.json | python3 -m json.tool
|
cat /data/corporate-memory/user_hashes.json | python3 -m json.tool
|
||||||
|
|
||||||
# View a user's synced rules
|
# View a user's synced rules
|
||||||
ls -la /home/petr/.claude_rules/
|
ls -la /home/john/.claude_rules/
|
||||||
```
|
```
|
||||||
|
|
||||||
### Webapp Integration
|
### Webapp Integration
|
||||||
|
|
@ -2092,7 +2092,7 @@ Custom metrics derived from logs for trend analysis:
|
||||||
- Real-time CPU, Memory, Disk, Network graphs
|
- Real-time CPU, Memory, Disk, Network graphs
|
||||||
- Systemd service failures
|
- Systemd service failures
|
||||||
- Health endpoint status
|
- Health endpoint status
|
||||||
- URL: https://console.cloud.google.com/monitoring/dashboards/custom/09cdd94b-a0ed-4458-952f-3cca2bd5ba6e?project=kids-ai-data-analysis
|
- URL: https://console.cloud.google.com/monitoring/dashboards/custom/09cdd94b-a0ed-4458-952f-3cca2bd5ba6e?project=your-gcp-project
|
||||||
|
|
||||||
### Health Endpoint & Uptime Monitoring
|
### Health Endpoint & Uptime Monitoring
|
||||||
|
|
||||||
|
|
@ -2135,7 +2135,7 @@ Returns detailed server status in JSON format:
|
||||||
### Viewing Logs
|
### Viewing Logs
|
||||||
|
|
||||||
**Cloud Logging Console:**
|
**Cloud Logging Console:**
|
||||||
https://console.cloud.google.com/logs?project=kids-ai-data-analysis
|
https://console.cloud.google.com/logs?project=your-gcp-project
|
||||||
|
|
||||||
**Useful log queries:**
|
**Useful log queries:**
|
||||||
|
|
||||||
|
|
@ -2168,7 +2168,7 @@ resource.labels.instance_id="656c1763-11a1-49bb-bbc3-9782acf15aef"
|
||||||
### Viewing Metrics
|
### Viewing Metrics
|
||||||
|
|
||||||
**Cloud Monitoring Console:**
|
**Cloud Monitoring Console:**
|
||||||
https://console.cloud.google.com/monitoring?project=kids-ai-data-analysis
|
https://console.cloud.google.com/monitoring?project=your-gcp-project
|
||||||
|
|
||||||
**Metrics Explorer** - Useful metric queries:
|
**Metrics Explorer** - Useful metric queries:
|
||||||
- CPU: `compute.googleapis.com/instance/cpu/utilization`
|
- CPU: `compute.googleapis.com/instance/cpu/utilization`
|
||||||
|
|
@ -2191,21 +2191,21 @@ Significantly cheaper than Datadog (~$15-31/host/month).
|
||||||
**List alert policies:**
|
**List alert policies:**
|
||||||
```bash
|
```bash
|
||||||
gcloud alpha monitoring policies list \
|
gcloud alpha monitoring policies list \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--format="table(displayName,enabled,conditions[0].conditionThreshold.thresholdValue)"
|
--format="table(displayName,enabled,conditions[0].conditionThreshold.thresholdValue)"
|
||||||
```
|
```
|
||||||
|
|
||||||
**Disable an alert:**
|
**Disable an alert:**
|
||||||
```bash
|
```bash
|
||||||
gcloud alpha monitoring policies update POLICY_ID \
|
gcloud alpha monitoring policies update POLICY_ID \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--no-enabled
|
--no-enabled
|
||||||
```
|
```
|
||||||
|
|
||||||
**Add notification channel:**
|
**Add notification channel:**
|
||||||
```bash
|
```bash
|
||||||
gcloud alpha monitoring channels create \
|
gcloud alpha monitoring channels create \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--display-name="New Person" \
|
--display-name="New Person" \
|
||||||
--type=email \
|
--type=email \
|
||||||
--channel-labels=email_address=person@your-domain.com
|
--channel-labels=email_address=person@your-domain.com
|
||||||
|
|
@ -2234,7 +2234,7 @@ When investigating server issues (like the 2026-02-13 systemd-journald crash):
|
||||||
```bash
|
```bash
|
||||||
# Export logs to file
|
# Export logs to file
|
||||||
gcloud logging read "resource.labels.instance_id=\"656c1763-11a1-49bb-bbc3-9782acf15aef\"" \
|
gcloud logging read "resource.labels.instance_id=\"656c1763-11a1-49bb-bbc3-9782acf15aef\"" \
|
||||||
--project=kids-ai-data-analysis \
|
--project=your-gcp-project \
|
||||||
--limit=1000 \
|
--limit=1000 \
|
||||||
--format=json \
|
--format=json \
|
||||||
--freshness=1d > server_logs.json
|
--freshness=1d > server_logs.json
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ Guide for exploring Claude Code session transcripts to identify friction points
|
||||||
|
|
||||||
## Session Data Location
|
## Session Data Location
|
||||||
|
|
||||||
**Server:** `data-broker-for-claude` (alias: `kids`)
|
**Server:** `your-server` (alias: `kids`)
|
||||||
**Path:** `/data/user_sessions/`
|
**Path:** `/data/user_sessions/`
|
||||||
|
|
||||||
Sessions are collected by systemd service `session-collector.timer` (runs every 30 minutes).
|
Sessions are collected by systemd service `session-collector.timer` (runs every 30 minutes).
|
||||||
|
|
@ -14,12 +14,12 @@ Sessions are collected by systemd service `session-collector.timer` (runs every
|
||||||
Sessions are organized by user:
|
Sessions are organized by user:
|
||||||
```
|
```
|
||||||
/data/user_sessions/
|
/data/user_sessions/
|
||||||
├── petr/
|
├── john/
|
||||||
│ ├── 2026-02-10_49898dbe-5045-45f5-9177-2ff10917de4a.jsonl
|
│ ├── 2026-02-10_49898dbe-5045-45f5-9177-2ff10917de4a.jsonl
|
||||||
│ └── ...
|
│ └── ...
|
||||||
├── martin.matejka/
|
├── mike.brown/
|
||||||
│ └── ...
|
│ └── ...
|
||||||
└── jakub.sochan/
|
└── sam.taylor/
|
||||||
└── ...
|
└── ...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -29,10 +29,10 @@ Session files are owned by `root:data-ops` with `-rw-------` permissions, making
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# This FAILS
|
# This FAILS
|
||||||
scp kids:/data/user_sessions/petr/session.jsonl .
|
scp kids:/data/user_sessions/john/session.jsonl .
|
||||||
|
|
||||||
# Use this instead
|
# Use this instead
|
||||||
ssh kids "sudo cat /data/user_sessions/petr/session.jsonl" > session.jsonl
|
ssh kids "sudo cat /data/user_sessions/john/session.jsonl" > session.jsonl
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
@ -79,7 +79,7 @@ total_time = sum(event['data']['elapsedTimeSeconds'] for event in bash_events)
|
||||||
|
|
||||||
**Example:**
|
**Example:**
|
||||||
```
|
```
|
||||||
Session petr.hunka_2026-02-09_19c0a02f:
|
Session john.doe_2026-02-09_19c0a02f:
|
||||||
First event: 2026-02-05 16:06:52
|
First event: 2026-02-05 16:06:52
|
||||||
Last event: 2026-02-09 15:18:50
|
Last event: 2026-02-09 15:18:50
|
||||||
Span: 3 days, 23 hours
|
Span: 3 days, 23 hours
|
||||||
|
|
@ -166,7 +166,7 @@ for cmd_id, times in commands.items():
|
||||||
**Example:**
|
**Example:**
|
||||||
```
|
```
|
||||||
Issue #84 fixed: 2026-02-06 21:37:49
|
Issue #84 fixed: 2026-02-06 21:37:49
|
||||||
Session file: petr.hunka_2026-02-09_19c0a02f.jsonl
|
Session file: john.doe_2026-02-09_19c0a02f.jsonl
|
||||||
File mtime: 2026-02-09 20:50 (within 48h filter)
|
File mtime: 2026-02-09 20:50 (within 48h filter)
|
||||||
|
|
||||||
BUT: Session started 2026-02-05 15:56 (BEFORE fix!)
|
BUT: Session started 2026-02-05 15:56 (BEFORE fix!)
|
||||||
|
|
@ -339,17 +339,17 @@ ssh kids "ls /data/user_sessions/ | wc -l"
|
||||||
ssh kids "find /data/user_sessions -name '*.jsonl' -mtime -7"
|
ssh kids "find /data/user_sessions -name '*.jsonl' -mtime -7"
|
||||||
|
|
||||||
# Find sessions by user
|
# Find sessions by user
|
||||||
ssh kids "ls /data/user_sessions/ | grep '^petr-'"
|
ssh kids "ls /data/user_sessions/ | grep '^john-'"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Download Sessions for Local Analysis
|
### Download Sessions for Local Analysis
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Download specific session
|
# Download specific session
|
||||||
scp kids:/data/user_sessions/petr-2024-12-15-abc123.jsonl .
|
scp kids:/data/user_sessions/john-2024-12-15-abc123.jsonl .
|
||||||
|
|
||||||
# Download all sessions for a user
|
# Download all sessions for a user
|
||||||
scp kids:/data/user_sessions/petr-*.jsonl ./sessions/
|
scp kids:/data/user_sessions/john-*.jsonl ./sessions/
|
||||||
|
|
||||||
# Download recent sessions
|
# Download recent sessions
|
||||||
ssh kids "find /data/user_sessions -mtime -7" | xargs -I {} scp kids:{} ./sessions/
|
ssh kids "find /data/user_sessions -mtime -7" | xargs -I {} scp kids:{} ./sessions/
|
||||||
|
|
@ -828,7 +828,7 @@ def calculate_active_time(session_file, gap_threshold_minutes=10):
|
||||||
}
|
}
|
||||||
|
|
||||||
# Example usage
|
# Example usage
|
||||||
session = Path("~/session-analysis/raw/petr.hunka_2026-02-09_19c0a02f.jsonl").expanduser()
|
session = Path("~/session-analysis/raw/john.doe_2026-02-09_19c0a02f.jsonl").expanduser()
|
||||||
result = calculate_active_time(session)
|
result = calculate_active_time(session)
|
||||||
|
|
||||||
print(f"Total span: {result['total_span_hours']:.2f} hours")
|
print(f"Total span: {result['total_span_hours']:.2f} hours")
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ done
|
||||||
|
|
||||||
# Create deploy user (for CI/CD automated deployment)
|
# Create deploy user (for CI/CD automated deployment)
|
||||||
if ! id deploy > /dev/null 2>&1; then
|
if ! id deploy > /dev/null 2>&1; then
|
||||||
useradd -r -m -s /bin/bash -G data-ops deploy
|
useradd -r -m -s /usr/sbin/nologin -G data-ops deploy
|
||||||
echo "Created deploy user"
|
echo "Created deploy user"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -84,17 +84,15 @@ echo "Adding www-data to data-ops group..."
|
||||||
usermod -aG data-ops www-data
|
usermod -aG data-ops www-data
|
||||||
|
|
||||||
# Install sudoers rules for www-data (from repo, includes all required rules)
|
# Install sudoers rules for www-data (from repo, includes all required rules)
|
||||||
|
# Validate BEFORE copying to prevent broken sudo if syntax is invalid
|
||||||
echo "Configuring sudoers..."
|
echo "Configuring sudoers..."
|
||||||
SUDOERS_FILE="/etc/sudoers.d/webapp"
|
SUDOERS_FILE="/etc/sudoers.d/webapp"
|
||||||
cp "${REPO_DIR}/server/sudoers-webapp" "$SUDOERS_FILE"
|
SUDOERS_SRC="${REPO_DIR}/server/sudoers-webapp"
|
||||||
chmod 440 "$SUDOERS_FILE"
|
if ! visudo -cf "$SUDOERS_SRC"; then
|
||||||
|
echo "ERROR: Invalid sudoers syntax in $SUDOERS_SRC"
|
||||||
# Validate sudoers syntax
|
|
||||||
if ! visudo -cf "$SUDOERS_FILE"; then
|
|
||||||
echo "ERROR: Invalid sudoers syntax"
|
|
||||||
rm -f "$SUDOERS_FILE"
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
install -m 440 "$SUDOERS_SRC" "$SUDOERS_FILE"
|
||||||
|
|
||||||
# Install systemd service
|
# Install systemd service
|
||||||
echo "Installing systemd service..."
|
echo "Installing systemd service..."
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue