agnes-the-ai-analyst/webapp/app.py
ZdenekSrotyr b502bd8bdd refactor: delete old sync pipeline — 9,500 lines removed
Phase 5 cleanup: remove all code replaced by extract.duckdb architecture.

Deleted modules:
- src/config.py (653) — replaced by DuckDB table_registry
- src/parquet_manager.py (755) — replaced by DuckDB COPY TO
- src/data_sync.py (734) — replaced by SyncOrchestrator
- src/remote_query.py (636) — replaced by DuckDB BigQuery ATTACH
- src/table_registry.py (464) — replaced by DuckDB repository
- connectors/keboola/adapter.py (820) — replaced by extractor.py
- connectors/bigquery/adapter.py (665) — replaced by extractor.py
- connectors/bigquery/client.py (644) — replaced by DuckDB BQ extension

Updated all imports in webapp, catalog_export, enricher, router,
sync_settings_service, generate_sample_data. Kept keboola/client.py
as fallback (removed src.config dependency).

704 tests passing.
2026-03-31 07:50:37 +02:00

2084 lines
79 KiB
Python

"""
Flask application for Google SSO user management.
Allows users to:
1. Log in with Google (allowed domain only)
2. View their account status if they exist
3. Create a new analyst account with their SSH key
"""
import json
import logging
import os
from datetime import datetime
from pathlib import Path
import yaml
from flask import Flask, flash, jsonify, redirect, render_template, request, session, url_for
from .auth import admin_required, auth_bp, km_admin_required, login_required
from .config import Config
from .desktop_auth import require_desktop_auth
from .notification_images import images_bp
from .account_service import get_account_details
from .sync_settings_service import get_sync_settings, update_sync_settings, get_table_subscriptions, update_table_subscriptions
# Jira connector is optional - only loaded if configured
try:
from connectors.jira.webhook import jira_bp
JIRA_AVAILABLE = True
except ImportError:
JIRA_AVAILABLE = False
jira_bp = None
from .telegram_service import get_telegram_status, link_telegram, unlink_telegram
from .corporate_memory_service import (
get_knowledge,
get_stats as get_memory_stats,
get_user_stats as get_memory_user_stats,
get_user_votes,
vote as memory_vote,
is_km_admin,
get_governance_mode,
get_groups as get_memory_groups,
approve_item,
reject_item,
mandate_item,
revoke_item,
edit_item,
batch_action,
get_pending_queue,
get_audit_log,
migrate_existing_items,
VALID_STATUSES,
)
from .user_service import (
UserInfo,
check_user_exists,
create_user,
get_webapp_username,
is_username_available,
validate_ssh_key,
)
# Optional OpenMetadata catalog enrichment
try:
from connectors.openmetadata.enricher import CatalogEnricher
_CATALOG_ENRICHER_AVAILABLE = True
except ImportError:
_CATALOG_ENRICHER_AVAILABLE = False
CatalogEnricher = None
# Metric parser for modal detail rendering
try:
from webapp.utils.metric_parser import MetricParser
except ImportError:
MetricParser = None
# Shared OpenMetadata transformer (catalog -> dict)
try:
from connectors.openmetadata.transformer import (
has_tag as _transformer_has_tag,
metric_to_detail_dict as _transformer_metric_detail,
metric_to_display_dict as _transformer_metric_display,
)
_TRANSFORMER_AVAILABLE = True
except ImportError:
_TRANSFORMER_AVAILABLE = False
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# Global catalog enricher (initialized in create_app)
_catalog_enricher = None
_catalog_filter_tag = ""
_catalog_data_product = ""
def get_git_commit_hash() -> str:
"""Get current git commit hash for cache busting static assets."""
try:
import subprocess
result = subprocess.run(
['git', 'rev-parse', '--short', 'HEAD'],
cwd=Path(__file__).parent.parent,
capture_output=True,
text=True,
timeout=2
)
if result.returncode == 0:
return result.stdout.strip()
except Exception:
pass
return "dev"
def create_app() -> Flask:
"""Create and configure the Flask application."""
global _catalog_enricher
app = Flask(__name__)
app.config.from_object(Config)
# Validate configuration
errors = Config.validate()
if errors and not app.debug:
for error in errors:
logger.warning(f"Configuration warning: {error}")
# Initialize OpenMetadata catalog enricher
if _CATALOG_ENRICHER_AVAILABLE:
try:
from config.loader import load_instance_config
instance_config = load_instance_config()
_catalog_enricher = CatalogEnricher(instance_config)
if _catalog_enricher.enabled:
logger.info("OpenMetadata catalog enricher initialized")
# Store metric discovery config
global _catalog_filter_tag, _catalog_data_product
om_config = instance_config.get("openmetadata", {})
_catalog_data_product = om_config.get("data_product", "").strip()
_catalog_filter_tag = om_config.get("filter_tag", "").strip()
if _catalog_data_product:
logger.info(f"Catalog metric discovery: data product '{_catalog_data_product}'")
elif _catalog_filter_tag:
logger.info(f"Catalog metric filter tag: {_catalog_filter_tag}")
except Exception as e:
logger.warning(f"Failed to initialize catalog enricher: {e}")
_catalog_enricher = None
# Register core auth blueprint (login_required, login page, logout)
app.register_blueprint(auth_bp)
# Auto-discover and register auth providers
from auth import discover_providers
for provider_instance in discover_providers():
provider_instance.init_app(app)
app.register_blueprint(provider_instance.get_blueprint())
# Register other blueprints
app.register_blueprint(images_bp)
if JIRA_AVAILABLE and jira_bp:
app.register_blueprint(jira_bp)
# Register main routes
register_routes(app)
# Add template context processor for current year and config
@app.context_processor
def inject_now():
return {"now": datetime.now}
@app.context_processor
def inject_config():
return {"config": Config}
# Add cache busting for static files
@app.context_processor
def inject_static_cache_buster():
def static_url(filename: str) -> str:
"""Generate static URL with cache-busting query parameter."""
static_path = Path(app.static_folder) / filename
if static_path.exists():
mtime = int(static_path.stat().st_mtime)
return url_for("static", filename=filename, v=mtime)
return url_for("static", filename=filename)
return {"static_url": static_url}
return app
NOTIFY_SOCKET_PATH = "/data/notifications/bot.sock"
# Path to sync state (written by data sync process)
SYNC_STATE_PATH = Path("/data/src_data/metadata/sync_state.json")
# Local development: fall back to dev_data/metadata/ relative to project root
_DEV_METADATA_PATH = Path(__file__).parent.parent / "dev_data" / "metadata"
def _build_activity_data() -> dict:
"""Build activity data for the Activity Center page.
Returns a dict with the structure expected by activity_center.html.
Currently returns empty-state defaults; will be populated with real
data from query logs, user sessions, and corporate memory as those
data sources become available.
"""
return {
"executive_summary": {
"active_today": 0,
"active_this_week": 0,
"teams_active": 0,
"business_processes_identified": 0,
"decisions_supported_this_week": 0,
"avg_success_rate": 0,
"adoption_trend": "-",
},
"maturity_roadmap": {
"summary": {
"overall_score": 0,
"optimized_count": 0,
"mature_count": 0,
"developing_count": 0,
"total_potential_value": "-",
},
"categories": [],
},
"business_processes": [],
"teams": [],
"activity_feed": [],
"data_opportunities": [],
}
def _resolve_metadata_path(filename: str) -> Path:
"""Resolve metadata file path with dev fallback."""
prod_path = SYNC_STATE_PATH.parent / filename
if prod_path.exists():
return prod_path
dev_path = _DEV_METADATA_PATH / filename
return dev_path
# Fallback stats (used when sync_state.json is unavailable)
FALLBACK_DATA_STATS = {
"tables": 0,
"columns": 0,
"rows": 0,
"rows_display": "-",
"size_mb": 0,
"size_display": "0 MB",
"uncompressed_mb": 0,
"unstructured_gb": 0,
"unstructured_display": "",
"last_updated": None,
"highlights": {},
}
def _generate_setup_instructions(username: str) -> str:
"""Generate clipboard-ready setup instructions from bootstrap.yaml.
Reads the structured YAML, substitutes placeholders from instance config,
and produces plain text that users paste into Claude Code.
"""
bootstrap_path = os.path.join(os.path.dirname(__file__), "..", "docs", "setup", "bootstrap.yaml")
with open(bootstrap_path, "r") as f:
bootstrap = yaml.safe_load(f)
webapp_url = f"https://{Config.SERVER_HOSTNAME}" if Config.SERVER_HOSTNAME else ""
placeholders = {
"{username}": username,
"{server_host}": Config.SERVER_HOST,
"{server_hostname}": Config.SERVER_HOSTNAME,
"{ssh_alias}": Config.SSH_ALIAS,
"{ssh_key}": Config.SSH_KEY,
"{project_dir}": Config.PROJECT_DIR,
"{webapp_url}": webapp_url,
}
def sub(text: str) -> str:
for key, val in placeholders.items():
text = text.replace(key, val)
return text
lines = []
# Header
if "header" in bootstrap:
lines.append(sub(bootstrap["header"]).strip())
lines.append("")
# Connection details
conn = bootstrap.get("connection", {})
if conn:
lines.append("Connection details:")
for key, val in conn.items():
label = key.replace("_", " ").replace("host", "IP").replace("url", "URL")
display_val = sub(val)
if key == "ssh_key":
display_val += " (already generated)"
lines.append(f" {label}: {display_val}")
lines.append("")
# Steps
lines.append("Steps:")
lines.append("")
for i, step in enumerate(bootstrap.get("steps", []), 1):
name = sub(step.get("name", ""))
condition = step.get("condition", "")
if condition:
lines.append(f"{i}. {name} ({sub(condition)}):")
else:
lines.append(f"{i}. {name}:")
# Description (free text instructions for Claude)
desc = step.get("description", "")
if desc:
for line in sub(desc).strip().splitlines():
lines.append(f" {line}")
# Commands (executable shell commands)
commands = step.get("commands", [])
for cmd in commands:
lines.append(f" {sub(cmd)}")
# Note
note = step.get("note", "")
if note:
lines.append(f" Note: {sub(note)}")
lines.append("")
# Existing project hint
existing = bootstrap.get("existing_project", {})
if existing:
msg = existing.get("message", "")
if msg:
lines.append("If this directory already has CLAUDE.md with 'AI Data Analyst':")
for line in sub(msg).strip().splitlines():
lines.append(f" {line}")
return "\n".join(lines)
def _load_data_stats() -> dict:
"""Load aggregate data stats from sync_state.json, with hardcoded fallback."""
try:
sync_path = _resolve_metadata_path("sync_state.json")
if sync_path.exists():
with open(sync_path) as f:
state = json.load(f)
tables_data = state.get("tables", {})
# Support flat format (table_id at top level, no "tables" wrapper)
if not tables_data and any(isinstance(v, dict) and "rows" in v for v in state.values()):
tables_data = {k: v for k, v in state.items() if isinstance(v, dict) and "rows" in v}
if not tables_data:
return dict(FALLBACK_DATA_STATS)
total_tables = len(tables_data)
total_columns = sum(t.get("columns", 0) for t in tables_data.values())
total_rows = sum(t.get("rows", 0) for t in tables_data.values())
total_size_mb = sum(t.get("file_size_mb", 0) for t in tables_data.values())
total_uncompressed_mb = sum(t.get("uncompressed_mb", 0) for t in tables_data.values())
# Format rows for display
if total_rows >= 1_000_000:
rows_display = f"{total_rows / 1_000_000:.0f}M+"
elif total_rows >= 1_000:
rows_display = f"{total_rows / 1_000:.0f}K+"
else:
rows_display = str(total_rows)
# Parse last_updated: try root-level first, then derive from table last_sync
last_updated = state.get("last_updated")
if not last_updated:
# Derive from max of all tables' last_sync timestamps
sync_times = [t.get("last_sync") for t in tables_data.values() if t.get("last_sync")]
if sync_times:
last_updated = max(sync_times)
last_updated_display = None
last_updated_iso = None
if last_updated:
try:
dt = datetime.fromisoformat(last_updated)
last_updated_display = dt.strftime("%b %d, %H:%M") + " UTC"
last_updated_iso = dt.isoformat()
except (ValueError, TypeError):
last_updated_display = last_updated[:16] if last_updated else None
# Format size for display
size_mb = round(total_size_mb)
if size_mb >= 1000:
size_display = f"{size_mb / 1000:.1f} GB"
else:
size_display = f"{size_mb} MB"
# Count tables by query_mode from data_description.md
local_tables = total_tables
remote_tables = 0
try:
desc_path = Path(os.path.dirname(__file__)) / ".." / "docs" / "data_description.md"
if desc_path.exists():
import re
import yaml
with open(desc_path) as f:
dd_content = f.read()
yaml_blocks = re.findall(r'```yaml\s*\n(.*?)```', dd_content, re.DOTALL)
all_dd_tables = []
for block in yaml_blocks:
parsed = yaml.safe_load(block)
if parsed and "tables" in parsed:
all_dd_tables.extend(parsed["tables"])
remote_tables = sum(1 for t in all_dd_tables if t.get("query_mode") == "remote")
local_tables = len(all_dd_tables) - remote_tables
except Exception:
pass
return {
"tables": total_tables,
"total_tables": local_tables + remote_tables,
"local_tables": local_tables,
"remote_tables": remote_tables,
"columns": total_columns if total_columns > 0 else FALLBACK_DATA_STATS["columns"],
"rows": total_rows,
"rows_display": rows_display,
"size_mb": size_mb,
"size_display": size_display,
"uncompressed_mb": round(total_uncompressed_mb),
"unstructured_gb": FALLBACK_DATA_STATS["unstructured_gb"],
"unstructured_display": FALLBACK_DATA_STATS["unstructured_display"],
"last_updated": last_updated_display,
"last_updated_iso": last_updated_iso,
"highlights": FALLBACK_DATA_STATS["highlights"],
}
except Exception as e:
logger.warning(f"Could not load data stats from sync_state.json: {e}")
# Fallback: derive stats from profiles.json (covers sample data / no-sync setups)
try:
profiles_path = _resolve_metadata_path("profiles.json")
if profiles_path.exists():
with open(profiles_path) as f:
profiles = json.load(f)
tables_data = profiles.get("tables", {})
if tables_data:
total_tables = len(tables_data)
total_rows = sum(t.get("row_count", 0) for t in tables_data.values())
total_columns = sum(t.get("column_count", 0) for t in tables_data.values())
total_size_mb = sum(t.get("file_size_mb", 0) or 0 for t in tables_data.values())
if total_rows >= 1_000_000:
rows_display = f"{total_rows / 1_000_000:.0f}M+"
elif total_rows >= 1_000:
rows_display = f"{total_rows / 1_000:.0f}K+"
else:
rows_display = str(total_rows)
size_mb = round(total_size_mb)
size_display = f"{size_mb / 1000:.1f} GB" if size_mb >= 1000 else f"{size_mb} MB"
return {
"tables": total_tables,
"columns": total_columns,
"rows": total_rows,
"rows_display": rows_display,
"size_mb": size_mb,
"size_display": size_display,
"uncompressed_mb": 0,
"unstructured_gb": 0,
"unstructured_display": "",
"last_updated": None,
"highlights": {},
}
except Exception as e:
logger.warning(f"Could not load data stats from profiles.json: {e}")
return dict(FALLBACK_DATA_STATS)
def _load_catalog_data() -> list:
"""Load catalog data by merging data_description.md (YAML) with sync_state.json.
Returns list of category dicts: [{name, icon_type, tables: [{name, description, rows, rows_display, period}]}]
"""
import re
import yaml
catalog = []
try:
# Parse data_description.md YAML block
desc_path = Path(os.path.dirname(__file__)) / ".." / "docs" / "data_description.md"
if not desc_path.exists():
return catalog
with open(desc_path) as f:
content = f.read()
# Extract ALL YAML blocks between ```yaml and ```
yaml_blocks = re.findall(r'```yaml\s*\n(.*?)```', content, re.DOTALL)
if not yaml_blocks:
return catalog
# Merge tables and folder_mappings from all blocks
yaml_data = {"tables": [], "folder_mapping": {}}
for block in yaml_blocks:
parsed = yaml.safe_load(block)
if not parsed:
continue
if "tables" in parsed:
yaml_data["tables"].extend(parsed["tables"])
if "folder_mapping" in parsed:
yaml_data["folder_mapping"].update(parsed["folder_mapping"])
if not yaml_data["tables"]:
return catalog
# Load sync state for row counts and timestamps
sync_data = {}
try:
sync_path = _resolve_metadata_path("sync_state.json")
if sync_path.exists():
with open(sync_path) as f:
state = json.load(f)
sync_data = state.get("tables", {})
# Support flat format (table_id at top level, no "tables" wrapper)
if not sync_data and any(isinstance(v, dict) and "rows" in v for v in state.values()):
sync_data = {k: v for k, v in state.items() if isinstance(v, dict) and "rows" in v}
except Exception:
pass
# Get folder mapping
folder_mapping = yaml_data.get("folder_mapping", {})
# Load category mappings from instance config, with empty fallback
try:
from config.loader import load_instance_config, get_instance_value
_catalog_config = load_instance_config()
_catalog_categories = get_instance_value(_catalog_config, "catalog", "categories", default={})
folder_to_category = {k: v.get("label", k) for k, v in _catalog_categories.items()}
folder_to_icon = {k: v.get("icon", k) for k, v in _catalog_categories.items()}
except Exception:
folder_to_category = {}
folder_to_icon = {}
# Map bucket to folder
bucket_to_folder = {}
for bucket_id, folder_name in folder_mapping.items():
bucket_to_folder[bucket_id] = folder_name
# Group tables by category (folder)
categories = {}
for table in yaml_data["tables"]:
table_id = table.get("id", "")
# Extract bucket from table_id (e.g., "in.c-crm.company" -> "in.c-crm")
parts = table_id.rsplit(".", 1)
bucket_id = parts[0] if len(parts) > 1 else ""
folder = bucket_to_folder.get(bucket_id, "other")
if folder not in categories:
categories[folder] = []
# Get sync info and query mode
query_mode = table.get("query_mode", "local")
sync_info = sync_data.get(table_id, {})
rows = sync_info.get("rows", 0)
# For remote tables, use volume estimate from config
if query_mode == "remote" and rows == 0:
volume = table.get("volume", {})
est_rows = volume.get("rows_per_day", 0)
if est_rows:
rows_display = f"~{est_rows / 1_000_000:.0f}M/day"
rows_large = True
else:
rows_display = "Live"
rows_large = False
else:
# Format rows for local/hybrid tables
if rows >= 1_000_000:
rows_display = f"{rows / 1_000_000:.1f}M"
elif rows >= 1_000:
rows_display = f"{rows:,}"
else:
rows_display = str(rows) if rows > 0 else "-"
rows_large = rows >= 1_000_000
# Parse last_sync timestamp for display
last_sync = sync_info.get("last_sync")
last_sync_display = None
if last_sync:
try:
dt = datetime.fromisoformat(last_sync)
last_sync_display = dt.strftime("%b %d, %H:%M") + " UTC"
except (ValueError, TypeError):
last_sync_display = None
table_info = {
"name": table.get("name", ""),
"description": table.get("description", ""),
"rows": rows,
"rows_display": rows_display,
"rows_large": rows_large,
"query_mode": query_mode,
"last_sync": last_sync_display,
}
# Enrich with catalog metadata (OpenMetadata)
if _catalog_enricher:
try:
# Create lightweight config for enrichment (enricher uses .id, .name, .catalog_fqn)
from types import SimpleNamespace
table_config = SimpleNamespace(
id=table_id,
name=table.get("name", ""),
catalog_fqn=table.get("catalog_fqn"),
)
catalog_data = _catalog_enricher.enrich_table(table_config)
if catalog_data:
# Enrich table info with catalog data
table_info["catalog_tags"] = catalog_data.tags
table_info["catalog_tier"] = catalog_data.tier
table_info["catalog_owners"] = catalog_data.owners
table_info["catalog_url"] = catalog_data.catalog_url
# Override description if catalog has one
if catalog_data.description:
table_info["description"] = catalog_data.description
except Exception as e:
logger.warning(f"Error enriching {table.get('name')}: {e}")
categories[folder].append(table_info)
# Build ordered catalog (from instance config or use discovered folders)
try:
category_order = get_instance_value(_catalog_config, "catalog", "order", default=list(folder_to_category.keys()))
except Exception:
category_order = list(folder_to_category.keys())
for folder in category_order:
if folder in categories:
catalog.append({
"name": folder_to_category.get(folder, folder),
"icon_type": folder_to_icon.get(folder, folder),
"tables": categories[folder],
"count": len(categories[folder]),
})
except Exception as e:
logger.warning(f"Could not load catalog data: {e}")
return catalog
# Category metadata for Business Metrics card
METRIC_CATEGORY_META = {
'revenue': {'label': 'Revenue', 'css': 'sales', 'order': 1},
'customers': {'label': 'Customers', 'css': 'hr', 'order': 2},
'marketing': {'label': 'Marketing', 'css': 'telemetry', 'order': 3},
'support': {'label': 'Support', 'css': 'support', 'order': 4},
}
def _load_metrics_data():
"""Load business metric definitions for catalog display.
Prefers metrics from OpenMetadata catalog. Falls back to YAML files if catalog unavailable.
Returns list of category dicts ordered by METRIC_CATEGORY_META:
[{'key': 'finance', 'label': 'Finance...', 'css': '...', 'metrics': [...]}, ...]
"""
# Try catalog first (Phase 2)
catalog_metrics = _load_metrics_from_catalog()
if catalog_metrics:
return catalog_metrics
# Fallback to YAML files if catalog unavailable
# Try production path first, fall back to local dev path
metrics_dir = Path("/data/docs/metrics")
if not metrics_dir.exists():
metrics_dir = Path(__file__).parent.parent / "docs" / "metrics"
if not metrics_dir.exists():
return []
categories = {}
for yml_file in sorted(metrics_dir.glob("*/*.yml")):
try:
with open(yml_file, 'r', encoding='utf-8') as f:
raw = yaml.safe_load(f)
if isinstance(raw, list) and raw:
metric = raw[0]
elif isinstance(raw, dict):
metric = raw
else:
continue
cat_key = yml_file.parent.name
if cat_key not in categories:
categories[cat_key] = []
categories[cat_key].append({
'name': metric.get('name', yml_file.stem),
'display_name': metric.get('display_name', yml_file.stem),
'description': metric.get('description', ''),
'grain': metric.get('grain', ''),
'path': f"{cat_key}/{yml_file.name}",
})
except Exception as e:
logger.warning(f"Could not parse metric {yml_file}: {e}")
# Build ordered result using METRIC_CATEGORY_META
result = []
for cat_key, meta in sorted(METRIC_CATEGORY_META.items(), key=lambda x: x[1]['order']):
if cat_key in categories:
result.append({
'key': cat_key,
'label': meta['label'],
'css': meta['css'],
'metrics': categories[cat_key],
})
# Add any unknown categories at the end
for cat_key, metrics in sorted(categories.items()):
if cat_key not in METRIC_CATEGORY_META:
result.append({
'key': cat_key,
'label': cat_key.replace('_', ' ').title(),
'css': cat_key,
'metrics': metrics,
})
return result
def _parse_om_metric(raw_metric: dict) -> dict:
"""
Parse raw OpenMetadata metric dict into format for metric list display.
Delegates to shared transformer module for consistent parsing across
webapp and catalog export.
Args:
raw_metric: Raw metric dict from OpenMetadata
Returns:
Dict with keys: name, display_name, description, grain, category, path
"""
if _TRANSFORMER_AVAILABLE:
return _transformer_metric_display(raw_metric)
# Inline fallback if transformer module not available
fqn = raw_metric.get("fullyQualifiedName", "")
name = raw_metric.get("name", "")
display_name = raw_metric.get("displayName", name)
description = raw_metric.get("description", "") or ""
tags = raw_metric.get("tags", [])
category = "general"
grain = raw_metric.get("granularity", "") or ""
for tag in tags:
tag_fqn = tag.get("tagFQN", "")
if tag_fqn.startswith("MetricCategory."):
category = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Category."):
category = tag_fqn.split(".", 1)[1]
return {
"name": name,
"display_name": display_name,
"description": description,
"grain": grain.lower() if grain else "",
"category": category,
"path": f"catalog:{fqn}",
}
def _load_metrics_from_catalog() -> list:
"""
Load business metrics from OpenMetadata catalog.
Groups metrics by category (from tags or fallback to "general").
Returns same structure as _load_metrics_data() for UI compatibility.
Returns:
List of category dicts with metrics:
[
{'key': 'finance', 'label': '...', 'css': '...', 'metrics': [...]},
{'key': 'marketing', 'label': '...', 'css': '...', 'metrics': [...]}
]
Returns empty list if catalog disabled or fails.
"""
global _catalog_enricher
if not _catalog_enricher or not _catalog_enricher.enabled:
return []
try:
# Fetch metrics - prefer data product filter, fall back to tag filter
if _catalog_data_product:
raw_metrics = _catalog_enricher.get_metrics_by_data_product(_catalog_data_product)
else:
raw_metrics = _catalog_enricher.get_metrics()
if _catalog_filter_tag and _TRANSFORMER_AVAILABLE:
raw_metrics = [
m for m in raw_metrics
if _transformer_has_tag(m.get("tags", []), _catalog_filter_tag)
]
if not raw_metrics:
logger.debug("No metrics found in catalog")
return []
# Parse each metric and group by category
categories = {}
for raw in raw_metrics:
try:
metric = _parse_om_metric(raw)
cat = metric["category"]
if cat not in categories:
categories[cat] = []
categories[cat].append(metric)
except Exception as e:
logger.warning(f"Failed to parse metric {raw.get('name', '?')}: {e}")
continue
# Build result using METRIC_CATEGORY_META for order and labels
result = []
for cat_key, meta in sorted(METRIC_CATEGORY_META.items(), key=lambda x: x[1]["order"]):
if cat_key in categories:
result.append({
"key": cat_key,
"label": meta["label"],
"css": meta["css"],
"metrics": categories[cat_key],
})
# Add unknown categories at the end
for cat_key, metrics in sorted(categories.items()):
if cat_key not in METRIC_CATEGORY_META:
result.append({
"key": cat_key,
"label": cat_key.replace("_", " ").title(),
"css": cat_key,
"metrics": metrics,
})
logger.info(f"Loaded {sum(len(c['metrics']) for c in result)} metrics from catalog")
return result
except Exception as e:
logger.warning(f"Failed to load metrics from catalog: {e}")
return []
def _build_om_metric_detail(raw_metric: dict) -> dict:
"""
Convert raw OpenMetadata metric into MetricParser-compatible JSON for modal.
Delegates to shared transformer module for consistent parsing.
Args:
raw_metric: Raw metric dict from OpenMetadata
Returns:
Dict matching MetricParser._structure_metric_data() format
"""
category_colors = MetricParser.CATEGORY_COLORS if MetricParser else {}
if _TRANSFORMER_AVAILABLE:
return _transformer_metric_detail(raw_metric, category_colors=category_colors)
# Inline fallback if transformer module not available
fqn = raw_metric.get("fullyQualifiedName", "")
name = raw_metric.get("name", "")
display_name = raw_metric.get("displayName", name)
description = raw_metric.get("description", "") or ""
expression = ""
metric_expr = raw_metric.get("metricExpression", {})
if isinstance(metric_expr, dict):
expression = metric_expr.get("expression", "") or ""
elif isinstance(metric_expr, str):
expression = metric_expr
# Fallback: top-level expression field (OpenMetadata format varies)
if not expression:
expression = raw_metric.get("expression", "") or ""
metric_type = raw_metric.get("metricType", "") or ""
unit = raw_metric.get("unitOfMeasurement", "") or ""
grain = raw_metric.get("granularity", "") or ""
category = "general"
dimensions = []
for tag in raw_metric.get("tags", []):
tag_fqn = tag.get("tagFQN", "")
if tag_fqn.startswith("MetricCategory."):
category = tag_fqn.split(".", 1)[1]
elif tag_fqn.startswith("Dimension."):
dimensions.append(tag_fqn.split(".", 1)[1])
return {
"name": name,
"display_name": display_name,
"category": category,
"category_color": category_colors.get(category, "#6B7280"),
"metadata": {"type": metric_type, "unit": unit, "grain": grain, "time_column": ""},
"overview": {"description": description.strip(), "description_html": description.strip(), "key_insights": []},
"validation": None,
"dimensions": dimensions,
"notes": {"all": [], "key_insights": []},
"sql_examples": {"expression": {"title": "Metric Expression", "query": expression, "complexity": "simple"}} if expression else {},
"technical": {"table": "", "expression": expression, "synonyms": [], "data_sources": []},
"special_sections": {},
}
def _send_welcome_message(username: str) -> None:
"""Send a welcome message to the user via bot socket after linking."""
try:
import httpx
transport = httpx.HTTPTransport(uds=NOTIFY_SOCKET_PATH)
with httpx.Client(transport=transport, timeout=10) as client:
client.post(
"http://localhost/send",
json={
"user": username,
"text": (
f"Account linked!\n\n"
f"Your server login: *{username}*\n"
f"Notifications dir: `~/user/notifications/`\n\n"
f"To create notification scripts, ask your local AI assistant "
f"(Claude Code). It knows how to build them for you.\n\n"
f"You will receive alerts from your scripts here."
),
"parse_mode": "Markdown",
},
)
except Exception as e:
logger.warning(f"Failed to send welcome message to {username}: {e}")
def register_routes(app: Flask) -> None:
"""Register main application routes."""
@app.route("/")
def index():
"""Redirect to dashboard or login."""
if "user" in session:
return redirect(url_for("dashboard"))
return redirect(url_for("auth.login"))
@app.route("/dashboard")
@login_required
def dashboard():
"""Show user dashboard with account info or registration form."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
# Check if user exists on the system
user_info = check_user_exists(username)
# Check if username is available (for new registrations)
username_available, username_error = is_username_available(username)
# Generate setup instructions from bootstrap.yaml
setup_instructions = ""
try:
setup_instructions = _generate_setup_instructions(username)
except Exception as e:
logger.warning(f"Could not generate setup instructions: {e}")
# Get Telegram link status
telegram_status = get_telegram_status(username)
# Get desktop app link status
from .desktop_auth import get_desktop_status
desktop_status = get_desktop_status(username)
# Load data stats
data_stats = _load_data_stats()
catalog_data = _load_catalog_data()
# Load sync settings (for existing users)
sync_settings = get_sync_settings(username) if user_info.exists else None
# Add subscription status to catalog tables
if user_info.exists:
subs = get_table_subscriptions(username)
table_mode = subs.get("table_mode", "all")
table_subs = subs.get("tables", {})
for cat in catalog_data:
for table in cat.get("tables", []):
if table_mode == "all":
table["subscribed"] = True
else:
table["subscribed"] = table_subs.get(table["name"], False)
# Gather account widget details (notification scripts, cron, last sync)
account_details = get_account_details(username) if user_info.exists else None
# Activity Center summary for dashboard widget (empty fallback)
activity_summary = {}
# Load business metrics for dashboard widget
metrics_data = _load_metrics_data()
return render_template(
"dashboard.html",
user=user,
username=username,
user_info=user_info,
username_available=username_available,
username_error=username_error,
server_host=Config.SERVER_HOST,
server_hostname=Config.SERVER_HOSTNAME,
ssh_alias=Config.SSH_ALIAS,
ssh_key=Config.SSH_KEY,
project_dir=Config.PROJECT_DIR,
setup_instructions=setup_instructions,
telegram_status=telegram_status,
desktop_status=desktop_status,
data_stats=data_stats,
catalog_data=catalog_data,
sync_settings=sync_settings,
account_details=account_details,
activity_summary=activity_summary,
metrics_data=metrics_data,
)
@app.route("/catalog")
@login_required
def catalog():
"""Data catalog page."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
data_stats = _load_data_stats()
catalog_data = _load_catalog_data()
sync_settings = get_sync_settings(username)
# Add subscription status to catalog tables
subs = get_table_subscriptions(username)
table_mode = subs.get("table_mode", "all")
table_subs = subs.get("tables", {})
for cat in catalog_data:
for table in cat.get("tables", []):
if table_mode == "all":
table["subscribed"] = True
else:
table["subscribed"] = table_subs.get(table["name"], False)
metrics_data = _load_metrics_data()
return render_template(
"catalog.html",
data_stats=data_stats,
catalog_data=catalog_data,
sync_settings=sync_settings,
metrics_data=metrics_data,
git_version=get_git_commit_hash(),
)
@app.route("/api/catalog/profile/<table_name>")
@login_required
def catalog_profile(table_name):
"""Return profiler data for a single table with OpenMetadata catalog enrichment."""
profiles_path = _resolve_metadata_path("profiles.json")
try:
if not profiles_path.exists():
return jsonify({"error": "Profiler data not available yet"}), 404
with open(profiles_path) as f:
profiles = json.load(f)
table_profile = profiles.get("tables", {}).get(table_name)
if not table_profile:
return jsonify({"error": f"No profile for table '{table_name}'"}), 404
# Enrich with OpenMetadata catalog data if available
if _catalog_enricher and _catalog_enricher.enabled:
try:
# Find table config from data_description.md
from types import SimpleNamespace
from config.loader import load_instance_config
# Load data_description.md to find table config by name
instance_config = load_instance_config()
desc_path = Path(os.path.dirname(__file__)) / ".." / "docs" / "data_description.md"
if desc_path.exists():
with open(desc_path) as f:
content = f.read()
import re
yaml_match = re.search(r'```yaml\s*\n(.*?)```', content, re.DOTALL)
if yaml_match:
import yaml
yaml_data = yaml.safe_load(yaml_match.group(1))
if yaml_data and "tables" in yaml_data:
# Find table by name
for table_def in yaml_data["tables"]:
if table_def.get("name") == table_name:
# Lightweight config (enricher uses .id, .name, .catalog_fqn)
table_config = SimpleNamespace(
id=table_def.get("id", ""),
name=table_def.get("name", ""),
catalog_fqn=table_def.get("catalog_fqn"),
)
catalog_data = _catalog_enricher.enrich_table(table_config)
if catalog_data:
# Add catalog enrichment to profile
table_profile["catalog"] = {
"description": catalog_data.description,
"tags": catalog_data.tags,
"tier": catalog_data.tier,
"owners": catalog_data.owners,
"url": catalog_data.catalog_url,
}
# Override description with catalog version
if catalog_data.description:
table_profile["description"] = catalog_data.description
break
except Exception as e:
logger.warning(f"Error enriching profile for {table_name}: {e}")
return jsonify(table_profile)
except Exception as e:
logger.error(f"Error loading profile for {table_name}: {e}")
return jsonify({"error": "Failed to load profile data"}), 500
@app.route("/api/metrics/<path:metric_path>")
@login_required
def api_metric(metric_path):
"""API endpoint to serve metric definition as structured JSON."""
import re
# Validate path to prevent directory traversal (allow category/file.yml pattern)
if not re.match(r"^[a-z_]+/[a-z_]+\.yml$", metric_path):
return jsonify({"error": "Invalid metric path"}), 400
# Try production path first, fall back to local dev path
docs_dir = Path("/data/docs/metrics")
if not docs_dir.exists():
# Local development: use docs/metrics relative to project root
docs_dir = Path(__file__).parent.parent / "docs" / "metrics"
file_path = docs_dir / metric_path
# Security check: ensure path stays within docs_dir
try:
if not file_path.is_file() or not file_path.resolve().is_relative_to(
docs_dir.resolve()
):
return jsonify({"error": "Metric file not found"}), 404
except (ValueError, OSError):
return jsonify({"error": "Invalid path"}), 400
# Parse metric YAML and return structured JSON
try:
from webapp.utils.metric_parser import MetricParser
parser = MetricParser(docs_dir)
metric_data = parser.parse_metric(metric_path)
return jsonify(metric_data)
except Exception as e:
logger.error(f"Error parsing metric {metric_path}: {e}")
return jsonify({"error": f"Failed to parse metric: {str(e)}"}), 500
@app.route("/api/catalog/metrics/<path:metric_fqn>")
@login_required
def api_catalog_metric(metric_fqn):
"""
API endpoint to serve metric from OpenMetadata catalog as structured JSON.
Args:
metric_fqn: Fully qualified name (e.g., "Active2%20Customers" URL-encoded)
Returns:
JSON matching MetricParser format for modal rendering
"""
global _catalog_enricher
if not _catalog_enricher or not _catalog_enricher.enabled:
return jsonify({"error": "Catalog not available"}), 503
try:
# URL-decode FQN (Flask path parameter already decoded, but just in case)
from urllib.parse import unquote
fqn = unquote(metric_fqn)
# Fetch metric from catalog
raw = _catalog_enricher._client.get_metric_by_fqn(fqn)
# Convert to MetricParser format
metric_data = _build_om_metric_detail(raw)
return jsonify(metric_data)
except Exception as e:
logger.error(f"Error fetching catalog metric {metric_fqn}: {e}")
return jsonify({"error": f"Failed to fetch metric: {str(e)}"}), 500
@app.route("/docs/metrics/<path:metric_path>")
@login_required
def serve_metric(metric_path):
"""Serve metric definition YAML files (legacy endpoint for backward compatibility)."""
import re
# Validate path to prevent directory traversal (allow category/file.yml pattern)
if not re.match(r"^[a-z_]+/[a-z_]+\.yml$", metric_path):
return render_template("error.html", error="Invalid metric path", code=400), 400
docs_dir = Path("/data/docs/metrics")
file_path = docs_dir / metric_path
# Security check: ensure path stays within docs_dir
try:
if not file_path.is_file() or not file_path.resolve().is_relative_to(
docs_dir.resolve()
):
return (
render_template("error.html", error="Metric file not found", code=404),
404,
)
except (ValueError, OSError):
return render_template("error.html", error="Invalid path", code=400), 400
from flask import send_file as flask_send_file
return flask_send_file(file_path, mimetype="text/plain")
@app.route("/register", methods=["POST"])
@login_required
def register():
"""Create a new analyst account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
# Check if user already exists
user_info = check_user_exists(username)
if user_info.exists:
flash("Your account already exists.", "info")
return redirect(url_for("dashboard"))
# Get and validate SSH key
# Normalize whitespace: collapse newlines/tabs/multiple spaces to single spaces
# Users often paste keys with line breaks from terminal wrapping
ssh_key = " ".join(request.form.get("ssh_key", "").split())
is_valid, error = validate_ssh_key(ssh_key)
if not is_valid:
flash(error, "error")
return redirect(url_for("dashboard"))
# Create the user
success, message = create_user(username, ssh_key)
if success:
flash(message, "success")
logger.info(f"Account created for {email} (username: {username})")
else:
flash(message, "error")
logger.error(f"Failed to create account for {email}: {message}")
return redirect(url_for("dashboard"))
@app.route("/api/telegram/verify", methods=["POST"])
@login_required
def telegram_verify():
"""Verify a Telegram verification code and link the account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
data = request.get_json(silent=True) or {}
code = data.get("code", "").strip()
if not code:
return jsonify({"error": "Verification code is required"}), 400
success, message = link_telegram(username, code)
if success:
logger.info(f"Telegram linked for {username}")
# Send welcome message via bot socket
_send_welcome_message(username)
return jsonify({"ok": True, "message": message})
return jsonify({"error": message}), 400
@app.route("/api/telegram/unlink", methods=["POST"])
@login_required
def telegram_unlink():
"""Unlink Telegram from the account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
success, message = unlink_telegram(username)
if success:
logger.info(f"Telegram unlinked for {username}")
return jsonify({"ok": True, "message": message})
return jsonify({"error": message}), 400
@app.route("/api/telegram/status")
@login_required
def telegram_status():
"""Get Telegram link status."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
status = get_telegram_status(username)
return jsonify(status)
@app.route("/download/<filename>")
@login_required
def download(filename):
"""Serve downloadable files (e.g., desktop app)."""
import re
if not re.match(r"^[a-zA-Z0-9_\-]+\.(zip|dmg)$", filename):
return render_template("error.html", error="Invalid filename", code=400), 400
download_dir = Path("/data/downloads")
file_path = download_dir / filename
if not file_path.is_file():
return render_template("error.html", error="File not found", code=404), 404
from flask import send_file as flask_send_file
return flask_send_file(file_path, as_attachment=True)
@app.route("/api/desktop/scripts")
def desktop_scripts():
"""List notification scripts for the authenticated desktop user."""
username = require_desktop_auth()
from services.telegram_bot.status import get_script_list_structured
scripts = get_script_list_structured(username)
return jsonify(scripts)
@app.route("/api/desktop/scripts/run", methods=["POST"])
def desktop_run_script():
"""Run a notification script on-demand for the authenticated desktop user."""
username = require_desktop_auth()
data = request.get_json(silent=True) or {}
script_name = data.get("name", "").strip()
if not script_name:
return jsonify({"error": "Missing 'name' field"}), 400
from services.telegram_bot.runner import run_user_script
from services.telegram_bot.dispatch import dispatch_to_ws_gateway
output = run_user_script(username, script_name)
if output is None:
return jsonify({"error": f"Script '{script_name}' failed or not found"}), 500
if output.get("notify", False):
dispatch_to_ws_gateway(username, output, script_name)
return jsonify({"ok": True})
@app.route("/api/sync-settings")
@login_required
def sync_settings_get():
"""Get sync settings for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
settings = get_sync_settings(username)
return jsonify(settings)
@app.route("/api/sync-settings", methods=["POST"])
@login_required
def sync_settings_update():
"""Update sync settings for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
data = request.get_json(silent=True) or {}
datasets = data.get("datasets", {})
if not datasets:
return jsonify({"error": "Missing datasets field"}), 400
success, message = update_sync_settings(username, datasets)
if success:
logger.info(f"Sync settings updated for {username}")
return jsonify({"ok": True, "message": message})
return jsonify({"error": message}), 400
@app.route("/api/table-subscriptions")
@login_required
def table_subscriptions_get():
"""Get per-table subscriptions for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
subs = get_table_subscriptions(username)
return jsonify(subs)
@app.route("/api/table-subscriptions", methods=["POST"])
@login_required
def table_subscriptions_update():
"""Update per-table subscriptions for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
data = request.get_json(silent=True) or {}
table_mode = data.get("table_mode", "all")
tables = data.get("tables", {})
if table_mode not in ("all", "explicit"):
return jsonify({"error": "table_mode must be 'all' or 'explicit'"}), 400
success, message = update_table_subscriptions(username, table_mode, tables)
if success:
logger.info(f"Table subscriptions updated for {username}")
return jsonify({"ok": True, "message": message})
return jsonify({"error": message}), 400
# ─────────────────────────────────────────────────────────────────
# Corporate Memory routes
# ─────────────────────────────────────────────────────────────────
@app.route("/corporate-memory")
@login_required
def corporate_memory():
"""Corporate Memory knowledge browser page."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
# Get stats for header
stats = get_memory_stats()
user_stats = get_memory_user_stats(username)
# Get user's votes for highlighting
user_votes = get_user_votes(username)
# Get initial page of knowledge
knowledge = get_knowledge(page=0, per_page=20)
# Governance context for admin features
_is_admin = is_km_admin(email) if email else False
governance = {
"mode": get_governance_mode(),
"is_km_admin": _is_admin,
"pending_count": get_memory_stats().get("pending_count", 0) if _is_admin else 0,
}
return render_template(
"corporate_memory.html",
stats=stats,
user_stats=user_stats,
user_votes=user_votes,
knowledge=knowledge,
governance=governance,
)
@app.route("/corporate-memory/admin")
@login_required
@km_admin_required
def corporate_memory_admin():
"""Corporate Memory admin review queue page."""
user = session.get("user", {})
email = user.get("email", "")
stats = get_memory_stats()
groups = get_memory_groups()
# Build groups list for audience dropdown (name + member count)
groups_list = [
{"name": name, "members_count": len(g.get("members", []))}
for name, g in groups.items()
if isinstance(g, dict)
]
return render_template(
"corporate_memory_admin.html",
stats=stats,
groups=groups_list,
governance_mode=get_governance_mode(),
)
# ─────────────────────────────────────────────────────────────────
# Activity Center routes
# ─────────────────────────────────────────────────────────────────
@app.route("/activity-center")
@login_required
def activity_center():
"""Activity Center page - enterprise data intelligence overview."""
activity = _build_activity_data()
return render_template("activity_center.html", activity=activity)
@app.route("/api/corporate-memory/knowledge")
@login_required
def api_corporate_memory_knowledge():
"""Get knowledge items with optional filtering."""
category = request.args.get("category")
search = request.args.get("search")
page = request.args.get("page", 0, type=int)
per_page = request.args.get("per_page", 20, type=int)
sort = request.args.get("sort", "score")
my_rules = request.args.get("my_rules", "").lower() == "true"
# Get username for my_rules filter
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
# Limit per_page to reasonable maximum
per_page = min(per_page, 100)
# Admin status filter (only km_admins can filter by status)
status = request.args.get("status")
include_statuses = None
if is_km_admin(email):
if status:
include_statuses = {status}
elif request.args.get("all_statuses", "").lower() == "true":
# Admin requesting all statuses (for admin "All Items" view)
include_statuses = set(VALID_STATUSES)
result = get_knowledge(
category=category,
search=search,
page=page,
per_page=per_page,
sort=sort,
username=username,
my_rules=my_rules,
include_statuses=include_statuses,
)
return jsonify(result)
@app.route("/api/corporate-memory/stats")
@login_required
def api_corporate_memory_stats():
"""Get corporate memory statistics for dashboard."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
stats = get_memory_stats()
user_stats = get_memory_user_stats(username)
return jsonify({
**stats,
**user_stats,
})
@app.route("/api/corporate-memory/vote", methods=["POST"])
@login_required
def api_corporate_memory_vote():
"""Vote on a knowledge item."""
mode = get_governance_mode()
if mode == "mandatory_only":
return jsonify({"ok": False, "error": "Voting is disabled in mandatory-only mode"}), 400
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
data = request.get_json(silent=True) or {}
item_id = data.get("item_id")
vote_value = data.get("vote", 0)
if not item_id:
return jsonify({"error": "Missing item_id"}), 400
try:
vote_value = int(vote_value)
except (TypeError, ValueError):
return jsonify({"error": "Invalid vote value"}), 400
success, message = memory_vote(username, item_id, vote_value)
if success:
return jsonify({"ok": True, "message": message})
return jsonify({"error": message}), 400
@app.route("/api/corporate-memory/my-votes")
@login_required
def api_corporate_memory_my_votes():
"""Get current user's votes."""
user = session.get("user", {})
email = user.get("email", "")
username = get_webapp_username(email)
votes = get_user_votes(username)
return jsonify({"votes": votes})
# ─────────────────────────────────────────────────────────────────
# Corporate Memory Admin API
# ─────────────────────────────────────────────────────────────────
@app.route("/api/corporate-memory/admin/approve", methods=["POST"])
@login_required
@km_admin_required
def corporate_memory_admin_approve():
"""Approve a pending knowledge item."""
data = request.get_json(silent=True) or {}
if "item_id" not in data:
return jsonify({"ok": False, "error": "item_id is required"}), 400
email = session.get("user", {}).get("email", "")
try:
success, message = approve_item(email, data["item_id"])
if not success:
return jsonify({"ok": False, "error": message}), 400
return jsonify({"ok": True, "message": message})
except Exception as e:
logger.exception("Error approving item")
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/corporate-memory/admin/reject", methods=["POST"])
@login_required
@km_admin_required
def corporate_memory_admin_reject():
"""Reject a knowledge item."""
data = request.get_json(silent=True) or {}
if "item_id" not in data:
return jsonify({"ok": False, "error": "item_id is required"}), 400
email = session.get("user", {}).get("email", "")
try:
success, message = reject_item(
email, data["item_id"], reason=data.get("reason"),
)
if not success:
return jsonify({"ok": False, "error": message}), 400
return jsonify({"ok": True, "message": message})
except Exception as e:
logger.exception("Error rejecting item")
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/corporate-memory/admin/mandate", methods=["POST"])
@login_required
@km_admin_required
def corporate_memory_admin_mandate():
"""Mark a knowledge item as mandatory."""
data = request.get_json(silent=True) or {}
if "item_id" not in data:
return jsonify({"ok": False, "error": "item_id is required"}), 400
mandatory_reason = data.get("mandatory_reason", "")
if not mandatory_reason or not mandatory_reason.strip():
return jsonify({"ok": False, "error": "mandatory_reason is required"}), 400
email = session.get("user", {}).get("email", "")
try:
success, message = mandate_item(
email,
data["item_id"],
mandatory_reason=mandatory_reason,
audience=data.get("audience", "all"),
)
if not success:
return jsonify({"ok": False, "error": message}), 400
return jsonify({"ok": True, "message": message})
except Exception as e:
logger.exception("Error mandating item")
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/corporate-memory/admin/revoke", methods=["POST"])
@login_required
@km_admin_required
def corporate_memory_admin_revoke():
"""Revoke a mandatory knowledge item."""
data = request.get_json(silent=True) or {}
if "item_id" not in data:
return jsonify({"ok": False, "error": "item_id is required"}), 400
email = session.get("user", {}).get("email", "")
try:
success, message = revoke_item(
email, data["item_id"], reason=data.get("reason"),
)
if not success:
return jsonify({"ok": False, "error": message}), 400
return jsonify({"ok": True, "message": message})
except Exception as e:
logger.exception("Error revoking item")
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/corporate-memory/admin/edit", methods=["POST"])
@login_required
@km_admin_required
def corporate_memory_admin_edit():
"""Edit a knowledge item's title and/or content."""
data = request.get_json(silent=True) or {}
if "item_id" not in data:
return jsonify({"ok": False, "error": "item_id is required"}), 400
title = data.get("title")
content = data.get("content")
if title is None and content is None:
return jsonify({"ok": False, "error": "At least one of title or content must be provided"}), 400
email = session.get("user", {}).get("email", "")
try:
success, message = edit_item(
email, data["item_id"], title=title, content=content,
)
if not success:
return jsonify({"ok": False, "error": message}), 400
return jsonify({"ok": True, "message": message})
except Exception as e:
logger.exception("Error editing item")
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/corporate-memory/admin/batch", methods=["POST"])
@login_required
@km_admin_required
def corporate_memory_admin_batch():
"""Perform a governance action on multiple items."""
data = request.get_json(silent=True) or {}
item_ids = data.get("item_ids")
action = data.get("action")
if not item_ids or not isinstance(item_ids, list):
return jsonify({"ok": False, "error": "item_ids must be a non-empty list"}), 400
if not action:
return jsonify({"ok": False, "error": "action is required"}), 400
email = session.get("user", {}).get("email", "")
try:
result = batch_action(
email,
item_ids,
action,
mandatory_reason=data.get("mandatory_reason"),
audience=data.get("audience"),
reason=data.get("reason"),
)
return jsonify({"ok": True, **result})
except Exception as e:
logger.exception("Error in batch action")
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/corporate-memory/admin/pending")
@login_required
@km_admin_required
def corporate_memory_admin_pending():
"""Get pending knowledge items awaiting review."""
category = request.args.get("category")
page = request.args.get("page", 0, type=int)
per_page = request.args.get("per_page", 20, type=int)
per_page = min(per_page, 100)
try:
result = get_pending_queue(
category=category, page=page, per_page=per_page,
)
return jsonify(result)
except Exception as e:
logger.exception("Error fetching pending queue")
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/corporate-memory/admin/audit")
@login_required
@km_admin_required
def corporate_memory_admin_audit():
"""Get the governance audit log."""
page = request.args.get("page", 0, type=int)
per_page = request.args.get("per_page", 50, type=int)
admin_filter = request.args.get("admin")
action_filter = request.args.get("action")
per_page = min(per_page, 100)
try:
result = get_audit_log(
page=page,
per_page=per_page,
admin=admin_filter,
action=action_filter,
)
return jsonify(result)
except Exception as e:
logger.exception("Error fetching audit log")
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/corporate-memory/admin/migrate", methods=["POST"])
@login_required
@km_admin_required
def corporate_memory_admin_migrate():
"""Migrate existing items without status to approved."""
email = session.get("user", {}).get("email", "")
try:
count = migrate_existing_items()
logger.info(f"Migration triggered by {email}: {count} items migrated")
return jsonify({"ok": True, "migrated": count})
except Exception as e:
logger.exception("Error migrating items")
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/corporate-memory/admin/config")
@login_required
@km_admin_required
def corporate_memory_admin_config():
"""Get current governance configuration."""
try:
groups = get_memory_groups()
groups_list = [
{"name": name, "members_count": len(g.get("members", []))}
for name, g in groups.items()
if isinstance(g, dict)
]
return jsonify({
"ok": True,
"governance_mode": get_governance_mode(),
"groups": groups_list,
})
except Exception as e:
logger.exception("Error fetching governance config")
return jsonify({"ok": False, "error": str(e)}), 500
# ─────────────────────────────────────────────────────────────────
# Admin pages
# ─────────────────────────────────────────────────────────────────
@app.route("/admin/tables")
@login_required
@admin_required
def admin_tables():
"""Admin table management page."""
return render_template("admin_tables.html")
# ─────────────────────────────────────────────────────────────────
# Admin API routes
# ─────────────────────────────────────────────────────────────────
@app.route("/api/admin/discover-tables")
@login_required
@admin_required
def admin_discover_tables():
"""Discover all available tables from the data source."""
try:
from app.instance_config import get_data_source_type, get_value
source_type = get_data_source_type()
raw_tables = []
if source_type == "keboola":
from connectors.keboola.client import KeboolaClient
url = get_value("keboola", "url", default="")
token = os.environ.get(get_value("keboola", "token_env", default="KEBOOLA_STORAGE_TOKEN"), "")
client = KeboolaClient(token=token, url=url)
raw_tables = client.discover_all_tables()
# Check which tables are already registered
registered_ids = set()
try:
from src.db import get_system_db
from src.repositories.table_registry import TableRegistryRepository
conn = get_system_db()
repo = TableRegistryRepository(conn)
registered_ids = {t["id"] for t in repo.list_all()}
except Exception:
pass
# Group by bucket
buckets: dict = {}
for t in raw_tables:
bid = t.get("bucket_id", "other")
if bid not in buckets:
buckets[bid] = {
"bucket_id": bid,
"bucket_name": t.get("bucket_name", bid),
"tables": [],
}
t["is_registered"] = t["id"] in registered_ids
buckets[bid]["tables"].append(t)
return jsonify({
"ok": True,
"total": len(raw_tables),
"buckets": list(buckets.values()),
})
except Exception as e:
logger.error(f"Discovery failed: {e}")
return jsonify({"error": str(e)}), 500
@app.route("/api/admin/registry")
@login_required
@admin_required
def admin_registry_list():
"""Return the full table registry."""
try:
from src.db import get_system_db
from src.repositories.table_registry import TableRegistryRepository
conn = get_system_db()
repo = TableRegistryRepository(conn)
return jsonify({
"ok": True,
"version": 0,
"folder_mapping": {},
"tables": repo.list_all(),
})
except Exception as e:
logger.error(f"Registry list failed: {e}")
return jsonify({"error": str(e)}), 500
@app.route("/api/admin/register-table", methods=["POST"])
@login_required
@admin_required
def admin_register_table():
"""Register a new table from discovery results."""
from src.db import get_system_db
from src.repositories.table_registry import TableRegistryRepository
user = session.get("user", {})
email = user.get("email", "")
data = request.get_json(silent=True) or {}
if not data.get("id"):
return jsonify({"error": "Missing table 'id'"}), 400
try:
conn = get_system_db()
repo = TableRegistryRepository(conn)
repo.register(
id=data["id"],
name=data.get("name", ""),
folder=data.get("folder"),
sync_strategy=data.get("sync_strategy"),
primary_key=data.get("primary_key"),
description=data.get("description"),
registered_by=email,
source_type=data.get("source_type"),
bucket=data.get("bucket"),
source_table=data.get("source_table"),
query_mode=data.get("query_mode", "local"),
sync_schedule=data.get("sync_schedule"),
profile_after_sync=data.get("profile_after_sync", True),
)
return jsonify({"ok": True})
except ValueError as e:
return jsonify({"error": str(e)}), 400
except Exception as e:
logger.error(f"Register table failed: {e}")
return jsonify({"error": str(e)}), 500
@app.route("/api/admin/registry/<path:table_id>", methods=["PUT"])
@login_required
@admin_required
def admin_update_table(table_id):
"""Update configuration of a registered table."""
from src.db import get_system_db
from src.repositories.table_registry import TableRegistryRepository
user = session.get("user", {})
email = user.get("email", "")
data = request.get_json(silent=True) or {}
data.pop("version", None) # Not used by DuckDB repo
try:
conn = get_system_db()
repo = TableRegistryRepository(conn)
# Get existing record and merge updates
existing = repo.get(table_id)
if not existing:
return jsonify({"error": f"Table '{table_id}' not found"}), 404
repo.register(
id=table_id,
name=data.get("name", existing.get("name", "")),
folder=data.get("folder", existing.get("folder")),
sync_strategy=data.get("sync_strategy", existing.get("sync_strategy")),
primary_key=data.get("primary_key", existing.get("primary_key")),
description=data.get("description", existing.get("description")),
registered_by=email,
source_type=data.get("source_type", existing.get("source_type")),
bucket=data.get("bucket", existing.get("bucket")),
source_table=data.get("source_table", existing.get("source_table")),
query_mode=data.get("query_mode", existing.get("query_mode", "local")),
sync_schedule=data.get("sync_schedule", existing.get("sync_schedule")),
profile_after_sync=data.get("profile_after_sync", existing.get("profile_after_sync", True)),
)
return jsonify({"ok": True})
except ValueError as e:
return jsonify({"error": str(e)}), 400
except Exception as e:
logger.error(f"Update table failed: {e}")
return jsonify({"error": str(e)}), 500
@app.route("/api/admin/registry/<path:table_id>", methods=["DELETE"])
@login_required
@admin_required
def admin_unregister_table(table_id):
"""Unregister a table and clean up subscriptions."""
from src.db import get_system_db
from src.repositories.table_registry import TableRegistryRepository
try:
conn = get_system_db()
repo = TableRegistryRepository(conn)
# Get table name before deletion (for subscription cleanup)
table_info = repo.get(table_id)
table_name = table_info["name"] if table_info else None
repo.unregister(table_id)
# Clean up per-user subscriptions for removed table
if table_name:
try:
_cleanup_table_subscriptions(table_name)
except Exception as ce:
logger.warning(f"Subscription cleanup for {table_name} failed: {ce}")
return jsonify({"ok": True})
except ValueError as e:
return jsonify({"error": str(e)}), 400
except Exception as e:
logger.error(f"Unregister table failed: {e}")
return jsonify({"error": str(e)}), 500
def _cleanup_table_subscriptions(table_name: str) -> None:
"""Remove a table from all users' per-table subscriptions."""
from webapp.sync_settings_service import _read_json, _write_json, SYNC_SETTINGS_FILE
all_settings = _read_json(SYNC_SETTINGS_FILE)
changed = False
for username, user_data in all_settings.items():
tables = user_data.get("tables", {})
if table_name in tables:
del tables[table_name]
changed = True
if changed:
_write_json(SYNC_SETTINGS_FILE, all_settings)
logger.info(f"Cleaned up subscriptions for removed table: {table_name}")
@app.route("/health")
def health():
"""
Health check endpoint for monitoring.
Returns detailed status of services, disk, load, and recent activity.
Returns 200 if healthy, 503 if degraded.
"""
from webapp.health_service import health_check
response, status_code = health_check()
return response, status_code
@app.errorhandler(404)
def not_found(e):
"""Handle 404 errors."""
return render_template("error.html", error="Page not found", code=404), 404
@app.errorhandler(500)
def server_error(e):
"""Handle 500 errors."""
logger.exception("Server error")
return render_template("error.html", error="Internal server error", code=500), 500
# Create the app instance for Gunicorn
app = create_app()
if __name__ == "__main__":
# Development server
app.run(debug=True, host="127.0.0.1", port=5000)