agnes-the-ai-analyst/webapp/app.py
Petr 86edd27655 Extract Jira into connectors/jira module
Move all Jira-specific code into a self-contained connector module:
- 22 files moved via git mv (transform, service, webhook, scripts,
  systemd units, tests, docs, bin helper)
- All imports updated to use connectors.jira.* paths
- Jira is now conditional: auto-detected via JIRA_DOMAIN env var
- Webapp registers Jira blueprint only when available
- Health service monitors Jira timers only when enabled
- Profiler loads Jira tables dynamically from filesystem
- Sync settings uses config-driven dependency validation
- Renamed keboola_platform_url -> custom_url in transform
- Updated deploy.sh, sudoers-deploy, backfill_gap.sh paths
- Fixed pytest.ini to skip live tests by default
2026-03-09 11:17:50 +01:00

848 lines
30 KiB
Python

"""
Flask application for Google SSO user management.
Allows users to:
1. Log in with Google (allowed domain only)
2. View their account status if they exist
3. Create a new analyst account with their SSH key
"""
import json
import logging
import os
from datetime import datetime
from pathlib import Path
from flask import Flask, flash, jsonify, redirect, render_template, request, session, url_for
from .auth import auth_bp, init_oauth, login_required
from .config import Config
from .desktop_auth import desktop_bp, require_desktop_auth
from .notification_images import images_bp
from .account_service import get_account_details
from .sync_settings_service import get_sync_settings, update_sync_settings
# Jira connector is optional - only loaded if configured
try:
from connectors.jira.webhook import jira_bp
JIRA_AVAILABLE = True
except ImportError:
JIRA_AVAILABLE = False
jira_bp = None
# Password auth is optional - requires SENDGRID_API_KEY
try:
from .password_auth import password_auth_bp
PASSWORD_AUTH_AVAILABLE = True
except ImportError:
PASSWORD_AUTH_AVAILABLE = False
password_auth_bp = None
from .telegram_service import get_telegram_status, link_telegram, unlink_telegram
from .corporate_memory_service import (
get_knowledge,
get_stats as get_memory_stats,
get_user_stats as get_memory_user_stats,
get_user_votes,
vote as memory_vote,
)
from .user_service import (
UserInfo,
check_user_exists,
create_user,
get_username_from_email,
is_username_available,
validate_ssh_key,
)
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
def create_app() -> Flask:
"""Create and configure the Flask application."""
app = Flask(__name__)
app.config.from_object(Config)
# Validate configuration
errors = Config.validate()
if errors and not app.debug:
for error in errors:
logger.warning(f"Configuration warning: {error}")
# Initialize OAuth
init_oauth(app)
# Register blueprints
app.register_blueprint(auth_bp)
app.register_blueprint(desktop_bp)
app.register_blueprint(images_bp)
if JIRA_AVAILABLE and jira_bp:
app.register_blueprint(jira_bp)
if PASSWORD_AUTH_AVAILABLE and password_auth_bp:
app.register_blueprint(password_auth_bp)
# Register main routes
register_routes(app)
# Add template context processor for current year and config
@app.context_processor
def inject_now():
return {"now": datetime.now}
@app.context_processor
def inject_config():
return {"config": Config}
# Add cache busting for static files
@app.context_processor
def inject_static_cache_buster():
def static_url(filename: str) -> str:
"""Generate static URL with cache-busting query parameter."""
static_path = Path(app.static_folder) / filename
if static_path.exists():
mtime = int(static_path.stat().st_mtime)
return url_for("static", filename=filename, v=mtime)
return url_for("static", filename=filename)
return {"static_url": static_url}
return app
NOTIFY_SOCKET_PATH = "/data/notifications/bot.sock"
# Path to sync state (written by data sync process)
SYNC_STATE_PATH = Path("/data/src_data/metadata/sync_state.json")
# Local development: fall back to dev_data/metadata/ relative to project root
_DEV_METADATA_PATH = Path(__file__).parent.parent / "dev_data" / "metadata"
def _resolve_metadata_path(filename: str) -> Path:
"""Resolve metadata file path with dev fallback."""
prod_path = SYNC_STATE_PATH.parent / filename
if prod_path.exists():
return prod_path
dev_path = _DEV_METADATA_PATH / filename
return dev_path
# Fallback stats (used when sync_state.json is unavailable)
FALLBACK_DATA_STATS = {
"tables": 0,
"columns": 0,
"rows": 0,
"rows_display": "-",
"size_mb": 0,
"size_display": "0 MB",
"uncompressed_mb": 0,
"unstructured_gb": 0,
"unstructured_display": "",
"last_updated": None,
"highlights": {},
}
def _load_data_stats() -> dict:
"""Load aggregate data stats from sync_state.json, with hardcoded fallback."""
try:
sync_path = _resolve_metadata_path("sync_state.json")
if sync_path.exists():
with open(sync_path) as f:
state = json.load(f)
tables_data = state.get("tables", {})
if not tables_data:
return dict(FALLBACK_DATA_STATS)
total_tables = len(tables_data)
total_columns = sum(t.get("columns", 0) for t in tables_data.values())
total_rows = sum(t.get("rows", 0) for t in tables_data.values())
total_size_mb = sum(t.get("file_size_mb", 0) for t in tables_data.values())
total_uncompressed_mb = sum(t.get("uncompressed_mb", 0) for t in tables_data.values())
# Format rows for display
if total_rows >= 1_000_000:
rows_display = f"{total_rows / 1_000_000:.0f}M+"
elif total_rows >= 1_000:
rows_display = f"{total_rows / 1_000:.0f}K+"
else:
rows_display = str(total_rows)
# Parse last_updated timestamp
last_updated = state.get("last_updated")
last_updated_display = None
if last_updated:
try:
dt = datetime.fromisoformat(last_updated)
last_updated_display = dt.strftime("%Y-%m-%d %H:%M") + " UTC"
except (ValueError, TypeError):
last_updated_display = last_updated[:16] if last_updated else None
# Format size for display
size_mb = round(total_size_mb)
if size_mb >= 1000:
size_display = f"{size_mb / 1000:.1f} GB"
else:
size_display = f"{size_mb} MB"
return {
"tables": total_tables,
"columns": total_columns if total_columns > 0 else FALLBACK_DATA_STATS["columns"],
"rows": total_rows,
"rows_display": rows_display,
"size_mb": size_mb,
"size_display": size_display,
"uncompressed_mb": round(total_uncompressed_mb),
"unstructured_gb": FALLBACK_DATA_STATS["unstructured_gb"],
"unstructured_display": FALLBACK_DATA_STATS["unstructured_display"],
"last_updated": last_updated_display,
"highlights": FALLBACK_DATA_STATS["highlights"],
}
except Exception as e:
logger.warning(f"Could not load data stats from sync_state.json: {e}")
return dict(FALLBACK_DATA_STATS)
def _load_catalog_data() -> list:
"""Load catalog data by merging data_description.md (YAML) with sync_state.json.
Returns list of category dicts: [{name, icon_type, tables: [{name, description, rows, rows_display, period}]}]
"""
import re
import yaml
catalog = []
try:
# Parse data_description.md YAML block
desc_path = Path(os.path.dirname(__file__)) / ".." / "docs" / "data_description.md"
if not desc_path.exists():
return catalog
with open(desc_path) as f:
content = f.read()
# Extract YAML block between ```yaml and ```
yaml_match = re.search(r'```yaml\s*\n(.*?)```', content, re.DOTALL)
if not yaml_match:
return catalog
yaml_data = yaml.safe_load(yaml_match.group(1))
if not yaml_data or "tables" not in yaml_data:
return catalog
# Load sync state for row counts
sync_data = {}
try:
sync_path = _resolve_metadata_path("sync_state.json")
if sync_path.exists():
with open(sync_path) as f:
state = json.load(f)
sync_data = state.get("tables", {})
except Exception:
pass
# Get folder mapping
folder_mapping = yaml_data.get("folder_mapping", {})
# Load category mappings from instance config, with empty fallback
try:
from config.loader import load_instance_config, get_instance_value
_catalog_config = load_instance_config()
_catalog_categories = get_instance_value(_catalog_config, "catalog", "categories", default={})
folder_to_category = {k: v.get("label", k) for k, v in _catalog_categories.items()}
folder_to_icon = {k: v.get("icon", k) for k, v in _catalog_categories.items()}
except Exception:
folder_to_category = {}
folder_to_icon = {}
# Map bucket to folder
bucket_to_folder = {}
for bucket_id, folder_name in folder_mapping.items():
bucket_to_folder[bucket_id] = folder_name
# Group tables by category (folder)
categories = {}
for table in yaml_data["tables"]:
table_id = table.get("id", "")
# Extract bucket from table_id (e.g., "in.c-crm.company" -> "in.c-crm")
parts = table_id.rsplit(".", 1)
bucket_id = parts[0] if len(parts) > 1 else ""
folder = bucket_to_folder.get(bucket_id, "other")
if folder not in categories:
categories[folder] = []
# Get sync info
sync_info = sync_data.get(table_id, {})
rows = sync_info.get("rows", 0)
# Format rows
if rows >= 1_000_000:
rows_display = f"{rows / 1_000_000:.1f}M"
elif rows >= 1_000:
rows_display = f"{rows:,}"
else:
rows_display = str(rows) if rows > 0 else "-"
# Determine if "large" badge
rows_large = rows >= 1_000_000
categories[folder].append({
"name": table.get("name", ""),
"description": table.get("description", ""),
"rows": rows,
"rows_display": rows_display,
"rows_large": rows_large,
})
# Build ordered catalog (from instance config or use discovered folders)
try:
category_order = get_instance_value(_catalog_config, "catalog", "order", default=list(folder_to_category.keys()))
except Exception:
category_order = list(folder_to_category.keys())
for folder in category_order:
if folder in categories:
catalog.append({
"name": folder_to_category.get(folder, folder),
"icon_type": folder_to_icon.get(folder, folder),
"tables": categories[folder],
"count": len(categories[folder]),
})
except Exception as e:
logger.warning(f"Could not load catalog data: {e}")
return catalog
def _send_welcome_message(username: str) -> None:
"""Send a welcome message to the user via bot socket after linking."""
try:
import httpx
transport = httpx.HTTPTransport(uds=NOTIFY_SOCKET_PATH)
with httpx.Client(transport=transport, timeout=10) as client:
client.post(
"http://localhost/send",
json={
"user": username,
"text": (
f"Account linked!\n\n"
f"Your server login: *{username}*\n"
f"Notifications dir: `~/user/notifications/`\n\n"
f"To create notification scripts, ask your local AI assistant "
f"(Claude Code). It knows how to build them for you.\n\n"
f"You will receive alerts from your scripts here."
),
"parse_mode": "Markdown",
},
)
except Exception as e:
logger.warning(f"Failed to send welcome message to {username}: {e}")
def register_routes(app: Flask) -> None:
"""Register main application routes."""
@app.route("/")
def index():
"""Redirect to dashboard or login."""
if "user" in session:
return redirect(url_for("dashboard"))
return redirect(url_for("auth.login"))
@app.route("/dashboard")
@login_required
def dashboard():
"""Show user dashboard with account info or registration form."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
# Check if user exists on the system
user_info = check_user_exists(username)
# Check if username is available (for new registrations)
username_available, username_error = is_username_available(username)
# Read bootstrap YAML for Claude Code setup instructions
bootstrap_yaml = ""
try:
bootstrap_path = os.path.join(os.path.dirname(__file__), "..", "docs", "setup", "bootstrap.yaml")
with open(bootstrap_path, "r") as f:
bootstrap_yaml_template = f.read()
# Inject username and server info into template
bootstrap_yaml = bootstrap_yaml_template.replace("{username}", username)
bootstrap_yaml = bootstrap_yaml.replace("{server_host}", Config.SERVER_HOST)
bootstrap_yaml = bootstrap_yaml.replace("{server_hostname}", Config.SERVER_HOSTNAME)
webapp_url = f"https://{Config.SERVER_HOSTNAME}" if Config.SERVER_HOSTNAME else ""
bootstrap_yaml = bootstrap_yaml.replace("{webapp_url}", webapp_url)
except Exception as e:
logger.warning(f"Could not read bootstrap.yaml: {e}")
# Get Telegram link status
telegram_status = get_telegram_status(username)
# Get desktop app link status
from .desktop_auth import get_desktop_status
desktop_status = get_desktop_status(username)
# Load data stats
data_stats = _load_data_stats()
catalog_data = _load_catalog_data()
# Load sync settings (for existing users)
sync_settings = get_sync_settings(username) if user_info.exists else None
# Gather account widget details (notification scripts, cron, last sync)
account_details = get_account_details(username) if user_info.exists else None
# Activity Center summary for dashboard widget (empty fallback)
activity_summary = {}
return render_template(
"dashboard.html",
user=user,
username=username,
user_info=user_info,
username_available=username_available,
username_error=username_error,
server_host=Config.SERVER_HOST,
server_hostname=Config.SERVER_HOSTNAME,
bootstrap_yaml=bootstrap_yaml,
telegram_status=telegram_status,
desktop_status=desktop_status,
data_stats=data_stats,
catalog_data=catalog_data,
sync_settings=sync_settings,
account_details=account_details,
activity_summary=activity_summary,
)
@app.route("/catalog")
@login_required
def catalog():
"""Data catalog page."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
data_stats = _load_data_stats()
catalog_data = _load_catalog_data()
sync_settings = get_sync_settings(username)
return render_template(
"catalog.html",
data_stats=data_stats,
catalog_data=catalog_data,
sync_settings=sync_settings,
)
@app.route("/api/catalog/profile/<table_name>")
@login_required
def catalog_profile(table_name):
"""Return profiler data for a single table."""
profiles_path = _resolve_metadata_path("profiles.json")
try:
if not profiles_path.exists():
return jsonify({"error": "Profiler data not available yet"}), 404
with open(profiles_path) as f:
profiles = json.load(f)
table_profile = profiles.get("tables", {}).get(table_name)
if not table_profile:
return jsonify({"error": f"No profile for table '{table_name}'"}), 404
return jsonify(table_profile)
except Exception as e:
logger.error(f"Error loading profile for {table_name}: {e}")
return jsonify({"error": "Failed to load profile data"}), 500
@app.route("/api/metrics/<path:metric_path>")
@login_required
def api_metric(metric_path):
"""API endpoint to serve metric definition as structured JSON."""
import re
# Validate path to prevent directory traversal (allow category/file.yml pattern)
if not re.match(r"^[a-z_]+/[a-z_]+\.yml$", metric_path):
return jsonify({"error": "Invalid metric path"}), 400
# Try production path first, fall back to local dev path
docs_dir = Path("/data/docs/metrics")
if not docs_dir.exists():
# Local development: use docs/metrics relative to project root
docs_dir = Path(__file__).parent.parent / "docs" / "metrics"
file_path = docs_dir / metric_path
# Security check: ensure path stays within docs_dir
try:
if not file_path.is_file() or not file_path.resolve().is_relative_to(
docs_dir.resolve()
):
return jsonify({"error": "Metric file not found"}), 404
except (ValueError, OSError):
return jsonify({"error": "Invalid path"}), 400
# Parse metric YAML and return structured JSON
try:
from webapp.utils.metric_parser import MetricParser
parser = MetricParser(docs_dir)
metric_data = parser.parse_metric(metric_path)
return jsonify(metric_data)
except Exception as e:
logger.error(f"Error parsing metric {metric_path}: {e}")
return jsonify({"error": f"Failed to parse metric: {str(e)}"}), 500
@app.route("/docs/metrics/<path:metric_path>")
@login_required
def serve_metric(metric_path):
"""Serve metric definition YAML files (legacy endpoint for backward compatibility)."""
import re
# Validate path to prevent directory traversal (allow category/file.yml pattern)
if not re.match(r"^[a-z_]+/[a-z_]+\.yml$", metric_path):
return render_template("error.html", error="Invalid metric path", code=400), 400
docs_dir = Path("/data/docs/metrics")
file_path = docs_dir / metric_path
# Security check: ensure path stays within docs_dir
try:
if not file_path.is_file() or not file_path.resolve().is_relative_to(
docs_dir.resolve()
):
return (
render_template("error.html", error="Metric file not found", code=404),
404,
)
except (ValueError, OSError):
return render_template("error.html", error="Invalid path", code=400), 400
from flask import send_file as flask_send_file
return flask_send_file(file_path, mimetype="text/plain")
@app.route("/register", methods=["POST"])
@login_required
def register():
"""Create a new analyst account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
# Check if user already exists
user_info = check_user_exists(username)
if user_info.exists:
flash("Your account already exists.", "info")
return redirect(url_for("dashboard"))
# Get and validate SSH key
# Normalize whitespace: collapse newlines/tabs/multiple spaces to single spaces
# Users often paste keys with line breaks from terminal wrapping
ssh_key = " ".join(request.form.get("ssh_key", "").split())
is_valid, error = validate_ssh_key(ssh_key)
if not is_valid:
flash(error, "error")
return redirect(url_for("dashboard"))
# Create the user
success, message = create_user(username, ssh_key)
if success:
flash(message, "success")
logger.info(f"Account created for {email} (username: {username})")
else:
flash(message, "error")
logger.error(f"Failed to create account for {email}: {message}")
return redirect(url_for("dashboard"))
@app.route("/api/telegram/verify", methods=["POST"])
@login_required
def telegram_verify():
"""Verify a Telegram verification code and link the account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
data = request.get_json(silent=True) or {}
code = data.get("code", "").strip()
if not code:
return jsonify({"error": "Verification code is required"}), 400
success, message = link_telegram(username, code)
if success:
logger.info(f"Telegram linked for {username}")
# Send welcome message via bot socket
_send_welcome_message(username)
return jsonify({"ok": True, "message": message})
return jsonify({"error": message}), 400
@app.route("/api/telegram/unlink", methods=["POST"])
@login_required
def telegram_unlink():
"""Unlink Telegram from the account."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
success, message = unlink_telegram(username)
if success:
logger.info(f"Telegram unlinked for {username}")
return jsonify({"ok": True, "message": message})
return jsonify({"error": message}), 400
@app.route("/api/telegram/status")
@login_required
def telegram_status():
"""Get Telegram link status."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
status = get_telegram_status(username)
return jsonify(status)
@app.route("/download/<filename>")
@login_required
def download(filename):
"""Serve downloadable files (e.g., desktop app)."""
import re
if not re.match(r"^[a-zA-Z0-9_\-]+\.(zip|dmg)$", filename):
return render_template("error.html", error="Invalid filename", code=400), 400
download_dir = Path("/data/downloads")
file_path = download_dir / filename
if not file_path.is_file():
return render_template("error.html", error="File not found", code=404), 404
from flask import send_file as flask_send_file
return flask_send_file(file_path, as_attachment=True)
@app.route("/api/desktop/scripts")
def desktop_scripts():
"""List notification scripts for the authenticated desktop user."""
username = require_desktop_auth()
from server.telegram_bot.status import get_script_list_structured
scripts = get_script_list_structured(username)
return jsonify(scripts)
@app.route("/api/desktop/scripts/run", methods=["POST"])
def desktop_run_script():
"""Run a notification script on-demand for the authenticated desktop user."""
username = require_desktop_auth()
data = request.get_json(silent=True) or {}
script_name = data.get("name", "").strip()
if not script_name:
return jsonify({"error": "Missing 'name' field"}), 400
from server.telegram_bot.runner import run_user_script
from server.telegram_bot.dispatch import dispatch_to_ws_gateway
output = run_user_script(username, script_name)
if output is None:
return jsonify({"error": f"Script '{script_name}' failed or not found"}), 500
if output.get("notify", False):
dispatch_to_ws_gateway(username, output, script_name)
return jsonify({"ok": True})
@app.route("/api/sync-settings")
@login_required
def sync_settings_get():
"""Get sync settings for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
settings = get_sync_settings(username)
return jsonify(settings)
@app.route("/api/sync-settings", methods=["POST"])
@login_required
def sync_settings_update():
"""Update sync settings for current user."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
data = request.get_json(silent=True) or {}
datasets = data.get("datasets", {})
if not datasets:
return jsonify({"error": "Missing datasets field"}), 400
success, message = update_sync_settings(username, datasets)
if success:
logger.info(f"Sync settings updated for {username}")
return jsonify({"ok": True, "message": message})
return jsonify({"error": message}), 400
# ─────────────────────────────────────────────────────────────────
# Corporate Memory routes
# ─────────────────────────────────────────────────────────────────
@app.route("/corporate-memory")
@login_required
def corporate_memory():
"""Corporate Memory knowledge browser page."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
# Get stats for header
stats = get_memory_stats()
user_stats = get_memory_user_stats(username)
# Get user's votes for highlighting
user_votes = get_user_votes(username)
# Get initial page of knowledge
knowledge = get_knowledge(page=0, per_page=20)
return render_template(
"corporate_memory.html",
stats=stats,
user_stats=user_stats,
user_votes=user_votes,
knowledge=knowledge,
)
# ─────────────────────────────────────────────────────────────────
# Activity Center routes
# ─────────────────────────────────────────────────────────────────
@app.route("/activity-center")
@login_required
def activity_center():
"""Activity Center page - enterprise data intelligence overview."""
activity = {}
return render_template("activity_center.html", activity=activity)
@app.route("/api/corporate-memory/knowledge")
@login_required
def api_corporate_memory_knowledge():
"""Get knowledge items with optional filtering."""
category = request.args.get("category")
search = request.args.get("search")
page = request.args.get("page", 0, type=int)
per_page = request.args.get("per_page", 20, type=int)
sort = request.args.get("sort", "score")
my_rules = request.args.get("my_rules", "").lower() == "true"
# Get username for my_rules filter
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
# Limit per_page to reasonable maximum
per_page = min(per_page, 100)
result = get_knowledge(
category=category,
search=search,
page=page,
per_page=per_page,
sort=sort,
username=username,
my_rules=my_rules,
)
return jsonify(result)
@app.route("/api/corporate-memory/stats")
@login_required
def api_corporate_memory_stats():
"""Get corporate memory statistics for dashboard."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
stats = get_memory_stats()
user_stats = get_memory_user_stats(username)
return jsonify({
**stats,
**user_stats,
})
@app.route("/api/corporate-memory/vote", methods=["POST"])
@login_required
def api_corporate_memory_vote():
"""Vote on a knowledge item."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
data = request.get_json(silent=True) or {}
item_id = data.get("item_id")
vote_value = data.get("vote", 0)
if not item_id:
return jsonify({"error": "Missing item_id"}), 400
try:
vote_value = int(vote_value)
except (TypeError, ValueError):
return jsonify({"error": "Invalid vote value"}), 400
success, message = memory_vote(username, item_id, vote_value)
if success:
return jsonify({"ok": True, "message": message})
return jsonify({"error": message}), 400
@app.route("/api/corporate-memory/my-votes")
@login_required
def api_corporate_memory_my_votes():
"""Get current user's votes."""
user = session.get("user", {})
email = user.get("email", "")
username = get_username_from_email(email)
votes = get_user_votes(username)
return jsonify({"votes": votes})
@app.route("/health")
def health():
"""
Health check endpoint for monitoring.
Returns detailed status of services, disk, load, and recent activity.
Returns 200 if healthy, 503 if degraded.
"""
from webapp.health_service import health_check
response, status_code = health_check()
return response, status_code
@app.errorhandler(404)
def not_found(e):
"""Handle 404 errors."""
return render_template("error.html", error="Page not found", code=404), 404
@app.errorhandler(500)
def server_error(e):
"""Handle 500 errors."""
logger.exception("Server error")
return render_template("error.html", error="Internal server error", code=500), 500
# Create the app instance for Gunicorn
app = create_app()
if __name__ == "__main__":
# Development server
app.run(debug=True, host="127.0.0.1", port=5000)