refactor: final cleanup — delete legacy auth, clean deps, fix hash, migrate to uv
- Delete root auth/ directory (legacy Flask providers, orphaned) - Clean requirements.txt: remove Flask, gunicorn, authlib, sendgrid, anthropic, openai, argon2-cffi (9 unused deps) - Fix hash computation in orchestrator: MD5 of parquet mtime+size (CLI sync now skips unchanged tables correctly) - Migrate pip → uv in CLAUDE.md, scripts/init.sh, pyproject.toml - Sync pyproject.toml dependencies with requirements.txt 578 tests passing.
This commit is contained in:
parent
2b7348a773
commit
5ee12d78e7
14 changed files with 77 additions and 784 deletions
14
CLAUDE.md
14
CLAUDE.md
|
|
@ -47,7 +47,7 @@ docker compose --profile full up # Include telegram bot
|
|||
│ ├── bigquery/ # BigQuery: extractor.py (remote-only via DuckDB BQ extension)
|
||||
│ └── jira/ # Jira: webhook + incremental parquet → extract.duckdb
|
||||
├── cli/ # CLI tool (`da sync`, `da query`, `da admin`)
|
||||
├── auth/ # Authentication providers (google, email, password, desktop)
|
||||
├── app/auth/ # Authentication (FastAPI-based providers)
|
||||
├── services/ # Standalone services (scheduler, telegram_bot, ws_gateway, etc.)
|
||||
├── server/ # Legacy deployment infrastructure
|
||||
├── scripts/ # Utility + migration scripts
|
||||
|
|
@ -105,7 +105,7 @@ Table definitions: DuckDB `table_registry` table in `system.duckdb`.
|
|||
```bash
|
||||
# Setup
|
||||
python3 -m venv .venv && source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
uv pip install -r requirements.txt
|
||||
|
||||
# Run FastAPI locally
|
||||
uvicorn app.main:app --reload
|
||||
|
|
@ -128,12 +128,10 @@ Must create `_meta` table with columns: table_name, description, rows, size_byte
|
|||
Orchestrator ATTACHes it automatically.
|
||||
|
||||
### Authentication
|
||||
Pluggable auth providers in `auth/`:
|
||||
- **Google** (`google`): OAuth via Google
|
||||
- **Email** (`email`): Email magic link (itsdangerous token)
|
||||
- **Password** (`password`): Username/password
|
||||
- **Desktop** (`desktop`): JWT for API
|
||||
- New provider = `auth/<name>/provider.py` implementing `AuthProvider`
|
||||
Auth providers in `app/auth/` (FastAPI-based):
|
||||
- **Google**: OAuth via Google
|
||||
- **Email**: Email magic link (itsdangerous token)
|
||||
- **Desktop**: JWT for API
|
||||
|
||||
## Key Implementation Details
|
||||
|
||||
|
|
|
|||
111
auth/__init__.py
111
auth/__init__.py
|
|
@ -1,111 +0,0 @@
|
|||
"""
|
||||
Pluggable authentication provider system.
|
||||
|
||||
Each auth provider lives in auth/<name>/provider.py and implements AuthProvider.
|
||||
Providers are auto-discovered and registered with the Flask app.
|
||||
|
||||
To add a new provider (e.g., Okta):
|
||||
1. Create auth/okta/provider.py
|
||||
2. Implement AuthProvider subclass
|
||||
3. Export `provider` instance at module level
|
||||
4. That's it - no changes to core code needed.
|
||||
"""
|
||||
|
||||
import importlib
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from flask import Blueprint
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AuthProvider(ABC):
|
||||
"""Base class for authentication providers."""
|
||||
|
||||
@abstractmethod
|
||||
def get_name(self) -> str:
|
||||
"""Internal name (e.g., 'google', 'password')."""
|
||||
|
||||
@abstractmethod
|
||||
def get_blueprint(self) -> Blueprint:
|
||||
"""Flask blueprint with auth routes."""
|
||||
|
||||
@abstractmethod
|
||||
def get_login_button(self) -> dict:
|
||||
"""Login button definition for the login page.
|
||||
|
||||
Returns dict with:
|
||||
text: str - Button label (e.g., "Sign in with Google")
|
||||
url: str - Route URL (e.g., "/login/google")
|
||||
icon_html: str - SVG or HTML for the icon
|
||||
subtitle: str - Optional help text below button
|
||||
order: int - Sort order (lower = higher on page)
|
||||
css_class: str - Optional CSS class for the button (e.g., "btn-google")
|
||||
visible: bool - Whether to show on login page (default True)
|
||||
"""
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if provider is configured and ready.
|
||||
Override to check env vars, API keys, etc.
|
||||
Returns False to skip this provider."""
|
||||
return True
|
||||
|
||||
def get_display_name(self) -> str:
|
||||
"""Human-readable name for UI."""
|
||||
return self.get_name().title()
|
||||
|
||||
def init_app(self, app) -> None:
|
||||
"""Optional: initialize provider with Flask app (e.g., for OAuth setup)."""
|
||||
pass
|
||||
|
||||
|
||||
def discover_providers() -> list[AuthProvider]:
|
||||
"""Auto-discover auth providers from auth/*/provider.py.
|
||||
|
||||
Each provider module must export a `provider` instance of AuthProvider.
|
||||
Providers are sorted by login button order.
|
||||
Only available providers (is_available() == True) are returned.
|
||||
Providers listed in Config.AUTH_DISABLED_PROVIDERS are skipped.
|
||||
"""
|
||||
from app.instance_config import get_value
|
||||
|
||||
disabled_raw = get_value("auth", "disabled_providers", default=[])
|
||||
disabled = [name.lower() for name in (disabled_raw or [])]
|
||||
providers = []
|
||||
auth_dir = Path(__file__).parent
|
||||
|
||||
for subdir in sorted(auth_dir.iterdir()):
|
||||
if not subdir.is_dir() or subdir.name.startswith("_"):
|
||||
continue
|
||||
provider_file = subdir / "provider.py"
|
||||
if not provider_file.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
mod = importlib.import_module(f"auth.{subdir.name}.provider")
|
||||
provider_instance = getattr(mod, "provider", None)
|
||||
if provider_instance and isinstance(provider_instance, AuthProvider):
|
||||
if provider_instance.get_name().lower() in disabled:
|
||||
logger.info(
|
||||
f"Auth provider disabled by config: {provider_instance.get_name()}"
|
||||
)
|
||||
elif provider_instance.is_available():
|
||||
providers.append(provider_instance)
|
||||
logger.info(f"Auth provider loaded: {provider_instance.get_name()}")
|
||||
else:
|
||||
logger.debug(
|
||||
f"Auth provider skipped (not available): {subdir.name}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Auth provider {subdir.name} has no 'provider' instance"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load auth provider {subdir.name}: {e}")
|
||||
|
||||
# Sort by login button order
|
||||
providers.sort(key=lambda p: p.get_login_button().get("order", 50))
|
||||
return providers
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
"""
|
||||
Desktop JWT authentication provider.
|
||||
|
||||
Desktop JWT authentication (Flask blueprint).
|
||||
This is NOT a login provider (no login button) - it provides
|
||||
JWT-based API authentication for the native desktop application.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from flask import Blueprint
|
||||
|
||||
import os
|
||||
|
||||
from auth import AuthProvider
|
||||
|
||||
|
||||
class _Config:
|
||||
DESKTOP_JWT_SECRET = os.environ.get("DESKTOP_JWT_SECRET", "")
|
||||
|
||||
|
||||
Config = _Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DesktopAuthProvider(AuthProvider):
|
||||
"""Desktop app JWT authentication provider."""
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "desktop"
|
||||
|
||||
def get_display_name(self) -> str:
|
||||
return "Desktop App"
|
||||
|
||||
def get_blueprint(self) -> Blueprint:
|
||||
# Legacy Flask blueprint — removed with webapp/
|
||||
return Blueprint("desktop_auth", __name__)
|
||||
|
||||
def get_login_button(self) -> dict:
|
||||
return {
|
||||
"text": "",
|
||||
"url": "",
|
||||
"icon_html": "",
|
||||
"subtitle": "",
|
||||
"order": 100,
|
||||
"css_class": "",
|
||||
"visible": False,
|
||||
}
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return bool(Config.DESKTOP_JWT_SECRET)
|
||||
|
||||
def init_app(self, app) -> None:
|
||||
"""No additional initialization needed."""
|
||||
pass
|
||||
|
||||
|
||||
# Module-level provider instance for auto-discovery
|
||||
provider = DesktopAuthProvider()
|
||||
|
|
@ -1,314 +0,0 @@
|
|||
"""
|
||||
Email magic link authentication provider.
|
||||
|
||||
Users enter their email, receive a magic link, click it and they're logged in.
|
||||
No passwords needed. Domain restriction ensures only allowed users can access.
|
||||
|
||||
Email delivery modes:
|
||||
1. SMTP relay (recommended) - configure SMTP_HOST, SMTP_PORT, etc. in .env
|
||||
2. Console mode (development) - link printed to server log, shown in browser
|
||||
"""
|
||||
|
||||
import logging
|
||||
import smtplib
|
||||
import time
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
|
||||
from flask import (
|
||||
Blueprint,
|
||||
current_app,
|
||||
flash,
|
||||
redirect,
|
||||
render_template,
|
||||
request,
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from itsdangerous import BadSignature, SignatureExpired, URLSafeTimedSerializer
|
||||
|
||||
import os
|
||||
|
||||
from auth import AuthProvider
|
||||
from app.instance_config import get_allowed_domains, get_value
|
||||
|
||||
_ALLOWED_DOMAINS = get_allowed_domains()
|
||||
_ALLOWED_EMAILS = [
|
||||
e.strip().lower()
|
||||
for e in os.environ.get("ALLOWED_EMAILS", "").split(",")
|
||||
if e.strip()
|
||||
]
|
||||
|
||||
|
||||
def validate_email_domain(email: str) -> bool:
|
||||
if not email:
|
||||
return False
|
||||
email_lower = email.lower()
|
||||
if email_lower in _ALLOWED_EMAILS:
|
||||
return True
|
||||
domain = email_lower.split("@")[-1]
|
||||
return domain in _ALLOWED_DOMAINS
|
||||
|
||||
|
||||
class _Config:
|
||||
SECRET_KEY = os.environ.get("WEBAPP_SECRET_KEY", "dev-secret-key-change-me")
|
||||
ALLOWED_DOMAINS = _ALLOWED_DOMAINS
|
||||
SMTP_HOST = os.environ.get("SMTP_HOST", "")
|
||||
SMTP_PORT = int(os.environ.get("SMTP_PORT", "587"))
|
||||
SMTP_USER = os.environ.get("SMTP_USER", "")
|
||||
SMTP_PASSWORD = os.environ.get("SMTP_PASSWORD", "")
|
||||
SMTP_FROM = os.environ.get("SMTP_FROM",
|
||||
os.environ.get("SMTP_USER",
|
||||
get_value("email", "from_address", default="noreply@example.com")))
|
||||
SMTP_USE_TLS = os.environ.get("SMTP_USE_TLS", "true").lower() == "true"
|
||||
INSTANCE_NAME = get_value("instance", "name", default="AI Data Analyst")
|
||||
|
||||
|
||||
Config = _Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
email_bp = Blueprint("email_auth", __name__)
|
||||
|
||||
# SVG envelope icon for the login button
|
||||
_EMAIL_ICON_HTML = (
|
||||
'<svg width="24" height="24" viewBox="0 0 24 24" fill="none" '
|
||||
'stroke="currentColor" stroke-width="2" stroke-linecap="round" '
|
||||
'stroke-linejoin="round">'
|
||||
'<rect x="2" y="4" width="20" height="16" rx="2"/>'
|
||||
'<path d="m22 7-8.97 5.7a1.94 1.94 0 0 1-2.06 0L2 7"/>'
|
||||
"</svg>"
|
||||
)
|
||||
|
||||
|
||||
def _get_serializer() -> URLSafeTimedSerializer:
|
||||
"""Create token serializer using the app secret key."""
|
||||
return URLSafeTimedSerializer(Config.SECRET_KEY, salt="email-magic-link")
|
||||
|
||||
|
||||
def _generate_magic_token(email: str) -> str:
|
||||
"""Generate a signed, time-limited token containing the email."""
|
||||
s = _get_serializer()
|
||||
return s.dumps({"email": email.lower(), "t": int(time.time())})
|
||||
|
||||
|
||||
def _verify_magic_token(token: str, max_age_seconds: int = 900) -> str | None:
|
||||
"""Verify magic link token. Returns email if valid, None otherwise.
|
||||
|
||||
Args:
|
||||
token: The signed token from the magic link URL.
|
||||
max_age_seconds: Token validity period (default 15 minutes).
|
||||
|
||||
Returns:
|
||||
Email address if token is valid, None otherwise.
|
||||
"""
|
||||
s = _get_serializer()
|
||||
try:
|
||||
data = s.loads(token, max_age=max_age_seconds)
|
||||
return data.get("email")
|
||||
except SignatureExpired:
|
||||
logger.warning("Magic link token expired")
|
||||
return None
|
||||
except BadSignature:
|
||||
logger.warning("Invalid magic link token")
|
||||
return None
|
||||
|
||||
|
||||
def _send_magic_email(email: str, magic_url: str) -> bool:
|
||||
"""Send magic link email via SMTP relay.
|
||||
|
||||
Returns True if sent successfully, False otherwise.
|
||||
"""
|
||||
smtp_host = Config.SMTP_HOST
|
||||
if not smtp_host:
|
||||
return False
|
||||
|
||||
msg = MIMEMultipart("alternative")
|
||||
msg["Subject"] = f"Sign in to {Config.INSTANCE_NAME}"
|
||||
msg["From"] = Config.SMTP_FROM
|
||||
msg["To"] = email
|
||||
|
||||
text_body = (
|
||||
f"Sign in to {Config.INSTANCE_NAME}\n\n"
|
||||
f"Click the link below to sign in:\n{magic_url}\n\n"
|
||||
f"This link expires in 15 minutes.\n"
|
||||
f"If you didn't request this, ignore this email."
|
||||
)
|
||||
|
||||
html_body = f"""<!DOCTYPE html>
|
||||
<html>
|
||||
<body style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; max-width: 480px; margin: 0 auto; padding: 20px;">
|
||||
<h2 style="color: #1a1a2e;">Sign in to {Config.INSTANCE_NAME}</h2>
|
||||
<p>Click the button below to sign in:</p>
|
||||
<p style="text-align: center; margin: 30px 0;">
|
||||
<a href="{magic_url}"
|
||||
style="background: #4361ee; color: white; padding: 12px 32px;
|
||||
text-decoration: none; border-radius: 6px; font-weight: 500;">
|
||||
Sign In
|
||||
</a>
|
||||
</p>
|
||||
<p style="color: #666; font-size: 14px;">
|
||||
This link expires in 15 minutes.<br>
|
||||
If you didn't request this, ignore this email.
|
||||
</p>
|
||||
<hr style="border: none; border-top: 1px solid #eee; margin: 20px 0;">
|
||||
<p style="color: #999; font-size: 12px;">
|
||||
Or copy and paste this URL into your browser:<br>
|
||||
<code style="word-break: break-all;">{magic_url}</code>
|
||||
</p>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
msg.attach(MIMEText(text_body, "plain"))
|
||||
msg.attach(MIMEText(html_body, "html"))
|
||||
|
||||
try:
|
||||
smtp_port = Config.SMTP_PORT
|
||||
use_tls = Config.SMTP_USE_TLS
|
||||
|
||||
if smtp_port == 465:
|
||||
server = smtplib.SMTP_SSL(smtp_host, smtp_port, timeout=10)
|
||||
else:
|
||||
server = smtplib.SMTP(smtp_host, smtp_port, timeout=10)
|
||||
if use_tls:
|
||||
server.starttls()
|
||||
|
||||
smtp_user = Config.SMTP_USER
|
||||
smtp_password = Config.SMTP_PASSWORD
|
||||
if smtp_user and smtp_password:
|
||||
server.login(smtp_user, smtp_password)
|
||||
|
||||
server.sendmail(Config.SMTP_FROM, [email], msg.as_string())
|
||||
server.quit()
|
||||
logger.info("Magic link email sent to %s via SMTP", email)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to send magic link email to %s: %s", email, e)
|
||||
return False
|
||||
|
||||
|
||||
# --- Routes ---
|
||||
|
||||
|
||||
@email_bp.route("/login/email")
|
||||
def login_email_form():
|
||||
"""Show email input form."""
|
||||
return render_template(
|
||||
"login_magic_link.html",
|
||||
allowed_domains=Config.ALLOWED_DOMAINS,
|
||||
)
|
||||
|
||||
|
||||
@email_bp.route("/login/email/send", methods=["POST"])
|
||||
def send_magic_link():
|
||||
"""Validate email domain and send magic link."""
|
||||
email = request.form.get("email", "").strip().lower()
|
||||
|
||||
if not email:
|
||||
flash("Please enter your email address.", "error")
|
||||
return redirect(url_for("email_auth.login_email_form"))
|
||||
|
||||
if not validate_email_domain(email):
|
||||
domains_str = ", ".join(f"@{d}" for d in Config.ALLOWED_DOMAINS)
|
||||
flash(
|
||||
f"Only {domains_str} email addresses are allowed.",
|
||||
"error",
|
||||
)
|
||||
return redirect(url_for("email_auth.login_email_form"))
|
||||
|
||||
# Generate magic link
|
||||
token = _generate_magic_token(email)
|
||||
magic_url = url_for("email_auth.verify_magic_link", token=token, _external=True)
|
||||
|
||||
# Try SMTP first, fall back to console mode
|
||||
smtp_sent = _send_magic_email(email, magic_url)
|
||||
|
||||
if smtp_sent:
|
||||
flash("Check your email for the sign-in link.", "info")
|
||||
return render_template(
|
||||
"login_magic_link_sent.html",
|
||||
email=email,
|
||||
console_mode=False,
|
||||
)
|
||||
else:
|
||||
# Console/development mode - show link directly
|
||||
logger.info("MAGIC LINK for %s: %s", email, magic_url)
|
||||
return render_template(
|
||||
"login_magic_link_sent.html",
|
||||
email=email,
|
||||
magic_url=magic_url,
|
||||
console_mode=True,
|
||||
)
|
||||
|
||||
|
||||
@email_bp.route("/login/email/verify/<token>")
|
||||
def verify_magic_link(token: str):
|
||||
"""Verify magic link token and log user in."""
|
||||
email = _verify_magic_token(token)
|
||||
|
||||
if not email:
|
||||
flash("This sign-in link has expired or is invalid. Please try again.", "error")
|
||||
return redirect(url_for("email_auth.login_email_form"))
|
||||
|
||||
# Double-check domain (in case config changed since token was issued)
|
||||
if not validate_email_domain(email):
|
||||
flash("Your email is no longer authorized.", "error")
|
||||
return redirect(url_for("auth.login"))
|
||||
|
||||
# Set session (shared contract across all auth providers)
|
||||
name = email.split("@")[0].replace(".", " ").title()
|
||||
session["user"] = {
|
||||
"email": email,
|
||||
"name": name,
|
||||
"picture": "",
|
||||
}
|
||||
|
||||
logger.info("User logged in via magic link: %s", email)
|
||||
return redirect(url_for("dashboard"))
|
||||
|
||||
|
||||
# --- Provider class ---
|
||||
|
||||
|
||||
class EmailAuthProvider(AuthProvider):
|
||||
"""Email magic link authentication provider."""
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "email"
|
||||
|
||||
def get_display_name(self) -> str:
|
||||
return "Email"
|
||||
|
||||
def get_blueprint(self) -> Blueprint:
|
||||
return email_bp
|
||||
|
||||
def get_login_button(self) -> dict:
|
||||
domains = Config.ALLOWED_DOMAINS
|
||||
if len(domains) > 1:
|
||||
domain_str = ", ".join(f"@{d}" for d in domains)
|
||||
elif domains:
|
||||
domain_str = f"@{domains[0]}"
|
||||
else:
|
||||
domain_str = ""
|
||||
return {
|
||||
"text": "Sign in with Email",
|
||||
"url": "/login/email",
|
||||
"icon_html": _EMAIL_ICON_HTML,
|
||||
"subtitle": f'For <strong>{domain_str}</strong> email addresses.' if domain_str else "",
|
||||
"order": 20,
|
||||
"css_class": "btn-email",
|
||||
"visible": True,
|
||||
}
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Available when at least one allowed domain is configured."""
|
||||
return len(Config.ALLOWED_DOMAINS) > 0
|
||||
|
||||
def init_app(self, app) -> None:
|
||||
"""No additional initialization needed."""
|
||||
pass
|
||||
|
||||
|
||||
# Module-level provider instance for auto-discovery
|
||||
provider = EmailAuthProvider()
|
||||
|
|
@ -1,157 +0,0 @@
|
|||
"""
|
||||
Google OAuth authentication provider.
|
||||
|
||||
Handles Google Sign-In flow with domain validation.
|
||||
Google OAuth flow with domain validation (Flask blueprint).
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from authlib.integrations.flask_client import OAuth
|
||||
from flask import Blueprint, flash, redirect, session, url_for
|
||||
|
||||
import os
|
||||
|
||||
from auth import AuthProvider
|
||||
from app.instance_config import get_allowed_domains
|
||||
|
||||
_ALLOWED_DOMAINS = get_allowed_domains()
|
||||
_ALLOWED_EMAILS = [
|
||||
e.strip().lower()
|
||||
for e in os.environ.get("ALLOWED_EMAILS", "").split(",")
|
||||
if e.strip()
|
||||
]
|
||||
|
||||
|
||||
def validate_email_domain(email: str) -> bool:
|
||||
if not email:
|
||||
return False
|
||||
email_lower = email.lower()
|
||||
if email_lower in _ALLOWED_EMAILS:
|
||||
return True
|
||||
domain = email_lower.split("@")[-1]
|
||||
return domain in _ALLOWED_DOMAINS
|
||||
|
||||
|
||||
class _Config:
|
||||
ALLOWED_DOMAINS = _ALLOWED_DOMAINS
|
||||
GOOGLE_CLIENT_ID = os.environ.get("GOOGLE_CLIENT_ID", "")
|
||||
GOOGLE_CLIENT_SECRET = os.environ.get("GOOGLE_CLIENT_SECRET", "")
|
||||
|
||||
|
||||
Config = _Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
google_bp = Blueprint("google_auth", __name__)
|
||||
oauth = OAuth()
|
||||
|
||||
# Google SVG icon for the login button
|
||||
_GOOGLE_ICON_HTML = (
|
||||
'<svg class="google-icon" viewBox="0 0 24 24" width="24" height="24">'
|
||||
'<path fill="#4285F4" d="M22.56 12.25c0-.78-.07-1.53-.2-2.25H12v4.26h5.92c-.26 '
|
||||
"1.37-1.04 2.53-2.21 3.31v2.77h3.57c2.08-1.92 3.28-4.74 3.28-8.09z\"/>"
|
||||
'<path fill="#34A853" d="M12 23c2.97 0 5.46-.98 7.28-2.66l-3.57-2.77c-.98.66-2.23 '
|
||||
"1.06-3.71 1.06-2.86 0-5.29-1.93-6.16-4.53H2.18v2.84C3.99 20.53 7.7 23 12 23z\"/>"
|
||||
'<path fill="#FBBC05" d="M5.84 14.09c-.22-.66-.35-1.36-.35-2.09s.13-1.43.35-2.09V7.07'
|
||||
'H2.18C1.43 8.55 1 10.22 1 12s.43 3.45 1.18 4.93l2.85-2.22.81-.62z"/>'
|
||||
'<path fill="#EA4335" d="M12 5.38c1.62 0 3.06.56 4.21 1.64l3.15-3.15C17.45 2.09 '
|
||||
'14.97 1 12 1 7.7 1 3.99 3.47 2.18 7.07l3.66 2.84c.87-2.6 3.3-4.53 6.16-4.53z"/>'
|
||||
"</svg>"
|
||||
)
|
||||
|
||||
|
||||
@google_bp.route("/login/google")
|
||||
def login_google():
|
||||
"""Initiate Google OAuth flow."""
|
||||
redirect_uri = url_for("google_auth.authorize", _external=True)
|
||||
return oauth.google.authorize_redirect(redirect_uri)
|
||||
|
||||
|
||||
@google_bp.route("/authorize")
|
||||
def authorize():
|
||||
"""Handle OAuth callback from Google."""
|
||||
try:
|
||||
token = oauth.google.authorize_access_token()
|
||||
userinfo = token.get("userinfo")
|
||||
|
||||
if not userinfo:
|
||||
logger.warning("No userinfo in OAuth response")
|
||||
flash("Failed to get user information from Google.", "error")
|
||||
return redirect(url_for("auth.login"))
|
||||
|
||||
email = userinfo.get("email", "")
|
||||
name = userinfo.get("name", "")
|
||||
|
||||
# Validate domain
|
||||
if not validate_email_domain(email):
|
||||
logger.warning(f"Login attempt from non-allowed domain: {email}")
|
||||
domains_str = ", ".join(f"@{d}" for d in Config.ALLOWED_DOMAINS)
|
||||
flash(
|
||||
f"Only {domains_str} email addresses are allowed.", "error"
|
||||
)
|
||||
return redirect(url_for("auth.login"))
|
||||
|
||||
# Store user info in session (shared contract across all providers)
|
||||
session["user"] = {
|
||||
"email": email,
|
||||
"name": name,
|
||||
"picture": userinfo.get("picture", ""),
|
||||
}
|
||||
|
||||
logger.info(f"User logged in via Google: {email}")
|
||||
return redirect(url_for("dashboard"))
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"OAuth error: {e}")
|
||||
flash("Authentication failed. Please try again.", "error")
|
||||
return redirect(url_for("auth.login"))
|
||||
|
||||
|
||||
class GoogleAuthProvider(AuthProvider):
|
||||
"""Google OAuth authentication provider."""
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "google"
|
||||
|
||||
def get_display_name(self) -> str:
|
||||
return "Google"
|
||||
|
||||
def get_blueprint(self) -> Blueprint:
|
||||
return google_bp
|
||||
|
||||
def get_login_button(self) -> dict:
|
||||
domains = Config.ALLOWED_DOMAINS
|
||||
if len(domains) > 1:
|
||||
domain_str = ", ".join(f"@{d}" for d in domains)
|
||||
else:
|
||||
domain_str = f"@{domains[0]}" if domains else ""
|
||||
return {
|
||||
"text": "Sign in with Google",
|
||||
"url": "/login/google",
|
||||
"icon_html": _GOOGLE_ICON_HTML,
|
||||
"subtitle": f'For <strong>{domain_str}</strong> email addresses.' if domain_str else "",
|
||||
"order": 10,
|
||||
"css_class": "btn-google",
|
||||
"visible": True,
|
||||
}
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return bool(Config.GOOGLE_CLIENT_ID)
|
||||
|
||||
def init_app(self, app) -> None:
|
||||
"""Initialize OAuth with the Flask app."""
|
||||
oauth.init_app(app)
|
||||
oauth.register(
|
||||
name="google",
|
||||
client_id=Config.GOOGLE_CLIENT_ID,
|
||||
client_secret=Config.GOOGLE_CLIENT_SECRET,
|
||||
server_metadata_url="https://accounts.google.com/.well-known/openid-configuration",
|
||||
client_kwargs={
|
||||
"scope": "openid email profile",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# Module-level provider instance for auto-discovery
|
||||
provider = GoogleAuthProvider()
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
"""
|
||||
Email/password authentication provider.
|
||||
|
||||
Email/password authentication (Flask blueprint).
|
||||
Available only when SENDGRID_API_KEY is configured.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from flask import Blueprint
|
||||
|
||||
import os
|
||||
|
||||
from auth import AuthProvider
|
||||
|
||||
|
||||
class _Config:
|
||||
SENDGRID_API_KEY = os.environ.get("SENDGRID_API_KEY", "")
|
||||
|
||||
|
||||
Config = _Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PasswordAuthProvider(AuthProvider):
|
||||
"""Email/password authentication provider for external users."""
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "password"
|
||||
|
||||
def get_display_name(self) -> str:
|
||||
return "Email"
|
||||
|
||||
def get_blueprint(self) -> Blueprint:
|
||||
# Legacy Flask blueprint — removed with webapp/
|
||||
return Blueprint("password_auth", __name__)
|
||||
|
||||
def get_login_button(self) -> dict:
|
||||
return {
|
||||
"text": "Sign in with Email",
|
||||
"url": "/login/email",
|
||||
"icon_html": "",
|
||||
"subtitle": "For external users (investors, partners).",
|
||||
"order": 20,
|
||||
"css_class": "btn-secondary",
|
||||
"visible": True,
|
||||
}
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return bool(Config.SENDGRID_API_KEY)
|
||||
|
||||
def init_app(self, app) -> None:
|
||||
"""No additional initialization needed."""
|
||||
pass
|
||||
|
||||
|
||||
# Module-level provider instance for auto-discovery
|
||||
provider = PasswordAuthProvider()
|
||||
|
|
@ -6,33 +6,43 @@ requires-python = ">=3.9"
|
|||
license = "MIT"
|
||||
|
||||
dependencies = [
|
||||
# Core database
|
||||
"duckdb>=0.9.0",
|
||||
# Web framework (FastAPI)
|
||||
"fastapi>=0.115.0",
|
||||
"uvicorn[standard]>=0.32.0",
|
||||
"python-multipart>=0.0.9",
|
||||
"jinja2>=3.1.0",
|
||||
# Authentication
|
||||
"PyJWT>=2.8.0",
|
||||
"itsdangerous>=2.1.0",
|
||||
# HTTP client
|
||||
"httpx>=0.27.0",
|
||||
# CLI
|
||||
"typer>=0.12.0",
|
||||
"rich>=13.0.0",
|
||||
# Configuration
|
||||
"python-dotenv>=1.0.0",
|
||||
"pyyaml>=6.0",
|
||||
# Data processing
|
||||
"pandas>=2.0.0",
|
||||
"pyarrow>=12.0.0",
|
||||
"pytz>=2024.1",
|
||||
# Data source connectors
|
||||
"kbcstorage>=0.9.0",
|
||||
"google-cloud-bigquery>=3.0.0",
|
||||
"google-cloud-bigquery-storage>=2.0.0",
|
||||
# Profiler visualizations
|
||||
"matplotlib>=3.8.0",
|
||||
"numpy>=1.24.0",
|
||||
# Sample data generation
|
||||
"faker>=24.0.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
da = "cli.main:app"
|
||||
|
||||
[project.optional-dependencies]
|
||||
connectors = [
|
||||
"kbcstorage>=0.9.0",
|
||||
"google-cloud-bigquery>=3.0.0",
|
||||
"google-cloud-bigquery-storage>=2.0.0",
|
||||
"pandas>=2.0.0",
|
||||
"pyarrow>=12.0.0",
|
||||
]
|
||||
telegram = [
|
||||
"aiohttp>=3.9.0",
|
||||
]
|
||||
dev = [
|
||||
"pytest>=7.0.0",
|
||||
"pytest-mock>=3.0.0",
|
||||
|
|
|
|||
|
|
@ -1,69 +1,40 @@
|
|||
# Data source adapters (install only what you need)
|
||||
kbcstorage>=0.9.0 # For Keboola adapter
|
||||
google-cloud-bigquery>=3.0.0 # For BigQuery adapter
|
||||
google-cloud-bigquery-storage>=2.0.0 # For BigQuery adapter (fast Arrow transfer)
|
||||
|
||||
# Data processing
|
||||
# pandas - core tabular data processing library
|
||||
# pyarrow - Parquet format support and fast operations
|
||||
# pytz - timezone support required by DuckDB for reading timezone-aware Parquet columns
|
||||
pandas>=2.0.0
|
||||
pyarrow>=12.0.0
|
||||
pytz>=2024.1
|
||||
|
||||
# Analytical database
|
||||
# DuckDB - in-process SQL OLAP database for analytical queries
|
||||
# Core database
|
||||
duckdb>=0.9.0
|
||||
|
||||
# Configuration
|
||||
# python-dotenv - loading environment variables from .env files
|
||||
# pyyaml - parsing YAML configuration from data_description.md
|
||||
python-dotenv>=1.0.0
|
||||
pyyaml>=6.0
|
||||
|
||||
# Progress tracking and logging
|
||||
# tqdm - progress bars for long-running operations (download, sync)
|
||||
tqdm>=4.65.0
|
||||
|
||||
# Web application (Google SSO portal)
|
||||
# flask - web framework for self-service portal (legacy, being replaced by FastAPI)
|
||||
# authlib - OAuth 2.0 / OpenID Connect library for Google SSO
|
||||
# gunicorn - WSGI server for production deployment
|
||||
flask>=3.0.0
|
||||
authlib>=1.3.0
|
||||
gunicorn>=21.0.0
|
||||
|
||||
# FastAPI - new unified web framework (API + web UI)
|
||||
# Web framework (FastAPI)
|
||||
fastapi>=0.115.0
|
||||
uvicorn[standard]>=0.32.0
|
||||
python-multipart>=0.0.9
|
||||
jinja2>=3.1.0
|
||||
|
||||
# Telegram notification bot
|
||||
# httpx - async HTTP client for Telegram API and unix socket communication
|
||||
# aiohttp - async HTTP server for bot's internal send API
|
||||
# Authentication
|
||||
PyJWT>=2.8.0
|
||||
itsdangerous>=2.1.0
|
||||
|
||||
# HTTP client
|
||||
httpx>=0.27.0
|
||||
aiohttp>=3.9.0
|
||||
|
||||
# CLI
|
||||
typer>=0.12.0
|
||||
rich>=13.0.0
|
||||
|
||||
# Configuration
|
||||
python-dotenv>=1.0.0
|
||||
pyyaml>=6.0
|
||||
|
||||
# Data processing
|
||||
pandas>=2.0.0
|
||||
pyarrow>=12.0.0
|
||||
pytz>=2024.1
|
||||
|
||||
# Data source connectors
|
||||
kbcstorage>=0.9.0
|
||||
google-cloud-bigquery>=3.0.0
|
||||
google-cloud-bigquery-storage>=2.0.0
|
||||
|
||||
# Profiler visualizations
|
||||
matplotlib>=3.8.0
|
||||
numpy>=1.24.0
|
||||
|
||||
# Desktop app authentication
|
||||
# PyJWT - JWT token creation and validation for desktop app auth
|
||||
PyJWT>=2.8.0
|
||||
|
||||
# Password authentication for external users
|
||||
# argon2-cffi - modern password hashing algorithm (Argon2id)
|
||||
# sendgrid - email service for setup/reset links
|
||||
argon2-cffi>=23.1.0
|
||||
sendgrid>=6.11.0
|
||||
|
||||
# Corporate Memory knowledge extraction
|
||||
# anthropic - Claude API client for HAIKU-based knowledge extraction
|
||||
anthropic>=0.39.0
|
||||
|
||||
# OpenAI-compatible API client for LLM proxy routing (LiteLLM, OpenRouter, etc.)
|
||||
openai>=1.0.0
|
||||
|
||||
# Sample data generation (development/testing)
|
||||
# faker - realistic synthetic data for demo datasets
|
||||
# Sample data generation
|
||||
faker>=24.0.0
|
||||
|
|
|
|||
|
|
@ -39,16 +39,19 @@ echo "3️⃣ Activating virtual environment..."
|
|||
source .venv/bin/activate
|
||||
echo " ✅ Virtual environment activated"
|
||||
|
||||
# Upgrade pip
|
||||
# Install uv if not available
|
||||
echo ""
|
||||
echo "4️⃣ Upgrading pip..."
|
||||
pip install --upgrade pip --quiet
|
||||
echo " ✅ pip upgraded"
|
||||
echo "4️⃣ Checking uv..."
|
||||
if ! command -v uv &> /dev/null; then
|
||||
echo " Installing uv..."
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
fi
|
||||
echo " ✅ uv available"
|
||||
|
||||
# Install dependencies
|
||||
echo ""
|
||||
echo "5️⃣ Installing dependencies from requirements.txt..."
|
||||
pip install -r requirements.txt --quiet
|
||||
uv pip install -r requirements.txt --quiet
|
||||
echo " ✅ Dependencies installed"
|
||||
|
||||
# Create folders
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"""Sync orchestrator — ATTACHes extract.duckdb files into master analytics.duckdb."""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
|
|
@ -144,28 +145,39 @@ class SyncOrchestrator:
|
|||
tables.append(table_name)
|
||||
|
||||
# Update sync_state in system DB
|
||||
self._update_sync_state(meta_rows)
|
||||
self._update_sync_state(meta_rows, source_name)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to attach %s: %s", source_name, e)
|
||||
|
||||
return tables
|
||||
|
||||
def _update_sync_state(self, meta_rows: list) -> None:
|
||||
def _update_sync_state(self, meta_rows: list, source_name: str) -> None:
|
||||
"""Update sync_state table in system.duckdb from _meta entries."""
|
||||
try:
|
||||
from src.db import get_system_db
|
||||
from src.repositories.sync_state import SyncStateRepository
|
||||
|
||||
extracts_dir = _get_extracts_dir()
|
||||
sys_conn = get_system_db()
|
||||
try:
|
||||
repo = SyncStateRepository(sys_conn)
|
||||
for table_name, rows, size_bytes, query_mode in meta_rows:
|
||||
# Compute hash from parquet file stats (fast, no file read)
|
||||
pq_path = extracts_dir / source_name / "data" / f"{table_name}.parquet"
|
||||
if pq_path.exists():
|
||||
stat = pq_path.stat()
|
||||
file_hash = hashlib.md5(
|
||||
f"{stat.st_mtime_ns}:{stat.st_size}".encode()
|
||||
).hexdigest()[:12]
|
||||
else:
|
||||
file_hash = ""
|
||||
|
||||
repo.update_sync(
|
||||
table_id=table_name,
|
||||
rows=rows or 0,
|
||||
file_size_bytes=size_bytes or 0,
|
||||
hash="", # TODO: compute from parquet file
|
||||
hash=file_hash,
|
||||
)
|
||||
finally:
|
||||
sys_conn.close()
|
||||
|
|
|
|||
Loading…
Reference in a new issue