BigQuery connector that syncs BQ tables to local Parquet files via PyArrow (no CSV intermediate step). Supports full refresh, timestamp-based incremental (via incremental_column), and partition-based sync strategies. - connectors/bigquery/client.py: BQ API wrapper with ADC auth, parameterized queries, metadata cache, cross-project support (job project != data project) - connectors/bigquery/adapter.py: DataSource implementation with merge/dedup - src/config.py: Add incremental_column field to TableConfig - 72 unit tests (mocked, no GCP SDK required)
129 lines
4.7 KiB
Text
129 lines
4.7 KiB
Text
# AI Data Analyst - Instance Configuration
|
|
# ==========================================
|
|
# This is the main configuration file for your instance.
|
|
# Copy to instance.yaml and fill in your values.
|
|
#
|
|
# SECRET VALUES use ${ENV_VAR} syntax - actual values go in .env file.
|
|
# Non-secret values are set directly here.
|
|
|
|
# --- Instance branding ---
|
|
instance:
|
|
name: "AI Data Analyst"
|
|
subtitle: "Your Organization"
|
|
copyright: "Your Organization"
|
|
# logo_svg: Full <svg> element for header logo (optional, default: Keboola logo)
|
|
# Example: '<svg width="120" height="30" viewBox="0 0 100 30" xmlns="http://www.w3.org/2000/svg"><text y="22" font-size="24" fill="#333">Logo</text></svg>'
|
|
|
|
# --- Server ---
|
|
server:
|
|
hostname: "" # DNS name (e.g., "data.acme.com")
|
|
host: "" # IP address
|
|
app_dir: "/opt/data-analyst" # Installation directory
|
|
|
|
# --- Admin users ---
|
|
# Manage the server, own data files, get unlimited resource limits.
|
|
# SSH keys are used by server/setup.sh during provisioning.
|
|
admins:
|
|
- username: "admin"
|
|
ssh_public_key: "ssh-ed25519 AAAA..."
|
|
|
|
# --- Deployment ---
|
|
deployment:
|
|
method: "manual" # manual | github_actions
|
|
repo_url: "" # e.g., "git@github.com:acme/ai-data-analyst.git"
|
|
branch: "main"
|
|
|
|
# --- Authentication ---
|
|
# At minimum, set allowed_domain and webapp_secret_key.
|
|
# Email magic link auth works out of the box (no external service needed).
|
|
# Google OAuth is optional - add credentials to enable it.
|
|
auth:
|
|
allowed_domain: "" # Email domain(s) for login, comma-separated (e.g., "acme.com" or "acme.com, partner.org")
|
|
webapp_secret_key: "${WEBAPP_SECRET_KEY}"
|
|
# Optional: Google OAuth (if not set, only email magic link is available)
|
|
google_client_id: "${GOOGLE_CLIENT_ID}"
|
|
google_client_secret: "${GOOGLE_CLIENT_SECRET}"
|
|
|
|
# --- Theme (optional) ---
|
|
# Customize colors, fonts, and shape to match your brand.
|
|
# All values are optional - defaults provide a clean blue theme.
|
|
# See docs/theme-reference.html for a visual guide.
|
|
theme:
|
|
# primary: "#0073D1" # Main brand color (buttons, links, accents)
|
|
# primary_dark: "#005BA3" # Hover/active state of primary
|
|
# primary_light: "rgba(0, 115, 209, 0.1)" # Light tint backgrounds
|
|
# text_primary: "#1A253C" # Main text color
|
|
# text_secondary: "#6B7280" # Muted/secondary text
|
|
# background: "#F5F7FA" # Page background
|
|
# surface: "#FFFFFF" # Card/panel background
|
|
# border: "#E5E7EB" # Borders and dividers
|
|
# font_primary: "'Inter', system-ui, sans-serif"
|
|
# font_url: "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap"
|
|
# radius: "6px" # Border radius (cards, buttons, inputs)
|
|
# success: "#10B77F"
|
|
# warning: "#F59F0A"
|
|
# error: "#EA580C"
|
|
|
|
# --- Data source ---
|
|
data_source:
|
|
type: "keboola" # keboola | bigquery | local
|
|
keboola:
|
|
storage_token: "${KEBOOLA_STORAGE_TOKEN}"
|
|
stack_url: "" # e.g., "https://connection.keboola.com"
|
|
project_id: ""
|
|
bigquery:
|
|
project: "${BIGQUERY_PROJECT}" # GCP project for job execution/billing
|
|
location: "${BIGQUERY_LOCATION}" # BigQuery location (e.g., "us-central1", "US")
|
|
# Uses ADC (Application Default Credentials) - VM service account on GCP
|
|
# Data can live in a different project -- use fully-qualified table IDs in data_description.md
|
|
|
|
# --- Email delivery (optional, for magic link auth) ---
|
|
# Without SMTP, magic links are shown directly in browser (development mode).
|
|
# For production, configure any SMTP relay (Gmail, Mailgun, SendGrid SMTP, etc.)
|
|
email:
|
|
from_address: "noreply@example.com"
|
|
from_name: "AI Data Analyst"
|
|
smtp_host: "${SMTP_HOST}" # e.g., "smtp.gmail.com"
|
|
smtp_port: 587 # 587 for STARTTLS, 465 for SSL
|
|
smtp_user: "${SMTP_USER}"
|
|
smtp_password: "${SMTP_PASSWORD}"
|
|
|
|
# --- Desktop app (optional) ---
|
|
desktop:
|
|
jwt_issuer: "data-analyst"
|
|
jwt_secret: "${DESKTOP_JWT_SECRET}"
|
|
url_scheme: "data-analyst"
|
|
|
|
# --- Telegram notifications (optional) ---
|
|
telegram:
|
|
bot_token: "${TELEGRAM_BOT_TOKEN}"
|
|
bot_username: ""
|
|
domain_suffix: ""
|
|
|
|
# --- Jira integration (optional) ---
|
|
jira:
|
|
domain: ""
|
|
email: ""
|
|
api_token: "${JIRA_API_TOKEN}"
|
|
webhook_secret: "${JIRA_WEBHOOK_SECRET}"
|
|
sla_email: ""
|
|
sla_api_token: "${JIRA_SLA_API_TOKEN}"
|
|
cloud_id: ""
|
|
|
|
# --- Corporate Memory AI (optional) ---
|
|
ai:
|
|
anthropic_api_key: "${ANTHROPIC_API_KEY}"
|
|
|
|
# --- User display (for Corporate Memory avatars) ---
|
|
users: {}
|
|
|
|
# --- Username mapping (webapp email -> server username, only if different) ---
|
|
username_mapping: {}
|
|
|
|
# --- Optional datasets (sync settings UI) ---
|
|
datasets: {}
|
|
|
|
# --- Data catalog ---
|
|
catalog:
|
|
categories: {}
|
|
order: []
|