infra: refactor Terraform into reusable customer-instance module

Breaking changes:
- infra/main.tf, variables.tf, outputs.tf, terraform.tfvars.example removed
- Single-file monolith replaced by reusable module + example

New structure:
- infra/modules/customer-instance/ — the module:
  - main.tf: VMs, disks, firewall, Secret Manager, dedicated VM SA
  - variables.tf: prod_instance + dev_instances flexible schema
  - outputs.tf: IPs, SA email, JWT secret reference
  - startup-script.sh.tpl: bootstraps VM, fetches secrets, runs compose,
    adds Watchtower for auto-upgrade
- infra/examples/minimal/ — OSS self-host quickstart using the module

Supports:
- Per-customer GCP project isolation
- Branch-aware dev VMs via dev_instances list (any image_tag)
- Persistent /data disk (rebuild-safe)
- OS Login (no per-user SSH keys)
- Caddy TLS mode (opt-in via tls_mode="caddy" + domain)
- Watchtower auto-upgrade (opt-in via upgrade_mode="auto")
This commit is contained in:
ZdenekSrotyr 2026-04-21 15:43:27 +02:00
parent 0dd8b13d62
commit a2c05a5d97
9 changed files with 408 additions and 307 deletions

View file

@ -0,0 +1,54 @@
# Minimal example: single-VM Agnes deploy.
# Pro OSS self-hoster, co chce prod VM bez dev, bez TLS.
terraform {
required_version = ">= 1.5"
required_providers {
google = {
source = "hashicorp/google"
version = "~> 5.0"
}
}
}
provider "google" {
project = var.gcp_project_id
region = "europe-west1"
}
variable "gcp_project_id" {
description = "GCP project ID (must have billing enabled)"
type = string
}
variable "admin_email" {
description = "Email for first admin user"
type = string
}
module "agnes" {
source = "../../modules/customer-instance"
gcp_project_id = var.gcp_project_id
customer_name = "self-hosted"
seed_admin_email = var.admin_email
prod_instance = {
name = "agnes"
machine_type = "e2-small"
data_disk_gb = 30
image_tag = "stable"
upgrade_mode = "auto"
tls_mode = "none"
domain = ""
}
dev_instances = []
# Customize below for your setup
data_source = "keboola"
}
output "agnes_ip" {
description = "SSH in via: ssh <user>@<ip>; UI at http://<ip>:8000"
value = module.agnes.prod_ip
}

View file

@ -1,170 +0,0 @@
terraform {
required_version = ">= 1.5"
backend "gcs" {
bucket = "agnes-terraform-state"
prefix = "instances"
}
required_providers {
google = {
source = "hashicorp/google"
version = "~> 5.0"
}
random = {
source = "hashicorp/random"
version = "~> 3.0"
}
}
}
provider "google" {
project = var.project_id
region = var.region
zone = var.zone
}
# --- Auto-generated secrets ---
resource "random_password" "jwt_secret" {
length = 48
special = false
}
# --- Network ---
resource "google_compute_firewall" "data_analyst" {
name = "${var.instance_name}-allow-web"
network = "default"
allow {
protocol = "tcp"
ports = ["22", "80", "443", "8000"]
}
source_ranges = ["0.0.0.0/0"]
target_tags = [var.instance_name]
}
# --- Static IP ---
resource "google_compute_address" "data_analyst" {
name = "${var.instance_name}-ip"
region = var.region
}
# --- Startup script ---
locals {
startup_script = <<-SCRIPT
#!/bin/bash
set -euo pipefail
exec > /var/log/startup.log 2>&1
echo "=== Installing Docker ==="
if ! command -v docker &> /dev/null; then
curl -fsSL https://get.docker.com | sh
usermod -aG docker ${var.ssh_user}
fi
# Install docker compose plugin
if ! docker compose version &> /dev/null; then
apt-get update && apt-get install -y docker-compose-plugin
fi
echo "=== Cloning repository ==="
APP_DIR="/opt/data-analyst"
if [ ! -d "$APP_DIR" ]; then
git clone https://github.com/keboola/agnes-the-ai-analyst.git "$APP_DIR"
cd "$APP_DIR"
git checkout main
else
cd "$APP_DIR"
git pull origin main || true
fi
echo "=== Creating .env ==="
cat > "$APP_DIR/.env" << 'ENVEOF'
JWT_SECRET_KEY=${random_password.jwt_secret.result}
DATA_DIR=/data
DATA_SOURCE=${var.keboola_token != "" ? "keboola" : "local"}
KEBOOLA_STORAGE_TOKEN=${var.keboola_token}
KEBOOLA_STACK_URL=${var.keboola_stack_url}
KEBOOLA_PROJECT_ID=${var.keboola_project_id}
SEED_ADMIN_EMAIL=${var.admin_email}
LOG_LEVEL=info
ENVEOF
# Strip leading whitespace from heredoc
sed -i 's/^ //' "$APP_DIR/.env"
chmod 600 "$APP_DIR/.env"
echo "=== Creating instance.yaml ==="
mkdir -p "$APP_DIR/config"
cat > "$APP_DIR/config/instance.yaml" << YAMLEOF
instance:
name: "${var.instance_name}"
subtitle: "Data Analytics Platform"
server:
host: "${google_compute_address.data_analyst.address}"
hostname: "${var.domain != "" ? var.domain : google_compute_address.data_analyst.address}"
port: 8000
auth:
allowed_domain: ""
data_source:
type: "${var.keboola_token != "" ? "keboola" : "local"}"
YAMLEOF
echo "=== Creating data directory ==="
mkdir -p /data/state /data/analytics /data/extracts
chown -R 1000:1000 /data
echo "=== Starting Docker Compose ==="
cd "$APP_DIR"
docker compose pull 2>/dev/null || true
docker compose build
docker compose up -d
echo "=== Startup complete ==="
docker compose ps
SCRIPT
}
# --- VM Instance ---
resource "google_compute_instance" "data_analyst" {
name = var.instance_name
machine_type = var.machine_type
zone = var.zone
tags = [var.instance_name]
boot_disk {
initialize_params {
image = "ubuntu-os-cloud/ubuntu-2404-lts-amd64"
size = var.disk_size_gb
type = "pd-ssd"
}
}
network_interface {
network = "default"
access_config {
nat_ip = google_compute_address.data_analyst.address
}
}
metadata = {
ssh-keys = "${var.ssh_user}:${file(pathexpand(var.ssh_public_key_path))}"
}
metadata_startup_script = local.startup_script
service_account {
scopes = ["cloud-platform"]
}
labels = {
app = "data-analyst"
managed = "terraform"
}
}

View file

@ -0,0 +1,163 @@
terraform {
required_version = ">= 1.5"
required_providers {
google = {
source = "hashicorp/google"
version = "~> 5.0"
}
random = {
source = "hashicorp/random"
version = "~> 3.0"
}
}
}
locals {
# Normalize all instances into a single list so for_each is uniform across prod + dev.
all_instances = concat(
[merge(var.prod_instance, { role = "prod" })],
[for d in var.dev_instances : merge(d, {
role = "dev"
disk_size_gb = 30
data_disk_gb = 20
upgrade_mode = "auto"
tls_mode = "caddy"
domain = ""
})]
)
}
# --- Secrets ---
resource "google_secret_manager_secret" "jwt" {
secret_id = "agnes-${var.customer_name}-jwt-secret"
project = var.gcp_project_id
replication {
auto {}
}
}
resource "random_password" "jwt" {
length = 48
special = false
}
resource "google_secret_manager_secret_version" "jwt" {
secret = google_secret_manager_secret.jwt.id
secret_data = random_password.jwt.result
}
# --- VM service account (dedikovaný, jen read Secret Manageru) ---
resource "google_service_account" "vm" {
account_id = "agnes-${var.customer_name}-vm"
display_name = "Agnes VM runtime SA (${var.customer_name})"
project = var.gcp_project_id
}
resource "google_project_iam_member" "vm_secrets" {
project = var.gcp_project_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.vm.email}"
}
# --- Network ---
resource "google_compute_firewall" "web" {
name = "agnes-${var.customer_name}-allow-web"
project = var.gcp_project_id
network = "default"
allow {
protocol = "tcp"
ports = ["22", "80", "443", "8000"]
}
source_ranges = ["0.0.0.0/0"]
target_tags = ["agnes-${var.customer_name}"]
}
# --- Persistent data disks + VMs (prod + dev) ---
resource "google_compute_disk" "data" {
for_each = { for inst in local.all_instances : inst.name => inst }
name = "${each.value.name}-data"
project = var.gcp_project_id
zone = var.zone
size = each.value.data_disk_gb
type = "pd-ssd"
}
resource "google_compute_address" "ip" {
for_each = { for inst in local.all_instances : inst.name => inst }
name = "${each.value.name}-ip"
project = var.gcp_project_id
region = var.region
}
resource "google_compute_instance" "vm" {
for_each = { for inst in local.all_instances : inst.name => inst }
name = each.value.name
project = var.gcp_project_id
machine_type = each.value.machine_type
zone = var.zone
tags = ["agnes-${var.customer_name}"]
boot_disk {
initialize_params {
image = "ubuntu-os-cloud/ubuntu-2404-lts-amd64"
size = each.value.disk_size_gb
type = "pd-ssd"
}
}
attached_disk {
source = google_compute_disk.data[each.key].self_link
device_name = "data"
}
network_interface {
network = "default"
access_config {
nat_ip = google_compute_address.ip[each.key].address
}
}
metadata = {
enable-oslogin = "TRUE"
}
metadata_startup_script = templatefile("${path.module}/startup-script.sh.tpl", {
customer_name = var.customer_name
image_repo = var.image_repo
image_tag = each.value.image_tag
upgrade_mode = each.value.upgrade_mode
tls_mode = each.value.tls_mode
domain = each.value.domain
data_source = var.data_source
keboola_stack_url = var.keboola_stack_url
seed_admin_email = var.seed_admin_email
role = each.value.role
})
service_account {
email = google_service_account.vm.email
scopes = ["cloud-platform"]
}
labels = {
app = "agnes"
customer = var.customer_name
role = each.value.role
managed = "terraform"
}
# Změna startup scriptu nemění běžící VM (script běží jen na boot).
# Pro aplikaci změn je potřeba VM restartovat nebo recreate.
lifecycle {
ignore_changes = [metadata_startup_script]
}
}

View file

@ -0,0 +1,19 @@
output "instance_ips" {
description = "Mapa { name => external IP }"
value = { for k, v in google_compute_address.ip : k => v.address }
}
output "prod_ip" {
description = "External IP prod instance"
value = google_compute_address.ip[var.prod_instance.name].address
}
output "vm_service_account" {
description = "Email VM SA (pro další IAM bindings, např. BigQuery)"
value = google_service_account.vm.email
}
output "jwt_secret_name" {
description = "Plný název JWT secretu v Secret Manageru"
value = google_secret_manager_secret.jwt.name
}

View file

@ -0,0 +1,100 @@
#!/bin/bash
# Agnes VM startup script — templated by Terraform.
# Idempotent — spustí se při každém boot.
set -euo pipefail
exec > /var/log/agnes-startup.log 2>&1
CUSTOMER_NAME="${customer_name}"
IMAGE_REPO="${image_repo}"
IMAGE_TAG="${image_tag}"
UPGRADE_MODE="${upgrade_mode}"
TLS_MODE="${tls_mode}"
DOMAIN="${domain}"
DATA_SOURCE="${data_source}"
KEBOOLA_STACK_URL="${keboola_stack_url}"
SEED_ADMIN_EMAIL="${seed_admin_email}"
ROLE="${role}"
echo "=== [Agnes $CUSTOMER_NAME $ROLE] Startup at $(date) ==="
# --- 1. Docker (install if missing) ---
if ! command -v docker &>/dev/null; then
curl -fsSL https://get.docker.com | sh
fi
if ! docker compose version &>/dev/null; then
apt-get update && apt-get install -y docker-compose-plugin
fi
# --- 2. Persistent data disk mount ---
DATA_DEV="/dev/disk/by-id/google-data"
DATA_MNT="/data"
if [ -b "$DATA_DEV" ]; then
if ! blkid "$DATA_DEV" | grep -q ext4; then
mkfs.ext4 -F "$DATA_DEV"
fi
mkdir -p "$DATA_MNT"
mountpoint -q "$DATA_MNT" || mount -o discard,defaults "$DATA_DEV" "$DATA_MNT"
grep -qF "$DATA_DEV" /etc/fstab || echo "$DATA_DEV $DATA_MNT ext4 discard,defaults,nofail 0 2" >> /etc/fstab
mkdir -p "$DATA_MNT/state" "$DATA_MNT/analytics" "$DATA_MNT/extracts"
fi
# --- 3. App directory + docker-compose files from public repo ---
APP_DIR="/opt/agnes"
mkdir -p "$APP_DIR"
cd "$APP_DIR"
# Fetch minimal docker-compose from public repo (main branch — stable)
curl -fsSL "https://raw.githubusercontent.com/keboola/agnes-the-ai-analyst/main/docker-compose.yml" -o docker-compose.yml
curl -fsSL "https://raw.githubusercontent.com/keboola/agnes-the-ai-analyst/main/docker-compose.prod.yml" -o docker-compose.prod.yml
# TLS overlay (Caddy + Let's Encrypt) — jen pokud potřeba
if [ "$TLS_MODE" = "caddy" ] && [ -n "$DOMAIN" ]; then
curl -fsSL "https://raw.githubusercontent.com/keboola/agnes-the-ai-analyst/main/Caddyfile" -o Caddyfile 2>/dev/null || true
fi
# --- 4. Fetch secrets from Secret Manager ---
KEBOOLA_TOKEN=""
if [ "$DATA_SOURCE" = "keboola" ]; then
KEBOOLA_TOKEN=$(gcloud secrets versions access latest --secret=keboola-storage-token 2>/dev/null || echo "")
fi
JWT_KEY=$(gcloud secrets versions access latest --secret=agnes-$${CUSTOMER_NAME}-jwt-secret)
cat > "$APP_DIR/.env" <<ENVEOF
JWT_SECRET_KEY=$JWT_KEY
DATA_DIR=$DATA_MNT
DATA_SOURCE=$DATA_SOURCE
KEBOOLA_STORAGE_TOKEN=$KEBOOLA_TOKEN
KEBOOLA_STACK_URL=$KEBOOLA_STACK_URL
SEED_ADMIN_EMAIL=$SEED_ADMIN_EMAIL
LOG_LEVEL=info
DOMAIN=$DOMAIN
AGNES_TAG=$IMAGE_TAG
ACME_EMAIL=admin@$${DOMAIN#*.}
ENVEOF
chmod 600 "$APP_DIR/.env"
# --- 5. Start Agnes ---
COMPOSE_PROFILES_ARG=""
if [ "$TLS_MODE" = "caddy" ] && [ -n "$DOMAIN" ]; then
COMPOSE_PROFILES_ARG="--profile tls"
fi
docker compose -f docker-compose.yml -f docker-compose.prod.yml $COMPOSE_PROFILES_ARG pull
docker compose -f docker-compose.yml -f docker-compose.prod.yml $COMPOSE_PROFILES_ARG up -d
# --- 6. Watchtower (auto-pull new images) ---
if [ "$UPGRADE_MODE" = "auto" ]; then
# Odstraň starý watchtower pokud existuje (pro idempotenci)
docker rm -f agnes-watchtower 2>/dev/null || true
docker run -d \
--name agnes-watchtower \
--restart=unless-stopped \
-v /var/run/docker.sock:/var/run/docker.sock \
containrrr/watchtower \
--interval 300 \
--cleanup \
--include-restarting
fi
echo "=== [Agnes $CUSTOMER_NAME $ROLE] Startup complete at $(date) ==="
docker compose ps

View file

@ -0,0 +1,72 @@
variable "gcp_project_id" {
description = "GCP project ID kde bude instance nasazená"
type = string
}
variable "region" {
description = "GCP region"
type = string
default = "europe-west1"
}
variable "zone" {
description = "GCP zone"
type = string
default = "europe-west1-b"
}
variable "customer_name" {
description = "Krátký identifikátor zákazníka (např. keboola, grpn). Použije se v prefixu resourců."
type = string
validation {
condition = can(regex("^[a-z][a-z0-9-]{1,20}$", var.customer_name))
error_message = "customer_name musí být lowercase, začínat písmenem, 2-21 znaků."
}
}
variable "prod_instance" {
description = "Prod VM konfigurace"
type = object({
name = string
machine_type = optional(string, "e2-small")
disk_size_gb = optional(number, 30)
data_disk_gb = optional(number, 50)
image_tag = optional(string, "stable")
upgrade_mode = optional(string, "auto")
tls_mode = optional(string, "caddy")
domain = optional(string, "")
})
}
variable "dev_instances" {
description = "Seznam dev VMs. Prázdné pole = žádné dev VMs."
type = list(object({
name = string
machine_type = optional(string, "e2-small")
image_tag = optional(string, "dev")
}))
default = []
}
variable "seed_admin_email" {
description = "Email prvního admin usera"
type = string
}
variable "data_source" {
description = "Typ data source — keboola | bigquery | csv"
type = string
default = "keboola"
}
variable "keboola_stack_url" {
description = "Keboola Stack URL (pokud data_source = keboola)"
type = string
default = ""
}
variable "image_repo" {
description = "Docker image repo"
type = string
default = "ghcr.io/keboola/agnes-the-ai-analyst"
}

View file

@ -1,39 +0,0 @@
output "instance_ip" {
description = "Public IP address of the server"
value = google_compute_address.data_analyst.address
}
output "ssh_command" {
description = "SSH command to connect"
value = "ssh ${var.ssh_user}@${google_compute_address.data_analyst.address}"
}
output "api_url" {
description = "API URL"
value = "http://${google_compute_address.data_analyst.address}:8000"
}
output "web_url" {
description = "Web UI URL"
value = var.domain != "" ? "https://${var.domain}" : "http://${google_compute_address.data_analyst.address}:8000"
}
output "swagger_url" {
description = "Swagger API docs URL"
value = "http://${google_compute_address.data_analyst.address}:8000/docs"
}
output "bootstrap_command" {
description = "Command to bootstrap first admin user"
value = "curl -X POST http://${google_compute_address.data_analyst.address}:8000/auth/bootstrap -H 'Content-Type: application/json' -d '{\"email\":\"admin@keboola.com\",\"name\":\"Admin\"}'"
}
output "cli_setup_commands" {
description = "Commands to set up local CLI"
value = <<-EOT
da setup init --server http://${google_compute_address.data_analyst.address}:8000
da setup bootstrap admin@keboola.com
da setup test-connection
da sync
EOT
}

View file

@ -1,19 +0,0 @@
# Copy to terraform.tfvars and fill in values
project_id = "your-gcp-project"
region = "europe-north1"
zone = "europe-north1-a"
machine_type = "e2-small" # 2 vCPU, 2GB RAM, ~$7/mo
disk_size_gb = 30
instance_name = "data-analyst"
ssh_user = "deploy"
ssh_public_key_path = "~/.ssh/id_ed25519.pub"
# JWT secret is auto-generated by Terraform (random_password)
# Keboola (optional — leave empty for sample data)
keboola_token = ""
keboola_stack_url = "https://connection.keboola.com"
keboola_project_id = ""
# Domain (optional — leave empty for IP-only access)
domain = ""

View file

@ -1,79 +0,0 @@
variable "project_id" {
description = "GCP project ID"
type = string
}
variable "region" {
description = "GCP region"
type = string
default = "europe-west1"
}
variable "zone" {
description = "GCP zone"
type = string
default = "europe-west1-b"
}
variable "machine_type" {
description = "VM machine type"
type = string
default = "e2-small"
}
variable "disk_size_gb" {
description = "Boot disk size in GB"
type = number
default = 30
}
variable "instance_name" {
description = "Name for the VM instance"
type = string
default = "data-analyst"
}
variable "ssh_user" {
description = "SSH username"
type = string
default = "deploy"
}
variable "ssh_public_key_path" {
description = "Path to SSH public key file"
type = string
default = "~/.ssh/id_ed25519.pub"
}
# App config (JWT secret auto-generated by Terraform)
variable "keboola_token" {
description = "Keboola Storage API token"
type = string
sensitive = true
default = ""
}
variable "keboola_stack_url" {
description = "Keboola Stack URL"
type = string
default = "https://connection.keboola.com"
}
variable "keboola_project_id" {
description = "Keboola project ID"
type = string
default = ""
}
variable "admin_email" {
description = "Admin email for initial seed (e.g., admin@company.com)"
type = string
default = ""
}
variable "domain" {
description = "Domain name for SSL (optional, empty = IP only)"
type = string
default = ""
}