From a2c05a5d97126fbbfa0114c64899747c6e7bff2a Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Tue, 21 Apr 2026 15:43:27 +0200 Subject: [PATCH] infra: refactor Terraform into reusable customer-instance module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaking changes: - infra/main.tf, variables.tf, outputs.tf, terraform.tfvars.example removed - Single-file monolith replaced by reusable module + example New structure: - infra/modules/customer-instance/ — the module: - main.tf: VMs, disks, firewall, Secret Manager, dedicated VM SA - variables.tf: prod_instance + dev_instances flexible schema - outputs.tf: IPs, SA email, JWT secret reference - startup-script.sh.tpl: bootstraps VM, fetches secrets, runs compose, adds Watchtower for auto-upgrade - infra/examples/minimal/ — OSS self-host quickstart using the module Supports: - Per-customer GCP project isolation - Branch-aware dev VMs via dev_instances list (any image_tag) - Persistent /data disk (rebuild-safe) - OS Login (no per-user SSH keys) - Caddy TLS mode (opt-in via tls_mode="caddy" + domain) - Watchtower auto-upgrade (opt-in via upgrade_mode="auto") --- infra/examples/minimal/main.tf | 54 ++++++ infra/main.tf | 170 ------------------ infra/modules/customer-instance/main.tf | 163 +++++++++++++++++ infra/modules/customer-instance/outputs.tf | 19 ++ .../customer-instance/startup-script.sh.tpl | 100 +++++++++++ infra/modules/customer-instance/variables.tf | 72 ++++++++ infra/outputs.tf | 39 ---- infra/terraform.tfvars.example | 19 -- infra/variables.tf | 79 -------- 9 files changed, 408 insertions(+), 307 deletions(-) create mode 100644 infra/examples/minimal/main.tf delete mode 100644 infra/main.tf create mode 100644 infra/modules/customer-instance/main.tf create mode 100644 infra/modules/customer-instance/outputs.tf create mode 100644 infra/modules/customer-instance/startup-script.sh.tpl create mode 100644 infra/modules/customer-instance/variables.tf delete mode 100644 infra/outputs.tf delete mode 100644 infra/terraform.tfvars.example delete mode 100644 infra/variables.tf diff --git a/infra/examples/minimal/main.tf b/infra/examples/minimal/main.tf new file mode 100644 index 0000000..166bc2b --- /dev/null +++ b/infra/examples/minimal/main.tf @@ -0,0 +1,54 @@ +# Minimal example: single-VM Agnes deploy. +# Pro OSS self-hoster, co chce prod VM bez dev, bez TLS. +terraform { + required_version = ">= 1.5" + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + } +} + +provider "google" { + project = var.gcp_project_id + region = "europe-west1" +} + +variable "gcp_project_id" { + description = "GCP project ID (must have billing enabled)" + type = string +} + +variable "admin_email" { + description = "Email for first admin user" + type = string +} + +module "agnes" { + source = "../../modules/customer-instance" + + gcp_project_id = var.gcp_project_id + customer_name = "self-hosted" + seed_admin_email = var.admin_email + + prod_instance = { + name = "agnes" + machine_type = "e2-small" + data_disk_gb = 30 + image_tag = "stable" + upgrade_mode = "auto" + tls_mode = "none" + domain = "" + } + + dev_instances = [] + + # Customize below for your setup + data_source = "keboola" +} + +output "agnes_ip" { + description = "SSH in via: ssh @; UI at http://:8000" + value = module.agnes.prod_ip +} diff --git a/infra/main.tf b/infra/main.tf deleted file mode 100644 index 5c6f541..0000000 --- a/infra/main.tf +++ /dev/null @@ -1,170 +0,0 @@ -terraform { - required_version = ">= 1.5" - - backend "gcs" { - bucket = "agnes-terraform-state" - prefix = "instances" - } - - required_providers { - google = { - source = "hashicorp/google" - version = "~> 5.0" - } - random = { - source = "hashicorp/random" - version = "~> 3.0" - } - } -} - -provider "google" { - project = var.project_id - region = var.region - zone = var.zone -} - -# --- Auto-generated secrets --- - -resource "random_password" "jwt_secret" { - length = 48 - special = false -} - -# --- Network --- - -resource "google_compute_firewall" "data_analyst" { - name = "${var.instance_name}-allow-web" - network = "default" - - allow { - protocol = "tcp" - ports = ["22", "80", "443", "8000"] - } - - source_ranges = ["0.0.0.0/0"] - target_tags = [var.instance_name] -} - -# --- Static IP --- - -resource "google_compute_address" "data_analyst" { - name = "${var.instance_name}-ip" - region = var.region -} - -# --- Startup script --- - -locals { - startup_script = <<-SCRIPT - #!/bin/bash - set -euo pipefail - exec > /var/log/startup.log 2>&1 - - echo "=== Installing Docker ===" - if ! command -v docker &> /dev/null; then - curl -fsSL https://get.docker.com | sh - usermod -aG docker ${var.ssh_user} - fi - - # Install docker compose plugin - if ! docker compose version &> /dev/null; then - apt-get update && apt-get install -y docker-compose-plugin - fi - - echo "=== Cloning repository ===" - APP_DIR="/opt/data-analyst" - if [ ! -d "$APP_DIR" ]; then - git clone https://github.com/keboola/agnes-the-ai-analyst.git "$APP_DIR" - cd "$APP_DIR" - git checkout main - else - cd "$APP_DIR" - git pull origin main || true - fi - - echo "=== Creating .env ===" - cat > "$APP_DIR/.env" << 'ENVEOF' - JWT_SECRET_KEY=${random_password.jwt_secret.result} - DATA_DIR=/data - DATA_SOURCE=${var.keboola_token != "" ? "keboola" : "local"} - KEBOOLA_STORAGE_TOKEN=${var.keboola_token} - KEBOOLA_STACK_URL=${var.keboola_stack_url} - KEBOOLA_PROJECT_ID=${var.keboola_project_id} - SEED_ADMIN_EMAIL=${var.admin_email} - LOG_LEVEL=info - ENVEOF - # Strip leading whitespace from heredoc - sed -i 's/^ //' "$APP_DIR/.env" - chmod 600 "$APP_DIR/.env" - - echo "=== Creating instance.yaml ===" - mkdir -p "$APP_DIR/config" - cat > "$APP_DIR/config/instance.yaml" << YAMLEOF -instance: - name: "${var.instance_name}" - subtitle: "Data Analytics Platform" -server: - host: "${google_compute_address.data_analyst.address}" - hostname: "${var.domain != "" ? var.domain : google_compute_address.data_analyst.address}" - port: 8000 -auth: - allowed_domain: "" -data_source: - type: "${var.keboola_token != "" ? "keboola" : "local"}" -YAMLEOF - - echo "=== Creating data directory ===" - mkdir -p /data/state /data/analytics /data/extracts - chown -R 1000:1000 /data - - echo "=== Starting Docker Compose ===" - cd "$APP_DIR" - docker compose pull 2>/dev/null || true - docker compose build - docker compose up -d - - echo "=== Startup complete ===" - docker compose ps - SCRIPT -} - -# --- VM Instance --- - -resource "google_compute_instance" "data_analyst" { - name = var.instance_name - machine_type = var.machine_type - zone = var.zone - - tags = [var.instance_name] - - boot_disk { - initialize_params { - image = "ubuntu-os-cloud/ubuntu-2404-lts-amd64" - size = var.disk_size_gb - type = "pd-ssd" - } - } - - network_interface { - network = "default" - access_config { - nat_ip = google_compute_address.data_analyst.address - } - } - - metadata = { - ssh-keys = "${var.ssh_user}:${file(pathexpand(var.ssh_public_key_path))}" - } - - metadata_startup_script = local.startup_script - - service_account { - scopes = ["cloud-platform"] - } - - labels = { - app = "data-analyst" - managed = "terraform" - } -} diff --git a/infra/modules/customer-instance/main.tf b/infra/modules/customer-instance/main.tf new file mode 100644 index 0000000..c32334e --- /dev/null +++ b/infra/modules/customer-instance/main.tf @@ -0,0 +1,163 @@ +terraform { + required_version = ">= 1.5" + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +locals { + # Normalize all instances into a single list so for_each is uniform across prod + dev. + all_instances = concat( + [merge(var.prod_instance, { role = "prod" })], + [for d in var.dev_instances : merge(d, { + role = "dev" + disk_size_gb = 30 + data_disk_gb = 20 + upgrade_mode = "auto" + tls_mode = "caddy" + domain = "" + })] + ) +} + +# --- Secrets --- + +resource "google_secret_manager_secret" "jwt" { + secret_id = "agnes-${var.customer_name}-jwt-secret" + project = var.gcp_project_id + replication { + auto {} + } +} + +resource "random_password" "jwt" { + length = 48 + special = false +} + +resource "google_secret_manager_secret_version" "jwt" { + secret = google_secret_manager_secret.jwt.id + secret_data = random_password.jwt.result +} + +# --- VM service account (dedikovaný, jen read Secret Manageru) --- + +resource "google_service_account" "vm" { + account_id = "agnes-${var.customer_name}-vm" + display_name = "Agnes VM runtime SA (${var.customer_name})" + project = var.gcp_project_id +} + +resource "google_project_iam_member" "vm_secrets" { + project = var.gcp_project_id + role = "roles/secretmanager.secretAccessor" + member = "serviceAccount:${google_service_account.vm.email}" +} + +# --- Network --- + +resource "google_compute_firewall" "web" { + name = "agnes-${var.customer_name}-allow-web" + project = var.gcp_project_id + network = "default" + + allow { + protocol = "tcp" + ports = ["22", "80", "443", "8000"] + } + + source_ranges = ["0.0.0.0/0"] + target_tags = ["agnes-${var.customer_name}"] +} + +# --- Persistent data disks + VMs (prod + dev) --- + +resource "google_compute_disk" "data" { + for_each = { for inst in local.all_instances : inst.name => inst } + + name = "${each.value.name}-data" + project = var.gcp_project_id + zone = var.zone + size = each.value.data_disk_gb + type = "pd-ssd" +} + +resource "google_compute_address" "ip" { + for_each = { for inst in local.all_instances : inst.name => inst } + + name = "${each.value.name}-ip" + project = var.gcp_project_id + region = var.region +} + +resource "google_compute_instance" "vm" { + for_each = { for inst in local.all_instances : inst.name => inst } + + name = each.value.name + project = var.gcp_project_id + machine_type = each.value.machine_type + zone = var.zone + tags = ["agnes-${var.customer_name}"] + + boot_disk { + initialize_params { + image = "ubuntu-os-cloud/ubuntu-2404-lts-amd64" + size = each.value.disk_size_gb + type = "pd-ssd" + } + } + + attached_disk { + source = google_compute_disk.data[each.key].self_link + device_name = "data" + } + + network_interface { + network = "default" + access_config { + nat_ip = google_compute_address.ip[each.key].address + } + } + + metadata = { + enable-oslogin = "TRUE" + } + + metadata_startup_script = templatefile("${path.module}/startup-script.sh.tpl", { + customer_name = var.customer_name + image_repo = var.image_repo + image_tag = each.value.image_tag + upgrade_mode = each.value.upgrade_mode + tls_mode = each.value.tls_mode + domain = each.value.domain + data_source = var.data_source + keboola_stack_url = var.keboola_stack_url + seed_admin_email = var.seed_admin_email + role = each.value.role + }) + + service_account { + email = google_service_account.vm.email + scopes = ["cloud-platform"] + } + + labels = { + app = "agnes" + customer = var.customer_name + role = each.value.role + managed = "terraform" + } + + # Změna startup scriptu nemění běžící VM (script běží jen na boot). + # Pro aplikaci změn je potřeba VM restartovat nebo recreate. + lifecycle { + ignore_changes = [metadata_startup_script] + } +} diff --git a/infra/modules/customer-instance/outputs.tf b/infra/modules/customer-instance/outputs.tf new file mode 100644 index 0000000..5fe605c --- /dev/null +++ b/infra/modules/customer-instance/outputs.tf @@ -0,0 +1,19 @@ +output "instance_ips" { + description = "Mapa { name => external IP }" + value = { for k, v in google_compute_address.ip : k => v.address } +} + +output "prod_ip" { + description = "External IP prod instance" + value = google_compute_address.ip[var.prod_instance.name].address +} + +output "vm_service_account" { + description = "Email VM SA (pro další IAM bindings, např. BigQuery)" + value = google_service_account.vm.email +} + +output "jwt_secret_name" { + description = "Plný název JWT secretu v Secret Manageru" + value = google_secret_manager_secret.jwt.name +} diff --git a/infra/modules/customer-instance/startup-script.sh.tpl b/infra/modules/customer-instance/startup-script.sh.tpl new file mode 100644 index 0000000..25be1b9 --- /dev/null +++ b/infra/modules/customer-instance/startup-script.sh.tpl @@ -0,0 +1,100 @@ +#!/bin/bash +# Agnes VM startup script — templated by Terraform. +# Idempotent — spustí se při každém boot. +set -euo pipefail +exec > /var/log/agnes-startup.log 2>&1 + +CUSTOMER_NAME="${customer_name}" +IMAGE_REPO="${image_repo}" +IMAGE_TAG="${image_tag}" +UPGRADE_MODE="${upgrade_mode}" +TLS_MODE="${tls_mode}" +DOMAIN="${domain}" +DATA_SOURCE="${data_source}" +KEBOOLA_STACK_URL="${keboola_stack_url}" +SEED_ADMIN_EMAIL="${seed_admin_email}" +ROLE="${role}" + +echo "=== [Agnes $CUSTOMER_NAME $ROLE] Startup at $(date) ===" + +# --- 1. Docker (install if missing) --- +if ! command -v docker &>/dev/null; then + curl -fsSL https://get.docker.com | sh +fi +if ! docker compose version &>/dev/null; then + apt-get update && apt-get install -y docker-compose-plugin +fi + +# --- 2. Persistent data disk mount --- +DATA_DEV="/dev/disk/by-id/google-data" +DATA_MNT="/data" +if [ -b "$DATA_DEV" ]; then + if ! blkid "$DATA_DEV" | grep -q ext4; then + mkfs.ext4 -F "$DATA_DEV" + fi + mkdir -p "$DATA_MNT" + mountpoint -q "$DATA_MNT" || mount -o discard,defaults "$DATA_DEV" "$DATA_MNT" + grep -qF "$DATA_DEV" /etc/fstab || echo "$DATA_DEV $DATA_MNT ext4 discard,defaults,nofail 0 2" >> /etc/fstab + mkdir -p "$DATA_MNT/state" "$DATA_MNT/analytics" "$DATA_MNT/extracts" +fi + +# --- 3. App directory + docker-compose files from public repo --- +APP_DIR="/opt/agnes" +mkdir -p "$APP_DIR" +cd "$APP_DIR" + +# Fetch minimal docker-compose from public repo (main branch — stable) +curl -fsSL "https://raw.githubusercontent.com/keboola/agnes-the-ai-analyst/main/docker-compose.yml" -o docker-compose.yml +curl -fsSL "https://raw.githubusercontent.com/keboola/agnes-the-ai-analyst/main/docker-compose.prod.yml" -o docker-compose.prod.yml + +# TLS overlay (Caddy + Let's Encrypt) — jen pokud potřeba +if [ "$TLS_MODE" = "caddy" ] && [ -n "$DOMAIN" ]; then + curl -fsSL "https://raw.githubusercontent.com/keboola/agnes-the-ai-analyst/main/Caddyfile" -o Caddyfile 2>/dev/null || true +fi + +# --- 4. Fetch secrets from Secret Manager --- +KEBOOLA_TOKEN="" +if [ "$DATA_SOURCE" = "keboola" ]; then + KEBOOLA_TOKEN=$(gcloud secrets versions access latest --secret=keboola-storage-token 2>/dev/null || echo "") +fi +JWT_KEY=$(gcloud secrets versions access latest --secret=agnes-$${CUSTOMER_NAME}-jwt-secret) + +cat > "$APP_DIR/.env" </dev/null || true + docker run -d \ + --name agnes-watchtower \ + --restart=unless-stopped \ + -v /var/run/docker.sock:/var/run/docker.sock \ + containrrr/watchtower \ + --interval 300 \ + --cleanup \ + --include-restarting +fi + +echo "=== [Agnes $CUSTOMER_NAME $ROLE] Startup complete at $(date) ===" +docker compose ps diff --git a/infra/modules/customer-instance/variables.tf b/infra/modules/customer-instance/variables.tf new file mode 100644 index 0000000..cc94ee0 --- /dev/null +++ b/infra/modules/customer-instance/variables.tf @@ -0,0 +1,72 @@ +variable "gcp_project_id" { + description = "GCP project ID kde bude instance nasazená" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "europe-west1" +} + +variable "zone" { + description = "GCP zone" + type = string + default = "europe-west1-b" +} + +variable "customer_name" { + description = "Krátký identifikátor zákazníka (např. keboola, grpn). Použije se v prefixu resourců." + type = string + validation { + condition = can(regex("^[a-z][a-z0-9-]{1,20}$", var.customer_name)) + error_message = "customer_name musí být lowercase, začínat písmenem, 2-21 znaků." + } +} + +variable "prod_instance" { + description = "Prod VM konfigurace" + type = object({ + name = string + machine_type = optional(string, "e2-small") + disk_size_gb = optional(number, 30) + data_disk_gb = optional(number, 50) + image_tag = optional(string, "stable") + upgrade_mode = optional(string, "auto") + tls_mode = optional(string, "caddy") + domain = optional(string, "") + }) +} + +variable "dev_instances" { + description = "Seznam dev VMs. Prázdné pole = žádné dev VMs." + type = list(object({ + name = string + machine_type = optional(string, "e2-small") + image_tag = optional(string, "dev") + })) + default = [] +} + +variable "seed_admin_email" { + description = "Email prvního admin usera" + type = string +} + +variable "data_source" { + description = "Typ data source — keboola | bigquery | csv" + type = string + default = "keboola" +} + +variable "keboola_stack_url" { + description = "Keboola Stack URL (pokud data_source = keboola)" + type = string + default = "" +} + +variable "image_repo" { + description = "Docker image repo" + type = string + default = "ghcr.io/keboola/agnes-the-ai-analyst" +} diff --git a/infra/outputs.tf b/infra/outputs.tf deleted file mode 100644 index be6d1ae..0000000 --- a/infra/outputs.tf +++ /dev/null @@ -1,39 +0,0 @@ -output "instance_ip" { - description = "Public IP address of the server" - value = google_compute_address.data_analyst.address -} - -output "ssh_command" { - description = "SSH command to connect" - value = "ssh ${var.ssh_user}@${google_compute_address.data_analyst.address}" -} - -output "api_url" { - description = "API URL" - value = "http://${google_compute_address.data_analyst.address}:8000" -} - -output "web_url" { - description = "Web UI URL" - value = var.domain != "" ? "https://${var.domain}" : "http://${google_compute_address.data_analyst.address}:8000" -} - -output "swagger_url" { - description = "Swagger API docs URL" - value = "http://${google_compute_address.data_analyst.address}:8000/docs" -} - -output "bootstrap_command" { - description = "Command to bootstrap first admin user" - value = "curl -X POST http://${google_compute_address.data_analyst.address}:8000/auth/bootstrap -H 'Content-Type: application/json' -d '{\"email\":\"admin@keboola.com\",\"name\":\"Admin\"}'" -} - -output "cli_setup_commands" { - description = "Commands to set up local CLI" - value = <<-EOT - da setup init --server http://${google_compute_address.data_analyst.address}:8000 - da setup bootstrap admin@keboola.com - da setup test-connection - da sync - EOT -} diff --git a/infra/terraform.tfvars.example b/infra/terraform.tfvars.example deleted file mode 100644 index d28e121..0000000 --- a/infra/terraform.tfvars.example +++ /dev/null @@ -1,19 +0,0 @@ -# Copy to terraform.tfvars and fill in values -project_id = "your-gcp-project" -region = "europe-north1" -zone = "europe-north1-a" -machine_type = "e2-small" # 2 vCPU, 2GB RAM, ~$7/mo -disk_size_gb = 30 -instance_name = "data-analyst" -ssh_user = "deploy" -ssh_public_key_path = "~/.ssh/id_ed25519.pub" - -# JWT secret is auto-generated by Terraform (random_password) - -# Keboola (optional — leave empty for sample data) -keboola_token = "" -keboola_stack_url = "https://connection.keboola.com" -keboola_project_id = "" - -# Domain (optional — leave empty for IP-only access) -domain = "" diff --git a/infra/variables.tf b/infra/variables.tf deleted file mode 100644 index dd6d239..0000000 --- a/infra/variables.tf +++ /dev/null @@ -1,79 +0,0 @@ -variable "project_id" { - description = "GCP project ID" - type = string -} - -variable "region" { - description = "GCP region" - type = string - default = "europe-west1" -} - -variable "zone" { - description = "GCP zone" - type = string - default = "europe-west1-b" -} - -variable "machine_type" { - description = "VM machine type" - type = string - default = "e2-small" -} - -variable "disk_size_gb" { - description = "Boot disk size in GB" - type = number - default = 30 -} - -variable "instance_name" { - description = "Name for the VM instance" - type = string - default = "data-analyst" -} - -variable "ssh_user" { - description = "SSH username" - type = string - default = "deploy" -} - -variable "ssh_public_key_path" { - description = "Path to SSH public key file" - type = string - default = "~/.ssh/id_ed25519.pub" -} - -# App config (JWT secret auto-generated by Terraform) - -variable "keboola_token" { - description = "Keboola Storage API token" - type = string - sensitive = true - default = "" -} - -variable "keboola_stack_url" { - description = "Keboola Stack URL" - type = string - default = "https://connection.keboola.com" -} - -variable "keboola_project_id" { - description = "Keboola project ID" - type = string - default = "" -} - -variable "admin_email" { - description = "Admin email for initial seed (e.g., admin@company.com)" - type = string - default = "" -} - -variable "domain" { - description = "Domain name for SSL (optional, empty = IP only)" - type = string - default = "" -}