From b6a94add67af8b9a0637a207a658efaf22f6e0b1 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Mon, 30 Mar 2026 15:55:26 +0200 Subject: [PATCH] feat: add Terraform config for GCP deployment - GCE e2-small with Ubuntu 24.04 + Docker - Static IP, firewall rules, SSD boot disk - Startup script: installs Docker, clones repo, creates .env, starts compose - Outputs: IP, SSH command, API URL, bootstrap command, CLI setup - ~7$/month for always-on server --- infra/.gitignore | 6 ++ infra/main.tf | 137 +++++++++++++++++++++++++++++++++ infra/outputs.tf | 39 ++++++++++ infra/terraform.tfvars.example | 20 +++++ infra/variables.tf | 78 +++++++++++++++++++ 5 files changed, 280 insertions(+) create mode 100644 infra/.gitignore create mode 100644 infra/main.tf create mode 100644 infra/outputs.tf create mode 100644 infra/terraform.tfvars.example create mode 100644 infra/variables.tf diff --git a/infra/.gitignore b/infra/.gitignore new file mode 100644 index 0000000..662ec57 --- /dev/null +++ b/infra/.gitignore @@ -0,0 +1,6 @@ +*.tfstate +*.tfstate.backup +.terraform/ +.terraform.lock.hcl +terraform.tfvars +*.auto.tfvars diff --git a/infra/main.tf b/infra/main.tf new file mode 100644 index 0000000..1b901d7 --- /dev/null +++ b/infra/main.tf @@ -0,0 +1,137 @@ +terraform { + required_version = ">= 1.5" + + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region + zone = var.zone +} + +# --- Network --- + +resource "google_compute_firewall" "data_analyst" { + name = "${var.instance_name}-allow-web" + network = "default" + + allow { + protocol = "tcp" + ports = ["22", "80", "443", "8000"] + } + + source_ranges = ["0.0.0.0/0"] + target_tags = [var.instance_name] +} + +# --- Static IP --- + +resource "google_compute_address" "data_analyst" { + name = "${var.instance_name}-ip" + region = var.region +} + +# --- Startup script --- + +locals { + startup_script = <<-SCRIPT + #!/bin/bash + set -euo pipefail + exec > /var/log/startup.log 2>&1 + + echo "=== Installing Docker ===" + if ! command -v docker &> /dev/null; then + curl -fsSL https://get.docker.com | sh + usermod -aG docker ${var.ssh_user} + fi + + # Install docker compose plugin + if ! docker compose version &> /dev/null; then + apt-get update && apt-get install -y docker-compose-plugin + fi + + echo "=== Cloning repository ===" + APP_DIR="/opt/data-analyst" + if [ ! -d "$APP_DIR" ]; then + git clone https://github.com/padak/tmp_oss.git "$APP_DIR" + cd "$APP_DIR" + git checkout feature/v2-fastapi-duckdb-docker-cli + else + cd "$APP_DIR" + git pull origin feature/v2-fastapi-duckdb-docker-cli || true + fi + + echo "=== Creating .env ===" + cat > "$APP_DIR/.env" << 'ENVEOF' + JWT_SECRET_KEY=${var.jwt_secret} + DATA_DIR=/data + DATA_SOURCE=${var.keboola_token != "" ? "keboola" : "local"} + KEBOOLA_STORAGE_TOKEN=${var.keboola_token} + KEBOOLA_STACK_URL=${var.keboola_stack_url} + KEBOOLA_PROJECT_ID=${var.keboola_project_id} + LOG_LEVEL=info + ENVEOF + # Strip leading whitespace from heredoc + sed -i 's/^ //' "$APP_DIR/.env" + chmod 600 "$APP_DIR/.env" + + echo "=== Creating data directory ===" + mkdir -p /data/state /data/analytics /data/src_data/parquet + chown -R 1000:1000 /data + + echo "=== Starting Docker Compose ===" + cd "$APP_DIR" + docker compose pull 2>/dev/null || true + docker compose build + docker compose up -d + + echo "=== Startup complete ===" + docker compose ps + SCRIPT +} + +# --- VM Instance --- + +resource "google_compute_instance" "data_analyst" { + name = var.instance_name + machine_type = var.machine_type + zone = var.zone + + tags = [var.instance_name] + + boot_disk { + initialize_params { + image = "ubuntu-os-cloud/ubuntu-2404-lts-amd64" + size = var.disk_size_gb + type = "pd-ssd" + } + } + + network_interface { + network = "default" + access_config { + nat_ip = google_compute_address.data_analyst.address + } + } + + metadata = { + ssh-keys = "${var.ssh_user}:${file(pathexpand(var.ssh_public_key_path))}" + } + + metadata_startup_script = local.startup_script + + service_account { + scopes = ["cloud-platform"] + } + + labels = { + app = "data-analyst" + managed = "terraform" + } +} diff --git a/infra/outputs.tf b/infra/outputs.tf new file mode 100644 index 0000000..be6d1ae --- /dev/null +++ b/infra/outputs.tf @@ -0,0 +1,39 @@ +output "instance_ip" { + description = "Public IP address of the server" + value = google_compute_address.data_analyst.address +} + +output "ssh_command" { + description = "SSH command to connect" + value = "ssh ${var.ssh_user}@${google_compute_address.data_analyst.address}" +} + +output "api_url" { + description = "API URL" + value = "http://${google_compute_address.data_analyst.address}:8000" +} + +output "web_url" { + description = "Web UI URL" + value = var.domain != "" ? "https://${var.domain}" : "http://${google_compute_address.data_analyst.address}:8000" +} + +output "swagger_url" { + description = "Swagger API docs URL" + value = "http://${google_compute_address.data_analyst.address}:8000/docs" +} + +output "bootstrap_command" { + description = "Command to bootstrap first admin user" + value = "curl -X POST http://${google_compute_address.data_analyst.address}:8000/auth/bootstrap -H 'Content-Type: application/json' -d '{\"email\":\"admin@keboola.com\",\"name\":\"Admin\"}'" +} + +output "cli_setup_commands" { + description = "Commands to set up local CLI" + value = <<-EOT + da setup init --server http://${google_compute_address.data_analyst.address}:8000 + da setup bootstrap admin@keboola.com + da setup test-connection + da sync + EOT +} diff --git a/infra/terraform.tfvars.example b/infra/terraform.tfvars.example new file mode 100644 index 0000000..5f973c0 --- /dev/null +++ b/infra/terraform.tfvars.example @@ -0,0 +1,20 @@ +# Copy to terraform.tfvars and fill in values +project_id = "your-gcp-project-id" +region = "europe-west1" +zone = "europe-west1-b" +machine_type = "e2-small" # 2 vCPU, 2GB RAM, ~$7/mo +disk_size_gb = 30 +instance_name = "data-analyst" +ssh_user = "deploy" +ssh_public_key_path = "~/.ssh/id_ed25519.pub" + +# App secrets +jwt_secret = "" # Generate: python3 -c "import secrets; print(secrets.token_urlsafe(32))" + +# Keboola (optional — leave empty for sample data) +keboola_token = "" +keboola_stack_url = "https://connection.keboola.com" +keboola_project_id = "" + +# Domain (optional — leave empty for IP-only access) +domain = "" diff --git a/infra/variables.tf b/infra/variables.tf new file mode 100644 index 0000000..559110c --- /dev/null +++ b/infra/variables.tf @@ -0,0 +1,78 @@ +variable "project_id" { + description = "GCP project ID" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "europe-west1" +} + +variable "zone" { + description = "GCP zone" + type = string + default = "europe-west1-b" +} + +variable "machine_type" { + description = "VM machine type" + type = string + default = "e2-small" +} + +variable "disk_size_gb" { + description = "Boot disk size in GB" + type = number + default = 30 +} + +variable "instance_name" { + description = "Name for the VM instance" + type = string + default = "data-analyst" +} + +variable "ssh_user" { + description = "SSH username" + type = string + default = "deploy" +} + +variable "ssh_public_key_path" { + description = "Path to SSH public key file" + type = string + default = "~/.ssh/id_ed25519.pub" +} + +# App config +variable "jwt_secret" { + description = "JWT secret key (min 32 chars)" + type = string + sensitive = true +} + +variable "keboola_token" { + description = "Keboola Storage API token" + type = string + sensitive = true + default = "" +} + +variable "keboola_stack_url" { + description = "Keboola Stack URL" + type = string + default = "https://connection.keboola.com" +} + +variable "keboola_project_id" { + description = "Keboola project ID" + type = string + default = "" +} + +variable "domain" { + description = "Domain name for SSL (optional, empty = IP only)" + type = string + default = "" +}