From d2e83ce9d015b1604d68ed6a38d502865d370ff9 Mon Sep 17 00:00:00 2001 From: Petr Date: Thu, 12 Mar 2026 11:06:49 +0100 Subject: [PATCH] Set DuckDB memory_limit=4GB in profiler to prevent OOM Server has 8GB RAM with other services running. DuckDB defaults to using all available memory, causing OOM killer when profiling large tables (22M rows, 39 cols triggered 7.5GB RSS -> killed). --- src/profiler.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/profiler.py b/src/profiler.py index b26c2e2..c4cf69a 100644 --- a/src/profiler.py +++ b/src/profiler.py @@ -672,6 +672,12 @@ def profile_table( """ con = duckdb.connect() + # Limit DuckDB memory to avoid OOM on servers with limited RAM. + # DuckDB defaults to using all available memory, which can trigger + # the OOM killer when profiling large tables alongside other services. + con.execute("SET memory_limit = '4GB'") + con.execute("SET threads = 2") + # Determine read expression if parquet_path.is_dir(): read_expr = f"read_parquet('{parquet_path}/*.parquet')"