From 30987eef164860902bb1df6067bfaacaf0a6dc59 Mon Sep 17 00:00:00 2001
From: ZdenekSrotyr <zdenek.srotyr@keboola.com>
Date: Thu, 9 Apr 2026 18:42:33 +0200
Subject: [PATCH] fix: add union_by_name=true to read_parquet calls in profiler

Handles schema evolution across partitions when profiling tables
with multiple parquet files that may have different column sets.
---
 src/profiler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/profiler.py b/src/profiler.py
index 0cda5f7..ced8778 100644
--- a/src/profiler.py
+++ b/src/profiler.py
@@ -716,9 +716,9 @@ def profile_table(
 
     # Determine read expression
     if parquet_path.is_dir():
-        read_expr = f"read_parquet('{parquet_path}/*.parquet')"
+        read_expr = f"read_parquet('{parquet_path}/*.parquet', union_by_name=true)"
     else:
-        read_expr = f"read_parquet('{parquet_path}')"
+        read_expr = f"read_parquet('{parquet_path}', union_by_name=true)"
 
     # Get row count to decide on sampling
     total_rows = con.execute(f"SELECT COUNT(*) FROM {read_expr}").fetchone()[0]