diff --git a/src/profiler.py b/src/profiler.py index 0cda5f7..ced8778 100644 --- a/src/profiler.py +++ b/src/profiler.py @@ -716,9 +716,9 @@ def profile_table( # Determine read expression if parquet_path.is_dir(): - read_expr = f"read_parquet('{parquet_path}/*.parquet')" + read_expr = f"read_parquet('{parquet_path}/*.parquet', union_by_name=true)" else: - read_expr = f"read_parquet('{parquet_path}')" + read_expr = f"read_parquet('{parquet_path}', union_by_name=true)" # Get row count to decide on sampling total_rows = con.execute(f"SELECT COUNT(*) FROM {read_expr}").fetchone()[0]