fix: add union_by_name=true to read_parquet calls in profiler
Handles schema evolution across partitions when profiling tables with multiple parquet files that may have different column sets.
This commit is contained in:
parent
5e0e4ceb9e
commit
30987eef16
1 changed files with 2 additions and 2 deletions
|
|
@ -716,9 +716,9 @@ def profile_table(
|
|||
|
||||
# Determine read expression
|
||||
if parquet_path.is_dir():
|
||||
read_expr = f"read_parquet('{parquet_path}/*.parquet')"
|
||||
read_expr = f"read_parquet('{parquet_path}/*.parquet', union_by_name=true)"
|
||||
else:
|
||||
read_expr = f"read_parquet('{parquet_path}')"
|
||||
read_expr = f"read_parquet('{parquet_path}', union_by_name=true)"
|
||||
|
||||
# Get row count to decide on sampling
|
||||
total_rows = con.execute(f"SELECT COUNT(*) FROM {read_expr}").fetchone()[0]
|
||||
|
|
|
|||
Loading…
Reference in a new issue