From 1be0dc53004fcf66a3e03062aa4bce56a4cdd9e3 Mon Sep 17 00:00:00 2001 From: Petr Date: Tue, 10 Mar 2026 22:09:14 +0100 Subject: [PATCH] Add flat parquet fallback to profiler get_parquet_path Tries subfolder path first (Keboola-style layout), then falls back to flat path for simple deployments like sample data. --- src/profiler.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/profiler.py b/src/profiler.py index 3f07731..c49f624 100644 --- a/src/profiler.py +++ b/src/profiler.py @@ -379,13 +379,27 @@ def load_metric_file_map(path: Path) -> Dict[str, str]: def get_parquet_path(table: TableInfo, folder_mapping: Dict[str, str]) -> Path: - """Compute the Parquet file/directory path for a table.""" + """Compute the Parquet file/directory path for a table. + + Tries subfolder path first (Keboola-style: parquet//.parquet), + then falls back to flat path (parquet/
.parquet) for simple layouts. + """ bucket_name = ".".join(table.id.split(".")[:-1]) folder_name = folder_mapping.get(bucket_name, bucket_name) base = PARQUET_DIR / folder_name if table.is_partitioned(): - return base / table.name # directory - return base / f"{table.name}.parquet" + subfolder_path = base / table.name + if subfolder_path.is_dir(): + return subfolder_path + # Fallback: flat layout (partitioned dir directly under PARQUET_DIR) + flat_path = PARQUET_DIR / table.name + return flat_path if flat_path.is_dir() else subfolder_path + subfolder_path = base / f"{table.name}.parquet" + if subfolder_path.exists(): + return subfolder_path + # Fallback: flat layout (file directly in PARQUET_DIR) + flat_path = PARQUET_DIR / f"{table.name}.parquet" + return flat_path if flat_path.exists() else subfolder_path # ---------------------------------------------------------------------------