Add OpenMetadata REST API connector and enricher to merge table/column metadata
from OpenMetadata catalog at sync and query time.
Changes:
- connectors/openmetadata/client.py: HTTP client for OM API
- connectors/openmetadata/enricher.py: Data enrichment with TTL cache
- tests/test_openmetadata_*: Unit tests for client and enricher
- src/config.py: Add catalog_fqn field to TableConfig
- src/data_sync.py: Use enricher in _generate_schema_yaml (catalog > BQ API > data_description.md)
- webapp/app.py: Initialize enricher, enrich catalog data with tags/tier/owners/url
- config/instance.yaml.example: Document openmetadata section
Features:
- FQN auto-derivation: bigquery.{table.id}
- TTL cache (default 1h) to avoid repeated API calls
- Graceful degradation: disabled if token missing, silent on HTTP errors
- Column description priority: catalog > BQ API > (none)
- Table description priority: catalog > data_description.md
623 lines
23 KiB
Python
623 lines
23 KiB
Python
"""
|
|
Configuration Management
|
|
|
|
This module handles:
|
|
1. Loading environment variables from .env file
|
|
2. Parsing data_description.md (YAML blocks with table definitions)
|
|
3. Validating configuration
|
|
4. Providing structured configuration data for other modules
|
|
|
|
SINGLE SOURCE OF TRUTH is data_description.md - it defines:
|
|
- List of tables to synchronize
|
|
- Sync strategies (full_refresh vs incremental)
|
|
- Primary keys and foreign keys
|
|
- Incremental columns and windows
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Any
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta
|
|
|
|
import yaml
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
# Logging setup
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ForeignKey:
|
|
"""
|
|
Representation of foreign key relationship between tables.
|
|
|
|
Attributes:
|
|
column: Column name in this table (e.g., "company_id")
|
|
references: Reference table and column (e.g., "company.id")
|
|
description: Relationship description
|
|
"""
|
|
column: str
|
|
references: str
|
|
description: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class WhereFilter:
|
|
"""
|
|
Filter for exporting subset of table data.
|
|
|
|
Used with Keboola Storage API whereFilters parameter.
|
|
|
|
Attributes:
|
|
column: Column name to filter on
|
|
operator: Comparison operator (eq, ne, gt, ge, lt, le)
|
|
values: List of values to compare against
|
|
"""
|
|
column: str
|
|
operator: str # eq, ne, gt, ge, lt, le
|
|
values: List[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class TableConfig:
|
|
"""
|
|
Configuration for a single table.
|
|
|
|
Attributes:
|
|
id: Full table ID in Keboola (e.g., "in.c-sfdc.company")
|
|
name: Short table name (e.g., "company")
|
|
description: Table description
|
|
primary_key: Primary key column name
|
|
sync_strategy: "full_refresh", "incremental", or "partitioned"
|
|
incremental_window_days: Number of days to backtrack for incremental sync
|
|
partition_by: Column name to partition by (for incremental/partitioned with partitions)
|
|
partition_granularity: Partition granularity: "month", "day", or "year"
|
|
foreign_keys: List of foreign key relationships
|
|
where_filters: List of filters to apply when exporting (for downloading subset of data)
|
|
folder: Override folder name (instead of bucket-level folder_mapping)
|
|
max_history_days: Max days of history for initial incremental load (None = download all)
|
|
dataset: Dataset group name for on-demand tables (e.g., "kbc_telemetry_expert")
|
|
initial_load_chunk_days: Chunk size in days for chunked initial load (default: 30)
|
|
"""
|
|
id: str
|
|
name: str
|
|
description: str
|
|
primary_key: str
|
|
sync_strategy: str # "full_refresh", "incremental", or "partitioned"
|
|
incremental_window_days: Optional[int] = None
|
|
partition_by: Optional[str] = None
|
|
partition_granularity: Optional[str] = None # "month", "day", "year"
|
|
foreign_keys: List[ForeignKey] = field(default_factory=list)
|
|
where_filters: List[WhereFilter] = field(default_factory=list)
|
|
folder: Optional[str] = None
|
|
max_history_days: Optional[int] = None
|
|
dataset: Optional[str] = None
|
|
initial_load_chunk_days: int = 30
|
|
incremental_column: Optional[str] = None # Column for timestamp-based incremental sync (BigQuery)
|
|
columns: Optional[List[str]] = None # Subset of columns to sync (None = all)
|
|
row_filter: Optional[str] = None # SQL WHERE clause for filtering (e.g., "event_date >= '2024-01-01'")
|
|
query_mode: str = "local" # "local" (Parquet) | "remote" (BQ direct) | "hybrid" (sync subset, query BQ)
|
|
partition_column_type: str = "TIMESTAMP" # BQ SQL type for partition column: "DATE", "TIMESTAMP", "DATETIME"
|
|
catalog_fqn: Optional[str] = None # Explicit OpenMetadata FQN override (auto-derived if not set)
|
|
|
|
def __post_init__(self):
|
|
"""Validate configuration after initialization."""
|
|
# Validate query_mode
|
|
valid_query_modes = ("local", "remote", "hybrid")
|
|
if self.query_mode not in valid_query_modes:
|
|
raise ValueError(
|
|
f"Invalid query_mode '{self.query_mode}' for table {self.id}. "
|
|
f"Allowed values: {', '.join(valid_query_modes)}"
|
|
)
|
|
|
|
# Validate sync_strategy
|
|
if self.sync_strategy not in ["full_refresh", "incremental", "partitioned"]:
|
|
raise ValueError(
|
|
f"Invalid sync_strategy '{self.sync_strategy}' for table {self.id}. "
|
|
f"Allowed values: 'full_refresh', 'incremental', 'partitioned'"
|
|
)
|
|
|
|
# For incremental strategy:
|
|
# - changedSince is calculated from last sync timestamp (Keboola internal)
|
|
# - partition_by is optional - if set, output will be partitioned
|
|
if self.sync_strategy == "incremental":
|
|
if not self.incremental_window_days:
|
|
# Default 7 days if not specified
|
|
self.incremental_window_days = 7
|
|
logger.warning(
|
|
f"Table {self.id}: incremental_window_days not set, "
|
|
f"using default 7 days"
|
|
)
|
|
# If partition_by is set, validate partition_granularity
|
|
if self.partition_by:
|
|
if not self.partition_granularity:
|
|
self.partition_granularity = "month"
|
|
logger.info(
|
|
f"Table {self.id}: partition_granularity not set, "
|
|
f"using default 'month'"
|
|
)
|
|
if self.partition_granularity not in ["month", "day", "year"]:
|
|
raise ValueError(
|
|
f"Invalid partition_granularity '{self.partition_granularity}' for table {self.id}. "
|
|
f"Allowed values: 'month', 'day', 'year'"
|
|
)
|
|
|
|
# Validate partition_column_type
|
|
valid_column_types = ("DATE", "TIMESTAMP", "DATETIME")
|
|
if self.partition_column_type not in valid_column_types:
|
|
raise ValueError(
|
|
f"Invalid partition_column_type '{self.partition_column_type}' for table {self.id}. "
|
|
f"Allowed values: {', '.join(valid_column_types)}"
|
|
)
|
|
|
|
# For partitioned, partition_by must be defined
|
|
if self.sync_strategy == "partitioned":
|
|
if not self.partition_by:
|
|
raise ValueError(
|
|
f"Table {self.id} has sync_strategy='partitioned', "
|
|
f"but partition_by is missing"
|
|
)
|
|
if not self.partition_granularity:
|
|
self.partition_granularity = "month"
|
|
logger.info(
|
|
f"Table {self.id}: partition_granularity not set, "
|
|
f"using default 'month'"
|
|
)
|
|
if self.partition_granularity not in ["month", "day", "year"]:
|
|
raise ValueError(
|
|
f"Invalid partition_granularity '{self.partition_granularity}' for table {self.id}. "
|
|
f"Allowed values: 'month', 'day', 'year'"
|
|
)
|
|
|
|
def get_primary_key_columns(self) -> List[str]:
|
|
"""
|
|
Get primary key as list of column names.
|
|
|
|
Supports both single and composite primary keys.
|
|
Composite PKs are defined as comma-separated string: "col1, col2"
|
|
|
|
Returns:
|
|
List of column names forming the primary key
|
|
"""
|
|
# Split by comma and strip whitespace
|
|
return [col.strip() for col in self.primary_key.split(",")]
|
|
|
|
def is_partitioned(self) -> bool:
|
|
"""Check if table output should be partitioned.
|
|
|
|
Returns True for:
|
|
- partitioned strategy (always partitioned)
|
|
- incremental strategy with partition_by set
|
|
"""
|
|
if self.sync_strategy == "partitioned":
|
|
return True
|
|
if self.sync_strategy == "incremental" and self.partition_by:
|
|
return True
|
|
return False
|
|
|
|
|
|
class Config:
|
|
"""
|
|
Main configuration class.
|
|
|
|
Loads environment variables and parses data_description.md.
|
|
Provides access to all configuration parameters.
|
|
"""
|
|
|
|
def __init__(self, env_file: Optional[str] = None):
|
|
"""
|
|
Initialize configuration.
|
|
|
|
Args:
|
|
env_file: Path to .env file. If None, looks for .env in project root.
|
|
"""
|
|
# Find project root (folder containing data_description.md)
|
|
self.project_root = self._find_project_root()
|
|
|
|
# Load environment variables
|
|
if env_file is None:
|
|
env_file = self.project_root / ".env"
|
|
|
|
if env_file.exists():
|
|
load_dotenv(env_file)
|
|
logger.info(f"Loaded from .env: {env_file}")
|
|
else:
|
|
logger.warning(
|
|
f".env file not found: {env_file}. "
|
|
f"Use config/.env.template as reference."
|
|
)
|
|
|
|
# Read by connectors/keboola/ if enabled
|
|
self.keboola_token = os.getenv("KEBOOLA_STORAGE_TOKEN")
|
|
self.keboola_stack_url = os.getenv("KEBOOLA_STACK_URL")
|
|
self.keboola_project_id = os.getenv("KEBOOLA_PROJECT_ID")
|
|
self.data_dir = Path(os.getenv("DATA_DIR", "./data"))
|
|
self.docs_output_dir = Path(os.getenv("DOCS_OUTPUT_DIR", "./docs"))
|
|
self.data_source = os.getenv("DATA_SOURCE", "local")
|
|
self.log_level = os.getenv("LOG_LEVEL", "INFO")
|
|
|
|
# Set log level
|
|
logging.getLogger().setLevel(self.log_level)
|
|
|
|
# Validate required environment variables
|
|
self._validate_env_vars()
|
|
|
|
# Parse data_description.md
|
|
self.tables, self.folder_mapping = self._parse_data_description()
|
|
|
|
logger.info(f"Configuration loaded: {len(self.tables)} tables")
|
|
|
|
def _find_project_root(self) -> Path:
|
|
"""
|
|
Find project root (folder containing docs/data_description.md).
|
|
|
|
Searches from current folder upwards.
|
|
|
|
Returns:
|
|
Path to project root
|
|
|
|
Raises:
|
|
FileNotFoundError: If docs/data_description.md is not found
|
|
"""
|
|
current = Path.cwd()
|
|
|
|
# Try current folder first
|
|
if (current / "docs" / "data_description.md").exists():
|
|
return current
|
|
|
|
# Try parent folders (up to 5 levels)
|
|
for _ in range(5):
|
|
current = current.parent
|
|
if (current / "docs" / "data_description.md").exists():
|
|
return current
|
|
|
|
raise FileNotFoundError(
|
|
"docs/data_description.md not found. "
|
|
"Make sure you're running from project root."
|
|
)
|
|
|
|
def _resolve_placeholder(self, value: str) -> str:
|
|
"""
|
|
Resolve placeholders in filter values.
|
|
|
|
Supported placeholders:
|
|
- {{last_week}}: 7 days ago
|
|
- {{last_month}}: 30 days ago
|
|
- {{last_2_months}}: 60 days ago
|
|
- {{last_3_months}}: 90 days ago
|
|
- {{last_6_months}}: 180 days ago
|
|
- {{last_year}}: 365 days ago
|
|
- {{last_2_years}}: 730 days ago
|
|
- {{today}}: Today's date
|
|
|
|
Args:
|
|
value: String that may contain placeholder
|
|
|
|
Returns:
|
|
Resolved string with actual date values
|
|
"""
|
|
if not isinstance(value, str):
|
|
return value
|
|
|
|
today = datetime.now()
|
|
|
|
placeholders = {
|
|
"{{last_week}}": (today - timedelta(days=7)).strftime("%Y-%m-%d"),
|
|
"{{last_month}}": (today - timedelta(days=30)).strftime("%Y-%m-%d"),
|
|
"{{last_2_months}}": (today - timedelta(days=60)).strftime("%Y-%m-%d"),
|
|
"{{last_3_months}}": (today - timedelta(days=90)).strftime("%Y-%m-%d"),
|
|
"{{last_6_months}}": (today - timedelta(days=180)).strftime("%Y-%m-%d"),
|
|
"{{last_year}}": (today - timedelta(days=365)).strftime("%Y-%m-%d"),
|
|
"{{last_2_years}}": (today - timedelta(days=730)).strftime("%Y-%m-%d"),
|
|
"{{today}}": today.strftime("%Y-%m-%d"),
|
|
}
|
|
|
|
result = value
|
|
for placeholder, replacement in placeholders.items():
|
|
if placeholder in result:
|
|
result = result.replace(placeholder, replacement)
|
|
logger.debug(f"Resolved placeholder: {placeholder} -> {replacement}")
|
|
|
|
return result
|
|
|
|
def _validate_env_vars(self):
|
|
"""
|
|
Validate that required environment variables are set based on data source type.
|
|
|
|
Raises:
|
|
ValueError: If any required variable is missing
|
|
"""
|
|
# Keboola env vars are validated by connectors/keboola/adapter.py at init time.
|
|
# No source-specific validation needed here.
|
|
pass
|
|
|
|
def _parse_data_description(self) -> tuple[List[TableConfig], Dict[str, str]]:
|
|
"""
|
|
Parse docs/data_description.md and extract table definitions.
|
|
|
|
Looks for YAML blocks in markdown file and parses them.
|
|
|
|
Returns:
|
|
Tuple of (List of TableConfig objects, folder_mapping dict)
|
|
|
|
Raises:
|
|
FileNotFoundError: If docs/data_description.md doesn't exist
|
|
yaml.YAMLError: If YAML is invalid
|
|
"""
|
|
# Check CONFIG_DIR first, then project root
|
|
config_dir = Path(os.environ.get("CONFIG_DIR", ""))
|
|
if config_dir and (config_dir / "data_description.md").exists():
|
|
data_desc_path = config_dir / "data_description.md"
|
|
else:
|
|
data_desc_path = self.project_root / "docs" / "data_description.md"
|
|
|
|
if not data_desc_path.exists():
|
|
raise FileNotFoundError(
|
|
f"docs/data_description.md not found: {data_desc_path}"
|
|
)
|
|
|
|
# Collect all markdown files to parse: main + dataset files
|
|
md_files = [data_desc_path]
|
|
datasets_dir = self.project_root / "docs" / "datasets"
|
|
if datasets_dir.exists():
|
|
for md_file in sorted(datasets_dir.glob("*.md")):
|
|
md_files.append(md_file)
|
|
logger.info(f"Found dataset file: {md_file.name}")
|
|
|
|
# Find YAML blocks (between ```yaml and ```) from all files
|
|
yaml_pattern = r'```yaml\n(.*?)```'
|
|
yaml_matches = []
|
|
for md_file in md_files:
|
|
content = md_file.read_text()
|
|
yaml_matches.extend(re.findall(yaml_pattern, content, re.DOTALL))
|
|
|
|
if not yaml_matches:
|
|
raise ValueError(
|
|
"data_description.md contains no YAML blocks. "
|
|
"Make sure tables are defined in ```yaml blocks."
|
|
)
|
|
|
|
# Parse all YAML blocks and merge them
|
|
all_tables = []
|
|
folder_mapping = {}
|
|
for yaml_block in yaml_matches:
|
|
try:
|
|
data = yaml.safe_load(yaml_block)
|
|
if data:
|
|
if "tables" in data:
|
|
all_tables.extend(data["tables"])
|
|
if "folder_mapping" in data:
|
|
folder_mapping.update(data["folder_mapping"])
|
|
except yaml.YAMLError as e:
|
|
logger.error(f"Error parsing YAML: {e}")
|
|
raise
|
|
|
|
if not all_tables:
|
|
raise ValueError(
|
|
"data_description.md contains no tables. "
|
|
"Make sure YAML block contains 'tables:' key."
|
|
)
|
|
|
|
# Convert to TableConfig objects
|
|
table_configs = []
|
|
for table_data in all_tables:
|
|
# Parse foreign keys
|
|
fk_list = []
|
|
if "foreign_keys" in table_data:
|
|
for fk_data in table_data["foreign_keys"]:
|
|
fk = ForeignKey(
|
|
column=fk_data["column"],
|
|
references=fk_data["references"],
|
|
description=fk_data.get("description")
|
|
)
|
|
fk_list.append(fk)
|
|
|
|
# Parse where filters with placeholder resolution
|
|
wf_list = []
|
|
if "where_filters" in table_data:
|
|
for wf_data in table_data["where_filters"]:
|
|
# Resolve placeholders in values
|
|
resolved_values = [
|
|
self._resolve_placeholder(v) for v in wf_data.get("values", [])
|
|
]
|
|
wf = WhereFilter(
|
|
column=wf_data["column"],
|
|
operator=wf_data["operator"],
|
|
values=resolved_values
|
|
)
|
|
wf_list.append(wf)
|
|
|
|
# Create TableConfig
|
|
config = TableConfig(
|
|
id=table_data["id"],
|
|
name=table_data["name"],
|
|
description=table_data["description"],
|
|
primary_key=table_data["primary_key"],
|
|
sync_strategy=table_data["sync_strategy"],
|
|
incremental_window_days=table_data.get("incremental_window_days"),
|
|
partition_by=table_data.get("partition_by"),
|
|
partition_granularity=table_data.get("partition_granularity"),
|
|
foreign_keys=fk_list,
|
|
where_filters=wf_list,
|
|
folder=table_data.get("folder"),
|
|
max_history_days=table_data.get("max_history_days"),
|
|
dataset=table_data.get("dataset"),
|
|
initial_load_chunk_days=table_data.get("initial_load_chunk_days", 30),
|
|
incremental_column=table_data.get("incremental_column"),
|
|
columns=table_data.get("columns"),
|
|
row_filter=table_data.get("row_filter"),
|
|
query_mode=table_data.get("query_mode", "local"),
|
|
partition_column_type=table_data.get("partition_column_type", "TIMESTAMP"),
|
|
catalog_fqn=table_data.get("catalog_fqn"),
|
|
)
|
|
table_configs.append(config)
|
|
|
|
return table_configs, folder_mapping
|
|
|
|
def get_table_config(self, table_id: str) -> Optional[TableConfig]:
|
|
"""
|
|
Get configuration for specific table by ID.
|
|
|
|
Args:
|
|
table_id: Full table ID (e.g., "in.c-sfdc.company")
|
|
|
|
Returns:
|
|
TableConfig or None if table not in configuration
|
|
"""
|
|
for table in self.tables:
|
|
if table.id == table_id:
|
|
return table
|
|
return None
|
|
|
|
def get_parquet_path(self, table_config: TableConfig) -> Path:
|
|
"""
|
|
Get path to Parquet file for given table.
|
|
|
|
Format: data/parquet/{folder_name}/{table_name}.parquet
|
|
For partitioned tables: data/parquet/{folder_name}/{table_name}/ (directory)
|
|
|
|
Folder name is determined by folder_mapping in data_description.md.
|
|
Falls back to bucket name if no mapping exists.
|
|
|
|
Args:
|
|
table_config: Table configuration
|
|
|
|
Returns:
|
|
Path to Parquet file (or directory for partitioned tables)
|
|
"""
|
|
# Extract bucket name from table ID (e.g., "in.c-crm" from "in.c-crm.company")
|
|
bucket_name = ".".join(table_config.id.split(".")[:-1])
|
|
|
|
# Use folder mapping if available, otherwise fall back to bucket name
|
|
folder_name = self.folder_mapping.get(bucket_name, bucket_name)
|
|
|
|
# Table-level folder override (e.g., folder: kbc_telemetry_expert)
|
|
if table_config.folder:
|
|
folder_name = table_config.folder
|
|
|
|
parquet_dir = self.data_dir / "parquet" / folder_name
|
|
parquet_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
if table_config.is_partitioned():
|
|
# For partitioned tables, return directory path
|
|
partition_dir = parquet_dir / table_config.name
|
|
partition_dir.mkdir(parents=True, exist_ok=True)
|
|
return partition_dir
|
|
else:
|
|
return parquet_dir / f"{table_config.name}.parquet"
|
|
|
|
def get_partition_path(self, table_config: TableConfig, partition_key: str) -> Path:
|
|
"""
|
|
Get path to specific partition file.
|
|
|
|
Args:
|
|
table_config: Table configuration (must be partitioned)
|
|
partition_key: Partition key (e.g., "2026_01" for monthly)
|
|
|
|
Returns:
|
|
Path to partition Parquet file
|
|
"""
|
|
if not table_config.is_partitioned():
|
|
raise ValueError(f"Table {table_config.id} is not partitioned")
|
|
|
|
partition_dir = self.get_parquet_path(table_config)
|
|
return partition_dir / f"{partition_key}.parquet"
|
|
|
|
def get_metadata_path(self) -> Path:
|
|
"""
|
|
Get path to metadata folder.
|
|
|
|
Returns:
|
|
Path to metadata folder
|
|
"""
|
|
metadata_dir = self.data_dir / "metadata"
|
|
metadata_dir.mkdir(parents=True, exist_ok=True)
|
|
return metadata_dir
|
|
|
|
def get_staging_path(self) -> Path:
|
|
"""
|
|
Get path to staging folder for temporary files.
|
|
|
|
Uses /tmp/data_analyst_staging for faster I/O and to avoid filling /data disk.
|
|
Directory is created by deploy.sh on server startup.
|
|
|
|
Returns:
|
|
Path to staging folder
|
|
"""
|
|
staging_dir = Path("/tmp/data_analyst_staging")
|
|
staging_dir.mkdir(parents=True, exist_ok=True)
|
|
return staging_dir
|
|
|
|
def get_duckdb_path(self) -> Path:
|
|
"""
|
|
Get path to DuckDB database.
|
|
|
|
Returns:
|
|
Path to DuckDB file
|
|
"""
|
|
duckdb_dir = self.data_dir / "duckdb"
|
|
duckdb_dir.mkdir(parents=True, exist_ok=True)
|
|
return duckdb_dir / "analytics.duckdb"
|
|
|
|
|
|
# Singleton instance for easy access from entire application
|
|
_config_instance: Optional[Config] = None
|
|
|
|
|
|
def get_config() -> Config:
|
|
"""
|
|
Get singleton configuration instance.
|
|
|
|
On first call initializes configuration, then returns existing instance.
|
|
|
|
Returns:
|
|
Config instance
|
|
"""
|
|
global _config_instance
|
|
if _config_instance is None:
|
|
_config_instance = Config()
|
|
return _config_instance
|
|
|
|
|
|
# For testing - allows resetting config
|
|
def reset_config():
|
|
"""Reset singleton config instance. For testing only."""
|
|
global _config_instance
|
|
_config_instance = None
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test configuration
|
|
print("🔧 Testing configuration...")
|
|
|
|
try:
|
|
config = get_config()
|
|
|
|
print(f"\n✅ Configuration loaded successfully!")
|
|
print(f" Project ID: {config.keboola_project_id}")
|
|
print(f" Stack URL: {config.keboola_stack_url}")
|
|
print(f" Data dir: {config.data_dir}")
|
|
print(f" Number of tables: {len(config.tables)}")
|
|
|
|
print(f"\n📊 Tables:")
|
|
for table in config.tables:
|
|
print(f" - {table.name} ({table.id})")
|
|
print(f" Strategy: {table.sync_strategy}")
|
|
if table.sync_strategy == "incremental":
|
|
print(f" Incremental window: {table.incremental_window_days} days")
|
|
if table.partition_by:
|
|
print(f" Partitioned by: {table.partition_by} ({table.partition_granularity})")
|
|
print(f" Parquet: {config.get_parquet_path(table)}")
|
|
|
|
except Exception as e:
|
|
print(f"\n❌ Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|