Add data freshness indicators and remote table visibility to UI

- Fix sync_state.json parsing: derive last_updated from table last_sync
  timestamps when root-level field is missing (flat format support)
- Parse ALL YAML blocks from data_description.md (was only first block)
- Show remote tables (daily_deal_traffic) in catalog with "Live" badge
- Show per-table sync timestamps and Local/Live query mode badges
- Add data freshness note to Business Metrics section
- Dashboard: fix "Not yet synced" bug, show local/live table breakdown
This commit is contained in:
Petr 2026-03-25 16:24:26 +01:00
parent a667b4e32f
commit eb7e5bdf8f
3 changed files with 172 additions and 23 deletions

View file

@ -375,13 +375,21 @@ def _load_data_stats() -> dict:
else:
rows_display = str(total_rows)
# Parse last_updated timestamp
# Parse last_updated: try root-level first, then derive from table last_sync
last_updated = state.get("last_updated")
if not last_updated:
# Derive from max of all tables' last_sync timestamps
sync_times = [t.get("last_sync") for t in tables_data.values() if t.get("last_sync")]
if sync_times:
last_updated = max(sync_times)
last_updated_display = None
last_updated_iso = None
if last_updated:
try:
dt = datetime.fromisoformat(last_updated)
last_updated_display = dt.strftime("%Y-%m-%d %H:%M") + " UTC"
last_updated_display = dt.strftime("%b %d, %H:%M") + " UTC"
last_updated_iso = dt.isoformat()
except (ValueError, TypeError):
last_updated_display = last_updated[:16] if last_updated else None
@ -392,8 +400,32 @@ def _load_data_stats() -> dict:
else:
size_display = f"{size_mb} MB"
# Count tables by query_mode from data_description.md
local_tables = total_tables
remote_tables = 0
try:
desc_path = Path(os.path.dirname(__file__)) / ".." / "docs" / "data_description.md"
if desc_path.exists():
import re
import yaml
with open(desc_path) as f:
dd_content = f.read()
yaml_blocks = re.findall(r'```yaml\s*\n(.*?)```', dd_content, re.DOTALL)
all_dd_tables = []
for block in yaml_blocks:
parsed = yaml.safe_load(block)
if parsed and "tables" in parsed:
all_dd_tables.extend(parsed["tables"])
remote_tables = sum(1 for t in all_dd_tables if t.get("query_mode") == "remote")
local_tables = len(all_dd_tables) - remote_tables
except Exception:
pass
return {
"tables": total_tables,
"total_tables": local_tables + remote_tables,
"local_tables": local_tables,
"remote_tables": remote_tables,
"columns": total_columns if total_columns > 0 else FALLBACK_DATA_STATS["columns"],
"rows": total_rows,
"rows_display": rows_display,
@ -403,6 +435,7 @@ def _load_data_stats() -> dict:
"unstructured_gb": FALLBACK_DATA_STATS["unstructured_gb"],
"unstructured_display": FALLBACK_DATA_STATS["unstructured_display"],
"last_updated": last_updated_display,
"last_updated_iso": last_updated_iso,
"highlights": FALLBACK_DATA_STATS["highlights"],
}
except Exception as e:
@ -467,16 +500,25 @@ def _load_catalog_data() -> list:
with open(desc_path) as f:
content = f.read()
# Extract YAML block between ```yaml and ```
yaml_match = re.search(r'```yaml\s*\n(.*?)```', content, re.DOTALL)
if not yaml_match:
# Extract ALL YAML blocks between ```yaml and ```
yaml_blocks = re.findall(r'```yaml\s*\n(.*?)```', content, re.DOTALL)
if not yaml_blocks:
return catalog
yaml_data = yaml.safe_load(yaml_match.group(1))
if not yaml_data or "tables" not in yaml_data:
# Merge tables and folder_mappings from all blocks
yaml_data = {"tables": [], "folder_mapping": {}}
for block in yaml_blocks:
parsed = yaml.safe_load(block)
if not parsed:
continue
if "tables" in parsed:
yaml_data["tables"].extend(parsed["tables"])
if "folder_mapping" in parsed:
yaml_data["folder_mapping"].update(parsed["folder_mapping"])
if not yaml_data["tables"]:
return catalog
# Load sync state for row counts
# Load sync state for row counts and timestamps
sync_data = {}
try:
sync_path = _resolve_metadata_path("sync_state.json")
@ -484,6 +526,9 @@ def _load_catalog_data() -> list:
with open(sync_path) as f:
state = json.load(f)
sync_data = state.get("tables", {})
# Support flat format (table_id at top level, no "tables" wrapper)
if not sync_data and any(isinstance(v, dict) and "rows" in v for v in state.values()):
sync_data = {k: v for k, v in state.items() if isinstance(v, dict) and "rows" in v}
except Exception:
pass
@ -518,20 +563,40 @@ def _load_catalog_data() -> list:
if folder not in categories:
categories[folder] = []
# Get sync info
# Get sync info and query mode
query_mode = table.get("query_mode", "local")
sync_info = sync_data.get(table_id, {})
rows = sync_info.get("rows", 0)
# Format rows
if rows >= 1_000_000:
rows_display = f"{rows / 1_000_000:.1f}M"
elif rows >= 1_000:
rows_display = f"{rows:,}"
# For remote tables, use volume estimate from config
if query_mode == "remote" and rows == 0:
volume = table.get("volume", {})
est_rows = volume.get("rows_per_day", 0)
if est_rows:
rows_display = f"~{est_rows / 1_000_000:.0f}M/day"
rows_large = True
else:
rows_display = "Live"
rows_large = False
else:
rows_display = str(rows) if rows > 0 else "-"
# Format rows for local/hybrid tables
if rows >= 1_000_000:
rows_display = f"{rows / 1_000_000:.1f}M"
elif rows >= 1_000:
rows_display = f"{rows:,}"
else:
rows_display = str(rows) if rows > 0 else "-"
rows_large = rows >= 1_000_000
# Determine if "large" badge
rows_large = rows >= 1_000_000
# Parse last_sync timestamp for display
last_sync = sync_info.get("last_sync")
last_sync_display = None
if last_sync:
try:
dt = datetime.fromisoformat(last_sync)
last_sync_display = dt.strftime("%b %d, %H:%M") + " UTC"
except (ValueError, TypeError):
last_sync_display = None
table_info = {
"name": table.get("name", ""),
@ -539,6 +604,8 @@ def _load_catalog_data() -> list:
"rows": rows,
"rows_display": rows_display,
"rows_large": rows_large,
"query_mode": query_mode,
"last_sync": last_sync_display,
}
# Enrich with catalog metadata (OpenMetadata)

View file

@ -418,6 +418,60 @@
color: #B45309;
}
/* ── Query Mode Badges ── */
.query-mode-badge {
font-size: 10px;
font-weight: 600;
padding: 2px 7px;
border-radius: 4px;
text-transform: uppercase;
letter-spacing: 0.3px;
white-space: nowrap;
}
.query-mode-badge.local {
background: var(--primary-light);
color: var(--primary);
}
.query-mode-badge.live {
background: rgba(16, 183, 127, 0.1);
color: #047857;
}
.table-sync-info {
font-size: 11px;
color: var(--text-secondary);
margin-top: 2px;
display: flex;
align-items: center;
gap: 5px;
}
.table-sync-info .live-dot {
width: 6px;
height: 6px;
border-radius: 50%;
background: #10B77F;
animation: pulse-live 2s ease-in-out infinite;
flex-shrink: 0;
}
@keyframes pulse-live {
0%, 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(16, 183, 127, 0.4); }
50% { opacity: 0.7; box-shadow: 0 0 0 3px rgba(16, 183, 127, 0); }
}
.data-freshness-note {
padding: 8px 24px;
font-size: 12px;
color: var(--text-secondary);
display: flex;
align-items: center;
gap: 6px;
border-top: 1px solid var(--border-light);
}
.data-freshness-note svg {
flex-shrink: 0;
opacity: 0.5;
}
.profile-link {
display: flex;
align-items: center;
@ -1332,7 +1386,12 @@
<div class="source-card-info">
<div class="source-card-name">Core Business Data</div>
<div class="source-card-desc">Core business data from internal systems</div>
<div class="source-card-meta">{{ data_stats.tables }} tables &middot; ~{{ data_stats.rows_display }} rows total</div>
<div class="source-card-meta">
{{ data_stats.total_tables or data_stats.tables }} tables &middot; ~{{ data_stats.rows_display }} rows total
{% if data_stats.last_updated %}
&middot; Synced {{ data_stats.last_updated }}
{% endif %}
</div>
</div>
</div>
<div class="source-card-right">
@ -1355,17 +1414,33 @@
</button>
<div class="accordion-content">
{% for table in category.tables %}
<div class="table-row" onclick="openProfiler('{{ table.name }}')">
<div class="table-row" {% if table.query_mode != 'remote' %}onclick="openProfiler('{{ table.name }}')"{% endif %}>
<div class="table-row-left">
<div class="table-row-name">{{ table.name }}</div>
<div class="table-row-name">
{{ table.name }}
{% if table.query_mode == 'remote' %}
<span class="query-mode-badge live">Live</span>
{% else %}
<span class="query-mode-badge local">Local</span>
{% endif %}
</div>
<div class="table-row-desc">{{ table.description }}</div>
<div class="table-sync-info">
{% if table.query_mode == 'remote' %}
<span class="live-dot"></span> Queried directly from BigQuery
{% elif table.last_sync %}
Synced {{ table.last_sync }}
{% endif %}
</div>
</div>
<div class="table-row-right">
<span class="rows-badge{{ ' large' if table.rows_large }}">{{ table.rows_display }}</span>
{% if table.query_mode != 'remote' %}
<span class="profile-link">
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M2 12s3-7 10-7 10 7 10 7-3 7-10 7-10-7-10-7z"/><circle cx="12" cy="12" r="3"/></svg>
Profile
</span>
{% endif %}
</div>
</div>
{% endfor %}
@ -1402,6 +1477,13 @@
</div>
</div>
{% if data_stats and data_stats.last_updated %}
<div class="data-freshness-note">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><polyline points="12 6 12 12 16 14"/></svg>
Calculated from data synced {{ data_stats.last_updated }}
</div>
{% endif %}
{% for category in metrics_data %}
<div class="accordion-category">
<button class="accordion-trigger" onclick="toggleAccordion(this)">

View file

@ -1946,7 +1946,7 @@
<div>
<div class="data-source-name">Core Business Data</div>
<div class="data-source-status">
<span class="status-dot"></span>
<span class="status-dot{% if data_stats.last_updated %} status-dot--live{% endif %}"></span>
{% if data_stats.last_updated %}Synced {{ data_stats.last_updated }}{% else %}Not yet synced{% endif %}
</div>
</div>
@ -1954,7 +1954,7 @@
<span class="badge-included">Always included</span>
</div>
<div class="data-source-details">
{% if catalog_data %}{% for cat in catalog_data %}{{ cat.name }} ({{ cat.count }} tables){% if not loop.last %}, {% endif %}{% endfor %} -- {{ data_stats.tables }} tables total{% else %}Finance, HR, Sales, KBC Telemetry -- {{ data_stats.tables }} tables total{% endif %}
{% if catalog_data %}{% for cat in catalog_data %}{{ cat.name }} ({{ cat.count }} tables){% if not loop.last %}, {% endif %}{% endfor %}{% if data_stats.remote_tables %} &middot; {{ data_stats.local_tables }} local, {{ data_stats.remote_tables }} live{% endif %}{% else %}{{ data_stats.total_tables or data_stats.tables }} tables total{% endif %}
</div>
</div>
@ -1973,7 +1973,7 @@
<div>
<div class="data-source-name">Business Metrics</div>
<div class="data-source-status">
{{ metrics_total.n }} metrics across {{ metrics_data|length }} categories
{{ metrics_total.n }} metrics across {{ metrics_data|length }} categories{% if data_stats.last_updated %} &middot; data from {{ data_stats.last_updated }}{% endif %}
</div>
</div>
</div>