#!/usr/bin/env python3 """ Jira SLA Polling - Refresh SLA data and self-heal stale status for open tickets. Periodic job that finds open issues with SLA data in Parquet, fetches fresh SLA elapsed_millis + status fields from the Jira API, and updates raw JSON + Parquet files. This keeps SLA breach tracking accurate for idle tickets where no webhook fires to refresh the snapshot. Self-healing: also fetches status/resolution fields so tickets resolved in Jira (but stale in local data due to missed webhooks) get corrected automatically on the next poll cycle. Designed to run as a systemd timer (every 15 min) via jira-sla-poll.timer. Usage: # On server: python -m connectors.jira.scripts.poll_sla # Dry run (count open issues, don't fetch): python -m connectors.jira.scripts.poll_sla --dry-run # Verbose logging: python -m connectors.jira.scripts.poll_sla --verbose Environment variables (loaded from .env): JIRA_SLA_EMAIL - Email for JSM service account authentication JIRA_SLA_API_TOKEN - API token for JSM service account JIRA_CLOUD_ID - Atlassian Cloud site ID (for cloud API base URL) JIRA_DATA_DIR - Directory for raw Jira data (default: /data/src_data/raw/jira) """ import argparse import json import logging import os import sys import tempfile import time from pathlib import Path import httpx import pandas as pd from dotenv import load_dotenv # Add project root to sys.path for imports PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent.parent sys.path.insert(0, str(PROJECT_ROOT)) from connectors.jira.scripts.backfill_sla import ( SLA_FIELDS, has_valid_sla_data, load_config, ) from connectors.jira.incremental_transform import transform_single_issue from connectors.jira.file_lock import issue_json_lock logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) logger = logging.getLogger(__name__) # Additional fields to fetch for self-healing stale status STATUS_FIELDS = ["status", "resolution", "resolutiondate", "updated"] def fetch_sla_and_status( base_url: str, auth: tuple[str, str], issue_key: str ) -> dict | None: """ Fetch SLA fields AND status/resolution fields for a single issue. Extends the SLA-only fetch to also request status, resolution, resolutiondate, and updated - enabling self-healing of stale data. Returns dict with all field values, or None on failure. """ all_fields = SLA_FIELDS + STATUS_FIELDS url = f"{base_url}/issue/{issue_key}" params = {"fields": ",".join(all_fields)} try: with httpx.Client(timeout=30) as client: response = client.get( url, auth=auth, params=params, headers={"Accept": "application/json"}, ) if response.status_code == 200: return response.json().get("fields", {}) elif response.status_code == 404: logger.debug(f"Issue {issue_key} not found") return None elif response.status_code == 429: retry_after = int(response.headers.get("Retry-After", 60)) logger.warning(f"Rate limited, waiting {retry_after}s...") time.sleep(retry_after) return fetch_sla_and_status(base_url, auth, issue_key) else: logger.warning( f"Failed to fetch SLA+status for {issue_key}: " f"{response.status_code} {response.text[:200]}" ) return None except httpx.RequestError as e: logger.error(f"Request error fetching SLA+status for {issue_key}: {e}") return None def find_open_issues_with_sla(parquet_dir: Path) -> list[str]: """ Read Parquet issues and return keys of open tickets that have SLA data. An issue qualifies if: - status_category != 'Done' (still open) - Has non-null first_response_elapsed_millis OR time_to_resolution_elapsed_millis """ issues_dir = parquet_dir / "issues" if not issues_dir.exists(): logger.error(f"Issues Parquet directory not found: {issues_dir}") return [] parquet_files = sorted(issues_dir.glob("*.parquet")) if not parquet_files: logger.error(f"No Parquet files found in {issues_dir}") return [] logger.info(f"Reading {len(parquet_files)} Parquet files from {issues_dir}") # Read only needed columns for efficiency columns = [ "issue_key", "status_category", "first_response_elapsed_millis", "time_to_resolution_elapsed_millis", ] dfs = [] for pf in parquet_files: try: df = pd.read_parquet(pf, columns=columns) dfs.append(df) except Exception as e: logger.warning(f"Failed to read {pf}: {e}") if not dfs: return [] all_issues = pd.concat(dfs, ignore_index=True) logger.info(f"Total issues in Parquet: {len(all_issues)}") # Filter: open issues with SLA data open_with_sla = all_issues[ (all_issues["status_category"] != "Done") & ( all_issues["first_response_elapsed_millis"].notna() | all_issues["time_to_resolution_elapsed_millis"].notna() ) ] issue_keys = open_with_sla["issue_key"].tolist() logger.info(f"Open issues with SLA data: {len(issue_keys)}") return issue_keys def update_issue_sla( issue_key: str, raw_dir: Path, base_url: str, auth: tuple[str, str], ) -> str: """ Fetch fresh SLA + status data for a single issue, update raw JSON, and re-transform to Parquet. Self-healing: if the API returns a resolved status for an issue that was "open" in Parquet, the status fields in JSON are updated so the next Parquet transform reflects the correct state. The entire read-modify-write + transform is wrapped in an advisory file lock to prevent races with the webhook handler. Returns: "updated", "skipped", "healed", or "failed" """ issues_dir = raw_dir / "issues" json_path = issues_dir / f"{issue_key}.json" if not json_path.exists(): logger.warning(f"Raw JSON not found for {issue_key}, skipping") return "skipped" # Fetch fresh SLA + status fields from API api_data = fetch_sla_and_status(base_url, auth, issue_key) if api_data is None: logger.warning(f"Failed to fetch SLA+status for {issue_key}") return "failed" # Check if any SLA field has valid data has_sla_data = any(has_valid_sla_data(api_data.get(f)) for f in SLA_FIELDS) # Check if status indicates resolution (self-healing) api_status = api_data.get("status") api_status_category = None if isinstance(api_status, dict): status_cat = api_status.get("statusCategory") if isinstance(status_cat, dict): api_status_category = status_cat.get("name") is_healed = api_status_category == "Done" if not has_sla_data and not is_healed: logger.debug(f"No SLA data and not resolved for {issue_key}") return "skipped" # Lock, read-modify-write, and transform atomically with issue_json_lock(issues_dir, issue_key): # Load existing JSON try: with open(json_path) as f: data = json.load(f) except Exception as e: logger.error(f"Failed to read {json_path}: {e}") return "failed" if "fields" not in data: data["fields"] = {} # Update SLA fields for sla_field in SLA_FIELDS: if sla_field in api_data: data["fields"][sla_field] = api_data[sla_field] # Update status fields (self-healing) if api_status is not None: data["fields"]["status"] = api_data["status"] if api_data.get("resolution") is not None: data["fields"]["resolution"] = api_data["resolution"] if api_data.get("resolutiondate") is not None: data["fields"]["resolutiondate"] = api_data["resolutiondate"] if api_data.get("updated") is not None: data["fields"]["updated"] = api_data["updated"] if is_healed: logger.info(f"Self-healing: {issue_key} is resolved in Jira") # Atomic write: temp file + replace fd, tmp_path = tempfile.mkstemp(dir=str(json_path.parent), suffix=".tmp") os.fchmod(fd, 0o660) # Restore group rw so www-data/deploy can access via ACL try: with os.fdopen(fd, "w") as f: json.dump(data, f, indent=2, default=str) os.replace(tmp_path, str(json_path)) except Exception: try: os.unlink(tmp_path) except OSError: pass raise # Re-transform to Parquet (inside lock to prevent stale reads) success = transform_single_issue(issue_key=issue_key) if not success: logger.error(f"Failed to transform {issue_key} after SLA update") return "failed" return "healed" if is_healed else "updated" def main(): parser = argparse.ArgumentParser( description="Poll open Jira tickets for fresh SLA data", ) parser.add_argument( "--dry-run", action="store_true", help="Only count open issues with SLA data, don't fetch or modify", ) parser.add_argument( "--verbose", action="store_true", help="Enable debug logging", ) args = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) config = load_config() raw_dir = config["data_dir"] parquet_dir = Path(os.environ.get( "JIRA_PARQUET_DIR", "/data/src_data/parquet/jira" )) base_url = config["base_url"] auth = (config["email"], config["api_token"]) # Find open issues with SLA data open_issues = find_open_issues_with_sla(parquet_dir) if not open_issues: logger.info("No open issues with SLA data found") return if args.dry_run: logger.info(f"Dry run: would poll {len(open_issues)} open issues:") for key in sorted(open_issues): logger.info(f" {key}") return # Process each open issue stats = {"updated": 0, "skipped": 0, "failed": 0, "healed": 0} start_time = time.time() for i, issue_key in enumerate(sorted(open_issues), 1): logger.info(f"[{i}/{len(open_issues)}] Polling {issue_key}...") result = update_issue_sla(issue_key, raw_dir, base_url, auth) stats[result] += 1 # Brief pause between API calls to be respectful time.sleep(0.5) elapsed = time.time() - start_time logger.info("=" * 60) logger.info("SLA polling completed!") logger.info(f"Open issues polled: {len(open_issues)}") logger.info(f"Updated (SLA only): {stats['updated']}") logger.info(f"Healed (status corrected): {stats['healed']}") logger.info(f"Skipped: {stats['skipped']}") logger.info(f"Failed: {stats['failed']}") logger.info(f"Time: {elapsed:.1f}s") logger.info("=" * 60) if stats["failed"] > 0: sys.exit(1) if __name__ == "__main__": main()