agnes-the-ai-analyst/connectors/jira/scripts/poll_sla.py
Petr 86edd27655 Extract Jira into connectors/jira module
Move all Jira-specific code into a self-contained connector module:
- 22 files moved via git mv (transform, service, webhook, scripts,
  systemd units, tests, docs, bin helper)
- All imports updated to use connectors.jira.* paths
- Jira is now conditional: auto-detected via JIRA_DOMAIN env var
- Webapp registers Jira blueprint only when available
- Health service monitors Jira timers only when enabled
- Profiler loads Jira tables dynamically from filesystem
- Sync settings uses config-driven dependency validation
- Renamed keboola_platform_url -> custom_url in transform
- Updated deploy.sh, sudoers-deploy, backfill_gap.sh paths
- Fixed pytest.ini to skip live tests by default
2026-03-09 11:17:50 +01:00

344 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Jira SLA Polling - Refresh SLA data and self-heal stale status for open tickets.
Periodic job that finds open issues with SLA data in Parquet, fetches
fresh SLA elapsed_millis + status fields from the Jira API, and updates
raw JSON + Parquet files. This keeps SLA breach tracking accurate for
idle tickets where no webhook fires to refresh the snapshot.
Self-healing: also fetches status/resolution fields so tickets resolved
in Jira (but stale in local data due to missed webhooks) get corrected
automatically on the next poll cycle.
Designed to run as a systemd timer (every 15 min) via jira-sla-poll.timer.
Usage:
# On server:
python -m connectors.jira.scripts.poll_sla
# Dry run (count open issues, don't fetch):
python -m connectors.jira.scripts.poll_sla --dry-run
# Verbose logging:
python -m connectors.jira.scripts.poll_sla --verbose
Environment variables (loaded from .env):
JIRA_SLA_EMAIL - Email for JSM service account authentication
JIRA_SLA_API_TOKEN - API token for JSM service account
JIRA_CLOUD_ID - Atlassian Cloud site ID (for cloud API base URL)
JIRA_DATA_DIR - Directory for raw Jira data (default: /data/src_data/raw/jira)
"""
import argparse
import json
import logging
import os
import sys
import tempfile
import time
from pathlib import Path
import httpx
import pandas as pd
from dotenv import load_dotenv
# Add project root to sys.path for imports
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from connectors.jira.scripts.backfill_sla import (
SLA_FIELDS,
has_valid_sla_data,
load_config,
)
from connectors.jira.incremental_transform import transform_single_issue
from connectors.jira.file_lock import issue_json_lock
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
# Additional fields to fetch for self-healing stale status
STATUS_FIELDS = ["status", "resolution", "resolutiondate", "updated"]
def fetch_sla_and_status(
base_url: str, auth: tuple[str, str], issue_key: str
) -> dict | None:
"""
Fetch SLA fields AND status/resolution fields for a single issue.
Extends the SLA-only fetch to also request status, resolution,
resolutiondate, and updated - enabling self-healing of stale data.
Returns dict with all field values, or None on failure.
"""
all_fields = SLA_FIELDS + STATUS_FIELDS
url = f"{base_url}/issue/{issue_key}"
params = {"fields": ",".join(all_fields)}
try:
with httpx.Client(timeout=30) as client:
response = client.get(
url,
auth=auth,
params=params,
headers={"Accept": "application/json"},
)
if response.status_code == 200:
return response.json().get("fields", {})
elif response.status_code == 404:
logger.debug(f"Issue {issue_key} not found")
return None
elif response.status_code == 429:
retry_after = int(response.headers.get("Retry-After", 60))
logger.warning(f"Rate limited, waiting {retry_after}s...")
time.sleep(retry_after)
return fetch_sla_and_status(base_url, auth, issue_key)
else:
logger.warning(
f"Failed to fetch SLA+status for {issue_key}: "
f"{response.status_code} {response.text[:200]}"
)
return None
except httpx.RequestError as e:
logger.error(f"Request error fetching SLA+status for {issue_key}: {e}")
return None
def find_open_issues_with_sla(parquet_dir: Path) -> list[str]:
"""
Read Parquet issues and return keys of open tickets that have SLA data.
An issue qualifies if:
- status_category != 'Done' (still open)
- Has non-null first_response_elapsed_millis OR time_to_resolution_elapsed_millis
"""
issues_dir = parquet_dir / "issues"
if not issues_dir.exists():
logger.error(f"Issues Parquet directory not found: {issues_dir}")
return []
parquet_files = sorted(issues_dir.glob("*.parquet"))
if not parquet_files:
logger.error(f"No Parquet files found in {issues_dir}")
return []
logger.info(f"Reading {len(parquet_files)} Parquet files from {issues_dir}")
# Read only needed columns for efficiency
columns = [
"issue_key",
"status_category",
"first_response_elapsed_millis",
"time_to_resolution_elapsed_millis",
]
dfs = []
for pf in parquet_files:
try:
df = pd.read_parquet(pf, columns=columns)
dfs.append(df)
except Exception as e:
logger.warning(f"Failed to read {pf}: {e}")
if not dfs:
return []
all_issues = pd.concat(dfs, ignore_index=True)
logger.info(f"Total issues in Parquet: {len(all_issues)}")
# Filter: open issues with SLA data
open_with_sla = all_issues[
(all_issues["status_category"] != "Done")
& (
all_issues["first_response_elapsed_millis"].notna()
| all_issues["time_to_resolution_elapsed_millis"].notna()
)
]
issue_keys = open_with_sla["issue_key"].tolist()
logger.info(f"Open issues with SLA data: {len(issue_keys)}")
return issue_keys
def update_issue_sla(
issue_key: str,
raw_dir: Path,
base_url: str,
auth: tuple[str, str],
) -> str:
"""
Fetch fresh SLA + status data for a single issue, update raw JSON,
and re-transform to Parquet.
Self-healing: if the API returns a resolved status for an issue that
was "open" in Parquet, the status fields in JSON are updated so the
next Parquet transform reflects the correct state.
The entire read-modify-write + transform is wrapped in an advisory
file lock to prevent races with the webhook handler.
Returns: "updated", "skipped", "healed", or "failed"
"""
issues_dir = raw_dir / "issues"
json_path = issues_dir / f"{issue_key}.json"
if not json_path.exists():
logger.warning(f"Raw JSON not found for {issue_key}, skipping")
return "skipped"
# Fetch fresh SLA + status fields from API
api_data = fetch_sla_and_status(base_url, auth, issue_key)
if api_data is None:
logger.warning(f"Failed to fetch SLA+status for {issue_key}")
return "failed"
# Check if any SLA field has valid data
has_sla_data = any(has_valid_sla_data(api_data.get(f)) for f in SLA_FIELDS)
# Check if status indicates resolution (self-healing)
api_status = api_data.get("status")
api_status_category = None
if isinstance(api_status, dict):
status_cat = api_status.get("statusCategory")
if isinstance(status_cat, dict):
api_status_category = status_cat.get("name")
is_healed = api_status_category == "Done"
if not has_sla_data and not is_healed:
logger.debug(f"No SLA data and not resolved for {issue_key}")
return "skipped"
# Lock, read-modify-write, and transform atomically
with issue_json_lock(issues_dir, issue_key):
# Load existing JSON
try:
with open(json_path) as f:
data = json.load(f)
except Exception as e:
logger.error(f"Failed to read {json_path}: {e}")
return "failed"
if "fields" not in data:
data["fields"] = {}
# Update SLA fields
for sla_field in SLA_FIELDS:
if sla_field in api_data:
data["fields"][sla_field] = api_data[sla_field]
# Update status fields (self-healing)
if api_status is not None:
data["fields"]["status"] = api_data["status"]
if api_data.get("resolution") is not None:
data["fields"]["resolution"] = api_data["resolution"]
if api_data.get("resolutiondate") is not None:
data["fields"]["resolutiondate"] = api_data["resolutiondate"]
if api_data.get("updated") is not None:
data["fields"]["updated"] = api_data["updated"]
if is_healed:
logger.info(f"Self-healing: {issue_key} is resolved in Jira")
# Atomic write: temp file + replace
fd, tmp_path = tempfile.mkstemp(dir=str(json_path.parent), suffix=".tmp")
os.fchmod(fd, 0o660) # Restore group rw so www-data/deploy can access via ACL
try:
with os.fdopen(fd, "w") as f:
json.dump(data, f, indent=2, default=str)
os.replace(tmp_path, str(json_path))
except Exception:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
# Re-transform to Parquet (inside lock to prevent stale reads)
success = transform_single_issue(issue_key=issue_key)
if not success:
logger.error(f"Failed to transform {issue_key} after SLA update")
return "failed"
return "healed" if is_healed else "updated"
def main():
parser = argparse.ArgumentParser(
description="Poll open Jira tickets for fresh SLA data",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Only count open issues with SLA data, don't fetch or modify",
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable debug logging",
)
args = parser.parse_args()
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
config = load_config()
raw_dir = config["data_dir"]
parquet_dir = Path(os.environ.get(
"JIRA_PARQUET_DIR", "/data/src_data/parquet/jira"
))
base_url = config["base_url"]
auth = (config["email"], config["api_token"])
# Find open issues with SLA data
open_issues = find_open_issues_with_sla(parquet_dir)
if not open_issues:
logger.info("No open issues with SLA data found")
return
if args.dry_run:
logger.info(f"Dry run: would poll {len(open_issues)} open issues:")
for key in sorted(open_issues):
logger.info(f" {key}")
return
# Process each open issue
stats = {"updated": 0, "skipped": 0, "failed": 0, "healed": 0}
start_time = time.time()
for i, issue_key in enumerate(sorted(open_issues), 1):
logger.info(f"[{i}/{len(open_issues)}] Polling {issue_key}...")
result = update_issue_sla(issue_key, raw_dir, base_url, auth)
stats[result] += 1
# Brief pause between API calls to be respectful
time.sleep(0.5)
elapsed = time.time() - start_time
logger.info("=" * 60)
logger.info("SLA polling completed!")
logger.info(f"Open issues polled: {len(open_issues)}")
logger.info(f"Updated (SLA only): {stats['updated']}")
logger.info(f"Healed (status corrected): {stats['healed']}")
logger.info(f"Skipped: {stats['skipped']}")
logger.info(f"Failed: {stats['failed']}")
logger.info(f"Time: {elapsed:.1f}s")
logger.info("=" * 60)
if stats["failed"] > 0:
sys.exit(1)
if __name__ == "__main__":
main()