Move all Jira-specific code into a self-contained connector module: - 22 files moved via git mv (transform, service, webhook, scripts, systemd units, tests, docs, bin helper) - All imports updated to use connectors.jira.* paths - Jira is now conditional: auto-detected via JIRA_DOMAIN env var - Webapp registers Jira blueprint only when available - Health service monitors Jira timers only when enabled - Profiler loads Jira tables dynamically from filesystem - Sync settings uses config-driven dependency validation - Renamed keboola_platform_url -> custom_url in transform - Updated deploy.sh, sudoers-deploy, backfill_gap.sh paths - Fixed pytest.ini to skip live tests by default
344 lines
11 KiB
Python
344 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Jira SLA Polling - Refresh SLA data and self-heal stale status for open tickets.
|
|
|
|
Periodic job that finds open issues with SLA data in Parquet, fetches
|
|
fresh SLA elapsed_millis + status fields from the Jira API, and updates
|
|
raw JSON + Parquet files. This keeps SLA breach tracking accurate for
|
|
idle tickets where no webhook fires to refresh the snapshot.
|
|
|
|
Self-healing: also fetches status/resolution fields so tickets resolved
|
|
in Jira (but stale in local data due to missed webhooks) get corrected
|
|
automatically on the next poll cycle.
|
|
|
|
Designed to run as a systemd timer (every 15 min) via jira-sla-poll.timer.
|
|
|
|
Usage:
|
|
# On server:
|
|
python -m connectors.jira.scripts.poll_sla
|
|
|
|
# Dry run (count open issues, don't fetch):
|
|
python -m connectors.jira.scripts.poll_sla --dry-run
|
|
|
|
# Verbose logging:
|
|
python -m connectors.jira.scripts.poll_sla --verbose
|
|
|
|
Environment variables (loaded from .env):
|
|
JIRA_SLA_EMAIL - Email for JSM service account authentication
|
|
JIRA_SLA_API_TOKEN - API token for JSM service account
|
|
JIRA_CLOUD_ID - Atlassian Cloud site ID (for cloud API base URL)
|
|
JIRA_DATA_DIR - Directory for raw Jira data (default: /data/src_data/raw/jira)
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
import pandas as pd
|
|
from dotenv import load_dotenv
|
|
|
|
# Add project root to sys.path for imports
|
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent.parent
|
|
sys.path.insert(0, str(PROJECT_ROOT))
|
|
|
|
from connectors.jira.scripts.backfill_sla import (
|
|
SLA_FIELDS,
|
|
has_valid_sla_data,
|
|
load_config,
|
|
)
|
|
from connectors.jira.incremental_transform import transform_single_issue
|
|
from connectors.jira.file_lock import issue_json_lock
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Additional fields to fetch for self-healing stale status
|
|
STATUS_FIELDS = ["status", "resolution", "resolutiondate", "updated"]
|
|
|
|
|
|
def fetch_sla_and_status(
|
|
base_url: str, auth: tuple[str, str], issue_key: str
|
|
) -> dict | None:
|
|
"""
|
|
Fetch SLA fields AND status/resolution fields for a single issue.
|
|
|
|
Extends the SLA-only fetch to also request status, resolution,
|
|
resolutiondate, and updated - enabling self-healing of stale data.
|
|
|
|
Returns dict with all field values, or None on failure.
|
|
"""
|
|
all_fields = SLA_FIELDS + STATUS_FIELDS
|
|
url = f"{base_url}/issue/{issue_key}"
|
|
params = {"fields": ",".join(all_fields)}
|
|
|
|
try:
|
|
with httpx.Client(timeout=30) as client:
|
|
response = client.get(
|
|
url,
|
|
auth=auth,
|
|
params=params,
|
|
headers={"Accept": "application/json"},
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
return response.json().get("fields", {})
|
|
elif response.status_code == 404:
|
|
logger.debug(f"Issue {issue_key} not found")
|
|
return None
|
|
elif response.status_code == 429:
|
|
retry_after = int(response.headers.get("Retry-After", 60))
|
|
logger.warning(f"Rate limited, waiting {retry_after}s...")
|
|
time.sleep(retry_after)
|
|
return fetch_sla_and_status(base_url, auth, issue_key)
|
|
else:
|
|
logger.warning(
|
|
f"Failed to fetch SLA+status for {issue_key}: "
|
|
f"{response.status_code} {response.text[:200]}"
|
|
)
|
|
return None
|
|
|
|
except httpx.RequestError as e:
|
|
logger.error(f"Request error fetching SLA+status for {issue_key}: {e}")
|
|
return None
|
|
|
|
|
|
def find_open_issues_with_sla(parquet_dir: Path) -> list[str]:
|
|
"""
|
|
Read Parquet issues and return keys of open tickets that have SLA data.
|
|
|
|
An issue qualifies if:
|
|
- status_category != 'Done' (still open)
|
|
- Has non-null first_response_elapsed_millis OR time_to_resolution_elapsed_millis
|
|
"""
|
|
issues_dir = parquet_dir / "issues"
|
|
if not issues_dir.exists():
|
|
logger.error(f"Issues Parquet directory not found: {issues_dir}")
|
|
return []
|
|
|
|
parquet_files = sorted(issues_dir.glob("*.parquet"))
|
|
if not parquet_files:
|
|
logger.error(f"No Parquet files found in {issues_dir}")
|
|
return []
|
|
|
|
logger.info(f"Reading {len(parquet_files)} Parquet files from {issues_dir}")
|
|
|
|
# Read only needed columns for efficiency
|
|
columns = [
|
|
"issue_key",
|
|
"status_category",
|
|
"first_response_elapsed_millis",
|
|
"time_to_resolution_elapsed_millis",
|
|
]
|
|
|
|
dfs = []
|
|
for pf in parquet_files:
|
|
try:
|
|
df = pd.read_parquet(pf, columns=columns)
|
|
dfs.append(df)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to read {pf}: {e}")
|
|
|
|
if not dfs:
|
|
return []
|
|
|
|
all_issues = pd.concat(dfs, ignore_index=True)
|
|
logger.info(f"Total issues in Parquet: {len(all_issues)}")
|
|
|
|
# Filter: open issues with SLA data
|
|
open_with_sla = all_issues[
|
|
(all_issues["status_category"] != "Done")
|
|
& (
|
|
all_issues["first_response_elapsed_millis"].notna()
|
|
| all_issues["time_to_resolution_elapsed_millis"].notna()
|
|
)
|
|
]
|
|
|
|
issue_keys = open_with_sla["issue_key"].tolist()
|
|
logger.info(f"Open issues with SLA data: {len(issue_keys)}")
|
|
return issue_keys
|
|
|
|
|
|
def update_issue_sla(
|
|
issue_key: str,
|
|
raw_dir: Path,
|
|
base_url: str,
|
|
auth: tuple[str, str],
|
|
) -> str:
|
|
"""
|
|
Fetch fresh SLA + status data for a single issue, update raw JSON,
|
|
and re-transform to Parquet.
|
|
|
|
Self-healing: if the API returns a resolved status for an issue that
|
|
was "open" in Parquet, the status fields in JSON are updated so the
|
|
next Parquet transform reflects the correct state.
|
|
|
|
The entire read-modify-write + transform is wrapped in an advisory
|
|
file lock to prevent races with the webhook handler.
|
|
|
|
Returns: "updated", "skipped", "healed", or "failed"
|
|
"""
|
|
issues_dir = raw_dir / "issues"
|
|
json_path = issues_dir / f"{issue_key}.json"
|
|
if not json_path.exists():
|
|
logger.warning(f"Raw JSON not found for {issue_key}, skipping")
|
|
return "skipped"
|
|
|
|
# Fetch fresh SLA + status fields from API
|
|
api_data = fetch_sla_and_status(base_url, auth, issue_key)
|
|
if api_data is None:
|
|
logger.warning(f"Failed to fetch SLA+status for {issue_key}")
|
|
return "failed"
|
|
|
|
# Check if any SLA field has valid data
|
|
has_sla_data = any(has_valid_sla_data(api_data.get(f)) for f in SLA_FIELDS)
|
|
|
|
# Check if status indicates resolution (self-healing)
|
|
api_status = api_data.get("status")
|
|
api_status_category = None
|
|
if isinstance(api_status, dict):
|
|
status_cat = api_status.get("statusCategory")
|
|
if isinstance(status_cat, dict):
|
|
api_status_category = status_cat.get("name")
|
|
|
|
is_healed = api_status_category == "Done"
|
|
|
|
if not has_sla_data and not is_healed:
|
|
logger.debug(f"No SLA data and not resolved for {issue_key}")
|
|
return "skipped"
|
|
|
|
# Lock, read-modify-write, and transform atomically
|
|
with issue_json_lock(issues_dir, issue_key):
|
|
# Load existing JSON
|
|
try:
|
|
with open(json_path) as f:
|
|
data = json.load(f)
|
|
except Exception as e:
|
|
logger.error(f"Failed to read {json_path}: {e}")
|
|
return "failed"
|
|
|
|
if "fields" not in data:
|
|
data["fields"] = {}
|
|
|
|
# Update SLA fields
|
|
for sla_field in SLA_FIELDS:
|
|
if sla_field in api_data:
|
|
data["fields"][sla_field] = api_data[sla_field]
|
|
|
|
# Update status fields (self-healing)
|
|
if api_status is not None:
|
|
data["fields"]["status"] = api_data["status"]
|
|
if api_data.get("resolution") is not None:
|
|
data["fields"]["resolution"] = api_data["resolution"]
|
|
if api_data.get("resolutiondate") is not None:
|
|
data["fields"]["resolutiondate"] = api_data["resolutiondate"]
|
|
if api_data.get("updated") is not None:
|
|
data["fields"]["updated"] = api_data["updated"]
|
|
|
|
if is_healed:
|
|
logger.info(f"Self-healing: {issue_key} is resolved in Jira")
|
|
|
|
# Atomic write: temp file + replace
|
|
fd, tmp_path = tempfile.mkstemp(dir=str(json_path.parent), suffix=".tmp")
|
|
os.fchmod(fd, 0o660) # Restore group rw so www-data/deploy can access via ACL
|
|
try:
|
|
with os.fdopen(fd, "w") as f:
|
|
json.dump(data, f, indent=2, default=str)
|
|
os.replace(tmp_path, str(json_path))
|
|
except Exception:
|
|
try:
|
|
os.unlink(tmp_path)
|
|
except OSError:
|
|
pass
|
|
raise
|
|
|
|
# Re-transform to Parquet (inside lock to prevent stale reads)
|
|
success = transform_single_issue(issue_key=issue_key)
|
|
if not success:
|
|
logger.error(f"Failed to transform {issue_key} after SLA update")
|
|
return "failed"
|
|
|
|
return "healed" if is_healed else "updated"
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Poll open Jira tickets for fresh SLA data",
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run",
|
|
action="store_true",
|
|
help="Only count open issues with SLA data, don't fetch or modify",
|
|
)
|
|
parser.add_argument(
|
|
"--verbose",
|
|
action="store_true",
|
|
help="Enable debug logging",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.verbose:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
config = load_config()
|
|
raw_dir = config["data_dir"]
|
|
parquet_dir = Path(os.environ.get(
|
|
"JIRA_PARQUET_DIR", "/data/src_data/parquet/jira"
|
|
))
|
|
base_url = config["base_url"]
|
|
auth = (config["email"], config["api_token"])
|
|
|
|
# Find open issues with SLA data
|
|
open_issues = find_open_issues_with_sla(parquet_dir)
|
|
|
|
if not open_issues:
|
|
logger.info("No open issues with SLA data found")
|
|
return
|
|
|
|
if args.dry_run:
|
|
logger.info(f"Dry run: would poll {len(open_issues)} open issues:")
|
|
for key in sorted(open_issues):
|
|
logger.info(f" {key}")
|
|
return
|
|
|
|
# Process each open issue
|
|
stats = {"updated": 0, "skipped": 0, "failed": 0, "healed": 0}
|
|
start_time = time.time()
|
|
|
|
for i, issue_key in enumerate(sorted(open_issues), 1):
|
|
logger.info(f"[{i}/{len(open_issues)}] Polling {issue_key}...")
|
|
|
|
result = update_issue_sla(issue_key, raw_dir, base_url, auth)
|
|
stats[result] += 1
|
|
|
|
# Brief pause between API calls to be respectful
|
|
time.sleep(0.5)
|
|
|
|
elapsed = time.time() - start_time
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("SLA polling completed!")
|
|
logger.info(f"Open issues polled: {len(open_issues)}")
|
|
logger.info(f"Updated (SLA only): {stats['updated']}")
|
|
logger.info(f"Healed (status corrected): {stats['healed']}")
|
|
logger.info(f"Skipped: {stats['skipped']}")
|
|
logger.info(f"Failed: {stats['failed']}")
|
|
logger.info(f"Time: {elapsed:.1f}s")
|
|
logger.info("=" * 60)
|
|
|
|
if stats["failed"] > 0:
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|