From 86edd276558ef5684958c2ef4752411e61fc33a8 Mon Sep 17 00:00:00 2001 From: Petr Date: Mon, 9 Mar 2026 11:17:50 +0100 Subject: [PATCH] Extract Jira into connectors/jira module Move all Jira-specific code into a self-contained connector module: - 22 files moved via git mv (transform, service, webhook, scripts, systemd units, tests, docs, bin helper) - All imports updated to use connectors.jira.* paths - Jira is now conditional: auto-detected via JIRA_DOMAIN env var - Webapp registers Jira blueprint only when available - Health service monitors Jira timers only when enabled - Profiler loads Jira tables dynamically from filesystem - Sync settings uses config-driven dependency validation - Renamed keboola_platform_url -> custom_url in transform - Updated deploy.sh, sudoers-deploy, backfill_gap.sh paths - Fixed pytest.ini to skip live tests by default --- CLAUDE.md | 4 +- connectors/__init__.py | 1 + dev_docs/jira.md => connectors/jira/README.md | 36 +++--- connectors/jira/__init__.py | 9 ++ .../jira}/bin/update-jira-symlinks | 0 .../jira/file_lock.py | 2 +- .../jira/incremental_transform.py | 4 +- connectors/jira/scripts/__init__.py | 0 .../jira/scripts/backfill.py | 22 ++-- .../jira/scripts/backfill_remote_links.py | 6 +- .../jira/scripts/backfill_sla.py | 8 +- .../jira/scripts/consistency_check.py | 10 +- .../jira/scripts/poll_sla.py | 14 +-- .../jira/scripts}/sync_jira.sh | 0 .../jira/service.py | 8 +- .../jira/systemd}/jira-consistency-deep.timer | 0 .../jira/systemd}/jira-consistency.service | 2 +- .../jira/systemd}/jira-consistency.timer | 0 .../jira/systemd}/jira-sla-poll.service | 2 +- .../jira/systemd}/jira-sla-poll.timer | 0 connectors/jira/tests/__init__.py | 0 .../jira/tests/test_file_lock.py | 4 +- .../jira/tests/test_parquet_lock.py | 8 +- .../jira/tests/test_sla_poll.py | 20 ++-- .../jira/transform.py | 6 +- .../jira/webhook.py | 4 +- dev_docs/server.md | 22 ++-- pytest.ini | 3 +- scripts/backfill_gap.sh | 4 +- server/deploy.sh | 14 +-- server/sudoers-deploy | 10 +- src/profiler.py | 106 ++++++++++-------- tests/test_sync_data.py | 2 +- webapp/app.py | 12 +- webapp/config.py | 11 +- webapp/health_service.py | 21 +++- webapp/sync_settings_service.py | 8 +- 37 files changed, 211 insertions(+), 172 deletions(-) create mode 100644 connectors/__init__.py rename dev_docs/jira.md => connectors/jira/README.md (96%) create mode 100644 connectors/jira/__init__.py rename {server => connectors/jira}/bin/update-jira-symlinks (100%) rename src/jira_file_lock.py => connectors/jira/file_lock.py (97%) rename src/incremental_jira_transform.py => connectors/jira/incremental_transform.py (99%) create mode 100644 connectors/jira/scripts/__init__.py rename scripts/jira_backfill.py => connectors/jira/scripts/backfill.py (97%) rename scripts/jira_backfill_remote_links.py => connectors/jira/scripts/backfill_remote_links.py (97%) rename scripts/jira_backfill_sla.py => connectors/jira/scripts/backfill_sla.py (97%) rename scripts/jira_consistency_check.py => connectors/jira/scripts/consistency_check.py (98%) rename scripts/jira_poll_sla.py => connectors/jira/scripts/poll_sla.py (96%) rename {scripts => connectors/jira/scripts}/sync_jira.sh (100%) rename webapp/jira_service.py => connectors/jira/service.py (98%) rename {server => connectors/jira/systemd}/jira-consistency-deep.timer (100%) rename {server => connectors/jira/systemd}/jira-consistency.service (82%) rename {server => connectors/jira/systemd}/jira-consistency.timer (100%) rename {server => connectors/jira/systemd}/jira-sla-poll.service (83%) rename {server => connectors/jira/systemd}/jira-sla-poll.timer (100%) create mode 100644 connectors/jira/tests/__init__.py rename tests/test_jira_file_lock.py => connectors/jira/tests/test_file_lock.py (98%) rename tests/test_jira_parquet_lock.py => connectors/jira/tests/test_parquet_lock.py (97%) rename tests/test_jira_sla_poll.py => connectors/jira/tests/test_sla_poll.py (93%) rename src/jira_transform.py => connectors/jira/transform.py (99%) rename webapp/jira_webhook.py => connectors/jira/webhook.py (98%) diff --git a/CLAUDE.md b/CLAUDE.md index c6c2692..fef725c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -150,8 +150,8 @@ When reopening the project in Claude Code: ### Files NOT to modify (stable infrastructure) - `src/parquet_manager.py` - Parquet conversion engine -- `src/jira_file_lock.py` - Advisory file locking -- `src/incremental_jira_transform.py` - Jira monthly Parquet transform +- `connectors/jira/file_lock.py` - Advisory file locking +- `connectors/jira/incremental_transform.py` - Jira monthly Parquet transform - `server/ws_gateway/` - WebSocket notification gateway ## Git Commits & Pull Requests diff --git a/connectors/__init__.py b/connectors/__init__.py new file mode 100644 index 0000000..781c590 --- /dev/null +++ b/connectors/__init__.py @@ -0,0 +1 @@ +"""Connectors package - pluggable data source integrations.""" diff --git a/dev_docs/jira.md b/connectors/jira/README.md similarity index 96% rename from dev_docs/jira.md rename to connectors/jira/README.md index 005504f..6e604d8 100644 --- a/dev_docs/jira.md +++ b/connectors/jira/README.md @@ -108,7 +108,7 @@ Real-time sync of Jira support tickets for AI-powered analysis. ### 2. Webhook Receiver -**File:** `webapp/jira_webhook.py` +**File:** `connectors/jira/webhook.py` Flask blueprint that handles incoming webhooks: @@ -131,7 +131,7 @@ def receive_jira_webhook(): ### 3. Jira Service -**File:** `webapp/jira_service.py` +**File:** `connectors/jira/service.py` Handles Jira API communication and data persistence: @@ -171,13 +171,13 @@ Two transformation modes are available: #### 4a. Incremental Transform (Real-Time) -**File:** `src/incremental_jira_transform.py` +**File:** `connectors/jira/incremental_transform.py` Called automatically by webhook handler after saving issue JSON and attachments. Updates only the affected monthly Parquet file. ```python # Called from jira_service.py after save_issue() -from src.incremental_jira_transform import transform_single_issue +from connectors.jira.incremental_transform import transform_single_issue transform_single_issue( issue_key="SUPPORT-1234", @@ -200,12 +200,12 @@ transform_single_issue( #### 4b. Batch Transform (Initial Load / Recovery) -**File:** `src/jira_transform.py` +**File:** `connectors/jira/transform.py` Used for initial historical load or to rebuild all Parquet from raw JSON. ```bash -python src/jira_transform.py \ +python -m connectors.jira.transform \ --raw-dir /data/src_data/raw/jira \ --output-dir /data/src_data/parquet/jira \ --attachments-dir /data/src_data/raw/jira/attachments @@ -422,7 +422,7 @@ if not hmac.compare_digest(signature, expected): 1. Run transformation manually: ```bash - python src/jira_transform.py \ + python -m connectors.jira.transform \ --raw-dir /data/src_data/raw/jira \ --output-dir /data/src_data/parquet/jira \ --attachments-dir /data/src_data/raw/jira/attachments @@ -439,11 +439,11 @@ See [docs/jira_schema.md](jira_schema.md) for detailed table schemas and example For initial setup or recovery, use the backfill script to download all historical issues. -**File:** `scripts/jira_backfill.py` +**File:** `connectors/jira/scripts/backfill.py` ```bash # Download all SUPPORT tickets (idempotent, skips existing) -python scripts/jira_backfill.py --parallel 4 +python -m connectors.jira.scripts.backfill --parallel 4 # Environment variables required: JIRA_DOMAIN=your-org.atlassian.net @@ -461,14 +461,14 @@ JIRA_DATA_DIR=/data/src_data/raw/jira # optional, default path **SLA backfill** (separate script, uses JSM service account): -**File:** `scripts/jira_backfill_sla.py` +**File:** `connectors/jira/scripts/backfill_sla.py` ```bash # Fetch SLA fields for all issues (uses JIRA_SLA_* env vars) -python scripts/jira_backfill_sla.py --parallel 8 +python -m connectors.jira.scripts.backfill_sla --parallel 8 # Dry run (count files needing update): -python scripts/jira_backfill_sla.py --dry-run +python -m connectors.jira.scripts.backfill_sla --dry-run ``` The personal API token lacks JSM Agent licence needed for SLA fields. @@ -478,7 +478,7 @@ into existing raw JSON files. **After backfill, run batch transform:** ```bash -python src/jira_transform.py \ +python -m connectors.jira.transform \ --raw-dir /data/src_data/raw/jira \ --output-dir /data/src_data/parquet/jira \ --attachments-dir /data/src_data/raw/jira/attachments @@ -491,7 +491,7 @@ cp -r /data/src_data/parquet/jira/* ~/server/parquet/jira/ SLA elapsed values (`first_response_elapsed_millis`, `time_to_resolution_elapsed_millis`) only update when a webhook fires. For idle open tickets (~49 tickets, ~0.3% of dataset), these values go stale and no longer reflect the actual current elapsed time. -**File:** `scripts/jira_poll_sla.py` +**File:** `connectors/jira/scripts/poll_sla.py` The SLA polling job runs every 15 minutes via systemd timer (`jira-sla-poll.timer`) as `root:data-ops` and: @@ -502,19 +502,19 @@ The SLA polling job runs every 15 minutes via systemd timer (`jira-sla-poll.time **Self-healing:** The poll fetches `status`, `resolution`, `resolutiondate`, and `updated` alongside the SLA fields. If a ticket is resolved in Jira but still appears "open" in Parquet (e.g. due to a missed webhook), the poll automatically corrects the status in JSON and re-transforms to Parquet. Log output: `Self-healing: SUPPORT-XXXX is resolved in Jira`. This was added in response to [#203](https://github.com/your-org/ai-data-analyst/issues/203) where 12 tickets were permanently stale after a permission bug prevented webhooks from updating JSON files. -**File locking:** The entire read-modify-write + Parquet transform is wrapped in a per-issue advisory file lock (`src/jira_file_lock.py`) to prevent races with the webhook handler. The webhook handler (`webapp/jira_service.py`) uses the same lock. Different issue keys don't block each other. +**File locking:** The entire read-modify-write + Parquet transform is wrapped in a per-issue advisory file lock (`connectors/jira/file_lock.py`) to prevent races with the webhook handler. The webhook handler (`connectors/jira/service.py`) uses the same lock. Different issue keys don't block each other. **Important — `mkstemp` and ACL:** The `issues/` directory uses POSIX ACLs with `default:mask::rwx`. `tempfile.mkstemp()` creates files with mode `0600`, which overrides the ACL mask to `---` and breaks group access for www-data (webhook handler) and deploy (batch transform). The `os.fchmod(fd, 0o660)` call immediately after `mkstemp()` restores the mask to `rw-`, preserving ACL-based access. See [#203](https://github.com/your-org/ai-data-analyst/issues/203) for the full incident report. ```bash # Manual run -python scripts/jira_poll_sla.py +python -m connectors.jira.scripts.poll_sla # Dry run (count open issues) -python scripts/jira_poll_sla.py --dry-run +python -m connectors.jira.scripts.poll_sla --dry-run # Verbose logging -python scripts/jira_poll_sla.py --verbose +python -m connectors.jira.scripts.poll_sla --verbose ``` **Return states:** diff --git a/connectors/jira/__init__.py b/connectors/jira/__init__.py new file mode 100644 index 0000000..35a16ca --- /dev/null +++ b/connectors/jira/__init__.py @@ -0,0 +1,9 @@ +""" +Jira connector - optional push-based data integration. + +Provides real-time webhook ingestion, batch backfill, SLA polling, +and incremental Parquet transforms for Jira Cloud issues. + +Enable by setting jira.enabled: true in config/instance.yaml +and providing JIRA_* environment variables. +""" diff --git a/server/bin/update-jira-symlinks b/connectors/jira/bin/update-jira-symlinks similarity index 100% rename from server/bin/update-jira-symlinks rename to connectors/jira/bin/update-jira-symlinks diff --git a/src/jira_file_lock.py b/connectors/jira/file_lock.py similarity index 97% rename from src/jira_file_lock.py rename to connectors/jira/file_lock.py index 471b7f9..755cf1d 100644 --- a/src/jira_file_lock.py +++ b/connectors/jira/file_lock.py @@ -12,7 +12,7 @@ Lock nesting order (always outer → inner to prevent deadlocks): Uses fcntl.flock() for POSIX advisory locking (works across processes). Usage: - from src.jira_file_lock import issue_json_lock, parquet_month_lock + from connectors.jira.file_lock import issue_json_lock, parquet_month_lock with issue_json_lock(issues_dir, "SUPPORT-1234"): # read JSON, modify, write diff --git a/src/incremental_jira_transform.py b/connectors/jira/incremental_transform.py similarity index 99% rename from src/incremental_jira_transform.py rename to connectors/jira/incremental_transform.py index 660c9ad..426368d 100644 --- a/src/incremental_jira_transform.py +++ b/connectors/jira/incremental_transform.py @@ -16,8 +16,8 @@ import pyarrow as pa import pyarrow.parquet as pq # Import transform functions from batch transform -from .jira_file_lock import parquet_month_lock -from .jira_transform import ( +from .file_lock import parquet_month_lock +from .transform import ( ATTACHMENTS_SCHEMA, CHANGELOG_SCHEMA, COMMENTS_SCHEMA, diff --git a/connectors/jira/scripts/__init__.py b/connectors/jira/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/jira_backfill.py b/connectors/jira/scripts/backfill.py similarity index 97% rename from scripts/jira_backfill.py rename to connectors/jira/scripts/backfill.py index 1e62c8c..01cf2a3 100755 --- a/scripts/jira_backfill.py +++ b/connectors/jira/scripts/backfill.py @@ -1,22 +1,22 @@ #!/usr/bin/env python3 """ -Jira Backfill Script - Download all historical SUPPORT tickets. +Jira Backfill Script - Download all historical Jira issues. -Downloads all issues from Jira SUPPORT project using JQL search with pagination. +Downloads all issues from Jira using JQL search with pagination. Reuses the webapp's JiraService for consistent data handling. Usage: # On server (uses /opt/data-analyst/.env): - python scripts/jira_backfill.py + python -m connectors.jira.scripts.backfill # With custom settings: - python scripts/jira_backfill.py --jql "project = SUPPORT AND created >= 2025-01-01" + python -m connectors.jira.scripts.backfill --jql "project = MY_PROJECT AND created >= 2025-01-01" # Skip already downloaded issues: - python scripts/jira_backfill.py --skip-existing + python -m connectors.jira.scripts.backfill --skip-existing # Dry run (show what would be downloaded): - python scripts/jira_backfill.py --dry-run + python -m connectors.jira.scripts.backfill --dry-run Environment variables (loaded from .env or set manually): JIRA_DOMAIN - Jira Cloud domain (e.g., your-org.atlassian.net) @@ -158,7 +158,7 @@ class JiraBackfill: jql: JQL query string Yields: - Issue keys (e.g., "SUPPORT-15190") + Issue keys (e.g., "PROJ-15190") """ next_page_token = None total_fetched = 0 @@ -201,7 +201,7 @@ class JiraBackfill: Fetch complete issue data from Jira. Args: - issue_key: Issue key (e.g., "SUPPORT-123") + issue_key: Issue key (e.g., "PROJ-123") Returns: Issue data dict or None if fetch failed @@ -245,7 +245,7 @@ class JiraBackfill: Fetch remote links for an issue from Jira. Args: - issue_key: Issue key (e.g., "SUPPORT-123") + issue_key: Issue key (e.g., "PROJ-123") Returns: List of remote link dicts, empty list on failure @@ -504,7 +504,7 @@ class JiraBackfill: def main(): parser = argparse.ArgumentParser( - description="Download all SUPPORT tickets from Jira", + description="Download all Jira issues", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__, ) @@ -543,7 +543,7 @@ def main(): ) parser.add_argument( "--issue-keys", - help="Comma-separated list of specific issue keys to backfill (e.g., SUPPORT-15307,SUPPORT-15308)", + help="Comma-separated list of specific issue keys to backfill (e.g., PROJ-123,PROJ-456)", ) args = parser.parse_args() diff --git a/scripts/jira_backfill_remote_links.py b/connectors/jira/scripts/backfill_remote_links.py similarity index 97% rename from scripts/jira_backfill_remote_links.py rename to connectors/jira/scripts/backfill_remote_links.py index ad24854..df8543a 100644 --- a/scripts/jira_backfill_remote_links.py +++ b/connectors/jira/scripts/backfill_remote_links.py @@ -8,13 +8,13 @@ Parquet transform to extract remote_links table data. Usage: # On server (uses /opt/data-analyst/.env): - python scripts/jira_backfill_remote_links.py + python -m connectors.jira.scripts.backfill_remote_links # With parallel workers: - python scripts/jira_backfill_remote_links.py --parallel 4 + python -m connectors.jira.scripts.backfill_remote_links --parallel 4 # Dry run: - python scripts/jira_backfill_remote_links.py --dry-run + python -m connectors.jira.scripts.backfill_remote_links --dry-run Environment variables (loaded from .env): JIRA_DOMAIN - Jira Cloud domain diff --git a/scripts/jira_backfill_sla.py b/connectors/jira/scripts/backfill_sla.py similarity index 97% rename from scripts/jira_backfill_sla.py rename to connectors/jira/scripts/backfill_sla.py index 8e4050a..8090164 100644 --- a/scripts/jira_backfill_sla.py +++ b/connectors/jira/scripts/backfill_sla.py @@ -13,16 +13,16 @@ the domain-based URL (https://your-org.atlassian.net/rest/api/3/...). Usage: # On server: - python scripts/jira_backfill_sla.py + python -m connectors.jira.scripts.backfill_sla # With parallel workers: - python scripts/jira_backfill_sla.py --parallel 8 + python -m connectors.jira.scripts.backfill_sla --parallel 8 # Dry run (count files needing update): - python scripts/jira_backfill_sla.py --dry-run + python -m connectors.jira.scripts.backfill_sla --dry-run # Force re-fetch even if SLA data already present: - python scripts/jira_backfill_sla.py --force + python -m connectors.jira.scripts.backfill_sla --force Environment variables (loaded from .env): JIRA_SLA_EMAIL - Email for JSM service account authentication diff --git a/scripts/jira_consistency_check.py b/connectors/jira/scripts/consistency_check.py similarity index 98% rename from scripts/jira_consistency_check.py rename to connectors/jira/scripts/consistency_check.py index 979773f..d1fa4db 100644 --- a/scripts/jira_consistency_check.py +++ b/connectors/jira/scripts/consistency_check.py @@ -12,13 +12,13 @@ Runs every 30 minutes via systemd timer to detect webhook losses and transform f Usage: # Dry run (check only, no fixes) - python scripts/jira_consistency_check.py --dry-run --max-age-days 7 + python -m connectors.jira.scripts.consistency_check --dry-run --max-age-days 7 # Auto-fix mode (default) - python scripts/jira_consistency_check.py --auto-fix --max-age-days 30 + python -m connectors.jira.scripts.consistency_check --auto-fix --max-age-days 30 # Weekly deep check (full history) - python scripts/jira_consistency_check.py --auto-fix --max-age-days 365 + python -m connectors.jira.scripts.consistency_check --auto-fix --max-age-days 365 Environment variables (loaded from .env): JIRA_DOMAIN - Jira Cloud domain (e.g., your-org.atlassian.net) @@ -353,7 +353,7 @@ class JiraConsistencyChecker: # Build command for targeted backfill (force re-download to fix corrupted files) cmd = [ str(self.config.venv_python), - str(self.config.repo_dir / "scripts" / "jira_backfill.py"), + str(self.config.repo_dir / "connectors" / "jira" / "scripts" / "backfill.py"), "--issue-keys", ",".join(issue_keys), "--no-skip-existing", # Force re-download even if files exist @@ -406,7 +406,7 @@ class JiraConsistencyChecker: cmd = [ str(self.config.venv_python), "-m", - "src.incremental_jira_transform", + "connectors.jira.incremental_transform", issue_key, "--raw-dir", str(self.config.raw_dir), "--output-dir", str(self.config.parquet_dir), diff --git a/scripts/jira_poll_sla.py b/connectors/jira/scripts/poll_sla.py similarity index 96% rename from scripts/jira_poll_sla.py rename to connectors/jira/scripts/poll_sla.py index edec825..ec4e368 100644 --- a/scripts/jira_poll_sla.py +++ b/connectors/jira/scripts/poll_sla.py @@ -15,13 +15,13 @@ Designed to run as a systemd timer (every 15 min) via jira-sla-poll.timer. Usage: # On server: - python scripts/jira_poll_sla.py + python -m connectors.jira.scripts.poll_sla # Dry run (count open issues, don't fetch): - python scripts/jira_poll_sla.py --dry-run + python -m connectors.jira.scripts.poll_sla --dry-run # Verbose logging: - python scripts/jira_poll_sla.py --verbose + python -m connectors.jira.scripts.poll_sla --verbose Environment variables (loaded from .env): JIRA_SLA_EMAIL - Email for JSM service account authentication @@ -44,16 +44,16 @@ import pandas as pd from dotenv import load_dotenv # Add project root to sys.path for imports -PROJECT_ROOT = Path(__file__).resolve().parent.parent +PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent.parent sys.path.insert(0, str(PROJECT_ROOT)) -from scripts.jira_backfill_sla import ( +from connectors.jira.scripts.backfill_sla import ( SLA_FIELDS, has_valid_sla_data, load_config, ) -from src.incremental_jira_transform import transform_single_issue -from src.jira_file_lock import issue_json_lock +from connectors.jira.incremental_transform import transform_single_issue +from connectors.jira.file_lock import issue_json_lock logging.basicConfig( level=logging.INFO, diff --git a/scripts/sync_jira.sh b/connectors/jira/scripts/sync_jira.sh similarity index 100% rename from scripts/sync_jira.sh rename to connectors/jira/scripts/sync_jira.sh diff --git a/webapp/jira_service.py b/connectors/jira/service.py similarity index 98% rename from webapp/jira_service.py rename to connectors/jira/service.py index d526ab7..7fc7a20 100644 --- a/webapp/jira_service.py +++ b/connectors/jira/service.py @@ -18,7 +18,7 @@ from typing import Any import httpx -from .config import Config +from webapp.config import Config logger = logging.getLogger(__name__) @@ -38,7 +38,7 @@ def trigger_incremental_transform(issue_key: str, deleted: bool = False) -> bool True if transform succeeded, False otherwise """ try: - from src.incremental_jira_transform import transform_single_issue + from connectors.jira.incremental_transform import transform_single_issue success = transform_single_issue( issue_key=issue_key, @@ -262,7 +262,7 @@ class JiraService: file_path.parent.mkdir(parents=True, exist_ok=True) try: - from src.jira_file_lock import issue_json_lock + from connectors.jira.file_lock import issue_json_lock # Lock protects the JSON write + Parquet transform from concurrent # SLA poll writes. Attachment download stays outside the lock. @@ -499,7 +499,7 @@ class JiraService: if file_path.exists(): # Mark as deleted rather than removing try: - from src.jira_file_lock import issue_json_lock + from connectors.jira.file_lock import issue_json_lock issues_dir = self.data_dir / "issues" with issue_json_lock(issues_dir, issue_key): diff --git a/server/jira-consistency-deep.timer b/connectors/jira/systemd/jira-consistency-deep.timer similarity index 100% rename from server/jira-consistency-deep.timer rename to connectors/jira/systemd/jira-consistency-deep.timer diff --git a/server/jira-consistency.service b/connectors/jira/systemd/jira-consistency.service similarity index 82% rename from server/jira-consistency.service rename to connectors/jira/systemd/jira-consistency.service index dacf690..504bf4f 100644 --- a/server/jira-consistency.service +++ b/connectors/jira/systemd/jira-consistency.service @@ -8,7 +8,7 @@ Type=oneshot User=root Group=data-ops WorkingDirectory=/opt/data-analyst/repo -ExecStart=/opt/data-analyst/.venv/bin/python scripts/jira_consistency_check.py --auto-fix --max-age-days 30 +ExecStart=/opt/data-analyst/.venv/bin/python -m connectors.jira.scripts.consistency_check --auto-fix --max-age-days 30 EnvironmentFile=/opt/data-analyst/.env EnvironmentFile=/opt/data-analyst/repo/.env ProtectSystem=strict diff --git a/server/jira-consistency.timer b/connectors/jira/systemd/jira-consistency.timer similarity index 100% rename from server/jira-consistency.timer rename to connectors/jira/systemd/jira-consistency.timer diff --git a/server/jira-sla-poll.service b/connectors/jira/systemd/jira-sla-poll.service similarity index 83% rename from server/jira-sla-poll.service rename to connectors/jira/systemd/jira-sla-poll.service index 34e0d8c..bf340f7 100644 --- a/server/jira-sla-poll.service +++ b/connectors/jira/systemd/jira-sla-poll.service @@ -8,7 +8,7 @@ Type=oneshot User=root Group=data-ops WorkingDirectory=/opt/data-analyst/repo -ExecStart=/opt/data-analyst/.venv/bin/python scripts/jira_poll_sla.py +ExecStart=/opt/data-analyst/.venv/bin/python -m connectors.jira.scripts.poll_sla EnvironmentFile=/opt/data-analyst/.env EnvironmentFile=/opt/data-analyst/repo/.env ProtectSystem=strict diff --git a/server/jira-sla-poll.timer b/connectors/jira/systemd/jira-sla-poll.timer similarity index 100% rename from server/jira-sla-poll.timer rename to connectors/jira/systemd/jira-sla-poll.timer diff --git a/connectors/jira/tests/__init__.py b/connectors/jira/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_jira_file_lock.py b/connectors/jira/tests/test_file_lock.py similarity index 98% rename from tests/test_jira_file_lock.py rename to connectors/jira/tests/test_file_lock.py index 28c8832..6dc4651 100644 --- a/tests/test_jira_file_lock.py +++ b/connectors/jira/tests/test_file_lock.py @@ -1,4 +1,4 @@ -"""Tests for per-issue advisory file locking (src/jira_file_lock.py). +"""Tests for per-issue advisory file locking (connectors/jira/file_lock.py). Verifies that issue_json_lock correctly: - Acquires and releases locks via context manager @@ -13,7 +13,7 @@ from pathlib import Path import pytest -from src.jira_file_lock import issue_json_lock +from connectors.jira.file_lock import issue_json_lock class TestBasicLockUnlock: diff --git a/tests/test_jira_parquet_lock.py b/connectors/jira/tests/test_parquet_lock.py similarity index 97% rename from tests/test_jira_parquet_lock.py rename to connectors/jira/tests/test_parquet_lock.py index 7985137..e3d34ee 100644 --- a/tests/test_jira_parquet_lock.py +++ b/connectors/jira/tests/test_parquet_lock.py @@ -1,4 +1,4 @@ -"""Tests for per-month Parquet advisory file locking (src/jira_file_lock.py). +"""Tests for per-month Parquet advisory file locking (connectors/jira/file_lock.py). Verifies that parquet_month_lock correctly: - Acquires and releases locks via context manager @@ -17,7 +17,7 @@ from pathlib import Path import pandas as pd import pytest -from src.jira_file_lock import parquet_month_lock +from connectors.jira.file_lock import parquet_month_lock # --------------------------------------------------------------------------- @@ -269,7 +269,7 @@ class TestParquetLockIntegration: def test_concurrent_transforms_no_data_loss(self, tmp_path: Path) -> None: """Simulate concurrent webhook transforms for same month.""" - from src.incremental_jira_transform import transform_single_issue + from connectors.jira.incremental_transform import transform_single_issue raw_dir = tmp_path / "raw" issues_dir = raw_dir / "issues" @@ -331,7 +331,7 @@ class TestParquetLockIntegration: def test_concurrent_transforms_different_months_independent(self, tmp_path: Path) -> None: """Issues in different months should not interfere with each other.""" - from src.incremental_jira_transform import transform_single_issue + from connectors.jira.incremental_transform import transform_single_issue raw_dir = tmp_path / "raw" issues_dir = raw_dir / "issues" diff --git a/tests/test_jira_sla_poll.py b/connectors/jira/tests/test_sla_poll.py similarity index 93% rename from tests/test_jira_sla_poll.py rename to connectors/jira/tests/test_sla_poll.py index 2d85ad2..cb273fd 100644 --- a/tests/test_jira_sla_poll.py +++ b/connectors/jira/tests/test_sla_poll.py @@ -1,5 +1,5 @@ """ -Tests for scripts/jira_poll_sla.py - SLA polling and self-healing logic. +Tests for connectors/jira/scripts/poll_sla.py - SLA polling and self-healing logic. Covers: - fetch_sla_and_status: API response parsing for SLA + status fields @@ -14,9 +14,9 @@ from unittest.mock import MagicMock, patch import pytest # Ensure project root is importable -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent)) -from scripts.jira_poll_sla import ( +from connectors.jira.scripts.poll_sla import ( SLA_FIELDS, STATUS_FIELDS, fetch_sla_and_status, @@ -68,7 +68,7 @@ def fake_issue_json_in_progress(tmp_path: Path) -> Path: class TestFetchSlaAndStatus: """Tests for the fetch_sla_and_status function.""" - @patch("scripts.jira_poll_sla.httpx.Client") + @patch("connectors.jira.scripts.poll_sla.httpx.Client") def test_returns_all_sla_and_status_fields(self, mock_client_cls: MagicMock) -> None: """ When the Jira API returns 200 with all requested fields, @@ -145,8 +145,8 @@ class TestFetchSlaAndStatus: class TestUpdateIssueSlaHealing: """Tests for self-healing when API reports an issue as resolved.""" - @patch("scripts.jira_poll_sla.transform_single_issue") - @patch("scripts.jira_poll_sla.fetch_sla_and_status") + @patch("connectors.jira.scripts.poll_sla.transform_single_issue") + @patch("connectors.jira.scripts.poll_sla.fetch_sla_and_status") def test_self_healing_returns_healed_and_updates_json( self, mock_fetch: MagicMock, @@ -224,8 +224,8 @@ class TestUpdateIssueSlaHealing: class TestUpdateIssueSlaSkip: """Tests for the skip logic when SLA data is empty and status is not Done.""" - @patch("scripts.jira_poll_sla.transform_single_issue") - @patch("scripts.jira_poll_sla.fetch_sla_and_status") + @patch("connectors.jira.scripts.poll_sla.transform_single_issue") + @patch("connectors.jira.scripts.poll_sla.fetch_sla_and_status") def test_skips_when_no_sla_data_and_not_resolved( self, mock_fetch: MagicMock, @@ -272,8 +272,8 @@ class TestUpdateIssueSlaSkip: class TestUpdateIssueSlaJsonMissing: """Tests for missing JSON file handling.""" - @patch("scripts.jira_poll_sla.transform_single_issue") - @patch("scripts.jira_poll_sla.fetch_sla_and_status") + @patch("connectors.jira.scripts.poll_sla.transform_single_issue") + @patch("connectors.jira.scripts.poll_sla.fetch_sla_and_status") def test_returns_skipped_when_json_file_missing( self, mock_fetch: MagicMock, diff --git a/src/jira_transform.py b/connectors/jira/transform.py similarity index 99% rename from src/jira_transform.py rename to connectors/jira/transform.py index 8c44c0f..9f01cbf 100644 --- a/src/jira_transform.py +++ b/connectors/jira/transform.py @@ -32,7 +32,7 @@ CUSTOM_FIELD_NAMES = { "customfield_10157": "satisfaction", # Customer satisfaction (was: sla_info) "customfield_10323": "triage", # Triage multi-select (was: team_tier) "customfield_10330": "context", # Context field (was: root_cause) - "customfield_10325": "keboola_platform_url", # Keboola platform URL (was: resolution_summary) + "customfield_10325": "custom_url", # Custom URL (was: resolution_summary) "customfield_10350": "slack_link", # Slack link (was: customer_type) "customfield_10475": "email_address", # Email address (was: context) "customfield_10511": "configuration_item", # Configuration item (was: categories) @@ -80,7 +80,7 @@ ISSUES_SCHEMA = { "organizations": "string", "spam": "string", "context": "string", - "keboola_platform_url": "string", + "custom_url": "string", "slack_link": "string", "technical_issue_category": "string", "email_address": "string", @@ -380,7 +380,7 @@ def transform_issue(raw_issue: dict) -> dict: "organizations": json.dumps(extract_option_list(fields.get("customfield_10002"))), "spam": extract_option_value(fields.get("customfield_10365")), "context": extract_text_from_adf(fields.get("customfield_10330")) or None, - "keboola_platform_url": fields.get("customfield_10325"), + "custom_url": fields.get("customfield_10325"), "slack_link": extract_option_value(fields.get("customfield_10350")), "technical_issue_category": extract_option_value(fields.get("customfield_10676")), "email_address": extract_option_value(fields.get("customfield_10475")), diff --git a/webapp/jira_webhook.py b/connectors/jira/webhook.py similarity index 98% rename from webapp/jira_webhook.py rename to connectors/jira/webhook.py index 9243f0f..ffa42ec 100644 --- a/webapp/jira_webhook.py +++ b/connectors/jira/webhook.py @@ -13,8 +13,8 @@ from datetime import datetime from flask import Blueprint, abort, jsonify, request -from .config import Config -from .jira_service import get_jira_service +from webapp.config import Config +from .service import get_jira_service logger = logging.getLogger(__name__) diff --git a/dev_docs/server.md b/dev_docs/server.md index 3035a5e..9e75302 100644 --- a/dev_docs/server.md +++ b/dev_docs/server.md @@ -152,7 +152,7 @@ Use `0o660` for files accessed by services via data-ops group ACL, `0o644` for w When multiple services write to the same JSON file (e.g., SLA poll and webhook handler both updating `/data/src_data/raw/jira/issues/SUPPORT-1234.json`), use advisory file locking to prevent races: ```python -from src.jira_file_lock import issue_json_lock +from connectors.jira.file_lock import issue_json_lock with issue_json_lock(issues_dir, issue_key): # read JSON, modify, atomic write, transform to Parquet @@ -165,8 +165,8 @@ with issue_json_lock(issues_dir, issue_key): - The lock must cover the entire read-modify-write **and** the Parquet transform — otherwise another writer could overwrite the JSON between write and transform, causing the transform to read stale data Currently used by: -- `scripts/jira_poll_sla.py` — wraps SLA+status update + `transform_single_issue()` -- `webapp/jira_service.py` — wraps `save_issue()` JSON write + `trigger_incremental_transform()`, and `_handle_deletion()` read-modify-write + transform +- `connectors/jira/scripts/poll_sla.py` — wraps SLA+status update + `transform_single_issue()` +- `connectors/jira/service.py` — wraps `save_issue()` JSON write + `trigger_incremental_transform()`, and `_handle_deletion()` read-modify-write + transform Attachment downloads in `save_issue()` intentionally run **outside** the lock (can take tens of seconds and don't modify JSON). @@ -1405,8 +1405,8 @@ SLA elapsed values (`first_response_elapsed_millis`, `time_to_resolution_elapsed |-----------|-------------| | `jira-sla-poll.service` | Oneshot service that polls open tickets for fresh SLA + status data | | `jira-sla-poll.timer` | Runs every 15 minutes (10min after boot, then every 15min) | -| `scripts/jira_poll_sla.py` | Reads Parquet to find open issues, fetches SLA + status via cloud API | -| `src/jira_file_lock.py` | Per-issue advisory file locking (shared with webhook handler) | +| `connectors/jira/scripts/poll_sla.py` | Reads Parquet to find open issues, fetches SLA + status via cloud API | +| `connectors/jira/file_lock.py` | Per-issue advisory file locking (shared with webhook handler) | **How it works:** 1. Reads Parquet issues to find open tickets with SLA data (~49 tickets) @@ -1428,7 +1428,7 @@ journalctl -u jira-sla-poll.service --since "1 hour ago" # Manual dry run (count open issues) cd /opt/data-analyst/repo -/opt/data-analyst/.venv/bin/python scripts/jira_poll_sla.py --dry-run +/opt/data-analyst/.venv/bin/python -m connectors.jira.scripts.poll_sla --dry-run ``` **Requires:** `JIRA_SLA_EMAIL`, `JIRA_SLA_API_TOKEN`, `JIRA_CLOUD_ID` in `.env`. Timer is auto-enabled by `deploy.sh` when `JIRA_SLA_API_TOKEN` is set. @@ -1442,7 +1442,7 @@ Automated check every 30 minutes to detect missing Jira issues caused by webhook | `jira-consistency.service` | Oneshot service that validates data consistency across all sources | | `jira-consistency.timer` | Runs every 30 minutes (10min after boot) | | `jira-consistency-deep.timer` | Weekly full history check (Sunday 3 AM) | -| `scripts/jira_consistency_check.py` | Validation script with auto-backfill capability | +| `connectors/jira/scripts/consistency_check.py` | Validation script with auto-backfill capability | **How it works:** 1. Queries Jira API for all issue keys (last 30 days by default) @@ -1470,10 +1470,10 @@ journalctl -u jira-consistency.service --since "1 hour ago" # Manual check (dry run) cd /opt/data-analyst/repo -/opt/data-analyst/.venv/bin/python scripts/jira_consistency_check.py --dry-run --max-age-days 7 +/opt/data-analyst/.venv/bin/python -m connectors.jira.scripts.consistency_check --dry-run --max-age-days 7 # Manual check with auto-fix -/opt/data-analyst/.venv/bin/python scripts/jira_consistency_check.py --auto-fix --max-age-days 30 +/opt/data-analyst/.venv/bin/python -m connectors.jira.scripts.consistency_check --auto-fix --max-age-days 30 # View consistency report cat /data/src_data/raw/jira/_consistency_report.json | python3 -m json.tool @@ -1486,7 +1486,7 @@ jq -r '.discrepancies.missing_in_json[]' /data/src_data/raw/jira/_consistency_re # Backfill specific issues cd /opt/data-analyst/repo -/opt/data-analyst/.venv/bin/python scripts/jira_backfill.py --issue-keys SUPPORT-15307,SUPPORT-15308 +/opt/data-analyst/.venv/bin/python -m connectors.jira.scripts.backfill --issue-keys SUPPORT-15307,SUPPORT-15308 # Verify in Parquet /opt/data-analyst/.venv/bin/python -c " @@ -1510,7 +1510,7 @@ for row in result: - API token has read-only access to Jira (no write permissions needed) - Webhook events are logged for audit purposes - Multiple services write to `/data/src_data/raw/jira/`: webapp (www-data), SLA poll (root), consistency check (root), backfill scripts (admin users) -- Concurrent writes to the same issue JSON are serialized via per-issue advisory file locking (`src/jira_file_lock.py`, `fcntl.flock`). Lock files in `issues/.locks/`. See [#203](https://github.com/your-org/ai-data-analyst/issues/203). +- Concurrent writes to the same issue JSON are serialized via per-issue advisory file locking (`connectors/jira/file_lock.py`, `fcntl.flock`). Lock files in `issues/.locks/`. See [#203](https://github.com/your-org/ai-data-analyst/issues/203). ## Data Profiler diff --git a/pytest.ini b/pytest.ini index f54fa70..fbcda6f 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,4 @@ [pytest] +addopts = -m "not live" markers = - live: tests requiring server access (deselect with '-m "not live"') + live: tests requiring server access (run with '-m live') diff --git a/scripts/backfill_gap.sh b/scripts/backfill_gap.sh index 91e3048..cd910a0 100755 --- a/scripts/backfill_gap.sh +++ b/scripts/backfill_gap.sh @@ -44,12 +44,12 @@ cd "$REPO_DIR" echo "" echo "--- Phase 1: Download raw JSON ---" if $DRY_RUN; then - python scripts/jira_backfill.py --jql "$JQL" --dry-run + python -m connectors.jira.scripts.backfill --jql "$JQL" --dry-run echo "Dry run complete. Exiting." exit 0 fi -python scripts/jira_backfill.py --jql "$JQL" --skip-existing --parallel 4 +python -m connectors.jira.scripts.backfill --jql "$JQL" --skip-existing --parallel 4 # --- Phase 2: Incremental Parquet transform --- echo "" diff --git a/server/deploy.sh b/server/deploy.sh index 618a474..65b8843 100755 --- a/server/deploy.sh +++ b/server/deploy.sh @@ -219,18 +219,18 @@ fi # Deploy Jira SLA polling systemd service and timer log "Deploying jira-sla-poll service and timer..." -if [[ -f "${REPO_DIR}/server/jira-sla-poll.service" ]]; then - sudo /usr/bin/cp "${REPO_DIR}/server/jira-sla-poll.service" /etc/systemd/system/jira-sla-poll.service - sudo /usr/bin/cp "${REPO_DIR}/server/jira-sla-poll.timer" /etc/systemd/system/jira-sla-poll.timer +if [[ -f "${REPO_DIR}/connectors/jira/systemd/jira-sla-poll.service" ]]; then + sudo /usr/bin/cp "${REPO_DIR}/connectors/jira/systemd/jira-sla-poll.service" /etc/systemd/system/jira-sla-poll.service + sudo /usr/bin/cp "${REPO_DIR}/connectors/jira/systemd/jira-sla-poll.timer" /etc/systemd/system/jira-sla-poll.timer sudo /usr/bin/systemctl daemon-reload fi # Deploy Jira consistency monitoring systemd service and timers log "Deploying jira-consistency service and timers..." -if [[ -f "${REPO_DIR}/server/jira-consistency.service" ]]; then - sudo /usr/bin/cp "${REPO_DIR}/server/jira-consistency.service" /etc/systemd/system/jira-consistency.service - sudo /usr/bin/cp "${REPO_DIR}/server/jira-consistency.timer" /etc/systemd/system/jira-consistency.timer - sudo /usr/bin/cp "${REPO_DIR}/server/jira-consistency-deep.timer" /etc/systemd/system/jira-consistency-deep.timer +if [[ -f "${REPO_DIR}/connectors/jira/systemd/jira-consistency.service" ]]; then + sudo /usr/bin/cp "${REPO_DIR}/connectors/jira/systemd/jira-consistency.service" /etc/systemd/system/jira-consistency.service + sudo /usr/bin/cp "${REPO_DIR}/connectors/jira/systemd/jira-consistency.timer" /etc/systemd/system/jira-consistency.timer + sudo /usr/bin/cp "${REPO_DIR}/connectors/jira/systemd/jira-consistency-deep.timer" /etc/systemd/system/jira-consistency-deep.timer sudo /usr/bin/systemctl daemon-reload # Create log file with correct permissions diff --git a/server/sudoers-deploy b/server/sudoers-deploy index 8ece3d8..f94e56e 100644 --- a/server/sudoers-deploy +++ b/server/sudoers-deploy @@ -113,8 +113,8 @@ deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl stop corporate-memory.timer deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl is-enabled corporate-memory.timer # Allow deploy user to manage jira-sla-poll service and timer -deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/server/jira-sla-poll.service /etc/systemd/system/jira-sla-poll.service -deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/server/jira-sla-poll.timer /etc/systemd/system/jira-sla-poll.timer +deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/connectors/jira/systemd/jira-sla-poll.service /etc/systemd/system/jira-sla-poll.service +deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/connectors/jira/systemd/jira-sla-poll.timer /etc/systemd/system/jira-sla-poll.timer deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl enable jira-sla-poll.timer deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl start jira-sla-poll.timer deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl stop jira-sla-poll.timer @@ -132,9 +132,9 @@ deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl stop session-collector.timer deploy ALL=(ALL) NOPASSWD: /usr/bin/systemctl is-enabled session-collector.timer # Allow deploy user to manage jira-consistency service and timers -deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/server/jira-consistency.service /etc/systemd/system/jira-consistency.service -deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/server/jira-consistency.timer /etc/systemd/system/jira-consistency.timer -deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/server/jira-consistency-deep.timer /etc/systemd/system/jira-consistency-deep.timer +deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/connectors/jira/systemd/jira-consistency.service /etc/systemd/system/jira-consistency.service +deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/connectors/jira/systemd/jira-consistency.timer /etc/systemd/system/jira-consistency.timer +deploy ALL=(ALL) NOPASSWD: /usr/bin/cp /opt/data-analyst/repo/connectors/jira/systemd/jira-consistency-deep.timer /etc/systemd/system/jira-consistency-deep.timer deploy ALL=(ALL) NOPASSWD: /usr/bin/touch /opt/data-analyst/logs/jira-consistency.log deploy ALL=(ALL) NOPASSWD: /usr/bin/chown root\:data-ops /opt/data-analyst/logs/jira-consistency.log deploy ALL=(ALL) NOPASSWD: /usr/bin/chmod 664 /opt/data-analyst/logs/jira-consistency.log diff --git a/src/profiler.py b/src/profiler.py index 9f9f4ff..56c0c88 100644 --- a/src/profiler.py +++ b/src/profiler.py @@ -60,55 +60,63 @@ METRICS_YML_PATH = DOCS_DIR / "metrics.yml" METRICS_DIR = DOCS_DIR / "metrics" DATA_DESCRIPTION_PATH = DOCS_DIR / "data_description.md" -# Jira / Support tables - not in data_description.md but stored as partitioned parquet -JIRA_PARQUET_DIR = PARQUET_DIR / "jira" -JIRA_TABLES = [ - { - "name": "jira_issues", - "subdir": "issues", - "description": "Support tickets from Jira SUPPORT project. Key fields: issue_key, summary, description, status, priority, assignee, created_at, resolved_at, severity, triage.", - "primary_key": "issue_key", - "foreign_keys": [], - }, - { - "name": "jira_comments", - "subdir": "comments", - "description": "Comments on support tickets. Key fields: comment_id, issue_key, author_email, body, created_at.", - "primary_key": "comment_id", - "foreign_keys": [{"column": "issue_key", "references": "jira_issues.issue_key", "description": "Parent support ticket"}], - }, - { - "name": "jira_attachments", - "subdir": "attachments", - "description": "Attachment metadata with local file paths. Key fields: attachment_id, issue_key, filename, local_path, size_bytes, mime_type.", - "primary_key": "attachment_id", - "foreign_keys": [{"column": "issue_key", "references": "jira_issues.issue_key", "description": "Parent support ticket"}], - }, - { - "name": "jira_changelog", - "subdir": "changelog", - "description": "History of all field changes on issues. Key fields: change_id, issue_key, field_name, from_value, to_value, changed_at.", - "primary_key": "change_id", - "foreign_keys": [{"column": "issue_key", "references": "jira_issues.issue_key", "description": "Parent support ticket"}], - }, - { - "name": "jira_issuelinks", - "subdir": "issuelinks", - "description": "Links between Jira issues (blocks, duplicates, relates to). Key fields: issue_key, link_id, link_type, direction, linked_issue_key.", - "primary_key": "link_id", - "foreign_keys": [ - {"column": "issue_key", "references": "jira_issues.issue_key", "description": "Source support ticket"}, - {"column": "linked_issue_key", "references": "jira_issues.issue_key", "description": "Target linked ticket"}, - ], - }, - { - "name": "jira_remote_links", - "subdir": "remote_links", - "description": "External links attached to issues (Confluence pages, Slack threads, etc.). Key fields: issue_key, remote_link_id, url, title.", - "primary_key": "remote_link_id", - "foreign_keys": [{"column": "issue_key", "references": "jira_issues.issue_key", "description": "Parent support ticket"}], - }, -] +# Jira tables - loaded dynamically if Jira connector is enabled +# The Jira connector stores partitioned parquet files in PARQUET_DIR/jira/ +def _load_jira_tables() -> tuple: + """Load Jira table definitions if the connector directory exists.""" + jira_dir = PARQUET_DIR / "jira" + if not jira_dir.exists(): + return jira_dir, [] + return jira_dir, [ + { + "name": "jira_issues", + "subdir": "issues", + "description": "Jira issues. Key fields: issue_key, summary, description, status, priority, assignee, created_at, resolved_at.", + "primary_key": "issue_key", + "foreign_keys": [], + }, + { + "name": "jira_comments", + "subdir": "comments", + "description": "Comments on Jira issues. Key fields: comment_id, issue_key, author_email, body, created_at.", + "primary_key": "comment_id", + "foreign_keys": [{"column": "issue_key", "references": "jira_issues.issue_key", "description": "Parent issue"}], + }, + { + "name": "jira_attachments", + "subdir": "attachments", + "description": "Attachment metadata with local file paths. Key fields: attachment_id, issue_key, filename, local_path, size_bytes, mime_type.", + "primary_key": "attachment_id", + "foreign_keys": [{"column": "issue_key", "references": "jira_issues.issue_key", "description": "Parent issue"}], + }, + { + "name": "jira_changelog", + "subdir": "changelog", + "description": "History of all field changes on issues. Key fields: change_id, issue_key, field_name, from_value, to_value, changed_at.", + "primary_key": "change_id", + "foreign_keys": [{"column": "issue_key", "references": "jira_issues.issue_key", "description": "Parent issue"}], + }, + { + "name": "jira_issuelinks", + "subdir": "issuelinks", + "description": "Links between Jira issues (blocks, duplicates, relates to). Key fields: issue_key, link_id, link_type, direction, linked_issue_key.", + "primary_key": "link_id", + "foreign_keys": [ + {"column": "issue_key", "references": "jira_issues.issue_key", "description": "Source issue"}, + {"column": "linked_issue_key", "references": "jira_issues.issue_key", "description": "Target linked issue"}, + ], + }, + { + "name": "jira_remote_links", + "subdir": "remote_links", + "description": "External links attached to issues (Confluence pages, Slack threads, etc.). Key fields: issue_key, remote_link_id, url, title.", + "primary_key": "remote_link_id", + "foreign_keys": [{"column": "issue_key", "references": "jira_issues.issue_key", "description": "Parent issue"}], + }, + ] + + +JIRA_PARQUET_DIR, JIRA_TABLES = _load_jira_tables() # --------------------------------------------------------------------------- diff --git a/tests/test_sync_data.py b/tests/test_sync_data.py index 9ee02d3..3619bc5 100644 --- a/tests/test_sync_data.py +++ b/tests/test_sync_data.py @@ -21,7 +21,7 @@ import pytest REPO_ROOT = Path(__file__).resolve().parent.parent SCRIPTS_DIR = REPO_ROOT / "scripts" SYNC_DATA_SH = SCRIPTS_DIR / "sync_data.sh" -SYNC_JIRA_SH = SCRIPTS_DIR / "sync_jira.sh" +SYNC_JIRA_SH = REPO_ROOT / "connectors" / "jira" / "scripts" / "sync_jira.sh" SYNC_SCRIPTS = [SYNC_DATA_SH, SYNC_JIRA_SH] DIAG_DIR = REPO_ROOT / "data" / "sync_diagnostics" diff --git a/webapp/app.py b/webapp/app.py index 893d291..e65f3bf 100644 --- a/webapp/app.py +++ b/webapp/app.py @@ -18,11 +18,18 @@ from flask import Flask, flash, jsonify, redirect, render_template, request, ses from .auth import auth_bp, init_oauth, login_required from .config import Config from .desktop_auth import desktop_bp, require_desktop_auth -from .jira_webhook import jira_bp from .notification_images import images_bp from .account_service import get_account_details from .sync_settings_service import get_sync_settings, update_sync_settings +# Jira connector is optional - only loaded if configured +try: + from connectors.jira.webhook import jira_bp + JIRA_AVAILABLE = True +except ImportError: + JIRA_AVAILABLE = False + jira_bp = None + # Password auth is optional - requires SENDGRID_API_KEY try: from .password_auth import password_auth_bp @@ -73,7 +80,8 @@ def create_app() -> Flask: app.register_blueprint(auth_bp) app.register_blueprint(desktop_bp) app.register_blueprint(images_bp) - app.register_blueprint(jira_bp) + if JIRA_AVAILABLE and jira_bp: + app.register_blueprint(jira_bp) if PASSWORD_AUTH_AVAILABLE and password_auth_bp: app.register_blueprint(password_auth_bp) diff --git a/webapp/config.py b/webapp/config.py index 02c4795..47d1a26 100644 --- a/webapp/config.py +++ b/webapp/config.py @@ -97,18 +97,17 @@ class Config: # Notification images directory NOTIFICATION_IMAGES_DIR = "/tmp" - # Jira webhook integration + # Jira connector (optional - loaded from connectors/jira/) + # These remain here for backward compatibility; the Jira connector + # reads them from this Config class. + JIRA_ENABLED = os.environ.get("JIRA_DOMAIN", "") != "" JIRA_WEBHOOK_SECRET = os.environ.get("JIRA_WEBHOOK_SECRET", "") - JIRA_DOMAIN = os.environ.get("JIRA_DOMAIN", "") # e.g., "yourorg.atlassian.net" + JIRA_DOMAIN = os.environ.get("JIRA_DOMAIN", "") JIRA_EMAIL = os.environ.get("JIRA_EMAIL", "") JIRA_API_TOKEN = os.environ.get("JIRA_API_TOKEN", "") - - # Jira SLA service account (JSM Agent licence required for SLA fields) JIRA_SLA_EMAIL = os.environ.get("JIRA_SLA_EMAIL", "") JIRA_SLA_API_TOKEN = os.environ.get("JIRA_SLA_API_TOKEN", "") JIRA_CLOUD_ID = os.environ.get("JIRA_CLOUD_ID", "") - - # Jira data storage (raw data, will be processed to parquet later) JIRA_DATA_DIR = Path(os.environ.get("JIRA_DATA_DIR", "/data/src_data/raw/jira")) @classmethod diff --git a/webapp/health_service.py b/webapp/health_service.py index 37972eb..fdeb691 100644 --- a/webapp/health_service.py +++ b/webapp/health_service.py @@ -5,7 +5,7 @@ Returns detailed system status including: - Systemd services (webapp, telegram-bot, timers) - Disk space - System load -- Last Jira webhook timestamp +- Optional: Jira webhook timestamp (if Jira connector enabled) """ import logging @@ -14,6 +14,8 @@ import subprocess from datetime import datetime from pathlib import Path +from .config import Config + logger = logging.getLogger(__name__) # Services to monitor @@ -22,12 +24,19 @@ CRITICAL_SERVICES = [ "notify-bot.service", ] -TIMERS_TO_MONITOR = [ - "jira-consistency.timer", +# Base timers (always monitored) +_BASE_TIMERS = [ "corporate-memory.timer", +] + +# Jira timers (only if Jira connector is enabled) +_JIRA_TIMERS = [ + "jira-consistency.timer", "jira-sla-poll.timer", ] +TIMERS_TO_MONITOR = _BASE_TIMERS + (_JIRA_TIMERS if Config.JIRA_ENABLED else []) + def get_service_status(service_name: str) -> dict: """Get systemd service status.""" @@ -139,7 +148,6 @@ def health_check() -> tuple[dict, int]: timers = [get_service_status(t) for t in TIMERS_TO_MONITOR] disk = get_disk_usage() load = get_load_average() - jira = get_last_jira_webhook() # Overall health: all critical checks must pass all_healthy = ( @@ -155,9 +163,12 @@ def health_check() -> tuple[dict, int]: "timers": timers, "disk": disk, "load": load, - "jira_webhook": jira, } + # Include Jira webhook status only if connector is enabled + if Config.JIRA_ENABLED: + response["jira_webhook"] = get_last_jira_webhook() + # Return 200 if healthy, 503 if degraded status_code = 200 if all_healthy else 503 diff --git a/webapp/sync_settings_service.py b/webapp/sync_settings_service.py index 17ff70f..a4f4f8f 100644 --- a/webapp/sync_settings_service.py +++ b/webapp/sync_settings_service.py @@ -105,9 +105,11 @@ def update_sync_settings(username: str, settings: dict) -> tuple[bool, str]: existing = all_settings.get(username, {}).get("datasets", dict(DEFAULT_SETTINGS)) existing.update(settings) - # Validate dependencies on merged state - if existing.get("jira_attachments") and not existing.get("jira"): - return False, "Jira attachments require Jira to be enabled" + # Validate dependencies on merged state (from instance config) + for key, info in DATASET_INFO.items(): + requires = info.get("requires") if isinstance(info, dict) else None + if requires and existing.get(key) and not existing.get(requires): + return False, f"{key} requires {requires} to be enabled" # Update user's settings all_settings[username] = {