mirror of
https://github.com/kvcache-ai/sglang.git
synced 2026-06-30 19:57:52 +00:00
1944 lines
67 KiB
Python
Executable File
1944 lines
67 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Query GitHub Actions job status for specific jobs or generate runner fleet reports.
|
|
|
|
Usage:
|
|
# Per-job reports (original mode)
|
|
python scripts/ci/utils/query_job_status.py --job "stage-c-test-large-8-gpu-amd-mi35x"
|
|
python scripts/ci/utils/query_job_status.py --job "stage-c-test-large-8-gpu-amd-mi35x" --hours 48
|
|
python scripts/ci/utils/query_job_status.py --job "stage-c-test-large-8-gpu-amd-mi35x" --workflow "pr-test-amd.yml" --input-data-file actions-job-snapshot.json --summary
|
|
|
|
# Runner fleet report (cross-workflow runner analytics)
|
|
python scripts/ci/utils/query_job_status.py --runner-report --workflow "pr-test-amd.yml,nightly-test-amd.yml" --hours 24
|
|
python scripts/ci/utils/query_job_status.py --runner-report --workflow "pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" --summary
|
|
python scripts/ci/utils/query_job_status.py --workflow "pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" --dump-data-file actions-job-snapshot.json
|
|
|
|
Requirements:
|
|
pip install tabulate
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Any, Optional
|
|
|
|
try:
|
|
from tabulate import tabulate
|
|
except ImportError:
|
|
print("Please install tabulate: pip install tabulate")
|
|
exit(1)
|
|
|
|
|
|
def check_gh_cli_available() -> bool:
|
|
"""Check if gh CLI is installed and authenticated."""
|
|
try:
|
|
result = subprocess.run(
|
|
["gh", "--version"],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if result.returncode != 0:
|
|
return False
|
|
|
|
# Check if authenticated
|
|
auth_result = subprocess.run(
|
|
["gh", "auth", "status"],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if auth_result.returncode != 0:
|
|
print(
|
|
"Error: gh CLI is not authenticated. Please run 'gh auth login' first.",
|
|
file=sys.stderr,
|
|
)
|
|
print(f"Details: {auth_result.stderr}", file=sys.stderr)
|
|
return False
|
|
|
|
return True
|
|
except FileNotFoundError:
|
|
print(
|
|
"Error: gh CLI is not installed. Please install it from https://cli.github.com/",
|
|
file=sys.stderr,
|
|
)
|
|
return False
|
|
|
|
|
|
def run_gh_command(args: list[str]) -> dict:
|
|
"""Run gh CLI command and return JSON result."""
|
|
try:
|
|
result = subprocess.run(
|
|
["gh", "api"] + args,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
except FileNotFoundError:
|
|
raise Exception("gh CLI not found. Please install from https://cli.github.com/")
|
|
|
|
if result.returncode != 0:
|
|
raise Exception(f"gh api failed: {result.stderr}")
|
|
return json.loads(result.stdout)
|
|
|
|
|
|
def is_rate_limit_error(error: str) -> bool:
|
|
"""Check whether an API error was caused by GitHub rate limiting."""
|
|
return "rate limit exceeded" in error.lower()
|
|
|
|
|
|
def _new_workflow_fetch_stats(workflow: str) -> dict[str, Any]:
|
|
"""Create an empty metadata bucket for a workflow snapshot."""
|
|
return {
|
|
"workflow": workflow,
|
|
"total_runs_seen": 0,
|
|
"runs_with_jobs": 0,
|
|
"skipped_runs": 0,
|
|
"skipped_runs_rate_limit": 0,
|
|
"jobs_collected": 0,
|
|
}
|
|
|
|
|
|
def _new_fetch_metadata(repo: str, workflows: list[str], hours: int) -> dict[str, Any]:
|
|
"""Create the fetch metadata container stored alongside snapshot jobs."""
|
|
return {
|
|
"repo": repo,
|
|
"hours": hours,
|
|
"requested_workflows": workflows,
|
|
"total_runs_seen": 0,
|
|
"runs_with_jobs": 0,
|
|
"jobs_collected": 0,
|
|
"skipped_runs": [],
|
|
"workflow_fetch_failures": [],
|
|
"workflow_stats": {
|
|
workflow: _new_workflow_fetch_stats(workflow) for workflow in workflows
|
|
},
|
|
}
|
|
|
|
|
|
def _record_workflow_fetch_failure(
|
|
fetch_metadata: dict[str, Any], workflow: str, error: str
|
|
) -> None:
|
|
"""Record a workflow-level failure while listing workflow runs."""
|
|
fetch_metadata["workflow_fetch_failures"].append(
|
|
{
|
|
"workflow": workflow,
|
|
"error": error.strip(),
|
|
"reason": "rate_limit" if is_rate_limit_error(error) else "api_error",
|
|
}
|
|
)
|
|
|
|
|
|
def _record_skipped_run(
|
|
fetch_metadata: dict[str, Any], workflow: str, run: dict, error: str
|
|
) -> None:
|
|
"""Record a run whose jobs could not be fetched."""
|
|
workflow_stats = fetch_metadata["workflow_stats"].setdefault(
|
|
workflow, _new_workflow_fetch_stats(workflow)
|
|
)
|
|
workflow_stats["skipped_runs"] += 1
|
|
if is_rate_limit_error(error):
|
|
workflow_stats["skipped_runs_rate_limit"] += 1
|
|
|
|
fetch_metadata["skipped_runs"].append(
|
|
{
|
|
"workflow": workflow,
|
|
"run_id": run["id"],
|
|
"created_at": run.get("created_at", ""),
|
|
"status": run.get("status", "unknown"),
|
|
"conclusion": run.get("conclusion") or "-",
|
|
"reason": "rate_limit" if is_rate_limit_error(error) else "api_error",
|
|
"error": error.strip(),
|
|
}
|
|
)
|
|
|
|
|
|
def parse_time(time_str: str) -> Optional[datetime]:
|
|
"""Parse ISO timestamp to datetime."""
|
|
if not time_str:
|
|
return None
|
|
return datetime.fromisoformat(time_str.replace("Z", "+00:00"))
|
|
|
|
|
|
def format_time(time_str: str) -> str:
|
|
"""Format ISO timestamp to readable format in UTC."""
|
|
if not time_str:
|
|
return "-"
|
|
dt = parse_time(time_str)
|
|
if dt:
|
|
# Ensure UTC
|
|
dt_utc = dt.astimezone(timezone.utc)
|
|
return dt_utc.strftime("%m-%d %H:%M")
|
|
return "-"
|
|
|
|
|
|
def get_workflow_runs(repo: str, workflow: str, hours: int = 24) -> list[dict]:
|
|
"""Get workflow runs from the last N hours."""
|
|
since = datetime.now(timezone.utc) - timedelta(hours=hours)
|
|
|
|
runs = []
|
|
page = 1
|
|
while True:
|
|
url = f"repos/{repo}/actions/runs?per_page=100&page={page}"
|
|
if workflow:
|
|
url = f"repos/{repo}/actions/workflows/{workflow}/runs?per_page=100&page={page}"
|
|
|
|
data = run_gh_command([url])
|
|
page_runs = data.get("workflow_runs", [])
|
|
|
|
for run in page_runs:
|
|
created_at = parse_time(run.get("created_at"))
|
|
if created_at and created_at >= since:
|
|
runs.append(run)
|
|
elif created_at and created_at < since:
|
|
return runs
|
|
|
|
if len(page_runs) < 100:
|
|
break
|
|
page += 1
|
|
if page > 20:
|
|
break
|
|
return runs
|
|
|
|
|
|
def get_jobs_for_run(repo: str, run_id: int) -> list[dict]:
|
|
"""Get all jobs for a workflow run."""
|
|
jobs = []
|
|
page = 1
|
|
while True:
|
|
data = run_gh_command(
|
|
[f"repos/{repo}/actions/runs/{run_id}/jobs?per_page=100&page={page}"]
|
|
)
|
|
jobs.extend(data.get("jobs", []))
|
|
if len(data.get("jobs", [])) < 100:
|
|
break
|
|
page += 1
|
|
if page > 5:
|
|
break
|
|
return jobs
|
|
|
|
|
|
def get_pr_number_from_run(run: dict) -> Optional[int]:
|
|
"""Extract PR number from run data."""
|
|
# Try to get from pull_requests array
|
|
prs = run.get("pull_requests", [])
|
|
if prs:
|
|
return prs[0].get("number")
|
|
return None
|
|
|
|
|
|
def _job_name_matches_filter(job_name: str, job_filter: str) -> bool:
|
|
"""Check whether a job name matches the report filter prefix."""
|
|
job_name_lower = job_name.lower()
|
|
filter_lower = job_filter.lower()
|
|
if not job_name_lower.startswith(filter_lower):
|
|
return False
|
|
if len(job_name_lower) > len(filter_lower):
|
|
next_char = job_name_lower[len(filter_lower)]
|
|
if next_char not in (" ", "("):
|
|
return False
|
|
return True
|
|
|
|
|
|
def filter_jobs(
|
|
jobs: list[dict],
|
|
job_filter: str,
|
|
workflow: str = None,
|
|
status_filter: str = None,
|
|
) -> list[dict]:
|
|
"""Filter a prefetched job list for a specific report target."""
|
|
results = []
|
|
for job in jobs:
|
|
if workflow and job.get("workflow") != workflow:
|
|
continue
|
|
if not _job_name_matches_filter(job.get("job_name", ""), job_filter):
|
|
continue
|
|
if status_filter and job.get("status") != status_filter:
|
|
continue
|
|
results.append(job)
|
|
return results
|
|
|
|
|
|
def save_snapshot(path: str, snapshot: dict[str, Any]) -> None:
|
|
"""Persist a prefetched Actions snapshot to disk."""
|
|
with open(path, "w") as f:
|
|
json.dump(snapshot, f, indent=2)
|
|
|
|
|
|
def load_snapshot(path: str) -> dict[str, Any]:
|
|
"""Load a previously saved Actions snapshot from disk."""
|
|
with open(path) as f:
|
|
snapshot = json.load(f)
|
|
if "jobs" not in snapshot:
|
|
raise ValueError(f"Snapshot file {path} is missing the 'jobs' field")
|
|
return snapshot
|
|
|
|
|
|
def fetch_all_jobs_snapshot(
|
|
repo: str,
|
|
workflows: list[str],
|
|
hours: int = 24,
|
|
) -> dict[str, Any]:
|
|
"""Fetch jobs once and store enough metadata to detect incomplete data."""
|
|
fetch_metadata = _new_fetch_metadata(repo, workflows, hours)
|
|
all_runs = []
|
|
|
|
for workflow in workflows:
|
|
print(f"Fetching runs for {workflow}...", file=sys.stderr)
|
|
try:
|
|
runs = get_workflow_runs(repo, workflow, hours)
|
|
except Exception as e:
|
|
error = str(e)
|
|
print(
|
|
f"Warning: Failed to list runs for workflow {workflow}: {error}",
|
|
file=sys.stderr,
|
|
)
|
|
_record_workflow_fetch_failure(fetch_metadata, workflow, error)
|
|
continue
|
|
|
|
print(f" Found {len(runs)} runs for {workflow}", file=sys.stderr)
|
|
fetch_metadata["workflow_stats"][workflow]["total_runs_seen"] = len(runs)
|
|
for run in runs:
|
|
run["_workflow"] = workflow
|
|
all_runs.extend(runs)
|
|
|
|
seen_run_ids = set()
|
|
unique_runs = []
|
|
for run in all_runs:
|
|
if run["id"] not in seen_run_ids:
|
|
seen_run_ids.add(run["id"])
|
|
unique_runs.append(run)
|
|
|
|
fetch_metadata["total_runs_seen"] = len(unique_runs)
|
|
print(f"Total unique workflow runs: {len(unique_runs)}", file=sys.stderr)
|
|
|
|
results = []
|
|
jobs_excluded_no_label = 0
|
|
total_runs = len(unique_runs)
|
|
|
|
for i, run in enumerate(unique_runs):
|
|
if (i + 1) % 20 == 0:
|
|
print(f"Processing run {i+1}/{total_runs}...", file=sys.stderr)
|
|
|
|
workflow_name = run.get("_workflow", "-")
|
|
try:
|
|
jobs = get_jobs_for_run(repo, run["id"])
|
|
except Exception as e:
|
|
error = str(e)
|
|
print(
|
|
f"Warning: Failed to get jobs for run {run['id']}: {error}",
|
|
file=sys.stderr,
|
|
)
|
|
_record_skipped_run(fetch_metadata, workflow_name, run, error)
|
|
continue
|
|
|
|
workflow_stats = fetch_metadata["workflow_stats"].setdefault(
|
|
workflow_name, _new_workflow_fetch_stats(workflow_name)
|
|
)
|
|
workflow_stats["runs_with_jobs"] += 1
|
|
fetch_metadata["runs_with_jobs"] += 1
|
|
|
|
pr_number = get_pr_number_from_run(run)
|
|
branch = run.get("head_branch", "")
|
|
run_status = run.get("status", "unknown")
|
|
run_conclusion = run.get("conclusion") or "-"
|
|
jobs_added = 0
|
|
|
|
for job in jobs:
|
|
job_name = job.get("name", "")
|
|
job_status = job.get("status", "unknown")
|
|
runner_name = job.get("runner_name") or "-"
|
|
labels = job.get("labels", [])
|
|
|
|
if len(labels) == 1 and labels[0] == "ubuntu-latest":
|
|
continue
|
|
|
|
if not labels:
|
|
jobs_excluded_no_label += 1
|
|
continue
|
|
|
|
is_stuck = False
|
|
if job_status == "in_progress":
|
|
if runner_name == "-":
|
|
is_stuck = True
|
|
elif run_status == "completed" and run_conclusion in (
|
|
"cancelled",
|
|
"failure",
|
|
):
|
|
is_stuck = True
|
|
|
|
results.append(
|
|
{
|
|
"job_name": job_name,
|
|
"status": job_status,
|
|
"conclusion": job.get("conclusion") or "-",
|
|
"created_at": job.get("created_at", ""),
|
|
"started_at": job.get("started_at", ""),
|
|
"completed_at": job.get("completed_at", ""),
|
|
"runner_name": runner_name,
|
|
"labels": labels,
|
|
"runner_group_name": job.get("runner_group_name") or "-",
|
|
"run_id": run["id"],
|
|
"run_status": run_status,
|
|
"run_conclusion": run_conclusion,
|
|
"pr_number": pr_number,
|
|
"branch": branch,
|
|
"html_url": job.get("html_url", ""),
|
|
"is_stuck": is_stuck,
|
|
"workflow": workflow_name,
|
|
}
|
|
)
|
|
jobs_added += 1
|
|
|
|
workflow_stats["jobs_collected"] += jobs_added
|
|
|
|
fetch_metadata["jobs_collected"] = len(results)
|
|
fetch_metadata["jobs_excluded_no_label"] = jobs_excluded_no_label
|
|
return {
|
|
"snapshot_version": 1,
|
|
"repo": repo,
|
|
"hours": hours,
|
|
"workflows": workflows,
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
"jobs": results,
|
|
"fetch_metadata": fetch_metadata,
|
|
}
|
|
|
|
|
|
def query_jobs(
|
|
repo: str,
|
|
job_filter: str,
|
|
workflow: str = None,
|
|
hours: int = 24,
|
|
status_filter: str = None,
|
|
) -> list[dict]:
|
|
"""Query jobs matching the filter."""
|
|
snapshot = fetch_all_jobs_snapshot(repo, [workflow], hours)
|
|
return filter_jobs(snapshot["jobs"], job_filter, workflow, status_filter)
|
|
|
|
|
|
def query_all_jobs(
|
|
repo: str,
|
|
workflows: list[str],
|
|
hours: int = 24,
|
|
) -> list[dict]:
|
|
"""Query all jobs across multiple workflows for fleet-level analysis.
|
|
|
|
Unlike query_jobs(), this does NOT filter by job name and collects
|
|
everything in a single pass -- ideal for runner-centric analytics.
|
|
Jobs on ubuntu-latest are excluded since those are utility jobs.
|
|
"""
|
|
return fetch_all_jobs_snapshot(repo, workflows, hours)["jobs"]
|
|
|
|
|
|
def calculate_duration(started_at: str, completed_at: str) -> str:
|
|
"""Calculate duration between start and completion."""
|
|
if not started_at or not completed_at:
|
|
return "-"
|
|
start = parse_time(started_at)
|
|
end = parse_time(completed_at)
|
|
if start and end:
|
|
duration = (end - start).total_seconds()
|
|
if duration < 0:
|
|
return "-" # Invalid data, skip
|
|
minutes = int(duration // 60)
|
|
seconds = int(duration % 60)
|
|
if minutes >= 60:
|
|
hours = minutes // 60
|
|
minutes = minutes % 60
|
|
return f"{hours}h{minutes}m"
|
|
return f"{minutes}m{seconds}s"
|
|
return "-"
|
|
|
|
|
|
def calculate_queue_time(
|
|
job: dict,
|
|
report_time: datetime = None,
|
|
) -> str:
|
|
"""
|
|
Calculate queue time for a job.
|
|
|
|
Uses ``runner_name`` as the reliable signal for whether a runner
|
|
picked the job up (consistent with ``_queue_time_seconds``):
|
|
|
|
* **Has runner** (job was picked up): ``started_at - created_at``.
|
|
* **No runner + queued/waiting** (still in queue):
|
|
``report_time - created_at``, suffixed with "(queuing)".
|
|
* **No runner + other status** (skipped / cancelled / stuck):
|
|
returns "-" (never truly queued for a runner).
|
|
"""
|
|
created = parse_time(job.get("created_at", ""))
|
|
if not created:
|
|
return "-"
|
|
|
|
runner = job.get("runner_name") or ""
|
|
has_runner = runner and runner != "-"
|
|
|
|
if has_runner:
|
|
started = parse_time(job.get("started_at", ""))
|
|
if not started:
|
|
return "-"
|
|
queue_seconds = (started - created).total_seconds()
|
|
if queue_seconds < 0:
|
|
return "-" # re-run; timestamps unreliable
|
|
else:
|
|
status = job.get("status", "")
|
|
if status not in ("queued", "waiting"):
|
|
return "-"
|
|
ref = report_time or datetime.now(timezone.utc)
|
|
queue_seconds = (ref - created).total_seconds()
|
|
if queue_seconds < 0:
|
|
return "-"
|
|
|
|
minutes = int(queue_seconds // 60)
|
|
seconds = int(queue_seconds % 60)
|
|
suffix = " (queuing)" if not has_runner else ""
|
|
if minutes >= 60:
|
|
hours = minutes // 60
|
|
minutes = minutes % 60
|
|
return f"{hours}h{minutes}m{suffix}"
|
|
return f"{minutes}m{seconds}s{suffix}"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Runner fleet analytics functions
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _format_duration_seconds(seconds: Optional[float]) -> str:
|
|
"""Format seconds into human-readable duration string."""
|
|
if seconds is None or seconds < 0:
|
|
return "-"
|
|
total_seconds = int(seconds)
|
|
minutes = total_seconds // 60
|
|
secs = total_seconds % 60
|
|
if minutes >= 60:
|
|
hours = minutes // 60
|
|
minutes = minutes % 60
|
|
return f"{hours}h{minutes}m"
|
|
return f"{minutes}m{secs}s"
|
|
|
|
|
|
def _get_runner_label(job: dict) -> str:
|
|
"""Extract the primary runner label from a job's labels list."""
|
|
labels = job.get("labels", [])
|
|
if not labels:
|
|
return "unknown"
|
|
for label in labels:
|
|
if label.startswith("linux-mi"):
|
|
return label
|
|
return labels[0]
|
|
|
|
|
|
_RUNNER_LABEL_RE = re.compile(r"linux-(mi\w+?)-(\d+)gpu")
|
|
_RUNNER_LABEL_ALT_RE = re.compile(r"linux-(mi\w+?)-gpu-(\d+)")
|
|
|
|
|
|
def _runner_label_sort_key(label: str) -> tuple:
|
|
"""Sort key for natural ordering: GPU type first, then GPU count.
|
|
|
|
linux-mi325-1gpu-sglang -> ('mi325', 1, 'linux-mi325-1gpu-sglang')
|
|
linux-mi35x-8gpu-sglang -> ('mi35x', 8, 'linux-mi35x-8gpu-sglang')
|
|
linux-mi35x-gpu-8.fabric -> ('mi35x', 8, 'linux-mi35x-gpu-8.fabric')
|
|
"""
|
|
m = _RUNNER_LABEL_RE.search(label)
|
|
if m:
|
|
return (m.group(1), int(m.group(2)), label)
|
|
m2 = _RUNNER_LABEL_ALT_RE.search(label)
|
|
if m2:
|
|
return (m2.group(1), int(m2.group(2)), label)
|
|
return ("zzz", 0, label)
|
|
|
|
|
|
def _percentile(data: list[float], p: int) -> Optional[float]:
|
|
"""Return a percentile from an already sorted or unsorted numeric list."""
|
|
if not data:
|
|
return None
|
|
sorted_data = sorted(data)
|
|
idx = min(int(len(sorted_data) * p / 100), len(sorted_data) - 1)
|
|
return sorted_data[idx]
|
|
|
|
|
|
def _average(data: list[float]) -> Optional[float]:
|
|
"""Return the average of a numeric list when samples exist."""
|
|
if not data:
|
|
return None
|
|
return sum(data) / len(data)
|
|
|
|
|
|
def _queue_time_seconds(job: dict, report_time: datetime = None) -> Optional[float]:
|
|
"""Extract queue time in seconds for a job.
|
|
|
|
* Has ``runner_name`` (picked up by a runner): ``started_at - created_at``.
|
|
* No ``runner_name`` + status ``queued``/``waiting`` (still in queue):
|
|
``report_time - created_at``.
|
|
* No ``runner_name`` + other status (e.g. skipped/cancelled before
|
|
pickup): ``None`` (skip).
|
|
|
|
GitHub sets ``started_at`` when a job *enters* the queue, so for jobs
|
|
that have not been picked up yet ``started_at ≈ created_at`` and the
|
|
naive difference would be ~0, which is wrong. The reliable signal for
|
|
"actually dequeued" is a non-empty ``runner_name``.
|
|
"""
|
|
created = parse_time(job.get("created_at", ""))
|
|
if not created:
|
|
return None
|
|
|
|
runner = job.get("runner_name") or ""
|
|
if not runner or runner == "-":
|
|
status = job.get("status", "")
|
|
if status not in ("queued", "waiting"):
|
|
return None
|
|
if report_time is None:
|
|
report_time = datetime.now(timezone.utc)
|
|
queue_seconds = (report_time - created).total_seconds()
|
|
return queue_seconds if queue_seconds >= 0 else None
|
|
|
|
started = parse_time(job.get("started_at", ""))
|
|
if not started:
|
|
return None
|
|
queue_seconds = (started - created).total_seconds()
|
|
return queue_seconds if queue_seconds >= 0 else None
|
|
|
|
|
|
def _build_queue_distribution(queue_times: list[float]) -> dict[str, Any]:
|
|
"""Build queue time buckets and percentile stats for one sample set."""
|
|
if not queue_times:
|
|
return {"buckets": [], "p50": None, "p90": None, "p99": None, "total": 0}
|
|
|
|
sorted_queue_times = sorted(queue_times)
|
|
bucket_defs = [
|
|
("< 1 min", 0, 60),
|
|
("1-5 min", 60, 300),
|
|
("5-15 min", 300, 900),
|
|
("15-30 min", 900, 1800),
|
|
("30-60 min", 1800, 3600),
|
|
("> 60 min", 3600, float("inf")),
|
|
]
|
|
|
|
total = len(sorted_queue_times)
|
|
buckets = []
|
|
for label, lo, hi in bucket_defs:
|
|
count = sum(1 for qt in sorted_queue_times if lo <= qt < hi)
|
|
pct = count / total * 100 if total > 0 else 0
|
|
buckets.append({"range": label, "count": count, "percentage": round(pct, 1)})
|
|
|
|
return {
|
|
"buckets": buckets,
|
|
"p50": _percentile(sorted_queue_times, 50),
|
|
"p90": _percentile(sorted_queue_times, 90),
|
|
"p99": _percentile(sorted_queue_times, 99),
|
|
"total": total,
|
|
}
|
|
|
|
|
|
def analyze_concurrency(jobs: list[dict], report_time: datetime = None) -> dict:
|
|
"""Analyze concurrent runner usage per runner label.
|
|
|
|
Uses an event-sweep algorithm: for each job that ran, create +1 event
|
|
at started_at and -1 event at completed_at, then sweep through sorted
|
|
events tracking the concurrent count.
|
|
"""
|
|
if report_time is None:
|
|
report_time = datetime.now(timezone.utc)
|
|
|
|
label_jobs: dict[str, list[dict]] = {}
|
|
for job in jobs:
|
|
label = _get_runner_label(job)
|
|
label_jobs.setdefault(label, []).append(job)
|
|
|
|
results = {}
|
|
for label in sorted(label_jobs):
|
|
pool_jobs = label_jobs[label]
|
|
events: list[tuple[datetime, int]] = []
|
|
queue_times: list[float] = []
|
|
durations: list[float] = []
|
|
|
|
for job in pool_jobs:
|
|
runner = job.get("runner_name") or ""
|
|
has_runner = bool(runner and runner != "-")
|
|
|
|
if has_runner:
|
|
started = parse_time(job.get("started_at", ""))
|
|
completed = parse_time(job.get("completed_at", ""))
|
|
|
|
if started and completed:
|
|
events.append((started, +1))
|
|
events.append((completed, -1))
|
|
durations.append((completed - started).total_seconds())
|
|
elif started:
|
|
events.append((started, +1))
|
|
events.append((report_time, -1))
|
|
durations.append((report_time - started).total_seconds())
|
|
|
|
qt = _queue_time_seconds(job, report_time=report_time)
|
|
if qt is not None:
|
|
queue_times.append(qt)
|
|
|
|
if not events:
|
|
results[label] = {
|
|
"peak": 0,
|
|
"avg_concurrent": 0.0,
|
|
"total_jobs": len(pool_jobs),
|
|
"avg_queue_seconds": _average(queue_times),
|
|
"p50_queue_seconds": _percentile(queue_times, 50),
|
|
"p99_queue_seconds": _percentile(queue_times, 99),
|
|
"avg_duration_seconds": _average(durations),
|
|
}
|
|
continue
|
|
|
|
events.sort(key=lambda x: (x[0], x[1]))
|
|
concurrent = 0
|
|
peak = 0
|
|
time_weighted_sum = 0.0
|
|
total_time = 0.0
|
|
prev_time = events[0][0]
|
|
|
|
for ts, delta in events:
|
|
if prev_time and concurrent > 0:
|
|
dt = (ts - prev_time).total_seconds()
|
|
time_weighted_sum += concurrent * dt
|
|
total_time += dt
|
|
concurrent += delta
|
|
peak = max(peak, concurrent)
|
|
prev_time = ts
|
|
|
|
avg_concurrent = time_weighted_sum / total_time if total_time > 0 else 0
|
|
avg_queue = _average(queue_times)
|
|
avg_duration = _average(durations)
|
|
|
|
results[label] = {
|
|
"peak": peak,
|
|
"avg_concurrent": round(avg_concurrent, 1),
|
|
"total_jobs": len(pool_jobs),
|
|
"avg_queue_seconds": avg_queue,
|
|
"p50_queue_seconds": _percentile(queue_times, 50),
|
|
"p99_queue_seconds": _percentile(queue_times, 99),
|
|
"avg_duration_seconds": avg_duration,
|
|
}
|
|
|
|
return results
|
|
|
|
|
|
def analyze_busy_periods(jobs: list[dict], report_time: datetime = None) -> list[dict]:
|
|
"""Analyze job activity by hour of day (UTC).
|
|
|
|
Buckets jobs by the UTC hour they started (or were created, for
|
|
still-queued jobs) and computes avg queue time. Classifies each hour
|
|
as Quiet / Moderate / Busy / Peak relative to the busiest hour.
|
|
"""
|
|
if report_time is None:
|
|
report_time = datetime.now(timezone.utc)
|
|
|
|
hourly: dict[int, dict] = {
|
|
h: {"jobs_started": 0, "queue_times": []} for h in range(24)
|
|
}
|
|
|
|
for job in jobs:
|
|
started = parse_time(job.get("started_at", ""))
|
|
created = parse_time(job.get("created_at", ""))
|
|
runner = job.get("runner_name") or ""
|
|
has_runner = bool(runner and runner != "-")
|
|
|
|
if has_runner and started:
|
|
hour = started.astimezone(timezone.utc).hour
|
|
hourly[hour]["jobs_started"] += 1
|
|
if created:
|
|
qt = (started - created).total_seconds()
|
|
if qt >= 0:
|
|
hourly[hour]["queue_times"].append(qt)
|
|
elif job.get("status") in ("queued", "waiting") and created:
|
|
hour = created.astimezone(timezone.utc).hour
|
|
hourly[hour]["jobs_started"] += 1
|
|
qt = (report_time - created).total_seconds()
|
|
if qt >= 0:
|
|
hourly[hour]["queue_times"].append(qt)
|
|
|
|
max_jobs = max((v["jobs_started"] for v in hourly.values()), default=1) or 1
|
|
|
|
results = []
|
|
for hour in range(24):
|
|
data = hourly[hour]
|
|
avg_queue = (
|
|
sum(data["queue_times"]) / len(data["queue_times"])
|
|
if data["queue_times"]
|
|
else 0
|
|
)
|
|
ratio = data["jobs_started"] / max_jobs
|
|
if ratio >= 0.75:
|
|
load = "Peak"
|
|
elif ratio >= 0.5:
|
|
load = "Busy"
|
|
elif ratio >= 0.25:
|
|
load = "Moderate"
|
|
else:
|
|
load = "Quiet"
|
|
|
|
results.append(
|
|
{
|
|
"hour": hour,
|
|
"hour_label": f"{hour:02d}:00-{(hour + 1) % 24:02d}:00",
|
|
"jobs_started": data["jobs_started"],
|
|
"avg_queue_seconds": avg_queue,
|
|
"load": load,
|
|
}
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
def analyze_queue_distribution(jobs: list[dict], report_time: datetime = None) -> dict:
|
|
"""Analyze queue time distribution per runner label."""
|
|
queue_times_by_label: dict[str, list[float]] = {}
|
|
for job in jobs:
|
|
queue_seconds = _queue_time_seconds(job, report_time=report_time)
|
|
if queue_seconds is None:
|
|
continue
|
|
label = _get_runner_label(job)
|
|
queue_times_by_label.setdefault(label, []).append(queue_seconds)
|
|
|
|
return {
|
|
label: _build_queue_distribution(queue_times)
|
|
for label, queue_times in sorted(queue_times_by_label.items())
|
|
}
|
|
|
|
|
|
def analyze_utilization_snapshots(
|
|
jobs: list[dict],
|
|
report_time: datetime = None,
|
|
interval_minutes: int = 15,
|
|
hours: int = 24,
|
|
) -> dict[str, list[dict]]:
|
|
"""Point-in-time snapshot at regular intervals per runner label.
|
|
|
|
At each interval mark over the last *hours* hours, counts:
|
|
- running: jobs that have a runner assigned (``runner_name`` set)
|
|
and are between ``started_at`` and ``completed_at``
|
|
- queued: jobs that have no runner assigned and haven't completed
|
|
|
|
GitHub's ``started_at`` is unreliable for distinguishing running vs
|
|
queued -- it is set when a job enters the queue, not when a runner
|
|
picks it up. The reliable signal is ``runner_name`` being non-empty.
|
|
"""
|
|
if report_time is None:
|
|
report_time = datetime.now(timezone.utc)
|
|
|
|
label_jobs: dict[str, list[dict]] = {}
|
|
for job in jobs:
|
|
label = _get_runner_label(job)
|
|
label_jobs.setdefault(label, []).append(job)
|
|
|
|
results: dict[str, list[dict]] = {}
|
|
|
|
for label in sorted(label_jobs, key=_runner_label_sort_key):
|
|
pool_jobs = label_jobs[label]
|
|
|
|
running_events: list[tuple[datetime, int]] = []
|
|
queued_events: list[tuple[datetime, int]] = []
|
|
|
|
for job in pool_jobs:
|
|
created = parse_time(job.get("created_at", ""))
|
|
started = parse_time(job.get("started_at", ""))
|
|
completed = parse_time(job.get("completed_at", ""))
|
|
runner = job.get("runner_name") or ""
|
|
has_runner = bool(runner and runner != "-")
|
|
|
|
if has_runner and started:
|
|
end = completed if completed else report_time
|
|
running_events.append((started, +1))
|
|
running_events.append((end, -1))
|
|
if created and created < started:
|
|
queued_events.append((created, +1))
|
|
queued_events.append((started, -1))
|
|
elif created and job.get("status") in ("queued", "waiting"):
|
|
queued_events.append((created, +1))
|
|
queued_events.append((report_time, -1))
|
|
|
|
sorted_running = sorted(running_events, key=lambda x: (x[0], x[1]))
|
|
sorted_queued = sorted(queued_events, key=lambda x: (x[0], x[1]))
|
|
|
|
window_start = report_time - timedelta(hours=hours)
|
|
window_start = window_start.replace(
|
|
minute=(window_start.minute // interval_minutes) * interval_minutes,
|
|
second=0,
|
|
microsecond=0,
|
|
)
|
|
|
|
snapshot_data: list[dict] = []
|
|
t = window_start
|
|
while t <= report_time:
|
|
running = _count_at_time(sorted_running, t)
|
|
queued = _count_at_time(sorted_queued, t)
|
|
|
|
if running > 0 or queued > 0:
|
|
snapshot_data.append(
|
|
{
|
|
"time": t.strftime("%m-%d %H:%M"),
|
|
"running": running,
|
|
"queued": queued,
|
|
}
|
|
)
|
|
t += timedelta(minutes=interval_minutes)
|
|
|
|
if snapshot_data:
|
|
results[label] = snapshot_data
|
|
|
|
return results
|
|
|
|
|
|
def _count_at_time(
|
|
sorted_events: list[tuple[datetime, int]],
|
|
t: datetime,
|
|
) -> int:
|
|
"""Count concurrent items at an exact point in time using event sweep."""
|
|
count = 0
|
|
for ts, delta in sorted_events:
|
|
if ts > t:
|
|
break
|
|
count += delta
|
|
return max(count, 0)
|
|
|
|
|
|
def process_results(
|
|
results: list[dict], repo: str, report_time: datetime = None
|
|
) -> dict:
|
|
"""
|
|
Process raw results into structured data for presentation.
|
|
Returns a dictionary containing:
|
|
- status_summary: dict of job_name -> status counts
|
|
- sorted_results: list of results sorted by created_at descending
|
|
- active_jobs: list of in_progress/queued/waiting jobs (excluding stuck)
|
|
- stuck_jobs: list of stuck/ghost jobs
|
|
- failed_jobs: list of failed jobs
|
|
- processed_jobs: list of jobs with calculated fields (queue_time, duration, etc.)
|
|
"""
|
|
if report_time is None:
|
|
report_time = datetime.now(timezone.utc)
|
|
|
|
if not results:
|
|
return {
|
|
"status_summary": {},
|
|
"sorted_results": [],
|
|
"active_jobs": [],
|
|
"stuck_jobs": [],
|
|
"failed_jobs": [],
|
|
"processed_jobs": [],
|
|
}
|
|
|
|
# Group by job name for summary
|
|
status_summary = {}
|
|
for r in results:
|
|
job_name = r["job_name"]
|
|
status = r["status"]
|
|
conclusion = r.get("conclusion", "-")
|
|
is_stuck = r.get("is_stuck", False)
|
|
if job_name not in status_summary:
|
|
status_summary[job_name] = {
|
|
"in_progress": 0,
|
|
"queued": 0,
|
|
"waiting": 0,
|
|
"stuck": 0,
|
|
"success": 0,
|
|
"failure": 0,
|
|
"cancelled": 0,
|
|
"skipped": 0,
|
|
}
|
|
if is_stuck:
|
|
status_summary[job_name]["stuck"] += 1
|
|
elif status == "completed":
|
|
if conclusion == "success":
|
|
status_summary[job_name]["success"] += 1
|
|
elif conclusion == "failure":
|
|
status_summary[job_name]["failure"] += 1
|
|
elif conclusion == "skipped":
|
|
status_summary[job_name]["skipped"] += 1
|
|
elif conclusion in (
|
|
"cancelled",
|
|
"timed_out",
|
|
"action_required",
|
|
"neutral",
|
|
"stale",
|
|
):
|
|
status_summary[job_name]["cancelled"] += 1
|
|
elif status in status_summary[job_name]:
|
|
status_summary[job_name][status] += 1
|
|
|
|
# Sort by created_at descending
|
|
sorted_results = sorted(results, key=lambda x: x["created_at"], reverse=True)
|
|
|
|
# Filter into categories (mutually exclusive)
|
|
active_jobs = [
|
|
r
|
|
for r in results
|
|
if r.get("status") in ("in_progress", "queued", "waiting")
|
|
and not r.get("is_stuck", False)
|
|
]
|
|
stuck_jobs = [r for r in results if r.get("is_stuck", False)]
|
|
# Only include jobs with conclusion "failure"
|
|
# Exclude stuck jobs to avoid double-counting
|
|
failed_jobs = [
|
|
r
|
|
for r in results
|
|
if r.get("conclusion", "-") == "failure" and not r.get("is_stuck", False)
|
|
]
|
|
|
|
# Process jobs with calculated fields
|
|
processed_jobs = []
|
|
for r in sorted_results:
|
|
processed = r.copy()
|
|
processed["created_formatted"] = format_time(r["created_at"])
|
|
processed["started_formatted"] = format_time(r["started_at"])
|
|
processed["queue_time"] = calculate_queue_time(r, report_time)
|
|
processed["duration"] = calculate_duration(r["started_at"], r["completed_at"])
|
|
# Use the job's html_url for direct link to the specific job
|
|
processed["url"] = (
|
|
r.get("html_url") or f"https://github.com/{repo}/actions/runs/{r['run_id']}"
|
|
)
|
|
|
|
if r["pr_number"]:
|
|
processed["pr_info"] = f"PR#{r['pr_number']}"
|
|
else:
|
|
processed["pr_info"] = r["branch"] if r["branch"] else "-"
|
|
|
|
# Status display with stuck marker
|
|
if r.get("is_stuck", False):
|
|
processed["status_display"] = f"STUCK ({r['status']})"
|
|
else:
|
|
processed["status_display"] = r["status"]
|
|
|
|
processed_jobs.append(processed)
|
|
|
|
return {
|
|
"status_summary": status_summary,
|
|
"sorted_results": sorted_results,
|
|
"active_jobs": active_jobs,
|
|
"stuck_jobs": stuck_jobs,
|
|
"failed_jobs": failed_jobs,
|
|
"processed_jobs": processed_jobs,
|
|
}
|
|
|
|
|
|
def summarize_fetch_metadata(
|
|
fetch_metadata: Optional[dict[str, Any]], workflows: list[str] = None
|
|
) -> Optional[dict[str, Any]]:
|
|
"""Summarize snapshot completeness for the workflows relevant to a report."""
|
|
if not fetch_metadata:
|
|
return None
|
|
|
|
workflow_filter = (
|
|
set(workflows)
|
|
if workflows
|
|
else set(fetch_metadata.get("requested_workflows", []))
|
|
)
|
|
workflow_stats = fetch_metadata.get("workflow_stats", {})
|
|
if not workflow_filter:
|
|
workflow_filter = set(workflow_stats)
|
|
|
|
relevant_stats = [
|
|
workflow_stats[workflow]
|
|
for workflow in workflow_filter
|
|
if workflow in workflow_stats
|
|
]
|
|
relevant_skipped_runs = [
|
|
run
|
|
for run in fetch_metadata.get("skipped_runs", [])
|
|
if run.get("workflow") in workflow_filter
|
|
]
|
|
relevant_workflow_failures = [
|
|
failure
|
|
for failure in fetch_metadata.get("workflow_fetch_failures", [])
|
|
if failure.get("workflow") in workflow_filter
|
|
]
|
|
|
|
skipped_run_rate_limit = sum(
|
|
1 for run in relevant_skipped_runs if run.get("reason") == "rate_limit"
|
|
)
|
|
workflow_failure_rate_limit = sum(
|
|
1
|
|
for failure in relevant_workflow_failures
|
|
if failure.get("reason") == "rate_limit"
|
|
)
|
|
|
|
return {
|
|
"known_runs": sum(stat.get("total_runs_seen", 0) for stat in relevant_stats),
|
|
"runs_with_jobs": sum(stat.get("runs_with_jobs", 0) for stat in relevant_stats),
|
|
"jobs_collected": sum(stat.get("jobs_collected", 0) for stat in relevant_stats),
|
|
"skipped_runs": relevant_skipped_runs,
|
|
"workflow_failures": relevant_workflow_failures,
|
|
"skipped_run_rate_limit": skipped_run_rate_limit,
|
|
"workflow_failure_rate_limit": workflow_failure_rate_limit,
|
|
"incomplete": bool(relevant_skipped_runs or relevant_workflow_failures),
|
|
}
|
|
|
|
|
|
def append_fetch_metadata_notice(
|
|
lines: list[str],
|
|
fetch_metadata: Optional[dict[str, Any]],
|
|
workflows: list[str] = None,
|
|
) -> None:
|
|
"""Append a markdown notice when the report is based on incomplete data."""
|
|
summary = summarize_fetch_metadata(fetch_metadata, workflows)
|
|
if not summary or not summary["incomplete"]:
|
|
return
|
|
|
|
skipped_runs = summary["skipped_runs"]
|
|
workflow_failures = summary["workflow_failures"]
|
|
other_skipped = len(skipped_runs) - summary["skipped_run_rate_limit"]
|
|
other_workflow_failures = (
|
|
len(workflow_failures) - summary["workflow_failure_rate_limit"]
|
|
)
|
|
|
|
lines.append(
|
|
"> **Data completeness:** Incomplete. GitHub API rate limit and/or fetch errors prevented a full dataset."
|
|
)
|
|
if summary["known_runs"] > 0:
|
|
lines.append(
|
|
f"> Successfully fetched jobs for **{summary['runs_with_jobs']}/{summary['known_runs']}** known runs in scope. Missing runs: **{len(skipped_runs)}** (rate limit: {summary['skipped_run_rate_limit']}, other API errors: {other_skipped})."
|
|
)
|
|
|
|
if workflow_failures:
|
|
workflow_names = ", ".join(
|
|
f"`{failure['workflow']}`" for failure in workflow_failures
|
|
)
|
|
lines.append(
|
|
f"> Could not list workflow runs for {workflow_names}. Missing run count is unknown for those workflows (rate limit: {summary['workflow_failure_rate_limit']}, other API errors: {other_workflow_failures})."
|
|
)
|
|
|
|
if skipped_runs:
|
|
skipped_ids = ", ".join(f"`{run['run_id']}`" for run in skipped_runs[:10])
|
|
remaining = len(skipped_runs) - 10
|
|
suffix = f", and {remaining} more" if remaining > 0 else ""
|
|
lines.append(f"> Missing run IDs: {skipped_ids}{suffix}.")
|
|
|
|
lines.append(
|
|
"> Missing job counts inside skipped runs are unknown because GitHub did not return those run job lists."
|
|
)
|
|
lines.append("")
|
|
|
|
|
|
def print_table(
|
|
results: list[dict], repo: str, generated_time: str, report_time: datetime = None
|
|
):
|
|
"""Print results as a formatted table using tabulate."""
|
|
print("")
|
|
print(f"Report generated: {generated_time} UTC")
|
|
print("Note: All times are in UTC")
|
|
print("")
|
|
|
|
if not results:
|
|
print("No jobs found matching the filter.")
|
|
return
|
|
|
|
# Process data
|
|
data = process_results(results, repo, report_time)
|
|
status_summary = data["status_summary"]
|
|
processed_jobs = data["processed_jobs"]
|
|
active_jobs = data["active_jobs"]
|
|
stuck_jobs = data["stuck_jobs"]
|
|
|
|
# Print summary table
|
|
print("\n" + "=" * 100)
|
|
print("SUMMARY BY JOB NAME")
|
|
print("=" * 100)
|
|
|
|
summary_data = []
|
|
for job_name, counts in sorted(status_summary.items()):
|
|
summary_data.append(
|
|
[
|
|
job_name,
|
|
counts["in_progress"],
|
|
counts["queued"],
|
|
counts["waiting"],
|
|
counts["stuck"],
|
|
counts["success"],
|
|
counts["failure"],
|
|
counts["cancelled"],
|
|
counts["skipped"],
|
|
]
|
|
)
|
|
|
|
print(
|
|
tabulate(
|
|
summary_data,
|
|
headers=[
|
|
"Job Name",
|
|
"Running",
|
|
"Queued",
|
|
"Waiting",
|
|
"Stuck",
|
|
"Success",
|
|
"Failure",
|
|
"Cancelled",
|
|
"Skipped",
|
|
],
|
|
tablefmt="grid",
|
|
)
|
|
)
|
|
|
|
# Print detailed table
|
|
print("\n" + "=" * 100)
|
|
print("DETAILED JOB LIST")
|
|
print("=" * 100)
|
|
|
|
detail_data = []
|
|
for p in processed_jobs:
|
|
detail_data.append(
|
|
[
|
|
p["job_name"],
|
|
p["status_display"],
|
|
p["conclusion"],
|
|
p["created_formatted"],
|
|
p["started_formatted"],
|
|
p["queue_time"],
|
|
p["duration"],
|
|
p["runner_name"] or "-",
|
|
p["pr_info"],
|
|
p["run_id"],
|
|
]
|
|
)
|
|
|
|
print(
|
|
tabulate(
|
|
detail_data,
|
|
headers=[
|
|
"Job Name",
|
|
"Status",
|
|
"Conclusion",
|
|
"Created",
|
|
"Started",
|
|
"Queue",
|
|
"Duration",
|
|
"Runner",
|
|
"PR/Branch",
|
|
"Run ID",
|
|
],
|
|
tablefmt="grid",
|
|
)
|
|
)
|
|
|
|
# Print links for active jobs (use processed_jobs for correct queue_time)
|
|
if active_jobs:
|
|
print("\n" + "=" * 100)
|
|
print("ACTIVE JOB LINKS")
|
|
print("=" * 100)
|
|
|
|
link_data = []
|
|
for r in active_jobs:
|
|
# Find the corresponding processed job to get pre-calculated fields
|
|
p = next(
|
|
(
|
|
p
|
|
for p in processed_jobs
|
|
if p["run_id"] == r["run_id"] and p["job_name"] == r["job_name"]
|
|
),
|
|
None,
|
|
)
|
|
if p:
|
|
link_data.append(
|
|
[
|
|
p["job_name"],
|
|
p["status"],
|
|
p["queue_time"],
|
|
p["pr_info"],
|
|
p["runner_name"] or "-",
|
|
p["url"],
|
|
]
|
|
)
|
|
|
|
print(
|
|
tabulate(
|
|
link_data,
|
|
headers=["Job Name", "Status", "Queue", "PR/Branch", "Runner", "URL"],
|
|
tablefmt="simple",
|
|
)
|
|
)
|
|
|
|
# Print stuck jobs (use processed_jobs for correct data)
|
|
if stuck_jobs:
|
|
print("\n" + "=" * 100)
|
|
print("STUCK/GHOST JOBS (in_progress but no runner or workflow cancelled)")
|
|
print("=" * 100)
|
|
|
|
stuck_data = []
|
|
for r in stuck_jobs:
|
|
# Find the corresponding processed job
|
|
p = next(
|
|
(
|
|
p
|
|
for p in processed_jobs
|
|
if p["run_id"] == r["run_id"] and p["job_name"] == r["job_name"]
|
|
),
|
|
None,
|
|
)
|
|
if p:
|
|
run_info = f"{r.get('run_status', '-')}/{r.get('run_conclusion', '-')}"
|
|
stuck_data.append(
|
|
[
|
|
p["job_name"],
|
|
p["status"],
|
|
run_info,
|
|
p["pr_info"],
|
|
p["runner_name"] or "-",
|
|
p["url"],
|
|
]
|
|
)
|
|
|
|
print(
|
|
tabulate(
|
|
stuck_data,
|
|
headers=[
|
|
"Job Name",
|
|
"Job Status",
|
|
"Run Status/Conclusion",
|
|
"PR/Branch",
|
|
"Runner",
|
|
"URL",
|
|
],
|
|
tablefmt="simple",
|
|
)
|
|
)
|
|
|
|
|
|
def format_markdown(
|
|
results: list[dict],
|
|
repo: str,
|
|
job_filter: str,
|
|
hours: int,
|
|
generated_time: str,
|
|
report_time: datetime = None,
|
|
fetch_metadata: dict[str, Any] = None,
|
|
workflow: str = None,
|
|
) -> str:
|
|
"""Format results as markdown for GitHub Actions summary."""
|
|
lines = []
|
|
|
|
# Header
|
|
lines.append(f"# Job Status Report: `{job_filter}`")
|
|
lines.append("")
|
|
lines.append(f"**Time window:** Last {hours} hours")
|
|
lines.append(f"**Generated:** {generated_time} UTC")
|
|
lines.append(f"**Total jobs found:** {len(results)}")
|
|
lines.append("")
|
|
lines.append("> **Note:** All times are displayed in UTC")
|
|
lines.append("")
|
|
append_fetch_metadata_notice(
|
|
lines, fetch_metadata, [workflow] if workflow else None
|
|
)
|
|
|
|
if not results:
|
|
lines.append("> No jobs found matching the filter.")
|
|
return "\n".join(lines)
|
|
|
|
# Process data using shared function
|
|
data = process_results(results, repo, report_time)
|
|
status_summary = data["status_summary"]
|
|
processed_jobs = data["processed_jobs"]
|
|
active_jobs = data["active_jobs"]
|
|
stuck_jobs = data["stuck_jobs"]
|
|
failed_jobs = data["failed_jobs"]
|
|
|
|
# Summary table
|
|
lines.append("## Summary by Job Name")
|
|
lines.append("")
|
|
lines.append(
|
|
"> **Status meanings:** Running = executing, Queued = waiting for runner, Waiting = waiting for dependent jobs, Stuck = ghost job, Cancelled = cancelled/timed_out, Skipped = skipped by workflow conditions"
|
|
)
|
|
lines.append("")
|
|
lines.append(
|
|
"| Job Name | Running | Queued | Waiting | Stuck | Success | Failure | Cancelled | Skipped |"
|
|
)
|
|
lines.append(
|
|
"|----------|---------|--------|---------|-------|---------|---------|-----------|---------|"
|
|
)
|
|
|
|
for job_name, counts in sorted(status_summary.items()):
|
|
running = f"**{counts['in_progress']}**" if counts["in_progress"] > 0 else "0"
|
|
queued = f"**{counts['queued']}**" if counts["queued"] > 0 else "0"
|
|
waiting = f"**{counts['waiting']}**" if counts["waiting"] > 0 else "0"
|
|
stuck = f"**{counts['stuck']}**" if counts["stuck"] > 0 else "0"
|
|
success = str(counts["success"])
|
|
failure = f"**{counts['failure']}**" if counts["failure"] > 0 else "0"
|
|
cancelled = str(counts["cancelled"])
|
|
skipped = str(counts["skipped"])
|
|
lines.append(
|
|
f"| `{job_name}` | {running} | {queued} | {waiting} | {stuck} | {success} | {failure} | {cancelled} | {skipped} |"
|
|
)
|
|
|
|
lines.append("")
|
|
|
|
# Active jobs section
|
|
if active_jobs:
|
|
lines.append("## Active Jobs")
|
|
lines.append("")
|
|
lines.append(
|
|
"| Status | Job Name | Created | Started | Queue | PR/Branch | Runner | Link |"
|
|
)
|
|
lines.append(
|
|
"|--------|----------|---------|---------|-------|-----------|--------|------|"
|
|
)
|
|
|
|
for r in sorted(
|
|
active_jobs, key=lambda x: (x["status"], x["created_at"]), reverse=True
|
|
):
|
|
# Find the processed version for this job
|
|
p = next(
|
|
(
|
|
p
|
|
for p in processed_jobs
|
|
if p["run_id"] == r["run_id"] and p["job_name"] == r["job_name"]
|
|
),
|
|
None,
|
|
)
|
|
if p:
|
|
lines.append(
|
|
f"| {p['status']} | `{p['job_name']}` | {p['created_formatted']} | {p['started_formatted']} | {p['queue_time']} | {p['pr_info']} | `{p['runner_name'] or '-'}` | [View]({p['url']}) |"
|
|
)
|
|
|
|
lines.append("")
|
|
|
|
# Stuck/Ghost jobs section
|
|
if stuck_jobs:
|
|
lines.append("## Stuck/Ghost Jobs")
|
|
lines.append("")
|
|
lines.append(
|
|
"> Jobs showing `in_progress` but have no runner assigned or workflow run is cancelled"
|
|
)
|
|
lines.append("")
|
|
lines.append(
|
|
"| Job Status | Run Status | Job Name | PR/Branch | Runner | Link |"
|
|
)
|
|
lines.append(
|
|
"|------------|------------|----------|-----------|--------|------|"
|
|
)
|
|
|
|
for r in sorted(stuck_jobs, key=lambda x: x["created_at"], reverse=True):
|
|
p = next(
|
|
(
|
|
p
|
|
for p in processed_jobs
|
|
if p["run_id"] == r["run_id"] and p["job_name"] == r["job_name"]
|
|
),
|
|
None,
|
|
)
|
|
if p:
|
|
run_info = f"{r.get('run_status', '-')}/{r.get('run_conclusion', '-')}"
|
|
lines.append(
|
|
f"| {p['status']} | {run_info} | `{p['job_name']}` | {p['pr_info']} | `{p['runner_name'] or '-'}` | [View]({p['url']}) |"
|
|
)
|
|
|
|
lines.append("")
|
|
|
|
# Failed jobs section (before All Jobs)
|
|
if failed_jobs:
|
|
lines.append(f"## Failed Jobs ({len(failed_jobs)} total)")
|
|
lines.append("")
|
|
lines.append(
|
|
"| Conclusion | Job Name | Created | Started | Queue | Duration | Runner | PR/Branch | Link |"
|
|
)
|
|
lines.append(
|
|
"|------------|----------|---------|---------|-------|----------|--------|-----------|------|"
|
|
)
|
|
|
|
for r in sorted(failed_jobs, key=lambda x: x["created_at"], reverse=True):
|
|
p = next(
|
|
(
|
|
p
|
|
for p in processed_jobs
|
|
if p["run_id"] == r["run_id"] and p["job_name"] == r["job_name"]
|
|
),
|
|
None,
|
|
)
|
|
if p:
|
|
lines.append(
|
|
f"| {p['conclusion']} | `{p['job_name']}` | {p['created_formatted']} | {p['started_formatted']} | {p['queue_time']} | {p['duration']} | `{p['runner_name'] or '-'}` | {p['pr_info']} | [View]({p['url']}) |"
|
|
)
|
|
|
|
lines.append("")
|
|
|
|
# Detailed table (all jobs) - collapsible
|
|
lines.append("<details>")
|
|
lines.append(
|
|
f"<summary><strong>All Jobs ({len(results)} total)</strong> - Click to expand</summary>"
|
|
)
|
|
lines.append("")
|
|
lines.append(
|
|
"| Job Name | Status | Conclusion | Created | Started | Queue | Duration | Runner | PR/Branch | Link |"
|
|
)
|
|
lines.append(
|
|
"|----------|--------|------------|---------|---------|-------|----------|--------|-----------|------|"
|
|
)
|
|
|
|
for p in processed_jobs:
|
|
# Mark stuck jobs in markdown with bold
|
|
if p.get("is_stuck", False):
|
|
status_display = f"**STUCK** ({p['status']})"
|
|
else:
|
|
status_display = p["status"]
|
|
|
|
lines.append(
|
|
f"| `{p['job_name']}` | {status_display} | {p['conclusion']} | {p['created_formatted']} | {p['started_formatted']} | {p['queue_time']} | {p['duration']} | `{p['runner_name'] or '-'}` | {p['pr_info']} | [View]({p['url']}) |"
|
|
)
|
|
|
|
lines.append("")
|
|
lines.append("</details>")
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def format_runner_report_markdown(
|
|
jobs: list[dict],
|
|
workflows: list[str],
|
|
hours: int,
|
|
generated_time: str,
|
|
report_time: datetime = None,
|
|
fetch_metadata: dict[str, Any] = None,
|
|
) -> str:
|
|
"""Format runner fleet analytics as markdown for GitHub Actions summary."""
|
|
if report_time is None:
|
|
report_time = datetime.now(timezone.utc)
|
|
|
|
lines: list[str] = []
|
|
|
|
# Header
|
|
lines.append("# CI Runner Fleet Report")
|
|
lines.append("")
|
|
lines.append(f"**Workflows:** {', '.join(f'`{w}`' for w in workflows)}")
|
|
lines.append(f"**Time window:** Last {hours} hours")
|
|
lines.append(f"**Generated:** {generated_time} UTC")
|
|
excluded_no_label = (
|
|
fetch_metadata.get("jobs_excluded_no_label", 0) if fetch_metadata else 0
|
|
)
|
|
lines.append(f"**Total jobs analyzed:** {len(jobs)}")
|
|
lines.append("")
|
|
lines.append(
|
|
"> All times are in UTC. Jobs on `ubuntu-latest` and jobs with no runner label "
|
|
"(waiting/unassigned) are excluded."
|
|
)
|
|
lines.append("")
|
|
append_fetch_metadata_notice(lines, fetch_metadata, workflows)
|
|
|
|
if not jobs:
|
|
lines.append("> No self-hosted runner jobs found in the time window.")
|
|
return "\n".join(lines)
|
|
|
|
# --- Fleet Overview ---
|
|
unique_labels = {_get_runner_label(j) for j in jobs}
|
|
completed_jobs = [j for j in jobs if j.get("status") == "completed"]
|
|
lines.append("## Fleet Overview")
|
|
lines.append("")
|
|
lines.append("| Metric | Value |")
|
|
lines.append("|--------|-------|")
|
|
lines.append(f"| Total runner labels seen | {len(unique_labels)} |")
|
|
lines.append(f"| Total jobs analyzed | {len(jobs)} |")
|
|
lines.append(f"| Completed jobs | {len(completed_jobs)} |")
|
|
if excluded_no_label:
|
|
lines.append(f"| Excluded (no runner label) | {excluded_no_label} |")
|
|
lines.append(f"| Time window | {hours}h |")
|
|
lines.append("")
|
|
|
|
# --- Concurrency by Runner Label ---
|
|
concurrency = analyze_concurrency(jobs, report_time)
|
|
if concurrency:
|
|
lines.append("## Concurrency by Runner Label")
|
|
lines.append("")
|
|
lines.append(
|
|
"| Runner Label | Peak Concurrent | Avg Concurrent | Total Jobs | Avg Queue | P50 Queue | P99 Queue | Avg Duration |"
|
|
)
|
|
lines.append(
|
|
"|-------------|----------------|---------------|-----------|-----------|-----------|-----------|-------------|"
|
|
)
|
|
for label in sorted(concurrency, key=_runner_label_sort_key):
|
|
c = concurrency[label]
|
|
lines.append(
|
|
f"| `{label}` | **{c['peak']}** | {c['avg_concurrent']} "
|
|
f"| {c['total_jobs']} "
|
|
f"| {_format_duration_seconds(c['avg_queue_seconds'])} "
|
|
f"| {_format_duration_seconds(c['p50_queue_seconds'])} "
|
|
f"| {_format_duration_seconds(c['p99_queue_seconds'])} "
|
|
f"| {_format_duration_seconds(c['avg_duration_seconds'])} |"
|
|
)
|
|
lines.append("")
|
|
|
|
# --- Busy Periods ---
|
|
busy_periods = analyze_busy_periods(jobs, report_time=report_time)
|
|
if busy_periods:
|
|
lines.append("## Busy Periods (UTC)")
|
|
lines.append("")
|
|
lines.append("| Hour (UTC) | Jobs Started | Avg Queue Time | Load |")
|
|
lines.append("|-----------|-------------|---------------|------|")
|
|
for bp in busy_periods:
|
|
if bp["jobs_started"] == 0:
|
|
continue
|
|
load_display = (
|
|
f"**{bp['load']}**" if bp["load"] in ("Peak", "Busy") else bp["load"]
|
|
)
|
|
lines.append(
|
|
f"| {bp['hour_label']} | {bp['jobs_started']} "
|
|
f"| {_format_duration_seconds(bp['avg_queue_seconds'])} "
|
|
f"| {load_display} |"
|
|
)
|
|
lines.append("")
|
|
|
|
peak_hours = [bp for bp in busy_periods if bp["load"] == "Peak"]
|
|
quiet_hours = [
|
|
bp
|
|
for bp in busy_periods
|
|
if bp["load"] == "Quiet" and bp["jobs_started"] > 0
|
|
]
|
|
if peak_hours:
|
|
labels = ", ".join(bp["hour_label"] for bp in peak_hours)
|
|
lines.append(f"> **Peak hours:** {labels}")
|
|
lines.append("")
|
|
if quiet_hours:
|
|
labels = ", ".join(bp["hour_label"] for bp in quiet_hours)
|
|
lines.append(f"> **Quiet hours:** {labels}")
|
|
lines.append("")
|
|
|
|
# --- Runner Utilization Snapshots ---
|
|
util_snapshots = analyze_utilization_snapshots(jobs, report_time, hours=hours)
|
|
if util_snapshots:
|
|
lines.append("## Runner Utilization (15-min snapshots)")
|
|
lines.append("")
|
|
lines.append(
|
|
"> Point-in-time snapshot every 15 minutes (UTC). "
|
|
"**Running** = jobs with a runner assigned and executing. "
|
|
"**Queued** = jobs waiting for a runner."
|
|
)
|
|
lines.append("")
|
|
|
|
for label in sorted(util_snapshots, key=_runner_label_sort_key):
|
|
snapshots = util_snapshots[label]
|
|
lines.append(f"### `{label}`")
|
|
lines.append("")
|
|
lines.append("| Time (UTC) | Running | Queued |")
|
|
lines.append("|-----------|---------|--------|")
|
|
for s in snapshots:
|
|
lines.append(f"| {s['time']} | **{s['running']}** | {s['queued']} |")
|
|
lines.append("")
|
|
|
|
# --- Queue Time Distribution ---
|
|
queue_dist = analyze_queue_distribution(jobs, report_time=report_time)
|
|
if queue_dist:
|
|
lines.append("## Queue Time Distribution by Runner Label")
|
|
lines.append("")
|
|
for label in sorted(queue_dist, key=_runner_label_sort_key):
|
|
dist = queue_dist[label]
|
|
lines.append(f"### `{label}`")
|
|
lines.append("")
|
|
lines.append(
|
|
f"> **Samples:** {dist['total']} | **P50:** {_format_duration_seconds(dist['p50'])} | **P90:** {_format_duration_seconds(dist['p90'])} | **P99:** {_format_duration_seconds(dist['p99'])}"
|
|
)
|
|
lines.append("")
|
|
lines.append("| Queue Time Range | Count | Percentage |")
|
|
lines.append("|-----------------|-------|------------|")
|
|
for b in dist["buckets"]:
|
|
bar = "#" * int(b["percentage"] / 3)
|
|
lines.append(
|
|
f"| {b['range']} | {b['count']} | {b['percentage']}% {bar} |"
|
|
)
|
|
lines.append("")
|
|
|
|
# --- Failed Jobs Detail (collapsible) ---
|
|
failed_jobs = [
|
|
j
|
|
for j in jobs
|
|
if j.get("conclusion") == "failure" and not j.get("is_stuck", False)
|
|
]
|
|
if failed_jobs:
|
|
lines.append("<details>")
|
|
lines.append(
|
|
f"<summary><strong>Failed Jobs ({len(failed_jobs)} total)</strong> - Click to expand</summary>"
|
|
)
|
|
lines.append("")
|
|
lines.append(
|
|
"| Job Name | Runner | Workflow | Queue | Duration | PR/Branch | Link |"
|
|
)
|
|
lines.append(
|
|
"|----------|--------|---------|-------|----------|-----------|------|"
|
|
)
|
|
for j in sorted(failed_jobs, key=lambda x: x["created_at"], reverse=True):
|
|
queue = calculate_queue_time(j, report_time)
|
|
dur = calculate_duration(j["started_at"], j["completed_at"])
|
|
pr_info = (
|
|
f"PR#{j['pr_number']}" if j.get("pr_number") else j.get("branch", "-")
|
|
)
|
|
url = j.get("html_url", "")
|
|
wf = j.get("workflow", "-")
|
|
lines.append(
|
|
f"| `{j['job_name']}` | `{j['runner_name']}` | `{wf}` "
|
|
f"| {queue} | {dur} | {pr_info} | [View]({url}) |"
|
|
)
|
|
lines.append("")
|
|
lines.append("</details>")
|
|
lines.append("")
|
|
|
|
# --- Stuck Jobs ---
|
|
stuck_jobs = [j for j in jobs if j.get("is_stuck", False)]
|
|
if stuck_jobs:
|
|
lines.append("## Stuck/Ghost Jobs")
|
|
lines.append("")
|
|
lines.append(
|
|
"> Jobs showing `in_progress` but have no runner assigned or workflow run is cancelled"
|
|
)
|
|
lines.append("")
|
|
lines.append(
|
|
"| Job Name | Job Status | Run Status | Runner | Workflow | Link |"
|
|
)
|
|
lines.append("|----------|-----------|-----------|--------|---------|------|")
|
|
for j in sorted(stuck_jobs, key=lambda x: x["created_at"], reverse=True):
|
|
run_info = f"{j.get('run_status', '-')}/{j.get('run_conclusion', '-')}"
|
|
url = j.get("html_url", "")
|
|
wf = j.get("workflow", "-")
|
|
lines.append(
|
|
f"| `{j['job_name']}` | {j['status']} | {run_info} "
|
|
f"| `{j['runner_name']}` | `{wf}` | [View]({url}) |"
|
|
)
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def main():
|
|
# Capture the time when the command is run (both datetime and formatted string)
|
|
report_time = datetime.now(timezone.utc)
|
|
report_generated_time = report_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
parser = argparse.ArgumentParser(description="Query GitHub Actions job status")
|
|
parser.add_argument(
|
|
"--repo",
|
|
default="sgl-project/sglang",
|
|
help="GitHub repo (default: sgl-project/sglang)",
|
|
)
|
|
parser.add_argument(
|
|
"--job",
|
|
required=False,
|
|
default=None,
|
|
help="Job name filter (required unless --runner-report is used)",
|
|
)
|
|
parser.add_argument(
|
|
"--workflow",
|
|
default="pr-test-amd.yml",
|
|
help="Workflow file name, or comma-separated list for --runner-report (default: pr-test-amd.yml)",
|
|
)
|
|
parser.add_argument(
|
|
"--hours",
|
|
type=int,
|
|
default=24,
|
|
help="Time window in hours (default: 24)",
|
|
)
|
|
parser.add_argument(
|
|
"--status",
|
|
choices=["in_progress", "queued", "completed", "waiting"],
|
|
help="Filter by job status",
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
choices=["table", "csv", "json", "markdown"],
|
|
default="table",
|
|
help="Output format (default: table)",
|
|
)
|
|
parser.add_argument(
|
|
"--summary",
|
|
action="store_true",
|
|
help="Write markdown output to GITHUB_STEP_SUMMARY",
|
|
)
|
|
parser.add_argument(
|
|
"--output-file",
|
|
type=str,
|
|
help="Write output to file",
|
|
)
|
|
parser.add_argument(
|
|
"--runner-report",
|
|
action="store_true",
|
|
help="Generate runner fleet analytics report across all jobs (no --job filter needed)",
|
|
)
|
|
parser.add_argument(
|
|
"--input-data-file",
|
|
type=str,
|
|
help="Load a prefetched Actions snapshot JSON instead of calling gh api",
|
|
)
|
|
parser.add_argument(
|
|
"--dump-data-file",
|
|
type=str,
|
|
help="Fetch Actions data once and save it as a snapshot JSON file",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.input_data_file and args.dump_data_file:
|
|
parser.error("--input-data-file and --dump-data-file cannot be used together")
|
|
|
|
if not args.runner_report and not args.job and not args.dump_data_file:
|
|
parser.error(
|
|
"--job is required unless --runner-report or --dump-data-file is specified"
|
|
)
|
|
|
|
workflows = [w.strip() for w in args.workflow.split(",") if w.strip()]
|
|
|
|
if not args.input_data_file and not check_gh_cli_available():
|
|
sys.exit(1)
|
|
|
|
snapshot = None
|
|
repo = args.repo
|
|
fetch_metadata = None
|
|
|
|
if args.input_data_file:
|
|
snapshot = load_snapshot(args.input_data_file)
|
|
repo = snapshot.get("repo", args.repo)
|
|
fetch_metadata = snapshot.get("fetch_metadata")
|
|
generated_at = snapshot.get("generated_at")
|
|
if generated_at:
|
|
report_time = parse_time(generated_at) or report_time
|
|
report_generated_time = report_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
if args.dump_data_file:
|
|
snapshot = fetch_all_jobs_snapshot(repo, workflows, args.hours)
|
|
save_snapshot(args.dump_data_file, snapshot)
|
|
summary = summarize_fetch_metadata(snapshot.get("fetch_metadata"), workflows)
|
|
print(f"Snapshot written to {args.dump_data_file}", file=sys.stderr)
|
|
if summary and summary["incomplete"]:
|
|
print(
|
|
"Warning: Snapshot is incomplete due to rate limit/API fetch failures.",
|
|
file=sys.stderr,
|
|
)
|
|
if summary["known_runs"] > 0:
|
|
print(
|
|
f"Known runs fetched successfully: {summary['runs_with_jobs']}/{summary['known_runs']}",
|
|
file=sys.stderr,
|
|
)
|
|
print(
|
|
f"Skipped runs with unknown job counts: {len(summary['skipped_runs'])}",
|
|
file=sys.stderr,
|
|
)
|
|
return
|
|
|
|
# --- Runner fleet report mode ---
|
|
if args.runner_report:
|
|
if snapshot is None:
|
|
snapshot = fetch_all_jobs_snapshot(repo, workflows, args.hours)
|
|
fetch_metadata = snapshot.get("fetch_metadata")
|
|
|
|
workflow_set = set(workflows)
|
|
all_snapshot_jobs = [
|
|
job for job in snapshot["jobs"] if job.get("workflow") in workflow_set
|
|
]
|
|
jobs = [job for job in all_snapshot_jobs if job.get("labels")]
|
|
if fetch_metadata is None:
|
|
fetch_metadata = {}
|
|
if "jobs_excluded_no_label" not in fetch_metadata:
|
|
fetch_metadata["jobs_excluded_no_label"] = len(all_snapshot_jobs) - len(
|
|
jobs
|
|
)
|
|
|
|
md_content = format_runner_report_markdown(
|
|
jobs,
|
|
workflows,
|
|
args.hours,
|
|
report_generated_time,
|
|
report_time,
|
|
fetch_metadata,
|
|
)
|
|
|
|
print(md_content)
|
|
|
|
if args.output_file:
|
|
with open(args.output_file, "w") as f:
|
|
f.write(md_content)
|
|
print(f"\nOutput written to {args.output_file}", file=sys.stderr)
|
|
|
|
if args.summary:
|
|
summary_file = os.environ.get("GITHUB_STEP_SUMMARY")
|
|
if summary_file:
|
|
with open(summary_file, "a") as f:
|
|
f.write(md_content)
|
|
f.write("\n")
|
|
print("Summary written to GITHUB_STEP_SUMMARY", file=sys.stderr)
|
|
else:
|
|
print(
|
|
"Warning: GITHUB_STEP_SUMMARY not set, markdown printed above.",
|
|
file=sys.stderr,
|
|
)
|
|
return
|
|
|
|
# --- Original per-job report mode ---
|
|
if snapshot is None:
|
|
snapshot = fetch_all_jobs_snapshot(repo, [args.workflow], args.hours)
|
|
fetch_metadata = snapshot.get("fetch_metadata")
|
|
|
|
results = filter_jobs(snapshot["jobs"], args.job, args.workflow, args.status)
|
|
|
|
output_content = None
|
|
|
|
if args.output == "table":
|
|
print_table(results, repo, report_generated_time, report_time)
|
|
elif args.output == "csv":
|
|
lines = [
|
|
"job_name,status,is_stuck,conclusion,created_at,started_at,queue_time,duration,runner,run_status,run_conclusion,pr_number,branch,url"
|
|
]
|
|
for r in sorted(results, key=lambda x: x["created_at"], reverse=True):
|
|
queue_time = calculate_queue_time(r, report_time)
|
|
duration = calculate_duration(r["started_at"], r["completed_at"])
|
|
is_stuck = "true" if r.get("is_stuck", False) else "false"
|
|
lines.append(
|
|
f'"{r["job_name"]}",{r["status"]},{is_stuck},{r["conclusion"]},{r["created_at"]},{r["started_at"]},{queue_time},{duration},{r["runner_name"]},{r.get("run_status", "-")},{r.get("run_conclusion", "-")},{r["pr_number"] or ""},{r["branch"]},{r["html_url"]}'
|
|
)
|
|
output_content = "\n".join(lines)
|
|
print(output_content)
|
|
elif args.output == "json":
|
|
json_results = []
|
|
for r in sorted(results, key=lambda x: x["created_at"], reverse=True):
|
|
r_copy = r.copy()
|
|
r_copy["queue_time"] = calculate_queue_time(r, report_time)
|
|
r_copy["duration"] = calculate_duration(r["started_at"], r["completed_at"])
|
|
r_copy["created_at_formatted"] = format_time(r["created_at"])
|
|
r_copy["started_at_formatted"] = format_time(r["started_at"])
|
|
json_results.append(r_copy)
|
|
output_content = json.dumps(json_results, indent=2)
|
|
print(output_content)
|
|
elif args.output == "markdown":
|
|
output_content = format_markdown(
|
|
results,
|
|
repo,
|
|
args.job,
|
|
args.hours,
|
|
report_generated_time,
|
|
report_time,
|
|
fetch_metadata,
|
|
args.workflow,
|
|
)
|
|
print(output_content)
|
|
|
|
if args.output_file and output_content:
|
|
with open(args.output_file, "w") as f:
|
|
f.write(output_content)
|
|
print(f"\nOutput written to {args.output_file}", file=sys.stderr)
|
|
|
|
if args.summary:
|
|
md_content = format_markdown(
|
|
results,
|
|
repo,
|
|
args.job,
|
|
args.hours,
|
|
report_generated_time,
|
|
report_time,
|
|
fetch_metadata,
|
|
args.workflow,
|
|
)
|
|
summary_file = os.environ.get("GITHUB_STEP_SUMMARY")
|
|
if summary_file:
|
|
with open(summary_file, "a") as f:
|
|
f.write(md_content)
|
|
f.write("\n")
|
|
print("Summary written to GITHUB_STEP_SUMMARY", file=sys.stderr)
|
|
else:
|
|
print(
|
|
"Warning: GITHUB_STEP_SUMMARY not set, printing markdown instead:",
|
|
file=sys.stderr,
|
|
)
|
|
print(md_content)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|