mirror of
https://github.com/kvcache-ai/sglang.git
synced 2026-06-30 19:57:52 +00:00
2751 lines
122 KiB
Python
2751 lines
122 KiB
Python
"""
|
|
SGLang CI Consecutive Failures Analyzer
|
|
|
|
Monitors GitHub Actions workflows for consecutive test failures and runner issues.
|
|
Detects failure streaks, tracks job health, identifies problematic runners, and generates alerts.
|
|
|
|
Features:
|
|
- Analyzes all jobs in PR Test workflow (excluding administrative jobs)
|
|
- Tracks consecutive failure streaks for each job
|
|
- Monitors runner health and failure rates
|
|
- Identifies whether failures are code-related or infrastructure-related
|
|
- Generates detailed reports with actionable recommendations
|
|
|
|
Usage:
|
|
python ci_failures_analysis.py --token <GITHUB_TOKEN> --limit 100
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
import requests
|
|
|
|
|
|
class SGLangFailuresAnalyzer:
|
|
"""Analyzes consecutive failures in GitHub Actions workflows."""
|
|
|
|
def __init__(self, token: str):
|
|
self.token = token
|
|
self.base_url = "https://api.github.com"
|
|
self.repo = "sgl-project/sglang"
|
|
self.headers = {
|
|
"Authorization": f"token {token}",
|
|
"Accept": "application/vnd.github.v3+json",
|
|
"User-Agent": "SGLang-Failures-Analyzer/1.0",
|
|
}
|
|
self.session = requests.Session()
|
|
self.session.headers.update(self.headers)
|
|
|
|
# Jobs to EXCLUDE from analysis (administrative/setup jobs, not actual tests)
|
|
self.excluded_jobs = [
|
|
"check-changes",
|
|
"pr-test-finish", # Nvidia workflow teardown
|
|
"pr-test-amd-finish", # AMD workflow teardown
|
|
"call-gate",
|
|
"pr-gate",
|
|
"check-all-jobs",
|
|
]
|
|
self.test_summaries = {}
|
|
|
|
def get_recent_runs(
|
|
self,
|
|
limit: int = 500,
|
|
workflow_filter: List[str] = None,
|
|
filters: Optional[Dict[str, str]] = None,
|
|
) -> List[Dict]:
|
|
"""
|
|
Fetch recent workflow runs from GitHub API using workflow file names.
|
|
|
|
Args:
|
|
limit: Number of runs to fetch per workflow
|
|
workflow_filter: List of workflow filenames
|
|
filters: Optional dict of API filters (e.g., {"event": "schedule"}, {"branch": "main"})
|
|
"""
|
|
filter_desc = f"workflows: {', '.join(workflow_filter)}"
|
|
if filters:
|
|
filter_desc += f", filters: {filters}"
|
|
|
|
print(f"Fetching {limit} runs per workflow ({filter_desc})...")
|
|
|
|
all_runs = []
|
|
|
|
for workflow_file in workflow_filter:
|
|
print(f"Fetching runs for {workflow_file}...")
|
|
|
|
# Use workflow filename directly - much simpler!
|
|
url = f"{self.base_url}/repos/{self.repo}/actions/workflows/{workflow_file}/runs"
|
|
params = {"per_page": min(limit, 100), "status": "completed"}
|
|
|
|
# Apply any additional filters
|
|
if filters:
|
|
params.update(filters)
|
|
|
|
try:
|
|
response = self.session.get(url, params=params, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
runs = data.get("workflow_runs", [])
|
|
print(f" Found {len(runs)} runs for {workflow_file}")
|
|
all_runs.extend(runs[:limit])
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error fetching runs for {workflow_file}: {e}")
|
|
continue
|
|
|
|
print(f"Collected {len(all_runs)} total runs")
|
|
return all_runs
|
|
|
|
def get_jobs_for_run(self, run_id: int) -> List[Dict]:
|
|
"""Get all jobs for a specific workflow run, handling pagination."""
|
|
try:
|
|
all_jobs = []
|
|
url = f"{self.base_url}/repos/{self.repo}/actions/runs/{run_id}/jobs"
|
|
params = {"per_page": 100} # Max per page
|
|
|
|
while url:
|
|
response = self.session.get(url, params=params, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
jobs = data.get("jobs", [])
|
|
all_jobs.extend(jobs)
|
|
|
|
# Check for next page in Link header
|
|
link_header = response.headers.get("Link", "")
|
|
next_url = None
|
|
if link_header:
|
|
links = link_header.split(", ")
|
|
for link in links:
|
|
if 'rel="next"' in link:
|
|
try:
|
|
parts = link.split(";")
|
|
if parts:
|
|
next_url = parts[0].strip("<>")
|
|
except Exception as e:
|
|
print(f"Error parsing Link header: {link}, error: {e}")
|
|
next_url = None
|
|
break
|
|
url = next_url
|
|
params = {} # Clear params for subsequent requests (URL has them)
|
|
|
|
return all_jobs
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error fetching jobs for run {run_id}: {e}")
|
|
return []
|
|
|
|
def get_job_logs(self, job_id: int) -> str:
|
|
"""Fetch logs for a specific job."""
|
|
try:
|
|
url = f"{self.base_url}/repos/{self.repo}/actions/jobs/{job_id}/logs"
|
|
response = self.session.get(url, timeout=60, allow_redirects=True)
|
|
if response.status_code == 200:
|
|
return response.text
|
|
return ""
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error fetching logs for job {job_id}: {e}")
|
|
return ""
|
|
|
|
def get_online_runners(self) -> Dict[str, Dict]:
|
|
"""
|
|
Fetch all self-hosted runners and their online status from GitHub API.
|
|
|
|
Returns:
|
|
Dict mapping runner label sets to their online/total counts.
|
|
E.g., {"8-gpu-h200-runner": {"online": 2, "total": 3, "busy": 1}}
|
|
"""
|
|
print("Fetching self-hosted runner status...")
|
|
try:
|
|
# Use separate admin token if available (needs repo admin scope)
|
|
runner_token = os.environ.get("GH_PAT_FOR_RUNNER_ADMIN") or self.token
|
|
runner_headers = {
|
|
"Authorization": f"token {runner_token}",
|
|
"Accept": "application/vnd.github.v3+json",
|
|
}
|
|
|
|
all_runners = []
|
|
url = f"{self.base_url}/repos/{self.repo}/actions/runners"
|
|
params = {"per_page": 100}
|
|
|
|
while url:
|
|
response = requests.get(
|
|
url, headers=runner_headers, params=params, timeout=30
|
|
)
|
|
if response.status_code != 200:
|
|
print(
|
|
f" Warning: Runner API returned {response.status_code}: {response.text[:200]}"
|
|
)
|
|
return {}
|
|
data = response.json()
|
|
runners = data.get("runners", [])
|
|
all_runners.extend(runners)
|
|
|
|
# Check for next page in Link header
|
|
link_header = response.headers.get("Link", "")
|
|
next_url = None
|
|
if link_header:
|
|
links = link_header.split(", ")
|
|
for link in links:
|
|
if 'rel="next"' in link:
|
|
try:
|
|
parts = link.split(";")
|
|
if parts:
|
|
next_url = parts[0].strip("<>")
|
|
except Exception as e:
|
|
print(f"Error parsing Link header: {link}, error: {e}")
|
|
next_url = None
|
|
break
|
|
url = next_url
|
|
params = {} # Clear params for subsequent requests
|
|
|
|
print(f" Found {len(all_runners)} self-hosted runners")
|
|
|
|
# Group runners by their labels (excluding common labels like "self-hosted")
|
|
# A runner can have multiple labels, so count it for each relevant label
|
|
runner_stats_by_label = defaultdict(
|
|
lambda: {"online": 0, "total": 0, "busy": 0}
|
|
)
|
|
|
|
# Common labels to exclude (not useful for grouping)
|
|
excluded_labels = {"self-hosted", "Linux", "X64", "ARM64"}
|
|
|
|
for runner in all_runners:
|
|
# Get all custom/relevant labels for this runner
|
|
labels = [
|
|
label.get("name", "")
|
|
for label in runner.get("labels", [])
|
|
if label.get("name", "") not in excluded_labels
|
|
]
|
|
|
|
# Count this runner for EACH of its relevant labels
|
|
for runner_label in labels:
|
|
runner_stats_by_label[runner_label]["total"] += 1
|
|
if runner.get("status") == "online":
|
|
runner_stats_by_label[runner_label]["online"] += 1
|
|
if runner.get("busy", False):
|
|
runner_stats_by_label[runner_label]["busy"] += 1
|
|
|
|
return dict(runner_stats_by_label)
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error fetching runners: {e}")
|
|
return {}
|
|
|
|
def find_last_running_test(self, logs: str) -> Optional[Dict]:
|
|
"""
|
|
Find the last test that was running before logs cut off (for timeout/exit scenarios).
|
|
Finds the last instance of 'server_args:' and looks for the test file a few lines above it.
|
|
|
|
Returns:
|
|
Dict with test info if found, or None if no test found.
|
|
"""
|
|
import re
|
|
|
|
# Strip ANSI escape codes
|
|
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
|
|
logs = ansi_escape.sub("", logs)
|
|
|
|
lines = logs.split("\n")
|
|
|
|
# Patterns to match test files
|
|
# Examples:
|
|
# - "sglang/test/test_example.py::TestClass::test_method[param]"
|
|
# - "python3 /path/to/test_example.py"
|
|
# - "Begin (0/0):" then "python3 /path/to/test.py" on next line
|
|
test_patterns = [
|
|
r"(\S+\.py)::", # pytest format: something.py::
|
|
r"python3?\s+(\S+\.py)", # python3 /path/to/test.py
|
|
]
|
|
|
|
# Find the last occurrence of server_args: (searching from bottom)
|
|
server_args_idx = None
|
|
for i in range(len(lines) - 1, -1, -1):
|
|
if "server_args:" in lines[i].lower() or "server_args =" in lines[i]:
|
|
server_args_idx = i
|
|
break
|
|
|
|
if server_args_idx is not None:
|
|
# Look at lines above server_args (up to 10 lines)
|
|
for j in range(1, 11):
|
|
line_idx = server_args_idx - j
|
|
if line_idx >= 0:
|
|
line = lines[line_idx]
|
|
for pattern in test_patterns:
|
|
match = re.search(pattern, line)
|
|
if match:
|
|
full_path = match.group(1)
|
|
test_file = (
|
|
full_path.split("/")[-1]
|
|
if "/" in full_path
|
|
else full_path
|
|
)
|
|
if test_file.endswith(".py"):
|
|
return {
|
|
"test_file": test_file,
|
|
"full_path": full_path,
|
|
"context": "last_running",
|
|
}
|
|
|
|
return None
|
|
|
|
def parse_test_summary(self, logs: str) -> Optional[Dict]:
|
|
"""
|
|
Parse the test summary block from job logs.
|
|
|
|
Returns:
|
|
Dict with passed/total counts and list of failed tests, or None if no summary found.
|
|
If no summary found, attempts to find the last running test (for timeout scenarios).
|
|
"""
|
|
import re
|
|
|
|
# Strip ANSI escape codes that GitHub Actions logs may contain
|
|
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
|
|
logs = ansi_escape.sub("", logs)
|
|
|
|
# Look for the test summary pattern
|
|
# Pattern matches: "Test Summary: 7/8 passed"
|
|
summary_match = re.search(r"Test Summary:\s*(\d+)/(\d+)\s*passed", logs)
|
|
if not summary_match:
|
|
# No summary found - try to find last running test
|
|
last_test = self.find_last_running_test(logs)
|
|
if last_test:
|
|
return {
|
|
"passed": 0,
|
|
"total": 0,
|
|
"failed_tests": [last_test],
|
|
"incomplete": True, # Mark that this is incomplete/inferred
|
|
}
|
|
return None
|
|
|
|
try:
|
|
passed = int(summary_match.group(1))
|
|
total = int(summary_match.group(2))
|
|
except (ValueError, TypeError) as e:
|
|
print(f"Error parsing test summary numbers: {e}")
|
|
return None
|
|
|
|
# Find failed tests section
|
|
# Look for "FAILED:" (the ✗ character may be mangled due to encoding)
|
|
failed_tests = []
|
|
# Match any character(s) before FAILED: (could be ✗, â, or other encoding artifacts)
|
|
failed_section_match = re.search(
|
|
r".?\s*FAILED:\s*\n(.*?)(?:={10,}|$)", logs, re.DOTALL
|
|
)
|
|
|
|
if failed_section_match:
|
|
failed_section = failed_section_match.group(1)
|
|
# Find all .py files - just look for non-whitespace ending in .py
|
|
for match in re.finditer(r"(\S+\.py)", failed_section):
|
|
full_path = match.group(1)
|
|
# Extract just the filename from the path
|
|
test_file = full_path.split("/")[-1] if "/" in full_path else full_path
|
|
failed_tests.append(
|
|
{
|
|
"test_file": test_file,
|
|
"full_path": full_path,
|
|
}
|
|
)
|
|
|
|
return {
|
|
"passed": passed,
|
|
"total": total,
|
|
"failed_tests": failed_tests,
|
|
}
|
|
|
|
def analyze_test_failures_for_job(self, recent_runs: List[Dict]) -> Dict[str, Dict]:
|
|
"""
|
|
Analyze test-level failures for a specific job across its recent runs.
|
|
|
|
Args:
|
|
recent_runs: List of recent run info dicts with job_id, job_url, conclusion, etc.
|
|
debug: Enable debug logging
|
|
|
|
Returns:
|
|
Dict mapping test_file -> {
|
|
"total_failures": int,
|
|
"current_streak": int,
|
|
"recent_runs": [{"run_number": ..., "job_url": ..., "status": ..., "failed": bool}, ...]
|
|
}
|
|
"""
|
|
test_failures: Dict[str, Dict] = defaultdict(
|
|
lambda: {"total_failures": 0, "current_streak": 0, "recent_runs": []}
|
|
)
|
|
|
|
# Track whether we successfully parsed any test summaries
|
|
parsed_any_test_summary = False
|
|
|
|
# Process runs in chronological order (oldest first) to track streaks
|
|
for run_info in recent_runs:
|
|
job_id = run_info.get("job_id")
|
|
conclusion = run_info.get("conclusion")
|
|
|
|
# For failed jobs, fetch logs and parse test failures
|
|
if conclusion == "failure" and job_id:
|
|
logs = self.get_job_logs(job_id)
|
|
test_summary = self.parse_test_summary(logs) if logs else None
|
|
self.test_summaries[job_id] = test_summary
|
|
|
|
# Debug logging for failed jobs without test summary
|
|
if not test_summary:
|
|
job_name = run_info.get("job_name", "unknown")
|
|
run_number = run_info.get("run_number", "unknown")
|
|
job_url = run_info.get("job_url", "N/A")
|
|
log_size = len(logs) if logs else 0
|
|
print(
|
|
f" ⚠️ Job failed without test summary: {job_name} (Run #{run_number})"
|
|
)
|
|
print(f" URL: {job_url}")
|
|
print(
|
|
f" Log size: {log_size} chars, Logs available: {bool(logs)}"
|
|
)
|
|
if logs:
|
|
# Show a snippet of the logs to help debug
|
|
log_snippet = logs[-500:] if len(logs) > 500 else logs
|
|
print(f" Last 500 chars of logs: {log_snippet[:200]}...")
|
|
elif test_summary.get("incomplete"):
|
|
# Log when we inferred a test from timeout
|
|
job_name = run_info.get("job_name", "unknown")
|
|
run_number = run_info.get("run_number", "unknown")
|
|
inferred_tests = [
|
|
t["test_file"] for t in test_summary.get("failed_tests", [])
|
|
]
|
|
print(
|
|
f" ⏱️ Inferred timeout test for {job_name} (Run #{run_number}): {inferred_tests}"
|
|
)
|
|
|
|
if test_summary and test_summary["failed_tests"]:
|
|
parsed_any_test_summary = True
|
|
# Track each failed test
|
|
failed_test_files = set()
|
|
is_incomplete = test_summary.get("incomplete", False)
|
|
|
|
for failed_test in test_summary["failed_tests"]:
|
|
test_file = failed_test["test_file"]
|
|
failed_test_files.add(test_file)
|
|
test_failures[test_file]["total_failures"] += 1
|
|
test_failures[test_file]["current_streak"] += 1
|
|
|
|
# Mark if this is a "last running" test (inferred from timeout)
|
|
is_last_running = failed_test.get("context") == "last_running"
|
|
status = "⏱️" if is_last_running else "❌"
|
|
|
|
test_failures[test_file]["recent_runs"].append(
|
|
{
|
|
"run_number": run_info.get("run_number"),
|
|
"job_url": run_info.get("job_url"),
|
|
"status": status,
|
|
"failed": True,
|
|
"last_running": is_last_running,
|
|
}
|
|
)
|
|
|
|
# Track if any run was a timeout/last_running
|
|
if (
|
|
is_last_running
|
|
and "has_timeout" not in test_failures[test_file]
|
|
):
|
|
test_failures[test_file]["has_timeout"] = True
|
|
|
|
# For tests we've seen before that didn't fail this time,
|
|
# they get a "pass" (the job failed but this specific test passed)
|
|
for test_file in test_failures.keys():
|
|
if test_file not in failed_test_files:
|
|
# Test passed in this run (job failed for other reasons)
|
|
test_failures[test_file]["current_streak"] = 0
|
|
test_failures[test_file]["recent_runs"].append(
|
|
{
|
|
"run_number": run_info.get("run_number"),
|
|
"job_url": run_info.get("job_url"),
|
|
"status": "✅",
|
|
"failed": False,
|
|
}
|
|
)
|
|
else:
|
|
# Job failed but no test summary found - don't reset streaks, mark as unknown
|
|
for test_file in test_failures.keys():
|
|
test_failures[test_file]["recent_runs"].append(
|
|
{
|
|
"run_number": run_info.get("run_number"),
|
|
"job_url": run_info.get("job_url"),
|
|
"status": "⚪", # Unknown - couldn't parse logs
|
|
"failed": None,
|
|
}
|
|
)
|
|
elif conclusion == "success":
|
|
# Job passed - all tests passed, reset streaks
|
|
for test_file in test_failures.keys():
|
|
test_failures[test_file]["current_streak"] = 0
|
|
test_failures[test_file]["recent_runs"].append(
|
|
{
|
|
"run_number": run_info.get("run_number"),
|
|
"job_url": run_info.get("job_url"),
|
|
"status": "✅",
|
|
"failed": False,
|
|
}
|
|
)
|
|
else:
|
|
# Other conclusion (cancelled, skipped, etc.) - don't reset streaks, mark as unknown
|
|
for test_file in test_failures.keys():
|
|
test_failures[test_file]["recent_runs"].append(
|
|
{
|
|
"run_number": run_info.get("run_number"),
|
|
"job_url": run_info.get("job_url"),
|
|
"status": "⚪",
|
|
"failed": None,
|
|
}
|
|
)
|
|
|
|
time.sleep(0.1) # Rate limiting for log fetches
|
|
|
|
# If we couldn't parse any test summaries, return special marker
|
|
if not parsed_any_test_summary:
|
|
return {"_no_test_summary": True}
|
|
|
|
# Convert to regular dict and sort by streak then total failures
|
|
result = {}
|
|
for test_file, data in test_failures.items():
|
|
# Filter out test failures where the current streak is composed ONLY of
|
|
# skipped/cancelled/unknown runs (no actual failures in the streak)
|
|
# We do this by checking if there's at least one actual failure (failed=True)
|
|
# in the recent runs that contribute to the current streak
|
|
current_streak = data["current_streak"]
|
|
recent_runs = data["recent_runs"]
|
|
|
|
# If there's a current streak, check if it contains actual failures
|
|
if current_streak > 0:
|
|
# Look at the last N runs where N = current_streak
|
|
# Check if any of them are actual failures (not just cancelled/skipped)
|
|
streak_runs = recent_runs[-current_streak:]
|
|
has_actual_failure = any(
|
|
run.get("failed") == True for run in streak_runs
|
|
)
|
|
|
|
# Skip this test if the streak contains no actual failures
|
|
if not has_actual_failure:
|
|
continue
|
|
|
|
result[test_file] = {
|
|
"total_failures": data["total_failures"],
|
|
"current_streak": current_streak,
|
|
"recent_runs": recent_runs[-10:], # Keep last 10
|
|
}
|
|
|
|
return result
|
|
|
|
def analyze_runner_health(
|
|
self, runs: List[Dict]
|
|
) -> Tuple[Dict[str, Dict], Dict[str, Dict], Dict[str, Dict], Dict[str, Dict]]:
|
|
"""
|
|
Analyze runner health by tracking failures per runner and consecutive failure streaks.
|
|
|
|
Returns:
|
|
Tuple of (runner_stats, runner_instance_data, runner_streak_data, runner_instance_streak_data)
|
|
- runner_stats: Overall stats per runner (failure rate, total jobs, etc.)
|
|
- runner_instance_data: Per-instance breakdown of failures
|
|
- runner_streak_data: Consecutive failure streaks per runner label
|
|
- runner_instance_streak_data: Consecutive failure streaks per runner instance
|
|
"""
|
|
print("\nAnalyzing runner health and consecutive failures...")
|
|
|
|
# Sort runs by created_at (oldest first)
|
|
sorted_runs = sorted(runs, key=lambda x: x.get("created_at", ""))
|
|
|
|
# Track runner statistics (overall)
|
|
runner_total_jobs: Dict[str, int] = defaultdict(int)
|
|
runner_failed_jobs: Dict[str, int] = defaultdict(int)
|
|
runner_job_failures: Dict[str, Dict[str, int]] = defaultdict(
|
|
lambda: defaultdict(int)
|
|
)
|
|
runner_job_totals: Dict[str, Dict[str, int]] = defaultdict(
|
|
lambda: defaultdict(int)
|
|
)
|
|
|
|
# Track queue times per runner instance (can aggregate for runner labels if needed)
|
|
runner_instance_queue_times: Dict[str, List[float]] = defaultdict(list)
|
|
|
|
# Track individual runner instances (runner_name + runner_id)
|
|
runner_instance_stats: Dict[str, Dict] = defaultdict(
|
|
lambda: {"total_jobs": 0, "failed_jobs": 0, "jobs_failed": defaultdict(int)}
|
|
)
|
|
|
|
# Track consecutive failures per runner (by labels)
|
|
runner_current_streak: Dict[str, int] = defaultdict(int)
|
|
runner_max_streak: Dict[str, int] = defaultdict(int)
|
|
runner_first_failure_in_streak: Dict[str, Optional[Dict]] = {}
|
|
runner_last_failure_in_streak: Dict[str, Optional[Dict]] = {}
|
|
runner_recovery_info: Dict[str, Optional[Dict]] = {}
|
|
|
|
# Track consecutive failures per runner instance
|
|
runner_instance_current_streak: Dict[str, int] = defaultdict(int)
|
|
runner_instance_max_streak: Dict[str, int] = defaultdict(int)
|
|
runner_instance_first_failure: Dict[str, Optional[Dict]] = {}
|
|
runner_instance_last_failure: Dict[str, Optional[Dict]] = {}
|
|
runner_instance_recovery: Dict[str, Optional[Dict]] = {}
|
|
runner_instance_all_failures_in_streak: Dict[str, List[Dict]] = defaultdict(
|
|
list
|
|
)
|
|
runner_instance_all_failures: Dict[str, List[Dict]] = defaultdict(list)
|
|
|
|
total_runs_processed = len(sorted_runs)
|
|
for i, run in enumerate(sorted_runs, 1):
|
|
if i % 50 == 0 or i == total_runs_processed:
|
|
print(
|
|
f"Processing run {i}/{total_runs_processed} for runner analysis: #{run.get('run_number')}"
|
|
)
|
|
|
|
head_commit = run.get("head_commit") or {}
|
|
run_info = {
|
|
"run_number": run.get("run_number"),
|
|
"run_id": run.get("id"),
|
|
"created_at": run.get("created_at"),
|
|
"head_sha": run.get("head_sha", "")[:8],
|
|
"author": head_commit.get("author", {}).get("name", "Unknown"),
|
|
"url": f"https://github.com/{self.repo}/actions/runs/{run.get('id')}",
|
|
}
|
|
|
|
pull_requests = run.get("pull_requests", [])
|
|
if pull_requests:
|
|
run_info["pr_number"] = pull_requests[0].get("number")
|
|
|
|
# Get jobs for this run
|
|
jobs = self.get_jobs_for_run(run.get("id"))
|
|
|
|
# Track whether each runner had at least one failure in this run
|
|
runner_had_failure: Dict[str, bool] = defaultdict(bool)
|
|
runner_had_success: Dict[str, bool] = defaultdict(bool)
|
|
runner_instance_had_failure: Dict[str, bool] = defaultdict(bool)
|
|
runner_instance_had_success: Dict[str, bool] = defaultdict(bool)
|
|
# Track first failed job for each runner in this run (for linking)
|
|
runner_first_failed_job: Dict[str, Dict] = {}
|
|
runner_instance_first_failed_job: Dict[str, Dict] = {}
|
|
|
|
for job in jobs:
|
|
job_name = job.get("name", "")
|
|
|
|
# Skip excluded jobs (administrative/setup jobs)
|
|
if any(
|
|
job_name.startswith(excluded) for excluded in self.excluded_jobs
|
|
):
|
|
continue
|
|
|
|
# Extract runner information
|
|
# GitHub API might use different fields for runner info
|
|
runner_name = (
|
|
job.get("runner_name")
|
|
or job.get("runner", {}).get("name")
|
|
or "unknown"
|
|
)
|
|
runner_id = job.get("runner_id") or job.get("runner", {}).get("id")
|
|
|
|
# Get runner labels (from runs-on field in workflow)
|
|
runner_labels = job.get("labels", [])
|
|
runner_labels_str = (
|
|
", ".join(runner_labels) if runner_labels else "unknown"
|
|
)
|
|
|
|
# Skip jobs without runner information (likely skipped/queued jobs)
|
|
if not runner_labels_str or runner_labels_str == "unknown":
|
|
continue
|
|
|
|
# Track by runner labels (primary identifier)
|
|
# Use labels as the key since they're more informative than runner_name
|
|
runner_key = runner_labels_str
|
|
runner_total_jobs[runner_key] += 1
|
|
runner_job_totals[runner_key][job_name] += 1
|
|
|
|
# Track by specific runner instance
|
|
if runner_id:
|
|
runner_instance_key = f"{runner_labels_str}_{runner_id}"
|
|
runner_instance_stats[runner_instance_key]["total_jobs"] += 1
|
|
# Store runner name for reference
|
|
runner_instance_stats[runner_instance_key][
|
|
"runner_name"
|
|
] = runner_name
|
|
|
|
# Calculate queue time (time from created to started) per instance
|
|
created_at = job.get("created_at")
|
|
started_at = job.get("started_at")
|
|
if created_at and started_at:
|
|
try:
|
|
from datetime import datetime
|
|
|
|
created_time = datetime.fromisoformat(
|
|
created_at.replace("Z", "+00:00")
|
|
)
|
|
started_time = datetime.fromisoformat(
|
|
started_at.replace("Z", "+00:00")
|
|
)
|
|
queue_time_seconds = (
|
|
started_time - created_time
|
|
).total_seconds()
|
|
if queue_time_seconds >= 0: # Sanity check
|
|
runner_instance_queue_times[runner_instance_key].append(
|
|
queue_time_seconds
|
|
)
|
|
except (ValueError, AttributeError, TypeError) as e:
|
|
print(
|
|
f"Error parsing timestamps for job {job.get('id')}: {e}"
|
|
)
|
|
pass # Skip if timestamp parsing fails
|
|
|
|
conclusion = job.get("conclusion")
|
|
|
|
if conclusion == "failure":
|
|
# Failure detected
|
|
runner_failed_jobs[runner_key] += 1
|
|
runner_job_failures[runner_key][job_name] += 1
|
|
runner_had_failure[runner_key] = True
|
|
|
|
# Track first failed job for this runner in this run (for linking)
|
|
if runner_key not in runner_first_failed_job:
|
|
runner_first_failed_job[runner_key] = {
|
|
"job_id": job.get("id"),
|
|
"job_url": job.get("html_url", run_info["url"]),
|
|
"job_name": job_name,
|
|
}
|
|
|
|
if runner_id:
|
|
runner_instance_stats[runner_instance_key]["failed_jobs"] += 1
|
|
runner_instance_stats[runner_instance_key]["jobs_failed"][
|
|
job_name
|
|
] += 1
|
|
runner_instance_had_failure[runner_instance_key] = True
|
|
|
|
# Track first failed job for this runner instance in this run
|
|
if runner_instance_key not in runner_instance_first_failed_job:
|
|
runner_instance_first_failed_job[runner_instance_key] = {
|
|
"job_id": job.get("id"),
|
|
"job_url": job.get("html_url", run_info["url"]),
|
|
"job_name": job_name,
|
|
}
|
|
|
|
elif conclusion == "success":
|
|
runner_had_success[runner_key] = True
|
|
if runner_id:
|
|
runner_instance_had_success[runner_instance_key] = True
|
|
|
|
# Update consecutive failure streaks based on run-level results
|
|
# A runner is considered "failing" if it had at least one failure in the run
|
|
for runner_key in set(
|
|
list(runner_had_failure.keys()) + list(runner_had_success.keys())
|
|
):
|
|
if runner_had_failure[runner_key]:
|
|
runner_current_streak[runner_key] += 1
|
|
failure_info = {
|
|
**run_info,
|
|
"runner_key": runner_key,
|
|
}
|
|
|
|
# Include job URL if we have it
|
|
if runner_key in runner_first_failed_job:
|
|
failure_info.update(runner_first_failed_job[runner_key])
|
|
|
|
# Track if this is the first failure in a new streak
|
|
if runner_current_streak[runner_key] == 1:
|
|
runner_first_failure_in_streak[runner_key] = failure_info
|
|
# Always update last failure to the most recent one
|
|
runner_last_failure_in_streak[runner_key] = failure_info
|
|
|
|
# Update max streak
|
|
if (
|
|
runner_current_streak[runner_key]
|
|
> runner_max_streak[runner_key]
|
|
):
|
|
runner_max_streak[runner_key] = runner_current_streak[
|
|
runner_key
|
|
]
|
|
|
|
elif runner_had_success[runner_key]:
|
|
# Success - streak broken
|
|
if runner_current_streak[runner_key] > 0:
|
|
runner_recovery_info[runner_key] = {
|
|
**run_info,
|
|
"runner_key": runner_key,
|
|
"streak_length": runner_current_streak[runner_key],
|
|
}
|
|
|
|
runner_current_streak[runner_key] = 0
|
|
runner_first_failure_in_streak[runner_key] = None
|
|
runner_last_failure_in_streak[runner_key] = None
|
|
|
|
# Update instance streaks
|
|
for runner_instance_key in set(
|
|
list(runner_instance_had_failure.keys())
|
|
+ list(runner_instance_had_success.keys())
|
|
):
|
|
if runner_instance_had_failure[runner_instance_key]:
|
|
runner_instance_current_streak[runner_instance_key] += 1
|
|
|
|
if runner_instance_current_streak[runner_instance_key] == 1:
|
|
failure_info = {
|
|
**run_info,
|
|
"runner_instance": runner_instance_key,
|
|
}
|
|
# Include job URL if we have it
|
|
if runner_instance_key in runner_instance_first_failed_job:
|
|
failure_info.update(
|
|
runner_instance_first_failed_job[runner_instance_key]
|
|
)
|
|
runner_instance_first_failure[runner_instance_key] = (
|
|
failure_info
|
|
)
|
|
|
|
# Always update last failure to the most recent one
|
|
failure_info = {
|
|
**run_info,
|
|
"runner_instance": runner_instance_key,
|
|
}
|
|
# Include job URL if we have it
|
|
if runner_instance_key in runner_instance_first_failed_job:
|
|
failure_info.update(
|
|
runner_instance_first_failed_job[runner_instance_key]
|
|
)
|
|
runner_instance_last_failure[runner_instance_key] = failure_info
|
|
runner_instance_all_failures_in_streak[runner_instance_key].append(
|
|
failure_info
|
|
)
|
|
runner_instance_all_failures[runner_instance_key].append(
|
|
failure_info
|
|
)
|
|
|
|
if (
|
|
runner_instance_current_streak[runner_instance_key]
|
|
> runner_instance_max_streak[runner_instance_key]
|
|
):
|
|
runner_instance_max_streak[runner_instance_key] = (
|
|
runner_instance_current_streak[runner_instance_key]
|
|
)
|
|
|
|
elif runner_instance_had_success[runner_instance_key]:
|
|
if runner_instance_current_streak[runner_instance_key] > 0:
|
|
runner_instance_recovery[runner_instance_key] = {
|
|
**run_info,
|
|
"runner_instance": runner_instance_key,
|
|
"streak_length": runner_instance_current_streak[
|
|
runner_instance_key
|
|
],
|
|
}
|
|
|
|
runner_instance_current_streak[runner_instance_key] = 0
|
|
runner_instance_first_failure[runner_instance_key] = None
|
|
runner_instance_all_failures_in_streak[runner_instance_key] = []
|
|
runner_instance_last_failure[runner_instance_key] = None
|
|
|
|
time.sleep(0.05)
|
|
|
|
# Build final runner stats
|
|
runner_stats = {}
|
|
for runner_key in runner_total_jobs.keys():
|
|
total = runner_total_jobs[runner_key]
|
|
failed = runner_failed_jobs[runner_key]
|
|
failure_rate = (failed / total * 100) if total > 0 else 0
|
|
|
|
# Calculate queue time statistics by aggregating from runner instances
|
|
# Find all instances that match this runner label
|
|
aggregated_queue_times = []
|
|
for instance_key, queue_times in runner_instance_queue_times.items():
|
|
# Extract the labels part from "labels_id"
|
|
instance_labels = (
|
|
instance_key.rsplit("_", 1)[0]
|
|
if "_" in instance_key
|
|
else instance_key
|
|
)
|
|
if instance_labels == runner_key:
|
|
aggregated_queue_times.extend(queue_times)
|
|
|
|
avg_queue_time = (
|
|
sum(aggregated_queue_times) / len(aggregated_queue_times)
|
|
if aggregated_queue_times
|
|
else 0
|
|
)
|
|
p90_queue_time = 0
|
|
if aggregated_queue_times:
|
|
sorted_queue_times = sorted(aggregated_queue_times)
|
|
p90_index = int(len(sorted_queue_times) * 0.9)
|
|
p90_queue_time = (
|
|
sorted_queue_times[p90_index]
|
|
if p90_index < len(sorted_queue_times)
|
|
else sorted_queue_times[-1]
|
|
)
|
|
|
|
runner_stats[runner_key] = {
|
|
"total_jobs": total,
|
|
"failed_jobs": failed,
|
|
"failure_rate": failure_rate,
|
|
"unique_jobs_with_failures": len(runner_job_failures[runner_key]),
|
|
"jobs_failed": dict(runner_job_failures[runner_key]),
|
|
"jobs_total": dict(runner_job_totals[runner_key]),
|
|
"avg_queue_time_seconds": avg_queue_time,
|
|
"p90_queue_time_seconds": p90_queue_time,
|
|
"queue_time_samples": len(aggregated_queue_times),
|
|
}
|
|
|
|
# Convert runner instance stats to regular dicts with queue time stats
|
|
runner_instance_data = {}
|
|
for instance_key, stats in runner_instance_stats.items():
|
|
# Calculate queue time statistics for this instance
|
|
queue_times = runner_instance_queue_times[instance_key]
|
|
avg_queue_time = sum(queue_times) / len(queue_times) if queue_times else 0
|
|
p90_queue_time = 0
|
|
if queue_times:
|
|
sorted_queue_times = sorted(queue_times)
|
|
p90_index = int(len(sorted_queue_times) * 0.9)
|
|
p90_queue_time = (
|
|
sorted_queue_times[p90_index]
|
|
if p90_index < len(sorted_queue_times)
|
|
else sorted_queue_times[-1]
|
|
)
|
|
|
|
runner_instance_data[instance_key] = {
|
|
"total_jobs": stats["total_jobs"],
|
|
"failed_jobs": stats["failed_jobs"],
|
|
"failure_rate": (
|
|
stats["failed_jobs"] / stats["total_jobs"] * 100
|
|
if stats["total_jobs"] > 0
|
|
else 0
|
|
),
|
|
"jobs_failed": dict(stats["jobs_failed"]),
|
|
"runner_name": stats.get("runner_name", "unknown"),
|
|
"avg_queue_time_seconds": avg_queue_time,
|
|
"p90_queue_time_seconds": p90_queue_time,
|
|
"queue_time_samples": len(queue_times),
|
|
"all_failures": list(
|
|
runner_instance_all_failures.get(instance_key, [])
|
|
),
|
|
}
|
|
|
|
# Build runner streak data
|
|
runner_streak_data = {}
|
|
for runner_key in runner_total_jobs.keys():
|
|
runner_streak_data[runner_key] = {
|
|
"current_streak": runner_current_streak[runner_key],
|
|
"max_streak": runner_max_streak[runner_key],
|
|
"total_failures": runner_failed_jobs[runner_key],
|
|
"total_jobs": runner_total_jobs[runner_key],
|
|
"failure_rate": (
|
|
runner_failed_jobs[runner_key] / runner_total_jobs[runner_key] * 100
|
|
if runner_total_jobs[runner_key] > 0
|
|
else 0
|
|
),
|
|
"jobs_failed": dict(runner_job_failures[runner_key]),
|
|
"first_failure_in_streak": runner_first_failure_in_streak.get(
|
|
runner_key
|
|
),
|
|
"last_failure_in_streak": runner_last_failure_in_streak.get(runner_key),
|
|
"recovery_info": runner_recovery_info.get(runner_key),
|
|
}
|
|
|
|
# Build runner instance streak data
|
|
runner_instance_streak_data = {}
|
|
for instance_key in runner_instance_stats.keys():
|
|
runner_instance_streak_data[instance_key] = {
|
|
"current_streak": runner_instance_current_streak[instance_key],
|
|
"max_streak": runner_instance_max_streak[instance_key],
|
|
"total_failures": runner_instance_stats[instance_key]["failed_jobs"],
|
|
"total_jobs": runner_instance_stats[instance_key]["total_jobs"],
|
|
"failure_rate": (
|
|
runner_instance_stats[instance_key]["failed_jobs"]
|
|
/ runner_instance_stats[instance_key]["total_jobs"]
|
|
* 100
|
|
if runner_instance_stats[instance_key]["total_jobs"] > 0
|
|
else 0
|
|
),
|
|
"runner_name": runner_instance_stats[instance_key].get(
|
|
"runner_name", "unknown"
|
|
),
|
|
"jobs_failed": dict(runner_instance_stats[instance_key]["jobs_failed"]),
|
|
"first_failure_in_streak": runner_instance_first_failure.get(
|
|
instance_key
|
|
),
|
|
"last_failure_in_streak": runner_instance_last_failure.get(
|
|
instance_key
|
|
),
|
|
"all_failures_in_streak": list(
|
|
runner_instance_all_failures_in_streak.get(instance_key, [])
|
|
),
|
|
"recovery_info": runner_instance_recovery.get(instance_key),
|
|
}
|
|
|
|
return (
|
|
runner_stats,
|
|
runner_instance_data,
|
|
runner_streak_data,
|
|
runner_instance_streak_data,
|
|
)
|
|
|
|
def analyze_consecutive_failures(
|
|
self, runs: List[Dict]
|
|
) -> Tuple[Dict[str, Dict], Dict[str, int]]:
|
|
"""
|
|
Analyze consecutive failures for each job.
|
|
|
|
"Current Streak" = consecutive failures ending at the most recent run (NOW)
|
|
If the most recent run succeeded, current streak = 0 (streak is broken)
|
|
"Max Streak" = the longest consecutive failure streak seen in the analyzed period
|
|
|
|
Returns:
|
|
Tuple of (job_streak_data, job_current_streaks)
|
|
"""
|
|
print("\nAnalyzing consecutive failures...")
|
|
|
|
# Sort runs by created_at (oldest first) to track streaks chronologically
|
|
sorted_runs = sorted(runs, key=lambda x: x.get("created_at", ""))
|
|
|
|
# Track current streak for each job
|
|
job_current_streak: Dict[str, int] = defaultdict(int)
|
|
job_max_streak: Dict[str, int] = defaultdict(int)
|
|
job_total_failures: Dict[str, int] = defaultdict(int)
|
|
job_total_runs: Dict[str, int] = defaultdict(int)
|
|
job_first_failure_in_streak: Dict[str, Optional[Dict]] = {}
|
|
job_last_failure_in_streak: Dict[str, Optional[Dict]] = {}
|
|
job_recovery_info: Dict[str, Optional[Dict]] = {}
|
|
job_recent_runs: Dict[str, List[Dict]] = defaultdict(list) # Track last 10 runs
|
|
|
|
total_runs_processed = len(sorted_runs)
|
|
for i, run in enumerate(sorted_runs, 1):
|
|
if i % 50 == 0 or i == total_runs_processed:
|
|
print(
|
|
f"Processing run {i}/{total_runs_processed}: #{run.get('run_number')}"
|
|
)
|
|
|
|
head_commit = run.get("head_commit") or {}
|
|
run_info = {
|
|
"run_number": run.get("run_number"),
|
|
"run_id": run.get("id"),
|
|
"created_at": run.get("created_at"),
|
|
"head_sha": run.get("head_sha", "")[:8],
|
|
"author": head_commit.get("author", {}).get("name", "Unknown"),
|
|
"url": f"https://github.com/{self.repo}/actions/runs/{run.get('id')}",
|
|
}
|
|
|
|
pull_requests = run.get("pull_requests", [])
|
|
if pull_requests:
|
|
run_info["pr_number"] = pull_requests[0].get("number")
|
|
|
|
# Get jobs for this run
|
|
jobs = self.get_jobs_for_run(run.get("id"))
|
|
|
|
for job in jobs:
|
|
job_name = job.get("name", "")
|
|
|
|
# Skip excluded jobs (administrative/setup jobs)
|
|
if any(
|
|
job_name.startswith(excluded) for excluded in self.excluded_jobs
|
|
):
|
|
continue
|
|
|
|
job_total_runs[job_name] += 1
|
|
conclusion = job.get("conclusion")
|
|
|
|
if conclusion == "failure":
|
|
# Failure detected
|
|
job_total_failures[job_name] += 1
|
|
job_current_streak[job_name] += 1
|
|
|
|
# Track if this is the first failure in a new streak
|
|
if job_current_streak[job_name] == 1:
|
|
job_first_failure_in_streak[job_name] = {
|
|
**run_info,
|
|
"job_name": job_name,
|
|
"job_id": job.get("id"),
|
|
"job_url": job.get("html_url", run_info["url"]),
|
|
"conclusion": conclusion,
|
|
}
|
|
|
|
# Always update last failure to the most recent one
|
|
job_last_failure_in_streak[job_name] = {
|
|
**run_info,
|
|
"job_name": job_name,
|
|
"job_id": job.get("id"),
|
|
"job_url": job.get("html_url", run_info["url"]),
|
|
"conclusion": conclusion,
|
|
}
|
|
|
|
# Update max streak
|
|
if job_current_streak[job_name] > job_max_streak[job_name]:
|
|
job_max_streak[job_name] = job_current_streak[job_name]
|
|
|
|
elif conclusion == "success":
|
|
# Success - streak broken
|
|
if job_current_streak[job_name] > 0:
|
|
# Record recovery
|
|
job_recovery_info[job_name] = {
|
|
**run_info,
|
|
"job_name": job_name,
|
|
"streak_length": job_current_streak[job_name],
|
|
}
|
|
|
|
job_current_streak[job_name] = 0
|
|
job_first_failure_in_streak[job_name] = None
|
|
job_last_failure_in_streak[job_name] = None
|
|
|
|
# Track recent runs (last 5 for each job)
|
|
run_attempt = job.get("run_attempt", 1)
|
|
|
|
# Create status emoji with superscript if retry attempt > 1
|
|
if conclusion == "success":
|
|
status = "✅"
|
|
elif conclusion == "failure":
|
|
status = "❌"
|
|
else:
|
|
status = "⚪"
|
|
|
|
# Add superscript for retry attempts (2+ only)
|
|
if run_attempt > 1:
|
|
superscript_map = {
|
|
"2": "²",
|
|
"3": "³",
|
|
"4": "⁴",
|
|
"5": "⁵",
|
|
"6": "⁶",
|
|
"7": "⁷",
|
|
"8": "⁸",
|
|
"9": "⁹",
|
|
}
|
|
status += superscript_map.get(str(run_attempt), f"^{run_attempt}")
|
|
|
|
job_recent_runs[job_name].append(
|
|
{
|
|
"run_number": run_info["run_number"],
|
|
"job_id": job.get("id"), # Needed for fetching logs
|
|
"job_url": job.get("html_url", run_info["url"]),
|
|
"conclusion": conclusion,
|
|
"status": status,
|
|
"run_attempt": run_attempt,
|
|
}
|
|
)
|
|
|
|
time.sleep(0.05)
|
|
|
|
# Build final results
|
|
job_streak_data = {}
|
|
for job_name in job_current_streak.keys():
|
|
# Get last 10 runs (oldest to latest, chronological order)
|
|
recent_runs = job_recent_runs.get(job_name, [])[-10:]
|
|
|
|
job_streak_data[job_name] = {
|
|
"current_streak": job_current_streak[job_name],
|
|
"max_streak": job_max_streak[job_name],
|
|
"total_failures": job_total_failures[job_name],
|
|
"total_runs": job_total_runs[job_name],
|
|
"failure_rate": (
|
|
job_total_failures[job_name] / job_total_runs[job_name] * 100
|
|
if job_total_runs[job_name] > 0
|
|
else 0
|
|
),
|
|
"first_failure_in_streak": job_first_failure_in_streak.get(job_name),
|
|
"last_failure_in_streak": job_last_failure_in_streak.get(job_name),
|
|
"recovery_info": job_recovery_info.get(job_name),
|
|
"recent_runs": recent_runs, # Last 10 runs with status emoji
|
|
}
|
|
|
|
return job_streak_data, job_current_streak
|
|
|
|
def analyze_test_failures_for_broken_jobs(
|
|
self, job_streak_data: Dict[str, Dict]
|
|
) -> Dict[str, Dict[str, Dict]]:
|
|
"""
|
|
Analyze test-level failures for jobs with current_streak >= 2 or failure_rate >= 50%.
|
|
|
|
Args:
|
|
job_streak_data: Dict mapping job_name -> job stats including recent_runs
|
|
|
|
Returns:
|
|
Dict mapping job_name -> {test_file -> test failure stats}
|
|
"""
|
|
# Filter to only broken/high-failure-rate jobs
|
|
jobs_to_analyze = [
|
|
(job_name, data)
|
|
for job_name, data in job_streak_data.items()
|
|
if data["current_streak"] >= 2 or data["failure_rate"] >= 50.0
|
|
]
|
|
|
|
if not jobs_to_analyze:
|
|
print("No broken or high-failure-rate jobs to analyze for test failures")
|
|
return {}
|
|
|
|
print(f"\nAnalyzing test-level failures for {len(jobs_to_analyze)} jobs...")
|
|
|
|
job_test_failures = {}
|
|
for i, (job_name, data) in enumerate(jobs_to_analyze, 1):
|
|
print(
|
|
f" [{i}/{len(jobs_to_analyze)}] Analyzing test failures for: {job_name}"
|
|
)
|
|
recent_runs = data.get("recent_runs", [])
|
|
|
|
if recent_runs:
|
|
test_failures = self.analyze_test_failures_for_job(recent_runs)
|
|
if test_failures:
|
|
job_test_failures[job_name] = test_failures
|
|
|
|
print(f"Found test-level failures for {len(job_test_failures)} jobs")
|
|
return job_test_failures
|
|
|
|
def analyze_runner_specific_test_failures(
|
|
self, runs: List[Dict]
|
|
) -> Dict[str, Dict[str, Dict]]:
|
|
"""
|
|
Analyze test failures grouped by runner to identify runner-specific issues.
|
|
|
|
Args:
|
|
runs: List of workflow runs to analyze
|
|
|
|
Returns:
|
|
Dict mapping runner_instance -> {test_file -> {"count": int, "jobs": [job_names]}}
|
|
"""
|
|
print("\nAnalyzing runner-specific test failures...")
|
|
|
|
runner_test_failures: Dict[str, Dict[str, Dict]] = defaultdict(
|
|
lambda: defaultdict(lambda: {"count": 0, "jobs": [], "job_urls": []})
|
|
)
|
|
|
|
for run in runs:
|
|
# Get jobs for this run
|
|
jobs = self.get_jobs_for_run(run.get("id"))
|
|
|
|
for job in jobs:
|
|
job_name = job.get("name", "")
|
|
conclusion = job.get("conclusion")
|
|
|
|
# Skip excluded jobs
|
|
if any(
|
|
job_name.startswith(excluded) for excluded in self.excluded_jobs
|
|
):
|
|
continue
|
|
|
|
# Only analyze failed jobs
|
|
if conclusion != "failure":
|
|
continue
|
|
|
|
# Get runner information
|
|
runner_name = (
|
|
job.get("runner_name")
|
|
or job.get("runner", {}).get("name")
|
|
or "unknown"
|
|
)
|
|
runner_id = job.get("runner_id") or job.get("runner", {}).get("id")
|
|
runner_labels = job.get("labels", [])
|
|
runner_labels_str = (
|
|
", ".join(runner_labels) if runner_labels else "unknown"
|
|
)
|
|
|
|
# Skip if no runner info
|
|
if not runner_id or runner_labels_str == "unknown":
|
|
continue
|
|
|
|
# Create runner instance key
|
|
runner_instance_key = f"{runner_name}_{runner_id}"
|
|
|
|
# Get job logs and parse test failures
|
|
job_id = job.get("id")
|
|
if job_id:
|
|
if job_id not in self.test_summaries:
|
|
logs = self.get_job_logs(job_id)
|
|
test_summary = self.parse_test_summary(logs) if logs else None
|
|
else:
|
|
test_summary = self.test_summaries[job_id]
|
|
|
|
if test_summary and test_summary.get("failed_tests"):
|
|
# Track each failed test for this runner
|
|
for failed_test in test_summary["failed_tests"]:
|
|
test_file = failed_test["test_file"]
|
|
|
|
runner_test_failures[runner_instance_key][test_file][
|
|
"count"
|
|
] += 1
|
|
runner_test_failures[runner_instance_key][test_file][
|
|
"jobs"
|
|
].append(job_name)
|
|
runner_test_failures[runner_instance_key][test_file][
|
|
"job_urls"
|
|
].append(
|
|
job.get(
|
|
"html_url",
|
|
f"https://github.com/{self.repo}/actions/runs/{run.get('id')}",
|
|
)
|
|
)
|
|
|
|
# Store runner metadata
|
|
if (
|
|
"runner_name"
|
|
not in runner_test_failures[runner_instance_key][
|
|
test_file
|
|
]
|
|
):
|
|
runner_test_failures[runner_instance_key][test_file][
|
|
"runner_name"
|
|
] = runner_name
|
|
runner_test_failures[runner_instance_key][test_file][
|
|
"runner_labels"
|
|
] = runner_labels_str
|
|
|
|
time.sleep(0.05)
|
|
|
|
# Filter to only include runners with tests that failed multiple times
|
|
filtered_results = {}
|
|
for runner_key, tests in runner_test_failures.items():
|
|
# Only include tests that failed 2+ times on this runner
|
|
multi_failure_tests = {
|
|
test: data for test, data in tests.items() if data["count"] >= 2
|
|
}
|
|
if multi_failure_tests:
|
|
filtered_results[runner_key] = multi_failure_tests
|
|
|
|
print(f"Found {len(filtered_results)} runners with repeated test failures")
|
|
return filtered_results
|
|
|
|
# print statements here mainly for local testing
|
|
def generate_failure_report(
|
|
self,
|
|
# Scheduled runs (9 workflows)
|
|
pr_test_nvidia_scheduled_data: Dict[str, Dict],
|
|
pr_test_amd_scheduled_data: Dict[str, Dict],
|
|
pr_test_xeon_scheduled_data: Dict[str, Dict],
|
|
pr_test_xpu_scheduled_data: Dict[str, Dict],
|
|
pr_test_npu_scheduled_data: Dict[str, Dict],
|
|
nightly_nvidia_scheduled_data: Dict[str, Dict],
|
|
nightly_amd_scheduled_data: Dict[str, Dict],
|
|
nightly_intel_scheduled_data: Dict[str, Dict],
|
|
nightly_npu_scheduled_data: Dict[str, Dict],
|
|
# General runs (9 workflows)
|
|
pr_test_nvidia_general_data: Dict[str, Dict],
|
|
pr_test_amd_general_data: Dict[str, Dict],
|
|
pr_test_xeon_general_data: Dict[str, Dict],
|
|
pr_test_xpu_general_data: Dict[str, Dict],
|
|
pr_test_npu_general_data: Dict[str, Dict],
|
|
nightly_nvidia_general_data: Dict[str, Dict],
|
|
nightly_amd_general_data: Dict[str, Dict],
|
|
nightly_intel_general_data: Dict[str, Dict],
|
|
nightly_npu_general_data: Dict[str, Dict],
|
|
# Runners
|
|
runner_stats: Optional[Dict[str, Dict]] = None,
|
|
runner_instance_data: Optional[Dict[str, Dict]] = None,
|
|
runner_streak_data: Optional[Dict[str, Dict]] = None,
|
|
runner_instance_streak_data: Optional[Dict[str, Dict]] = None,
|
|
online_runners: Optional[Dict[str, Dict]] = None,
|
|
# Test failures (per job -> per test)
|
|
job_test_failures: Optional[Dict[str, Dict[str, Dict]]] = None,
|
|
# Test failures for general runs (per job -> per test)
|
|
job_test_failures_general: Optional[Dict[str, Dict[str, Dict]]] = None,
|
|
# Runner-specific test failures
|
|
runner_test_failures: Optional[Dict[str, Dict[str, Dict]]] = None,
|
|
# Config
|
|
output_file: Optional[str] = None,
|
|
pr_test_scheduled_limit: int = 12,
|
|
nightly_scheduled_limit: int = 6,
|
|
general_limit: int = 100,
|
|
):
|
|
"""Generate detailed failure analysis report."""
|
|
print("\n" + "=" * 80)
|
|
print("SGLang Consecutive Failures Analysis Report")
|
|
print("=" * 80)
|
|
|
|
# Combine all general data for summary stats
|
|
combined_general_data = {
|
|
**pr_test_nvidia_general_data,
|
|
**pr_test_amd_general_data,
|
|
**pr_test_xeon_general_data,
|
|
**pr_test_xpu_general_data,
|
|
**pr_test_npu_general_data,
|
|
**nightly_nvidia_general_data,
|
|
**nightly_amd_general_data,
|
|
**nightly_intel_general_data,
|
|
**nightly_npu_general_data,
|
|
}
|
|
|
|
# Sort jobs by current streak (descending)
|
|
sorted_jobs = sorted(
|
|
combined_general_data.items(),
|
|
key=lambda x: (x[1]["current_streak"], x[1]["failure_rate"]),
|
|
reverse=True,
|
|
)
|
|
|
|
# Build report data (always needed for GitHub summary)
|
|
# Calculate overall queue time for summary
|
|
overall_avg_queue = 0
|
|
overall_p90_queue = 0
|
|
if runner_stats:
|
|
all_avg_queue_times = [
|
|
stats["avg_queue_time_seconds"]
|
|
for stats in runner_stats.values()
|
|
if stats["queue_time_samples"] > 0
|
|
]
|
|
all_p90_queue_times = [
|
|
stats["p90_queue_time_seconds"]
|
|
for stats in runner_stats.values()
|
|
if stats["queue_time_samples"] > 0
|
|
]
|
|
if all_avg_queue_times:
|
|
overall_avg_queue = sum(all_avg_queue_times) / len(all_avg_queue_times)
|
|
overall_p90_queue = sum(all_p90_queue_times) / len(all_p90_queue_times)
|
|
|
|
# Calculate PR Test and Nightly Test job counts for scheduled runs (main branch)
|
|
pr_scheduled_combined = {
|
|
**pr_test_nvidia_scheduled_data,
|
|
**pr_test_amd_scheduled_data,
|
|
**pr_test_xeon_scheduled_data,
|
|
**pr_test_xpu_scheduled_data,
|
|
**pr_test_npu_scheduled_data,
|
|
}
|
|
nightly_scheduled_combined = {
|
|
**nightly_nvidia_scheduled_data,
|
|
**nightly_amd_scheduled_data,
|
|
**nightly_intel_scheduled_data,
|
|
**nightly_npu_scheduled_data,
|
|
}
|
|
|
|
pr_main_count = len(pr_scheduled_combined)
|
|
pr_main_with_streaks = sum(
|
|
1 for d in pr_scheduled_combined.values() if d["current_streak"] >= 2
|
|
)
|
|
nightly_main_count = len(nightly_scheduled_combined)
|
|
nightly_main_with_streaks = sum(
|
|
1 for d in nightly_scheduled_combined.values() if d["current_streak"] >= 2
|
|
)
|
|
|
|
report_data = {
|
|
"summary": {
|
|
"total_jobs": len(sorted_jobs),
|
|
"jobs_with_streaks": sum(
|
|
1 for j in sorted_jobs if j[1]["current_streak"] > 0
|
|
),
|
|
"total_runners": len(runner_stats) if runner_stats else 0,
|
|
"analysis_timestamp": datetime.now().isoformat(),
|
|
"avg_queue_time_seconds": overall_avg_queue,
|
|
"p90_queue_time_seconds": overall_p90_queue,
|
|
"pr_main_count": pr_main_count,
|
|
"pr_main_with_streaks": pr_main_with_streaks,
|
|
"nightly_main_count": nightly_main_count,
|
|
"nightly_main_with_streaks": nightly_main_with_streaks,
|
|
},
|
|
"pr_test_scheduled_limit": pr_test_scheduled_limit,
|
|
"nightly_scheduled_limit": nightly_scheduled_limit,
|
|
"general_limit": general_limit,
|
|
# Scheduled data
|
|
"pr_test_nvidia_scheduled_data": pr_test_nvidia_scheduled_data,
|
|
"pr_test_amd_scheduled_data": pr_test_amd_scheduled_data,
|
|
"pr_test_xeon_scheduled_data": pr_test_xeon_scheduled_data,
|
|
"pr_test_xpu_scheduled_data": pr_test_xpu_scheduled_data,
|
|
"pr_test_npu_scheduled_data": pr_test_npu_scheduled_data,
|
|
"nightly_nvidia_scheduled_data": nightly_nvidia_scheduled_data,
|
|
"nightly_amd_scheduled_data": nightly_amd_scheduled_data,
|
|
"nightly_intel_scheduled_data": nightly_intel_scheduled_data,
|
|
"nightly_npu_scheduled_data": nightly_npu_scheduled_data,
|
|
# General data
|
|
"pr_test_nvidia_general_data": pr_test_nvidia_general_data,
|
|
"pr_test_amd_general_data": pr_test_amd_general_data,
|
|
"pr_test_xeon_general_data": pr_test_xeon_general_data,
|
|
"pr_test_xpu_general_data": pr_test_xpu_general_data,
|
|
"pr_test_npu_general_data": pr_test_npu_general_data,
|
|
"nightly_nvidia_general_data": nightly_nvidia_general_data,
|
|
"nightly_amd_general_data": nightly_amd_general_data,
|
|
"nightly_intel_general_data": nightly_intel_general_data,
|
|
"nightly_npu_general_data": nightly_npu_general_data,
|
|
"runner_stats": runner_stats if runner_stats else {},
|
|
"runner_instance_data": (
|
|
runner_instance_data if runner_instance_data else {}
|
|
),
|
|
"runner_streak_data": runner_streak_data if runner_streak_data else {},
|
|
"runner_instance_streak_data": (
|
|
runner_instance_streak_data if runner_instance_streak_data else {}
|
|
),
|
|
"job_test_failures": job_test_failures if job_test_failures else {},
|
|
"job_test_failures_general": (
|
|
job_test_failures_general if job_test_failures_general else {}
|
|
),
|
|
"runner_test_failures": (
|
|
runner_test_failures if runner_test_failures else {}
|
|
),
|
|
"online_runners": online_runners if online_runners else {},
|
|
}
|
|
|
|
# Save to JSON only if output file is specified
|
|
if output_file:
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
json.dump(report_data, f, ensure_ascii=False, indent=2)
|
|
print(f"\nDetailed report saved to: {output_file}")
|
|
|
|
print("=" * 80)
|
|
|
|
return report_data
|
|
|
|
def generate_github_summary(self, report_data: Dict):
|
|
"""Generate GitHub Actions Step Summary."""
|
|
try:
|
|
github_step_summary = os.environ.get("GITHUB_STEP_SUMMARY")
|
|
if not github_step_summary:
|
|
print("Not running in GitHub Actions, skipping summary generation")
|
|
return
|
|
|
|
print("Generating GitHub Actions summary...")
|
|
|
|
summary_lines = []
|
|
summary_lines.append("# SGLang Consecutive Failures Analysis")
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
f"**Analysis Timestamp:** {report_data['summary']['analysis_timestamp']}"
|
|
)
|
|
summary_lines.append(
|
|
"_Note: Recent runs are shown oldest → latest (left to right)_"
|
|
)
|
|
summary_lines.append("")
|
|
|
|
# Summary stats - COLLAPSIBLE
|
|
summary_lines.append("<details>")
|
|
summary_lines.append(
|
|
"<summary>📊 Summary Statistics (click to expand)</summary>"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append("| Metric | Count |")
|
|
summary_lines.append("|--------|-------|")
|
|
summary_lines.append(
|
|
f"| Total (unique) jobs analyzed | {report_data['summary']['total_jobs']} |"
|
|
)
|
|
summary_lines.append(
|
|
f"| Jobs with Active Failure Streaks | {report_data['summary']['jobs_with_streaks']} |"
|
|
)
|
|
|
|
# Add main branch job counters
|
|
pr_main_count = report_data["summary"].get("pr_main_count", 0)
|
|
pr_main_with_streaks = report_data["summary"].get("pr_main_with_streaks", 0)
|
|
nightly_main_count = report_data["summary"].get("nightly_main_count", 0)
|
|
nightly_main_with_streaks = report_data["summary"].get(
|
|
"nightly_main_with_streaks", 0
|
|
)
|
|
|
|
summary_lines.append(
|
|
f"| PR Test Jobs on Main (scheduled) | {pr_main_count} ({pr_main_with_streaks} with streaks) |"
|
|
)
|
|
summary_lines.append(
|
|
f"| Nightly Test Jobs on Main (scheduled) | {nightly_main_count} ({nightly_main_with_streaks} with streaks) |"
|
|
)
|
|
|
|
summary_lines.append(
|
|
f"| Total Runners Analyzed | {report_data['summary']['total_runners']} |"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append("</details>")
|
|
summary_lines.append("")
|
|
|
|
# Runner Statistics - COLLAPSIBLE
|
|
runner_stats = report_data.get("runner_stats", {})
|
|
online_runners = report_data.get("online_runners", {})
|
|
if runner_stats:
|
|
summary_lines.append("<details>")
|
|
summary_lines.append(
|
|
"<summary>📊 Runner Statistics (by type) (click to expand)</summary>"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"_High queue times indicate that runner type may need more workers. Online column shows current runner availability._"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"| Runner Type | Online | Avg Queue | P90 Queue | # of Jobs Processed | Jobs Using This Runner |"
|
|
)
|
|
summary_lines.append(
|
|
"|-------------|--------|-----------|-----------|---------------------|------------------------|"
|
|
)
|
|
|
|
# Sort by P90 queue time descending (longest waits first)
|
|
sorted_runners = sorted(
|
|
runner_stats.items(),
|
|
key=lambda x: x[1].get("p90_queue_time_seconds", 0),
|
|
reverse=True,
|
|
)
|
|
|
|
for runner_key, stats in sorted_runners:
|
|
avg_queue = stats.get("avg_queue_time_seconds", 0)
|
|
p90_queue = stats.get("p90_queue_time_seconds", 0)
|
|
total_jobs = stats.get("total_jobs", 0)
|
|
|
|
# Get online runner count for this runner type
|
|
# First try exact match, then fall back to substring match
|
|
online_count = online_runners.get(runner_key)
|
|
if not online_count:
|
|
# Fall back to substring match (but prefer longer matches)
|
|
best_match = None
|
|
best_match_len = 0
|
|
for online_key, online_stats in online_runners.items():
|
|
if online_key in runner_key or runner_key in online_key:
|
|
# Prefer longer matching keys (more specific)
|
|
if len(online_key) > best_match_len:
|
|
best_match = online_stats
|
|
best_match_len = len(online_key)
|
|
online_count = best_match
|
|
if online_count:
|
|
online_str = f"{online_count['online']}/{online_count['total']}"
|
|
else:
|
|
online_str = "N/A"
|
|
|
|
# Get unique job names that run on this runner
|
|
jobs_total = stats.get("jobs_total", {})
|
|
unique_jobs = list(jobs_total.keys())
|
|
# Truncate job names and limit to first 3
|
|
job_names_short = [
|
|
(j if len(j) <= 25 else j[:22] + "...") for j in unique_jobs[:3]
|
|
]
|
|
jobs_str = ", ".join(f"`{j}`" for j in job_names_short)
|
|
if len(unique_jobs) > 3:
|
|
jobs_str += f" +{len(unique_jobs) - 3} more"
|
|
|
|
# Format queue times
|
|
avg_str = f"{avg_queue / 60:.1f}m" if avg_queue > 0 else "N/A"
|
|
p90_str = f"{p90_queue / 60:.1f}m" if p90_queue > 0 else "N/A"
|
|
|
|
# Truncate long runner labels
|
|
display_name = (
|
|
runner_key if len(runner_key) <= 35 else runner_key[:32] + "..."
|
|
)
|
|
|
|
# Highlight if P90 queue time > 10 minutes (potential bottleneck)
|
|
if p90_queue > 600:
|
|
summary_lines.append(
|
|
f"| <span style='color:orange'>`{display_name}`</span> | <span style='color:orange'>{online_str}</span> | <span style='color:orange'>{avg_str}</span> | <span style='color:orange'>{p90_str}</span> | <span style='color:orange'>{total_jobs}</span> | {jobs_str} |"
|
|
)
|
|
else:
|
|
summary_lines.append(
|
|
f"| `{display_name}` | {online_str} | {avg_str} | {p90_str} | {total_jobs} | {jobs_str} |"
|
|
)
|
|
|
|
summary_lines.append("")
|
|
summary_lines.append("</details>")
|
|
summary_lines.append("")
|
|
|
|
# Get test failures data
|
|
job_test_failures = report_data.get("job_test_failures", {})
|
|
job_test_failures_general = report_data.get("job_test_failures_general", {})
|
|
|
|
# Helper function to generate job section for GitHub markdown
|
|
def generate_job_section_md(
|
|
title: str,
|
|
data: Dict[str, Dict],
|
|
show_test_failures: bool = True,
|
|
test_failures_dict: Optional[Dict[str, Dict[str, Dict]]] = None,
|
|
):
|
|
sorted_data = sorted(
|
|
data.items(),
|
|
key=lambda x: (x[1]["current_streak"], x[1]["failure_rate"]),
|
|
reverse=True,
|
|
)
|
|
broken = [
|
|
(name, d) for name, d in sorted_data if d["current_streak"] >= 2
|
|
]
|
|
high_failure_rate = [
|
|
(name, d)
|
|
for name, d in sorted_data
|
|
if d["current_streak"] < 2
|
|
and d["failure_rate"] >= 50.0
|
|
and d["total_failures"] > 0
|
|
]
|
|
recently_failed = [
|
|
(name, d)
|
|
for name, d in sorted_data
|
|
if d["current_streak"] < 2
|
|
and d["failure_rate"] < 50.0
|
|
and d["total_failures"] > 0
|
|
]
|
|
|
|
# Always show section header
|
|
summary_lines.append(f"## {title}")
|
|
summary_lines.append("")
|
|
|
|
# ==== TEST-LEVEL FAILURES FIRST (if show_test_failures is enabled) ====
|
|
if show_test_failures:
|
|
# Use the provided test_failures_dict, or default to job_test_failures
|
|
active_test_failures = (
|
|
test_failures_dict
|
|
if test_failures_dict is not None
|
|
else job_test_failures
|
|
)
|
|
|
|
# Collect all test failures from broken and high_failure_rate jobs
|
|
all_test_failures = []
|
|
|
|
# Collect from broken jobs (current_streak >= 2)
|
|
for job_name, job_data in broken:
|
|
test_failures = active_test_failures.get(job_name, {})
|
|
if test_failures and not test_failures.get("_no_test_summary"):
|
|
for test_file, test_data in test_failures.items():
|
|
if not test_file.startswith("_"): # Skip marker keys
|
|
all_test_failures.append(
|
|
{
|
|
"job_name": job_name,
|
|
"test_file": test_file,
|
|
"test_data": test_data,
|
|
"job_data": job_data,
|
|
}
|
|
)
|
|
|
|
# Collect from high_failure_rate jobs
|
|
for job_name, job_data in high_failure_rate:
|
|
test_failures = active_test_failures.get(job_name, {})
|
|
if test_failures and not test_failures.get("_no_test_summary"):
|
|
for test_file, test_data in test_failures.items():
|
|
if not test_file.startswith("_"):
|
|
all_test_failures.append(
|
|
{
|
|
"job_name": job_name,
|
|
"test_file": test_file,
|
|
"test_data": test_data,
|
|
"job_data": job_data,
|
|
}
|
|
)
|
|
|
|
# Sort by current_streak descending, then total_failures descending
|
|
all_test_failures.sort(
|
|
key=lambda x: (
|
|
x["test_data"]["current_streak"],
|
|
x["test_data"]["total_failures"],
|
|
),
|
|
reverse=True,
|
|
)
|
|
|
|
# Split into streak tests and non-streak tests
|
|
streak_tests = [
|
|
t
|
|
for t in all_test_failures
|
|
if t["test_data"]["current_streak"] >= 2
|
|
]
|
|
|
|
# For non-streak tests, calculate failure rate and include all that have failed
|
|
non_streak_tests = []
|
|
for t in all_test_failures:
|
|
if t["test_data"]["current_streak"] < 2:
|
|
# Calculate test failure rate from recent_runs
|
|
recent_runs = t["test_data"].get("recent_runs", [])
|
|
if recent_runs:
|
|
# Count actual failures (failed=True) vs total runs
|
|
total_runs = len(recent_runs)
|
|
failed_runs = sum(
|
|
1 for r in recent_runs if r.get("failed") == True
|
|
)
|
|
failure_rate = (
|
|
(failed_runs / total_runs * 100)
|
|
if total_runs > 0
|
|
else 0
|
|
)
|
|
|
|
# Include all tests that have at least 1 failure
|
|
if failed_runs >= 1:
|
|
# Store failure rate for sorting
|
|
t["failure_rate"] = failure_rate
|
|
t["failed_runs"] = failed_runs
|
|
t["total_test_runs"] = total_runs
|
|
non_streak_tests.append(t)
|
|
|
|
# Sort by failure rate descending
|
|
non_streak_tests.sort(key=lambda x: x["failure_rate"], reverse=True)
|
|
|
|
# Show tests with consecutive failures
|
|
if streak_tests:
|
|
summary_lines.append(
|
|
"🔥 **Tests with consecutive failures (≥2) & currently failing**"
|
|
)
|
|
summary_lines.append("")
|
|
|
|
# Check if any test has timeout indicator
|
|
has_timeout = any(
|
|
any(
|
|
r.get("status") == "⏱️"
|
|
for r in t["test_data"].get("recent_runs", [])
|
|
)
|
|
for t in streak_tests
|
|
)
|
|
if has_timeout:
|
|
summary_lines.append(
|
|
"_Note: ⏱️ indicates test was last running when logs cut off (possible timeout)_"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"| Test File | Job | Failures | Streak | First | Last | Recent Runs (oldest → latest) |"
|
|
)
|
|
summary_lines.append(
|
|
"|-----------|-----|----------|--------|-------|------|-------------------------------|"
|
|
)
|
|
|
|
for test_info in streak_tests[:20]: # Show top 20 tests
|
|
test_file = test_info["test_file"]
|
|
job_name = test_info["job_name"]
|
|
test_data = test_info["test_data"]
|
|
job_data = test_info["job_data"]
|
|
|
|
test_display = test_file
|
|
job_display = job_name
|
|
|
|
# Get first and last failure from job level
|
|
first_failure = job_data.get("first_failure_in_streak")
|
|
first_str = (
|
|
f"[Run #{first_failure['run_number']}]({first_failure.get('job_url', first_failure['url'])})"
|
|
if first_failure
|
|
else "N/A"
|
|
)
|
|
|
|
last_failure = job_data.get("last_failure_in_streak")
|
|
last_str = (
|
|
f"[Run #{last_failure['run_number']}]({last_failure.get('job_url', last_failure['url'])})"
|
|
if last_failure
|
|
else "N/A"
|
|
)
|
|
|
|
# Format streak with fire emoji
|
|
streak_str = f"🔥 {test_data['current_streak']}"
|
|
|
|
# Build history links
|
|
recent_runs = test_data.get("recent_runs", [])
|
|
if recent_runs:
|
|
history_links = "… " + " ".join(
|
|
[
|
|
f"[{r['status']}]({r['job_url']})"
|
|
for r in recent_runs[-10:]
|
|
] # Last 10 runs
|
|
)
|
|
else:
|
|
history_links = "N/A"
|
|
|
|
# Highlight if streak >= 3
|
|
if test_data["current_streak"] >= 3:
|
|
summary_lines.append(
|
|
f"| <span style='color:red'>`{test_display}`</span> | <span style='color:red'>`{job_display}`</span> | "
|
|
f"<span style='color:red'>{test_data['total_failures']}</span> | <span style='color:red'>{streak_str}</span> | "
|
|
f"<span style='color:red'>{first_str}</span> | <span style='color:red'>{last_str}</span> | "
|
|
f"<span style='color:red'>{history_links}</span> |"
|
|
)
|
|
else:
|
|
summary_lines.append(
|
|
f"| `{test_display}` | `{job_display}` | {test_data['total_failures']} | {streak_str} | "
|
|
f"{first_str} | {last_str} | {history_links} |"
|
|
)
|
|
|
|
summary_lines.append("")
|
|
|
|
# Show all tests that have failed (no current streak), ranked by failure rate
|
|
if non_streak_tests:
|
|
summary_lines.append(
|
|
"📋 **Other tests with failures (ranked by failure rate)**"
|
|
)
|
|
summary_lines.append("")
|
|
|
|
# Check if any test has timeout indicator
|
|
has_timeout = any(
|
|
any(
|
|
r.get("status") == "⏱️"
|
|
for r in t["test_data"].get("recent_runs", [])
|
|
)
|
|
for t in non_streak_tests
|
|
)
|
|
if has_timeout:
|
|
summary_lines.append(
|
|
"_Note: ⏱️ indicates test was last running when logs cut off (possible timeout)_"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"| Test File | Job | Failed | Total | Fail Rate | Recent Runs (oldest → latest) |"
|
|
)
|
|
summary_lines.append(
|
|
"|-----------|-----|--------|-------|-----------|-------------------------------|"
|
|
)
|
|
|
|
for test_info in non_streak_tests[:20]: # Show top 20
|
|
test_file = test_info["test_file"]
|
|
job_name = test_info["job_name"]
|
|
test_data = test_info["test_data"]
|
|
failure_rate = test_info["failure_rate"]
|
|
failed_runs = test_info["failed_runs"]
|
|
total_test_runs = test_info["total_test_runs"]
|
|
|
|
test_display = test_file
|
|
job_display = job_name
|
|
|
|
# Build history links
|
|
recent_runs = test_data.get("recent_runs", [])
|
|
if recent_runs:
|
|
history_links = "… " + " ".join(
|
|
[
|
|
f"[{r['status']}]({r['job_url']})"
|
|
for r in recent_runs[-10:]
|
|
]
|
|
)
|
|
else:
|
|
history_links = "N/A"
|
|
|
|
# Highlight if failure rate >= 50%
|
|
if failure_rate >= 50.0:
|
|
summary_lines.append(
|
|
f"| <span style='color:orange'>`{test_display}`</span> | <span style='color:orange'>`{job_display}`</span> | "
|
|
f"<span style='color:orange'>{failed_runs}</span> | <span style='color:orange'>{total_test_runs}</span> | "
|
|
f"<span style='color:orange'>{failure_rate:.1f}%</span> | <span style='color:orange'>{history_links}</span> |"
|
|
)
|
|
else:
|
|
summary_lines.append(
|
|
f"| `{test_display}` | `{job_display}` | {failed_runs} | {total_test_runs} | "
|
|
f"{failure_rate:.1f}% | {history_links} |"
|
|
)
|
|
|
|
summary_lines.append("")
|
|
|
|
# If no test failures found but we have broken/high_failure_rate jobs
|
|
if (
|
|
not streak_tests
|
|
and not non_streak_tests
|
|
and (broken or high_failure_rate)
|
|
):
|
|
summary_lines.append(
|
|
"_No test-level failure data available for this workflow_"
|
|
)
|
|
summary_lines.append("")
|
|
|
|
# ==== JOB-LEVEL SUMMARY (COLLAPSIBLE) ====
|
|
summary_lines.append("<details>")
|
|
summary_lines.append(
|
|
"<summary><b>📊 Job-level summary (click to expand)</b></summary>"
|
|
)
|
|
summary_lines.append("")
|
|
|
|
# Broken jobs (with active streak)
|
|
if broken:
|
|
summary_lines.append("<details>")
|
|
summary_lines.append(
|
|
"<summary>🔥 <b>Consecutive failures (≥2) & currently failing</b></summary>"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"| Job Name | Current | Max | Runs | First | Last | Recent Runs (oldest → latest) |"
|
|
)
|
|
summary_lines.append(
|
|
"|----------|---------|-----|------|-------|------|-------------------------------|"
|
|
)
|
|
for job_name, d in broken[:15]:
|
|
display_name = (
|
|
job_name if len(job_name) <= 35 else job_name[:32] + "..."
|
|
)
|
|
|
|
first_failure = d.get("first_failure_in_streak")
|
|
first_str = (
|
|
f"[Run #{first_failure['run_number']}]({first_failure.get('job_url', first_failure['url'])})"
|
|
if first_failure
|
|
else "N/A"
|
|
)
|
|
|
|
last_failure = d.get("last_failure_in_streak")
|
|
last_str = (
|
|
f"[Run #{last_failure['run_number']}]({last_failure.get('job_url', last_failure['url'])})"
|
|
if last_failure
|
|
else "N/A"
|
|
)
|
|
|
|
recent_runs = d.get("recent_runs", [])
|
|
if recent_runs:
|
|
history_links = "… " + " ".join(
|
|
[
|
|
f"[{r['status']}]({r['job_url']})"
|
|
for r in recent_runs
|
|
]
|
|
)
|
|
else:
|
|
history_links = "N/A"
|
|
|
|
if d["current_streak"] >= 3:
|
|
summary_lines.append(
|
|
f"| <span style='color:red'>`{display_name}`</span> | <span style='color:red'>{d['current_streak']}</span> | <span style='color:red'>{d['max_streak']}</span> | <span style='color:red'>{d['total_runs']}</span> | "
|
|
f"<span style='color:red'>{first_str}</span> | <span style='color:red'>{last_str}</span> | <span style='color:red'>{history_links}</span> |"
|
|
)
|
|
else:
|
|
summary_lines.append(
|
|
f"| `{display_name}` | {d['current_streak']} | {d['max_streak']} | {d['total_runs']} | "
|
|
f"{first_str} | {last_str} | {history_links} |"
|
|
)
|
|
|
|
summary_lines.append("")
|
|
summary_lines.append("</details>")
|
|
summary_lines.append("")
|
|
|
|
# High failure rate jobs (no active streak)
|
|
if high_failure_rate:
|
|
summary_lines.append("<details>")
|
|
summary_lines.append(
|
|
"<summary>⚠️ <b>No current failure streak but high intermittent failure rate (≥50%)</b></summary>"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"| Job Name | Failures | Fail Rate | Total Runs | Recent Runs (oldest → latest) |"
|
|
)
|
|
summary_lines.append(
|
|
"|----------|----------|-----------|------------|-------------------------------|"
|
|
)
|
|
for job_name, d in high_failure_rate[:15]:
|
|
display_name = (
|
|
job_name if len(job_name) <= 35 else job_name[:32] + "..."
|
|
)
|
|
recent_runs = d.get("recent_runs", [])
|
|
if recent_runs:
|
|
history_links = "… " + " ".join(
|
|
[
|
|
f"[{r['status']}]({r['job_url']})"
|
|
for r in recent_runs
|
|
]
|
|
)
|
|
else:
|
|
history_links = "N/A"
|
|
|
|
summary_lines.append(
|
|
f"| <span style='color:orange'>`{display_name}`</span> | <span style='color:orange'>{d['total_failures']}</span> | <span style='color:orange'>{d['failure_rate']:.1f}%</span> | <span style='color:orange'>{d['total_runs']}</span> | <span style='color:orange'>{history_links}</span> |"
|
|
)
|
|
|
|
summary_lines.append("")
|
|
summary_lines.append("</details>")
|
|
summary_lines.append("")
|
|
|
|
# Recently failed jobs (collapsible)
|
|
if recently_failed:
|
|
max_total_runs = max(d["total_runs"] for _, d in recently_failed)
|
|
summary_lines.append("<details>")
|
|
summary_lines.append(
|
|
f"<summary>📋 <b>No current failure streak, but had failures in the past {max_total_runs} runs - {len(recently_failed)} jobs</b></summary>"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"| Job Name | Failures | Fail Rate | Total Runs | Recent Runs (oldest → latest) |"
|
|
)
|
|
summary_lines.append(
|
|
"|----------|----------|-----------|------------|-------------------------------|"
|
|
)
|
|
for job_name, d in recently_failed[:15]:
|
|
display_name = (
|
|
job_name if len(job_name) <= 35 else job_name[:32] + "..."
|
|
)
|
|
recent_runs = d.get("recent_runs", [])
|
|
if recent_runs:
|
|
history_links = "… " + " ".join(
|
|
[
|
|
f"[{r['status']}]({r['job_url']})"
|
|
for r in recent_runs
|
|
]
|
|
)
|
|
else:
|
|
history_links = "N/A"
|
|
|
|
summary_lines.append(
|
|
f"| `{display_name}` | {d['total_failures']} | {d['failure_rate']:.1f}% | {d['total_runs']} | {history_links} |"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append("</details>")
|
|
summary_lines.append("")
|
|
|
|
# Combined message when no broken/high_failure_rate jobs but has recently_failed
|
|
if not broken and not high_failure_rate and recently_failed:
|
|
max_total_runs = max(d["total_runs"] for _, d in recently_failed)
|
|
summary_lines.append(
|
|
f"✅ No jobs with active failure streaks, but **{len(recently_failed)} jobs** had failures in the past **{max_total_runs} runs**"
|
|
)
|
|
summary_lines.append("")
|
|
elif not broken and not high_failure_rate and not recently_failed:
|
|
summary_lines.append("✅ **No jobs with active failure streaks**")
|
|
summary_lines.append("")
|
|
|
|
summary_lines.append("</details>")
|
|
summary_lines.append("")
|
|
|
|
# ========== RUNNERS (at the top) ==========
|
|
summary_lines.append("---")
|
|
summary_lines.append("# 🖥️ RUNNER HEALTH")
|
|
summary_lines.append("")
|
|
|
|
# Workers section
|
|
if report_data.get("runner_instance_data") and report_data.get(
|
|
"runner_instance_streak_data"
|
|
):
|
|
# Combine instance stats with streak data
|
|
combined_data = []
|
|
for instance_key, stats in report_data["runner_instance_data"].items():
|
|
streak_data = report_data["runner_instance_streak_data"].get(
|
|
instance_key, {}
|
|
)
|
|
combined_data.append(
|
|
{
|
|
"runner_name": stats.get("runner_name", "unknown"),
|
|
"current_streak": streak_data.get("current_streak", 0),
|
|
"max_streak": streak_data.get("max_streak", 0),
|
|
"failure_rate": stats["failure_rate"],
|
|
"total_jobs": stats["total_jobs"],
|
|
"unique_jobs": len(stats.get("jobs_failed", {})),
|
|
"avg_queue": stats.get("avg_queue_time_seconds", 0),
|
|
"all_failures_in_streak": streak_data.get(
|
|
"all_failures_in_streak", []
|
|
),
|
|
"all_failures": stats.get("all_failures", []),
|
|
}
|
|
)
|
|
|
|
sorted_runners = sorted(
|
|
combined_data,
|
|
key=lambda x: (
|
|
x["current_streak"],
|
|
x["max_streak"],
|
|
x["failure_rate"],
|
|
),
|
|
reverse=True,
|
|
)
|
|
|
|
# Split runners into consecutive failures and high failure rate
|
|
runners_with_streak = [
|
|
r for r in sorted_runners if r["current_streak"] >= 2
|
|
]
|
|
runners_high_fail_rate = [
|
|
r
|
|
for r in sorted_runners
|
|
if r["current_streak"] < 2
|
|
and r["failure_rate"] >= 50.0
|
|
and r["total_jobs"] >= 2
|
|
]
|
|
|
|
# Always show section header
|
|
summary_lines.append("## Workers")
|
|
summary_lines.append("")
|
|
|
|
# Runners with consecutive failures
|
|
if runners_with_streak:
|
|
summary_lines.append(
|
|
"🔥 **Consecutive failures (≥2) & currently failing**"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"| Machine Name | Current Streak | Max | Fail Rate | Avg Queue | Total Jobs | Failed Jobs | Unique Jobs | Jobs |"
|
|
)
|
|
summary_lines.append(
|
|
"|--------------|----------------|-----|-----------|-----------|------------|-------------|-------------|------|"
|
|
)
|
|
|
|
for runner_data in runners_with_streak[:15]:
|
|
display_name = (
|
|
runner_data["runner_name"]
|
|
if len(runner_data["runner_name"]) <= 28
|
|
else runner_data["runner_name"][:25] + "..."
|
|
)
|
|
|
|
avg_queue_str = (
|
|
f"{runner_data['avg_queue'] / 60:.1f}m"
|
|
if runner_data["avg_queue"] > 0
|
|
else "N/A"
|
|
)
|
|
|
|
all_failures = runner_data.get("all_failures_in_streak", [])
|
|
failed_jobs_count = len(all_failures)
|
|
jobs_str = (
|
|
" ".join(
|
|
f"[#{f.get('run_number', '?')}]({f.get('job_url', f['url'])})"
|
|
for f in all_failures
|
|
)
|
|
if all_failures
|
|
else "N/A"
|
|
)
|
|
|
|
# Make entire row red if current streak >= 3
|
|
if runner_data["current_streak"] >= 3:
|
|
summary_lines.append(
|
|
f"| <span style='color:red'>`{display_name}`</span> | <span style='color:red'>{runner_data['current_streak']}</span> | <span style='color:red'>{runner_data['max_streak']}</span> | "
|
|
f"<span style='color:red'>{runner_data['failure_rate']:.1f}%</span> | <span style='color:red'>{avg_queue_str}</span> | <span style='color:red'>{runner_data['total_jobs']}</span> | <span style='color:red'>{failed_jobs_count}</span> | <span style='color:red'>{runner_data.get('unique_jobs', 0)}</span> | <span style='color:red'>{jobs_str}</span> |"
|
|
)
|
|
else:
|
|
summary_lines.append(
|
|
f"| `{display_name}` | {runner_data['current_streak']} | {runner_data['max_streak']} | "
|
|
f"{runner_data['failure_rate']:.1f}% | {avg_queue_str} | {runner_data['total_jobs']} | {failed_jobs_count} | {runner_data.get('unique_jobs', 0)} | {jobs_str} |"
|
|
)
|
|
|
|
summary_lines.append("")
|
|
|
|
# Runners with high failure rate (but no current streak)
|
|
if runners_high_fail_rate:
|
|
summary_lines.append(
|
|
"⚠️ **No current failure streak but high failure rate (≥50%)**"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"| Machine Name | Fail Rate | Avg Queue | Total Jobs | Failed Jobs | Unique Jobs | Jobs |"
|
|
)
|
|
summary_lines.append(
|
|
"|--------------|-----------|-----------|------------|-------------|-------------|------|"
|
|
)
|
|
|
|
for runner_data in runners_high_fail_rate[:15]:
|
|
display_name = (
|
|
runner_data["runner_name"]
|
|
if len(runner_data["runner_name"]) <= 28
|
|
else runner_data["runner_name"][:25] + "..."
|
|
)
|
|
|
|
avg_queue_str = (
|
|
f"{runner_data['avg_queue'] / 60:.1f}m"
|
|
if runner_data["avg_queue"] > 0
|
|
else "N/A"
|
|
)
|
|
|
|
all_failures = runner_data.get("all_failures", [])
|
|
failed_jobs_count = len(all_failures)
|
|
jobs_str = (
|
|
" ".join(
|
|
f"[#{f.get('run_number', '?')}]({f.get('job_url', f['url'])})"
|
|
for f in all_failures
|
|
)
|
|
if all_failures
|
|
else "N/A"
|
|
)
|
|
|
|
summary_lines.append(
|
|
f"| <span style='color:orange'>`{display_name}`</span> | <span style='color:orange'>{runner_data['failure_rate']:.1f}%</span> | "
|
|
f"<span style='color:orange'>{avg_queue_str}</span> | <span style='color:orange'>{runner_data['total_jobs']}</span> | "
|
|
f"<span style='color:orange'>{failed_jobs_count}</span> | <span style='color:orange'>{runner_data.get('unique_jobs', 0)}</span> | <span style='color:orange'>{jobs_str}</span> |"
|
|
)
|
|
|
|
summary_lines.append("")
|
|
|
|
# If no issues
|
|
if not runners_with_streak and not runners_high_fail_rate:
|
|
summary_lines.append(
|
|
"✅ **No runners with active failure streaks or high failure rates**"
|
|
)
|
|
summary_lines.append("")
|
|
|
|
# ========== RUNNER-SPECIFIC TEST FAILURES ==========
|
|
runner_test_failures = report_data.get("runner_test_failures", {})
|
|
if runner_test_failures:
|
|
summary_lines.append("## Runner-Specific Test Failures")
|
|
summary_lines.append("")
|
|
summary_lines.append(
|
|
"_Tests that fail multiple times on the same runner (possible runner-specific issues)_"
|
|
)
|
|
summary_lines.append("")
|
|
|
|
# Sort runners by number of multi-failure tests
|
|
sorted_runners = sorted(
|
|
runner_test_failures.items(),
|
|
key=lambda x: sum(test["count"] for test in x[1].values()),
|
|
reverse=True,
|
|
)
|
|
|
|
for runner_key, tests in sorted_runners[:10]: # Show top 10 runners
|
|
# Sort tests by failure count
|
|
sorted_tests = sorted(
|
|
tests.items(),
|
|
key=lambda x: x[1]["count"],
|
|
reverse=True,
|
|
)
|
|
|
|
# Get runner name from first test
|
|
runner_name = sorted_tests[0][1].get("runner_name", runner_key)
|
|
total_failures = sum(test["count"] for test in tests.values())
|
|
|
|
summary_lines.append("<details>")
|
|
summary_lines.append(
|
|
f"<summary>🤖 <b>Runner: {runner_name}</b> ({len(tests)} tests, {total_failures} total failures)</summary>"
|
|
)
|
|
summary_lines.append("")
|
|
summary_lines.append("| Test File | Failures | Jobs |")
|
|
summary_lines.append("|-----------|----------|------|")
|
|
|
|
for test_file, test_data in sorted_tests[
|
|
:15
|
|
]: # Show top 15 tests per runner
|
|
count = test_data["count"]
|
|
jobs = test_data["jobs"]
|
|
job_urls = test_data["job_urls"]
|
|
|
|
# Truncate test file name
|
|
test_display = (
|
|
test_file
|
|
if len(test_file) <= 35
|
|
else test_file[:32] + "..."
|
|
)
|
|
|
|
# Create job links (show first 3, then count)
|
|
job_links = []
|
|
for job_name, job_url in zip(jobs[:3], job_urls[:3]):
|
|
job_short = (
|
|
job_name
|
|
if len(job_name) <= 20
|
|
else job_name[:17] + "..."
|
|
)
|
|
job_links.append(f"[{job_short}]({job_url})")
|
|
|
|
jobs_str = ", ".join(job_links)
|
|
if len(jobs) > 3:
|
|
jobs_str += f" +{len(jobs) - 3} more"
|
|
|
|
# Highlight if many failures
|
|
if count >= 3:
|
|
summary_lines.append(
|
|
f"| <span style='color:red'>`{test_display}`</span> | <span style='color:red'>{count}</span> | <span style='color:red'>{jobs_str}</span> |"
|
|
)
|
|
else:
|
|
summary_lines.append(
|
|
f"| `{test_display}` | {count} | {jobs_str} |"
|
|
)
|
|
|
|
summary_lines.append("")
|
|
summary_lines.append("</details>")
|
|
summary_lines.append("")
|
|
|
|
# ========== SCHEDULED RUNS (9 sections) ==========
|
|
summary_lines.append("---")
|
|
summary_lines.append("# 📅 SCHEDULED RUNS (Main Branch)")
|
|
summary_lines.append("")
|
|
|
|
# Get limits
|
|
pr_sched_limit = report_data.get("pr_test_scheduled_limit", 12)
|
|
nightly_sched_limit = report_data.get("nightly_scheduled_limit", 6)
|
|
|
|
# PR Tests - Scheduled (5 workflows)
|
|
generate_job_section_md(
|
|
f"1. PR Test NVIDIA - Scheduled (latest {pr_sched_limit} runs)",
|
|
report_data.get("pr_test_nvidia_scheduled_data", {}),
|
|
)
|
|
generate_job_section_md(
|
|
f"2. PR Test AMD - Scheduled (latest {pr_sched_limit} runs)",
|
|
report_data.get("pr_test_amd_scheduled_data", {}),
|
|
)
|
|
generate_job_section_md(
|
|
f"3. PR Test Xeon - Scheduled (latest {pr_sched_limit} runs)",
|
|
report_data.get("pr_test_xeon_scheduled_data", {}),
|
|
)
|
|
generate_job_section_md(
|
|
f"4. PR Test XPU - Scheduled (latest {pr_sched_limit} runs)",
|
|
report_data.get("pr_test_xpu_scheduled_data", {}),
|
|
)
|
|
generate_job_section_md(
|
|
f"5. PR Test NPU - Scheduled (latest {pr_sched_limit} runs)",
|
|
report_data.get("pr_test_npu_scheduled_data", {}),
|
|
)
|
|
|
|
# Nightly Tests - Scheduled (4 workflows)
|
|
generate_job_section_md(
|
|
f"6. Nightly NVIDIA - Scheduled (latest {nightly_sched_limit} runs)",
|
|
report_data.get("nightly_nvidia_scheduled_data", {}),
|
|
)
|
|
generate_job_section_md(
|
|
f"7. Nightly AMD - Scheduled (latest {nightly_sched_limit} runs)",
|
|
report_data.get("nightly_amd_scheduled_data", {}),
|
|
)
|
|
generate_job_section_md(
|
|
f"8. Nightly Intel - Scheduled (latest {nightly_sched_limit} runs)",
|
|
report_data.get("nightly_intel_scheduled_data", {}),
|
|
)
|
|
generate_job_section_md(
|
|
f"9. Nightly NPU - Scheduled (latest {nightly_sched_limit} runs)",
|
|
report_data.get("nightly_npu_scheduled_data", {}),
|
|
)
|
|
|
|
# ========== GENERAL RUNS (9 sections) ==========
|
|
summary_lines.append("---")
|
|
summary_lines.append("# 🌍 GENERAL RUNS (All Branches)")
|
|
summary_lines.append("")
|
|
|
|
gen_limit = report_data.get("general_limit", 100)
|
|
|
|
# PR Tests - General (5 workflows) - with test failure analysis
|
|
generate_job_section_md(
|
|
f"10. PR Test NVIDIA - General (latest {gen_limit} runs)",
|
|
report_data.get("pr_test_nvidia_general_data", {}),
|
|
show_test_failures=True,
|
|
test_failures_dict=job_test_failures_general,
|
|
)
|
|
generate_job_section_md(
|
|
f"11. PR Test AMD - General (latest {gen_limit} runs)",
|
|
report_data.get("pr_test_amd_general_data", {}),
|
|
show_test_failures=True,
|
|
test_failures_dict=job_test_failures_general,
|
|
)
|
|
generate_job_section_md(
|
|
f"12. PR Test Xeon - General (latest {gen_limit} runs)",
|
|
report_data.get("pr_test_xeon_general_data", {}),
|
|
show_test_failures=True,
|
|
test_failures_dict=job_test_failures_general,
|
|
)
|
|
generate_job_section_md(
|
|
f"13. PR Test XPU - General (latest {gen_limit} runs)",
|
|
report_data.get("pr_test_xpu_general_data", {}),
|
|
show_test_failures=True,
|
|
test_failures_dict=job_test_failures_general,
|
|
)
|
|
generate_job_section_md(
|
|
f"14. PR Test NPU - General (latest {gen_limit} runs)",
|
|
report_data.get("pr_test_npu_general_data", {}),
|
|
show_test_failures=True,
|
|
test_failures_dict=job_test_failures_general,
|
|
)
|
|
|
|
# Nightly Tests - General (4 workflows) - with test failure analysis
|
|
generate_job_section_md(
|
|
f"15. Nightly NVIDIA - General (latest {gen_limit} runs)",
|
|
report_data.get("nightly_nvidia_general_data", {}),
|
|
show_test_failures=True,
|
|
test_failures_dict=job_test_failures_general,
|
|
)
|
|
generate_job_section_md(
|
|
f"16. Nightly AMD - General (latest {gen_limit} runs)",
|
|
report_data.get("nightly_amd_general_data", {}),
|
|
show_test_failures=True,
|
|
test_failures_dict=job_test_failures_general,
|
|
)
|
|
generate_job_section_md(
|
|
f"17. Nightly Intel - General (latest {gen_limit} runs)",
|
|
report_data.get("nightly_intel_general_data", {}),
|
|
show_test_failures=True,
|
|
test_failures_dict=job_test_failures_general,
|
|
)
|
|
generate_job_section_md(
|
|
f"18. Nightly NPU - General (latest {gen_limit} runs)",
|
|
report_data.get("nightly_npu_general_data", {}),
|
|
show_test_failures=True,
|
|
test_failures_dict=job_test_failures_general,
|
|
)
|
|
|
|
# Write summary
|
|
with open(github_step_summary, "a", encoding="utf-8") as f:
|
|
f.write("\n".join(summary_lines))
|
|
|
|
print("GitHub Actions summary generated successfully")
|
|
|
|
except Exception as e:
|
|
print(f"Failed to generate GitHub Actions summary: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="SGLang Consecutive Failures Analyzer")
|
|
parser.add_argument("--token", required=True, help="GitHub Personal Access Token")
|
|
parser.add_argument(
|
|
"--limit",
|
|
type=int,
|
|
default=100,
|
|
help="Number of workflow runs to analyze per workflow for general analysis (default: 100)",
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
default=None,
|
|
help="Output JSON file (optional, only writes if specified)",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
analyzer = SGLangFailuresAnalyzer(args.token)
|
|
|
|
try:
|
|
# Fetch runs for each category separately
|
|
print("\n" + "=" * 80)
|
|
print("FETCHING WORKFLOW RUNS")
|
|
print("=" * 80)
|
|
|
|
# Fixed limits for scheduled runs
|
|
pr_test_scheduled_limit = 12 # Past 12 scheduled PR Test runs
|
|
nightly_scheduled_limit = 6 # Past 6 scheduled Nightly Test runs
|
|
|
|
# === SCHEDULED RUNS (9 workflows) ===
|
|
# PR Tests - Scheduled (5 workflows)
|
|
pr_test_nvidia_scheduled_runs = analyzer.get_recent_runs(
|
|
limit=pr_test_scheduled_limit,
|
|
workflow_filter=["pr-test.yml"],
|
|
filters={"event": "schedule"},
|
|
)
|
|
# These 4 don't have scheduled events, so filter by main branch instead
|
|
pr_test_amd_scheduled_runs = analyzer.get_recent_runs(
|
|
limit=pr_test_scheduled_limit,
|
|
workflow_filter=["pr-test-amd.yml"],
|
|
filters={"branch": "main"},
|
|
)
|
|
pr_test_xeon_scheduled_runs = analyzer.get_recent_runs(
|
|
limit=pr_test_scheduled_limit,
|
|
workflow_filter=["pr-test-xeon.yml"],
|
|
filters={"branch": "main"},
|
|
)
|
|
pr_test_xpu_scheduled_runs = analyzer.get_recent_runs(
|
|
limit=pr_test_scheduled_limit,
|
|
workflow_filter=["pr-test-xpu.yml"],
|
|
filters={"branch": "main"},
|
|
)
|
|
pr_test_npu_scheduled_runs = analyzer.get_recent_runs(
|
|
limit=pr_test_scheduled_limit,
|
|
workflow_filter=["pr-test-npu.yml"],
|
|
filters={"branch": "main"},
|
|
)
|
|
|
|
# Nightly Tests - Scheduled (4 workflows)
|
|
nightly_nvidia_scheduled_runs = analyzer.get_recent_runs(
|
|
limit=nightly_scheduled_limit,
|
|
workflow_filter=["nightly-test-nvidia.yml"],
|
|
filters={"event": "schedule"},
|
|
)
|
|
nightly_amd_scheduled_runs = analyzer.get_recent_runs(
|
|
limit=nightly_scheduled_limit,
|
|
workflow_filter=["nightly-test-amd.yml"],
|
|
filters={"event": "schedule"},
|
|
)
|
|
nightly_intel_scheduled_runs = analyzer.get_recent_runs(
|
|
limit=nightly_scheduled_limit,
|
|
workflow_filter=["nightly-test-intel.yml"],
|
|
filters={"event": "schedule"},
|
|
)
|
|
nightly_npu_scheduled_runs = analyzer.get_recent_runs(
|
|
limit=nightly_scheduled_limit,
|
|
workflow_filter=["nightly-test-npu.yml"],
|
|
filters={"event": "schedule"},
|
|
)
|
|
|
|
# === GENERAL RUNS (9 workflows) ===
|
|
# PR Tests - General (5 workflows)
|
|
pr_test_nvidia_general_runs = analyzer.get_recent_runs(
|
|
limit=args.limit,
|
|
workflow_filter=["pr-test.yml"],
|
|
)
|
|
pr_test_amd_general_runs = analyzer.get_recent_runs(
|
|
limit=args.limit,
|
|
workflow_filter=["pr-test-amd.yml"],
|
|
)
|
|
pr_test_xeon_general_runs = analyzer.get_recent_runs(
|
|
limit=args.limit,
|
|
workflow_filter=["pr-test-xeon.yml"],
|
|
)
|
|
pr_test_xpu_general_runs = analyzer.get_recent_runs(
|
|
limit=args.limit,
|
|
workflow_filter=["pr-test-xpu.yml"],
|
|
)
|
|
pr_test_npu_general_runs = analyzer.get_recent_runs(
|
|
limit=args.limit,
|
|
workflow_filter=["pr-test-npu.yml"],
|
|
)
|
|
|
|
# Nightly Tests - General (4 workflows)
|
|
nightly_nvidia_general_runs = analyzer.get_recent_runs(
|
|
limit=args.limit,
|
|
workflow_filter=["nightly-test-nvidia.yml"],
|
|
)
|
|
nightly_amd_general_runs = analyzer.get_recent_runs(
|
|
limit=args.limit,
|
|
workflow_filter=["nightly-test-amd.yml"],
|
|
)
|
|
nightly_intel_general_runs = analyzer.get_recent_runs(
|
|
limit=args.limit,
|
|
workflow_filter=["nightly-test-intel.yml"],
|
|
)
|
|
nightly_npu_general_runs = analyzer.get_recent_runs(
|
|
limit=args.limit,
|
|
workflow_filter=["nightly-test-npu.yml"],
|
|
)
|
|
|
|
# Choosing nvidia pr test and nightly for runner health analysis
|
|
# Use scheduled runs (already limited to 12 PR + 6 nightly) to avoid
|
|
# pulling months of history from the unfiltered general fetch.
|
|
runner_runs = pr_test_nvidia_scheduled_runs + nightly_nvidia_scheduled_runs
|
|
|
|
if not runner_runs and not pr_test_nvidia_scheduled_runs:
|
|
print("No workflow runs found")
|
|
return
|
|
|
|
print("\n" + "=" * 80)
|
|
print("ANALYZING CONSECUTIVE FAILURES")
|
|
print("=" * 80)
|
|
|
|
# Analyze SCHEDULED runs
|
|
pr_test_nvidia_scheduled_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_nvidia_scheduled_runs)
|
|
if pr_test_nvidia_scheduled_runs
|
|
else ({}, {})
|
|
)
|
|
pr_test_amd_scheduled_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_amd_scheduled_runs)
|
|
if pr_test_amd_scheduled_runs
|
|
else ({}, {})
|
|
)
|
|
pr_test_xeon_scheduled_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_xeon_scheduled_runs)
|
|
if pr_test_xeon_scheduled_runs
|
|
else ({}, {})
|
|
)
|
|
pr_test_xpu_scheduled_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_xpu_scheduled_runs)
|
|
if pr_test_xpu_scheduled_runs
|
|
else ({}, {})
|
|
)
|
|
pr_test_npu_scheduled_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_npu_scheduled_runs)
|
|
if pr_test_npu_scheduled_runs
|
|
else ({}, {})
|
|
)
|
|
|
|
nightly_nvidia_scheduled_data, _ = (
|
|
analyzer.analyze_consecutive_failures(nightly_nvidia_scheduled_runs)
|
|
if nightly_nvidia_scheduled_runs
|
|
else ({}, {})
|
|
)
|
|
nightly_amd_scheduled_data, _ = (
|
|
analyzer.analyze_consecutive_failures(nightly_amd_scheduled_runs)
|
|
if nightly_amd_scheduled_runs
|
|
else ({}, {})
|
|
)
|
|
nightly_intel_scheduled_data, _ = (
|
|
analyzer.analyze_consecutive_failures(nightly_intel_scheduled_runs)
|
|
if nightly_intel_scheduled_runs
|
|
else ({}, {})
|
|
)
|
|
nightly_npu_scheduled_data, _ = (
|
|
analyzer.analyze_consecutive_failures(nightly_npu_scheduled_runs)
|
|
if nightly_npu_scheduled_runs
|
|
else ({}, {})
|
|
)
|
|
|
|
# Analyze GENERAL runs
|
|
pr_test_nvidia_general_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_nvidia_general_runs)
|
|
if pr_test_nvidia_general_runs
|
|
else ({}, {})
|
|
)
|
|
pr_test_amd_general_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_amd_general_runs)
|
|
if pr_test_amd_general_runs
|
|
else ({}, {})
|
|
)
|
|
pr_test_xeon_general_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_xeon_general_runs)
|
|
if pr_test_xeon_general_runs
|
|
else ({}, {})
|
|
)
|
|
pr_test_xpu_general_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_xpu_general_runs)
|
|
if pr_test_xpu_general_runs
|
|
else ({}, {})
|
|
)
|
|
pr_test_npu_general_data, _ = (
|
|
analyzer.analyze_consecutive_failures(pr_test_npu_general_runs)
|
|
if pr_test_npu_general_runs
|
|
else ({}, {})
|
|
)
|
|
|
|
nightly_nvidia_general_data, _ = (
|
|
analyzer.analyze_consecutive_failures(nightly_nvidia_general_runs)
|
|
if nightly_nvidia_general_runs
|
|
else ({}, {})
|
|
)
|
|
nightly_amd_general_data, _ = (
|
|
analyzer.analyze_consecutive_failures(nightly_amd_general_runs)
|
|
if nightly_amd_general_runs
|
|
else ({}, {})
|
|
)
|
|
nightly_intel_general_data, _ = (
|
|
analyzer.analyze_consecutive_failures(nightly_intel_general_runs)
|
|
if nightly_intel_general_runs
|
|
else ({}, {})
|
|
)
|
|
nightly_npu_general_data, _ = (
|
|
analyzer.analyze_consecutive_failures(nightly_npu_general_runs)
|
|
if nightly_npu_general_runs
|
|
else ({}, {})
|
|
)
|
|
|
|
# Analyze runner health and consecutive failures on all runs
|
|
(
|
|
runner_stats,
|
|
runner_instance_data,
|
|
runner_streak_data,
|
|
runner_instance_streak_data,
|
|
) = analyzer.analyze_runner_health(runner_runs)
|
|
|
|
# Fetch online runner status
|
|
online_runners = analyzer.get_online_runners()
|
|
|
|
# Analyze test-level failures for broken/high-failure-rate jobs
|
|
# Combine all scheduled data for test failure analysis (main branch, most important)
|
|
all_scheduled_data = {
|
|
**pr_test_nvidia_scheduled_data,
|
|
**pr_test_amd_scheduled_data,
|
|
**pr_test_xeon_scheduled_data,
|
|
**pr_test_xpu_scheduled_data,
|
|
**pr_test_npu_scheduled_data,
|
|
**nightly_nvidia_scheduled_data,
|
|
**nightly_amd_scheduled_data,
|
|
**nightly_intel_scheduled_data,
|
|
**nightly_npu_scheduled_data,
|
|
}
|
|
job_test_failures = analyzer.analyze_test_failures_for_broken_jobs(
|
|
all_scheduled_data
|
|
)
|
|
|
|
# Analyze test-level failures for general runs (all branches)
|
|
all_general_data = {
|
|
**pr_test_nvidia_general_data,
|
|
**pr_test_amd_general_data,
|
|
**pr_test_xeon_general_data,
|
|
**pr_test_xpu_general_data,
|
|
**pr_test_npu_general_data,
|
|
**nightly_nvidia_general_data,
|
|
**nightly_amd_general_data,
|
|
**nightly_intel_general_data,
|
|
**nightly_npu_general_data,
|
|
}
|
|
job_test_failures_general = analyzer.analyze_test_failures_for_broken_jobs(
|
|
all_general_data
|
|
)
|
|
|
|
# Analyze runner-specific test failures
|
|
runner_test_failures = analyzer.analyze_runner_specific_test_failures(
|
|
runner_runs
|
|
)
|
|
|
|
# Generate report with all datasets
|
|
report_data = analyzer.generate_failure_report(
|
|
# Scheduled runs (9 workflows)
|
|
pr_test_nvidia_scheduled_data,
|
|
pr_test_amd_scheduled_data,
|
|
pr_test_xeon_scheduled_data,
|
|
pr_test_xpu_scheduled_data,
|
|
pr_test_npu_scheduled_data,
|
|
nightly_nvidia_scheduled_data,
|
|
nightly_amd_scheduled_data,
|
|
nightly_intel_scheduled_data,
|
|
nightly_npu_scheduled_data,
|
|
# General runs (9 workflows)
|
|
pr_test_nvidia_general_data,
|
|
pr_test_amd_general_data,
|
|
pr_test_xeon_general_data,
|
|
pr_test_xpu_general_data,
|
|
pr_test_npu_general_data,
|
|
nightly_nvidia_general_data,
|
|
nightly_amd_general_data,
|
|
nightly_intel_general_data,
|
|
nightly_npu_general_data,
|
|
# Runners
|
|
runner_stats,
|
|
runner_instance_data,
|
|
runner_streak_data,
|
|
runner_instance_streak_data,
|
|
online_runners,
|
|
# Test failures
|
|
job_test_failures,
|
|
job_test_failures_general,
|
|
runner_test_failures,
|
|
# Config
|
|
args.output,
|
|
pr_test_scheduled_limit,
|
|
nightly_scheduled_limit,
|
|
args.limit,
|
|
)
|
|
|
|
# Generate GitHub Actions summary
|
|
analyzer.generate_github_summary(report_data)
|
|
|
|
except Exception as e:
|
|
print(f"Error during analysis: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|