Files
sglang/scripts/ci/slurm/process_result.py
2026-04-10 15:12:24 -07:00

118 lines
3.9 KiB
Python

"""Process a raw srt-slurm benchmark result JSON into an aggregated format.
Usage (called once per result file):
RESULT_FILENAME=<path_without_.json> PREFILL_GPUS=<n> DECODE_GPUS=<n> \\
RECIPE_FILE=<path_to_recipe.yaml> python3 process_result.py
Required env vars:
RESULT_FILENAME - path to the result file without the .json extension
FRAMEWORK - e.g. dynamo-sglang
PRECISION - e.g. fp8, fp4
MODEL_PREFIX - short model label, e.g. dsr1
ISL - input sequence length
OSL - output sequence length
PREFILL_GPUS - number of prefill GPUs (extracted from result filename)
DECODE_GPUS - number of decode GPUs (extracted from result filename)
Optional env vars:
RECIPE_FILE - path to the srt-slurm recipe YAML; if set, topology
fields (TP, EP, DP, workers) are parsed from it
"""
import json
import os
import sys
from pathlib import Path
def require(var):
val = os.environ.get(var)
if val is None:
print(f"ERROR: Missing required env var: {var}", file=sys.stderr)
sys.exit(1)
return val
result_filename = require("RESULT_FILENAME")
framework = require("FRAMEWORK")
precision = require("PRECISION")
model_prefix = require("MODEL_PREFIX")
isl = int(require("ISL"))
osl = int(require("OSL"))
prefill_gpus = int(require("PREFILL_GPUS"))
decode_gpus = int(require("DECODE_GPUS"))
with open(f"{result_filename}.json") as f:
raw = json.load(f)
# ---------------------------------------------------------------------------
# Topology — parse from recipe YAML if available, otherwise default to 0/"-"
# ---------------------------------------------------------------------------
prefill_tp = prefill_ep = prefill_dp_attn = 0
prefill_num_workers = decode_tp = decode_ep = decode_dp_attn = decode_num_workers = 0
recipe_file = os.environ.get("RECIPE_FILE")
if recipe_file and Path(recipe_file).exists():
import yaml
with open(recipe_file) as f:
recipe = yaml.safe_load(f)
res = recipe.get("resources", {})
prefill_num_workers = res.get("prefill_workers", 0)
decode_num_workers = res.get("decode_workers", 0)
sgl = recipe.get("backend", {}).get("sglang_config", {})
p = sgl.get("prefill", {})
d = sgl.get("decode", {})
prefill_tp = p.get("tensor-parallel-size", 0)
prefill_ep = p.get("expert-parallel-size", 0)
prefill_dp_attn = p.get("data-parallel-size", "-")
decode_tp = d.get("tensor-parallel-size", 0)
decode_ep = d.get("expert-parallel-size", 0)
decode_dp_attn = d.get("data-parallel-size", "-")
total_gpus = prefill_gpus + decode_gpus
data = {
"hw": "gb200",
"conc": int(raw["max_concurrency"]),
"model": raw["model_id"],
"infmax_model_prefix": model_prefix,
"framework": framework,
"precision": precision,
"isl": isl,
"osl": osl,
"is_multinode": True,
"disagg": True,
"num_prefill_gpu": prefill_gpus,
"num_decode_gpu": decode_gpus,
"prefill_num_workers": prefill_num_workers,
"prefill_tp": prefill_tp,
"prefill_ep": prefill_ep,
"prefill_dp_attention": prefill_dp_attn,
"decode_num_workers": decode_num_workers,
"decode_tp": decode_tp,
"decode_ep": decode_ep,
"decode_dp_attention": decode_dp_attn,
"tput_per_gpu": float(raw["total_token_throughput"]) / total_gpus,
"output_tput_per_gpu": float(raw["output_throughput"]) / decode_gpus,
"input_tput_per_gpu": (
float(raw["total_token_throughput"]) - float(raw["output_throughput"])
)
/ prefill_gpus,
}
for key, value in raw.items():
if key.endswith("_ms"):
data[key.replace("_ms", "")] = float(value) / 1000.0
if "tpot" in key:
data[key.replace("_ms", "").replace("tpot", "intvty")] = 1000.0 / float(value)
out_path = Path(result_filename).parent / f"agg_{Path(result_filename).name}.json"
with open(out_path, "w") as f:
json.dump(data, f, indent=2)
print(f"Written: {out_path}")