Add q1/q3 quartiles to GPUTimeData struct

The quantile values are not currently used, but plumbed through
This commit is contained in:
Oleksandr Pavlyk
2026-06-03 06:35:24 -05:00
parent 0d1d9d2838
commit 71823e2f4f
2 changed files with 17 additions and 1 deletions

View File

@@ -37,7 +37,9 @@ GPU_TIME_MAX_TAG = "nv/cold/time/gpu/max"
GPU_TIME_MEAN_TAG = "nv/cold/time/gpu/mean"
GPU_TIME_STDEV_TAG = "nv/cold/time/gpu/stdev/absolute"
GPU_TIME_STDEV_RELATIVE_TAG = "nv/cold/time/gpu/stdev/relative"
GPU_TIME_Q1_TAG = "nv/cold/time/gpu/q1"
GPU_TIME_MEDIAN_TAG = "nv/cold/time/gpu/median"
GPU_TIME_Q3_TAG = "nv/cold/time/gpu/q3"
GPU_TIME_IR_TAG = "nv/cold/time/gpu/ir/absolute"
GPU_TIME_IR_RELATIVE_TAG = "nv/cold/time/gpu/ir/relative"
GPU_SM_CLOCK_RATE_MEAN_TAG = "nv/cold/sm_clock_rate/mean"
@@ -79,7 +81,9 @@ class GpuTimingData:
mean: float | None
stdev: float | None
stdev_relative: float | None
first_quartile: float | None
median: float | None
third_quartile: float | None
interquartile_range: float | None
interquartile_range_relative: float | None
sm_clock_rate_mean: float | None = None
@@ -461,7 +465,9 @@ def extract_gpu_timing_data(summaries, json_dir=None, float32_reader=read_float3
stdev_relative=extract_summary_float(
summaries, GPU_TIME_STDEV_RELATIVE_TAG, null_value=math.inf
),
first_quartile=extract_summary_float(summaries, GPU_TIME_Q1_TAG),
median=extract_summary_float(summaries, GPU_TIME_MEDIAN_TAG),
third_quartile=extract_summary_float(summaries, GPU_TIME_Q3_TAG),
interquartile_range=extract_summary_float(
summaries, GPU_TIME_IR_TAG, null_value=math.inf
),

View File

@@ -116,7 +116,9 @@ def make_gpu_timing_data(
mean=1.0,
stdev=None,
stdev_relative=0.01,
first_quartile=None,
median=None,
third_quartile=None,
interquartile_range=None,
interquartile_range_relative=None,
sm_clock_rate_mean=None,
@@ -127,7 +129,9 @@ def make_gpu_timing_data(
mean=mean,
stdev=stdev,
stdev_relative=stdev_relative,
first_quartile=first_quartile,
median=median,
third_quartile=third_quartile,
interquartile_range=interquartile_range,
interquartile_range_relative=interquartile_range_relative,
sm_clock_rate_mean=sm_clock_rate_mean,
@@ -387,14 +391,20 @@ def test_gpu_timing_data_loads_samples_and_frequencies_lazily(
assert reader_calls == [str(samples_file), str(freqs_file)]
def test_gpu_timing_data_parses_sm_clock_rate_mean(nvbench_compare):
def test_gpu_timing_data_parses_quartiles_and_sm_clock_rate_mean(nvbench_compare):
timing = nvbench_compare.extract_gpu_timing_data(
[
make_summary(nvbench_compare, "GPU_TIME_MEAN_TAG", "2.0"),
make_summary(nvbench_compare, "GPU_TIME_Q1_TAG", "1.5"),
make_summary(nvbench_compare, "GPU_TIME_MEDIAN_TAG", "2.0"),
make_summary(nvbench_compare, "GPU_TIME_Q3_TAG", "2.5"),
make_summary(nvbench_compare, "GPU_SM_CLOCK_RATE_MEAN_TAG", "1.5e9"),
],
)
assert timing.first_quartile == pytest.approx(1.5)
assert timing.median == pytest.approx(2.0)
assert timing.third_quartile == pytest.approx(2.5)
assert timing.sm_clock_rate_mean == pytest.approx(1.5e9)
assert timing.frequencies is None