diff --git a/python/scripts/nvbench_compare.py b/python/scripts/nvbench_compare.py index 3b69360..7f77910 100644 --- a/python/scripts/nvbench_compare.py +++ b/python/scripts/nvbench_compare.py @@ -646,6 +646,19 @@ def compute_timing_interval(timing): return None +def compute_timing_interval_from_samples(samples): + values = positive_finite_array(samples) + if values is None: + return None + + first_quartile, median, third_quartile = np.quantile(values, [0.25, 0.5, 0.75]) + return make_timing_interval( + lower=np.min(values), + upper=third_quartile, + center=median, + ) + + def make_decision(status, code, message, *, severity=0.0): return TimingDecision( status=status, @@ -932,16 +945,45 @@ def confirm_clear_gap_with_clock_rate( if cycle_status == status: return make_decision( status, - "clear_gap_confirmed_by_cycles", + "clear_gap_confirmed_by_summary_cycles", "clear timing gap was confirmed by SM-clock-adjusted cycle intervals", ) return make_decision( ComparisonStatus.UNDECIDED, - "cycle_gap_not_confirmed", + "summary_cycle_gap_not_confirmed", "clear timing gap was not confirmed by SM-clock-adjusted cycle intervals", ) +def confirm_clear_gap_with_bulk_cycles(status, ref_timing, cmp_timing, thresholds): + ref_bulk = get_bulk_time_and_cycles(ref_timing) + cmp_bulk = get_bulk_time_and_cycles(cmp_timing) + if ref_bulk is None or cmp_bulk is None: + return None + + _, ref_cycles = ref_bulk + _, cmp_cycles = cmp_bulk + ref_cycle_interval = compute_timing_interval_from_samples(ref_cycles) + cmp_cycle_interval = compute_timing_interval_from_samples(cmp_cycles) + if ref_cycle_interval is None or cmp_cycle_interval is None: + return None + + cycle_status = compare_intervals_for_clear_gap( + ref_cycle_interval, cmp_cycle_interval, thresholds + ) + if cycle_status == status: + return make_decision( + status, + "clear_gap_confirmed_by_bulk_cycles", + "clear timing gap was confirmed by bulk cycle intervals", + ) + return make_decision( + ComparisonStatus.UNDECIDED, + "bulk_cycle_gap_not_confirmed", + "clear timing gap was not confirmed by bulk cycle intervals", + ) + + def compare_timings_for_clear_gap(ref_timing, cmp_timing, thresholds): ref_interval = compute_timing_interval(ref_timing) cmp_interval = compute_timing_interval(cmp_timing) @@ -960,6 +1002,12 @@ def compare_timings_for_clear_gap(ref_timing, cmp_timing, thresholds): "timing intervals do not have a sufficient clear gap", ) + bulk_decision = confirm_clear_gap_with_bulk_cycles( + status, ref_timing, cmp_timing, thresholds + ) + if bulk_decision is not None: + return bulk_decision + return confirm_clear_gap_with_clock_rate( status, ref_timing, cmp_timing, ref_interval, cmp_interval, thresholds ) diff --git a/python/test/test_nvbench_compare.py b/python/test/test_nvbench_compare.py index 70e4e8b..b75a0a8 100644 --- a/python/test/test_nvbench_compare.py +++ b/python/test/test_nvbench_compare.py @@ -510,7 +510,7 @@ def test_compare_gpu_timings_classifies_common_cases(nvbench_compare): ) assert fast is not None assert fast.status == nvbench_compare.ComparisonStatus.FAST - assert fast.reason.code == "clear_gap_confirmed_by_cycles" + assert fast.reason.code == "clear_gap_confirmed_by_summary_cycles" slow = nvbench_compare.compare_gpu_timings( ref_interval_timing, @@ -527,7 +527,7 @@ def test_compare_gpu_timings_classifies_common_cases(nvbench_compare): ) assert slow is not None assert slow.status == nvbench_compare.ComparisonStatus.SLOW - assert slow.reason.code == "clear_gap_confirmed_by_cycles" + assert slow.reason.code == "clear_gap_confirmed_by_summary_cycles" same = nvbench_compare.compare_gpu_timings( ref_interval_timing, @@ -650,7 +650,63 @@ def test_compare_gpu_timings_classifies_common_cases(nvbench_compare): ) assert frequency_shift is not None assert frequency_shift.status == nvbench_compare.ComparisonStatus.UNDECIDED - assert frequency_shift.reason.code == "cycle_gap_not_confirmed" + assert frequency_shift.reason.code == "summary_cycle_gap_not_confirmed" + + bulk_cycle_fast = nvbench_compare.compare_gpu_timings( + make_gpu_timing_data( + nvbench_compare, + minimum=1.0, + first_quartile=1.1, + median=1.2, + third_quartile=1.3, + mean=1.2, + stdev_relative=0.05, + sample_values=[1.0, 1.1, 1.2, 1.3], + frequency_values=[100.0] * 4, + ), + make_gpu_timing_data( + nvbench_compare, + minimum=0.8, + first_quartile=0.85, + median=0.9, + third_quartile=0.95, + mean=0.9, + stdev_relative=0.05, + sample_values=[0.8, 0.85, 0.9, 0.95], + frequency_values=[100.0] * 4, + ), + ) + assert bulk_cycle_fast is not None + assert bulk_cycle_fast.status == nvbench_compare.ComparisonStatus.FAST + assert bulk_cycle_fast.reason.code == "clear_gap_confirmed_by_bulk_cycles" + + bulk_cycle_shift = nvbench_compare.compare_gpu_timings( + make_gpu_timing_data( + nvbench_compare, + minimum=1.0, + first_quartile=1.1, + median=1.2, + third_quartile=1.3, + mean=1.2, + stdev_relative=0.05, + sample_values=[1.0, 1.1, 1.2, 1.3], + frequency_values=[100.0] * 4, + ), + make_gpu_timing_data( + nvbench_compare, + minimum=0.8, + first_quartile=0.85, + median=0.9, + third_quartile=0.95, + mean=0.9, + stdev_relative=0.05, + sample_values=[0.8, 0.85, 0.9, 0.95], + frequency_values=[200.0] * 4, + ), + ) + assert bulk_cycle_shift is not None + assert bulk_cycle_shift.status == nvbench_compare.ComparisonStatus.UNDECIDED + assert bulk_cycle_shift.reason.code == "bulk_cycle_gap_not_confirmed" missing_noise = nvbench_compare.compare_gpu_timings( ref_timing,