From 8e56a7bd9449bd3b05f85032207c886e91bfe83f Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Tue, 21 Dec 2021 21:05:13 -0500 Subject: [PATCH] Add `noisy_bench` with some benchmarks that currently always time-out. --- CMakeLists.txt | 4 + nvbench/detail/transform_reduce.cuh | 2 + nvbench/state.cxx | 4 +- testing/device/CMakeLists.txt | 14 +++ testing/device/noisy_bench.cu | 146 ++++++++++++++++++++++++++++ 5 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 testing/device/CMakeLists.txt create mode 100644 testing/device/noisy_bench.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 08e239d..369d631 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,10 @@ option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON) option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default}) option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF) +option(NVBench_ENABLE_DEVICE_TESTING + "Include tests that require a GPU (with locked clocks)." + OFF +) option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF) include(cmake/NVBenchConfigTarget.cmake) diff --git a/nvbench/detail/transform_reduce.cuh b/nvbench/detail/transform_reduce.cuh index ae8ab6a..8bc5db6 100644 --- a/nvbench/detail/transform_reduce.cuh +++ b/nvbench/detail/transform_reduce.cuh @@ -18,6 +18,8 @@ #pragma once +#include + // Many compilers still don't ship transform_reduce with their STLs, so here's // a naive implementation that will work everywhere. This is never used in a // critical section, so perf isn't a concern. diff --git a/nvbench/state.cxx b/nvbench/state.cxx index 931dbbc..505722a 100644 --- a/nvbench/state.cxx +++ b/nvbench/state.cxx @@ -128,7 +128,7 @@ const summary &state::get_summary(std::string_view name) const [&name](const auto &s) { return s.get_name() == name; }); if (iter == m_summaries.cend()) { - NVBENCH_THROW(std::runtime_error, "No summary named '{}'.", name); + NVBENCH_THROW(std::invalid_argument, "No summary named '{}'.", name); } return *iter; } @@ -140,7 +140,7 @@ summary &state::get_summary(std::string_view name) [&name](auto &s) { return s.get_name() == name; }); if (iter == m_summaries.end()) { - NVBENCH_THROW(std::runtime_error, "No summary named '{}'.", name); + NVBENCH_THROW(std::invalid_argument, "No summary named '{}'.", name); } return *iter; } diff --git a/testing/device/CMakeLists.txt b/testing/device/CMakeLists.txt new file mode 100644 index 0000000..2a5fdf9 --- /dev/null +++ b/testing/device/CMakeLists.txt @@ -0,0 +1,14 @@ +# Test that we're converging to an accurate mean + stdev without timing out: +set(test_name nvbench.test.device.noisy_bench) +add_executable(${test_name} noisy_bench.cu) +target_link_libraries(${test_name} PRIVATE nvbench::main fmt) +nvbench_config_target(${test_name}) +add_dependencies(nvbench.test.all ${test_name}) + +if (NVBench_ENABLE_DEVICE_TESTING) + add_test(NAME ${test_name} COMMAND "$") + set_tests_properties(${test_name} PROPERTIES + # Any timeouts/warnings are hard failures for this test. + FAIL_REGULAR_EXPRESSION "Warn;timed out" + ) +endif() diff --git a/testing/device/noisy_bench.cu b/testing/device/noisy_bench.cu new file mode 100644 index 0000000..686fbda --- /dev/null +++ b/testing/device/noisy_bench.cu @@ -0,0 +1,146 @@ +/* + * Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include +#include +#include + +void noisy_bench(nvbench::state &state) +{ + // time, convert ms -> s + const auto mean = static_cast(state.get_float64("Mean")) / + 1000.f; + // rel stdev + const auto noise_pct = + static_cast(state.get_float64("Noise")); + const auto noise = noise_pct / 100.f; + // abs stdev + const auto stdev = noise * mean; + + std::minstd_rand rng{}; + std::normal_distribution dist(mean, stdev); + + // cold tag will save time by disabling batch measurements + state.exec(nvbench::exec_tag::impl::cold, [&](nvbench::launch &launch) { + const auto seconds = dist(rng); + nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(seconds); + }); + + const auto measured_mean = static_cast( + state.get_summary("Average GPU Time (Cold)").get_float64("value")); + const auto measured_noise = [&]() { + try + { + return static_cast( + state.get_summary("GPU Relative Standard Deviation (Cold)") + .get_float64("value")); + } + catch (std::invalid_argument &) + { + return std::numeric_limits::infinity(); + } + }(); + const auto measured_stdev = measured_noise * measured_mean; + + const auto mean_error = std::fabs(measured_mean - mean); + const auto stdev_error = std::fabs(measured_stdev - stdev); + const auto noise_error = std::fabs(measured_noise - noise); + + const auto mean_threshold = std::max(0.025f * mean, 8e-6f); // 2.5% or 8us + const auto stdev_threshold = std::max(0.05f * stdev, 5e-6f); // 5% or 5us + + const auto mean_pass = mean_error < mean_threshold; + const auto stdev_pass = stdev_error < stdev_threshold; + + fmt::print("| {:^5} " + "| {:^12} | {:^12} " + "| {:^12} | {:^12} | {:^4} |\n", + "", + "Expected", + "Measured", + "Error", + "Threshold", + "Flag"); + fmt::print("|{:-^7}" + "|{:-^14}|{:-^14}" + "|{:-^14}|{:-^14}|{:-^6}|\n", + "", + "", + "", + "", + "", + ""); + fmt::print("| Mean " + "| {:>9.6f} ms | {:>9.6f} ms " + "| {:>9.6f} ms | {:>9.6f} ms | {:4} |\n" + "| Stdev " + "| {:>9.6f} ms | {:>9.6f} ms " + "| {:>9.6f} ms | {:>9.6f} ms | {:4} |\n" + "| Noise " + "| {:>9.6f}% | {:>9.6f}% " + "| {:>9.6f}% | {:5} | {:4} |\n", + mean * 1000, + measured_mean * 1000, + mean_error * 1000, + mean_threshold * 1000, + mean_pass ? "" : "!!!!", + + stdev * 1000, + measured_stdev * 1000, + stdev_error * 1000, + stdev_threshold * 1000, + stdev_pass ? "" : "!!!!", + + noise * 100, + measured_noise * 100, + noise_error * 100, + "", + ""); + + if (!mean_pass) + { + // This isn't actually logged, it just tells ctest to mark the test as + // skipped as a soft-failure. + fmt::print("Warn: Mean error exceeds threshold: ({:.3} ms > {:.3} ms)\n", + mean_error * 1000, + mean_threshold * 1000); + } + + if (!stdev_pass) + { + // This isn't actually logged, it just tells ctest to mark the test as + // skipped as a soft-failure. + fmt::print("Warn: Stdev error exceeds threshold: " + "({:.6} ms > {:.6} ms, noise: {:.3}%)\n", + stdev_error * 1000, + stdev_threshold * 1000, + measured_noise * 100); + } +} +NVBENCH_BENCH(noisy_bench) + .add_float64_axis("Mean", {0.05, 0.1, 0.5, 1.0, 10.0}) // ms + .add_float64_axis("Noise", {0.1, 5., 25.}) // % + // disable this; we want to test that the benchmarking loop will still exit + // when max_noise is never reached: + .set_max_noise(0.0000001);