Add NVML support for persistence mode, locking clocks.

Locking clocks is currently only implemented for Volta+ devices.

Example usage:

my_bench -d [0,1,3] --persistence-mode 1 --lock-gpu-clocks base

See the cli_help.md docs for more info.
This commit is contained in:
Allison Vacanti
2021-10-21 01:01:16 -04:00
parent d0c90ff920
commit b948e79cab
18 changed files with 656 additions and 19 deletions

View File

@@ -1,4 +1,5 @@
# 3.20.1 required for rapids-cmake
# 3.21.0 required for NVBench_ADD_DEPENDENT_DLLS_TO_* (MSVC only)
cmake_minimum_required(VERSION 3.20.1)
set(CMAKE_CXX_STANDARD 17)
@@ -15,17 +16,21 @@ project(NVBench
nvbench_init_rapids_cmake()
option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON)
option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF)
option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF)
include(cmake/NVBenchConfigTarget.cmake)
include(cmake/NVBenchDependencies.cmake)
include(cmake/NVBenchDependentDlls.cmake)
include(cmake/NVBenchExports.cmake)
include(cmake/NVBenchWriteConfigHeader.cmake)
include(cmake/NVBenchDependencies.cmake)
include(cmake/NVBenchInstallRules.cmake)
include(cmake/NVBenchUtilities.cmake)
message(STATUS "NVBench CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF)
option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF)
add_subdirectory(nvbench)
if (NVBench_ENABLE_EXAMPLES OR NVBench_ENABLE_TESTING)

View File

@@ -10,7 +10,7 @@ features:
be dynamic numbers/strings or [static types](docs/benchmarks.md#type-axes).
* [Runtime customization](docs/cli_help.md): A rich command-line interface
allows [redefinition of parameter axes](docs/cli_help_axis.md), CUDA device
selection, changing output formats, and more.
selection, locking GPU clocks (Volta+), changing output formats, and more.
* [Throughput calculations](docs/benchmarks.md#throughput-measurements): Compute
and report:
* Item throughput (elements/second)

View File

@@ -57,3 +57,10 @@ rapids_find_package(CUDAToolkit REQUIRED
# Append CTK targets to this as we add optional deps (NMVL, CUPTI, ...)
set(ctk_libraries CUDA::toolkit)
################################################################################
# CUDAToolkit -> NVML
if (NVBench_ENABLE_NVML)
include("${CMAKE_CURRENT_LIST_DIR}/NVBenchNVML.cmake")
list(APPEND ctk_libraries nvbench::nvml)
endif()

View File

@@ -0,0 +1,36 @@
# By default, add dependent DLLs to the build dir on MSVC. This avoids
# a variety of runtime issues when using NVML, etc.
# This behavior can be disabled using the following options:
if (WIN32)
option(NVBench_ADD_DEPENDENT_DLLS_TO_BUILD
"Copy dependent dlls to NVBench library build location (MSVC only)."
ON
)
else()
# These are forced off for non-MSVC builds, as $<TARGET_RUNTIME_DLLS:...>
# will always be empty on non-dll platforms.
set(NVBench_ADD_DEPENDENT_DLLS_TO_BUILD OFF)
endif()
if (NVBench_ADD_DEPENDENT_DLLS_TO_BUILD)
message(STATUS
"CMake 3.21.0 is required when NVBench_ADD_DEPENDENT_DLLS_TO_BUILD "
"is enabled."
)
cmake_minimum_required(VERSION 3.21.0)
endif()
function(nvbench_setup_dep_dlls target_name)
# The custom command below fails when there aren't any runtime DLLs to copy,
# so only enable it when a relevant dependency is enabled:
if (NVBench_ADD_DEPENDENT_DLLS_TO_BUILD AND NVBench_ENABLE_NVML)
add_custom_command(TARGET ${target_name}
POST_BUILD
COMMAND
"${CMAKE_COMMAND}" -E copy
"$<TARGET_RUNTIME_DLLS:${target_name}>"
"$<TARGET_FILE_DIR:${target_name}>"
COMMAND_EXPAND_LISTS
)
endif()
endfunction()

View File

@@ -1,14 +1,28 @@
macro(nvbench_generate_exports)
set(nvbench_build_export_code_block "")
set(nvbench_install_export_code_block "")
if (NVBench_ENABLE_NVML)
string(APPEND nvbench_build_export_code_block
"include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake\")\n"
)
string(APPEND nvbench_install_export_code_block
"include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchNVML.cmake\")\n"
)
endif()
rapids_export(BUILD NVBench
EXPORT_SET nvbench-targets
NAMESPACE "nvbench::"
GLOBAL_TARGETS nvbench main
LANGUAGES CUDA CXX
FINAL_CODE_BLOCK nvbench_build_export_code_block
)
rapids_export(INSTALL NVBench
EXPORT_SET nvbench-targets
NAMESPACE "nvbench::"
GLOBAL_TARGETS nvbench main
LANGUAGES CUDA CXX
FINAL_CODE_BLOCK nvbench_install_export_code_block
)
endmacro()

View File

@@ -10,13 +10,35 @@ install(DIRECTORY "${NVBench_SOURCE_DIR}/nvbench"
)
# generated headers from build dir:
install(FILES
"${NVBench_BINARY_DIR}/nvbench/detail/version.cuh"
"${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh"
install(
FILES
"${NVBench_BINARY_DIR}/nvbench/config.cuh"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench"
)
install(
FILES
"${NVBench_BINARY_DIR}/nvbench/detail/version.cuh"
"${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench/detail"
)
#
# Install CMake files needed by consumers to locate dependencies:
#
# Borrowing this logic from rapids_cmake's export logic to make sure these end
# up in the same location as nvbench-config.cmake:
rapids_cmake_install_lib_dir(config_install_location)
set(config_install_location "${config_install_location}/cmake/nvbench")
if (NVBench_ENABLE_NVML)
install(
FILES
"${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake"
DESTINATION "${config_install_location}"
)
endif()
# Call with a list of library targets to generate install rules:
function(nvbench_install_libraries)
install(TARGETS ${ARGN}

37
cmake/NVBenchNVML.cmake Normal file
View File

@@ -0,0 +1,37 @@
# Since this file is installed, we need to make sure that the CUDAToolkit has
# been found by consumers:
if (NOT TARGET CUDA::toolkit)
find_package(CUDAToolkit REQUIRED)
endif()
if (WIN32)
# The CUDA:: targets currently don't provide dll locations through the
# `IMPORTED_LOCATION` property, nor are they marked as `SHARED` libraries
# (they're currently `UNKNOWN`). This prevents the `nvbench_setup_dep_dlls`
# CMake function from copying the dlls to the build / install directories.
# This is discussed in https://gitlab.kitware.com/cmake/cmake/-/issues/22845
# and the other CMake issues it links to.
#
# We create a nvbench-specific target that configures the nvml interface as
# described here:
# https://gitlab.kitware.com/cmake/cmake/-/issues/22845#note_1077538
#
# Use find_file instead of find_library, which would search for a .lib file.
# This is also nice because find_file searches recursively (find_library
# does not) and some versions of CTK nest nvml.dll several directories deep
# under C:\Windows\System32.
find_file(NVBench_NVML_DLL nvml.dll REQUIRED
DOC "The full path to nvml.dll. Usually somewhere under C:/Windows/System32."
PATHS "C:/Windows/System32"
)
mark_as_advanced(NVBench_NVML_DLL)
add_library(nvbench::nvml SHARED IMPORTED)
target_link_libraries(nvbench::nvml INTERFACE CUDA::toolkit)
set_target_properties(nvbench::nvml PROPERTIES
IMPORTED_LOCATION "${NVBench_NVML_DLL}"
IMPORTED_IMPLIB "${CUDA_nvml_LIBRARY}"
)
else()
# Linux is much easier...
add_library(nvbench::nvml ALIAS CUDA::nvml)
endif()

View File

@@ -0,0 +1,7 @@
function(nvbench_write_config_header filepath)
if (NVBench_ENABLE_NVML)
set(NVBENCH_HAS_NVML 1)
endif()
configure_file("${NVBench_SOURCE_DIR}/cmake/config.cuh.in" "${filepath}")
endfunction()

22
cmake/config.cuh.in Normal file
View File

@@ -0,0 +1,22 @@
/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
// Defined if NVBench has been built with NVML support.
#cmakedefine NVBENCH_HAS_NVML

View File

@@ -12,6 +12,33 @@
* `--version`
* Print information about the version of NVBench used to build the executable.
# Device Modification
* `--persistence-mode <state>`, `--pm <state>`
* Sets persistence mode for one or more GPU devices.
* Applies to the devices described by the most recent `--devices` option,
or all devices if `--devices` is not specified.
* This option requires root / admin permissions.
* This option is only supported on Linux.
* This call must precede all other device modification options, if any.
* Note that persistence mode is deprecated and will be removed at some point
in favor of the new persistence daemon. See the following link for more
details: https://docs.nvidia.com/deploy/driver-persistence/index.html
* Valid values for `state` are:
* `0`: Disable persistence mode.
* `1`: Enable persistence mode.
* `--lock-gpu-clocks <rate>`, `--lgc <rate>`
* Lock GPU clocks for one or more devices to a particular rate.
* Applies to the devices described by the most recent `--devices` option,
or all devices if `--devices` is not specified.
* This option requires root / admin permissions.
* This option is only supported in Volta+ (sm_70+) devices.
* Valid values for `rate` are:
* `reset`, `unlock`, `none`: Unlock the GPU clocks.
* `base`, `tdp`: Lock clocks to base frequency (best for stable results).
* `max`, `maximum`: Lock clocks to max frequency (best for fastest results).
# Output
* `--csv <filename/stream>`
@@ -51,7 +78,7 @@
* `--devices <device ids>`, `--device <device ids>`, `-d <device ids>`
* Limit execution to one or more devices.
* `<device ids>` is a single id, or a comma separated list.
* `<device ids>` is a single id, a comma separated list, or the string "all".
* Device ids can be obtained from `--list`.
* Applies to the most recent `--benchmark`, or all benchmarks if specified
before any `--benchmark` arguments.

View File

@@ -26,6 +26,10 @@ set(srcs
detail/state_generator.cxx
)
if (NVBench_ENABLE_NVML)
list(APPEND srcs internal/nvml.cxx)
endif()
# CUDA 11.0 can't compile json_printer without crashing
# So for that version fall back to C++ with degraded
# output ( no PTX version info )
@@ -49,6 +53,8 @@ file_to_string("../docs/cli_help_axis.md"
cli_help_axis_text
)
nvbench_write_config_header("${NVBench_BINARY_DIR}/nvbench/config.cuh")
# nvbench (nvbench::nvbench)
add_library(nvbench SHARED ${srcs})
target_include_directories(nvbench PUBLIC
@@ -77,4 +83,5 @@ add_dependencies(nvbench.all nvbench_main)
add_library(nvbench::nvbench ALIAS nvbench)
add_library(nvbench::main ALIAS nvbench_main)
nvbench_setup_dep_dlls(nvbench)
nvbench_install_libraries(nvbench nvbench_main)

View File

@@ -18,8 +18,10 @@
#include <nvbench/device_info.cuh>
#include <nvbench/config.cuh>
#include <nvbench/cuda_call.cuh>
#include <nvbench/detail/device_scope.cuh>
#include <nvbench/internal/nvml.cuh>
#include <cuda_runtime_api.h>
@@ -38,8 +40,108 @@ device_info::memory_info device_info::get_global_memory_usage() const
device_info::device_info(int id)
: m_id{id}
, m_prop{}
, m_nvml_device(nullptr)
{
NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id));
#ifdef NVBENCH_HAS_NVML
// Retrieve the current device's pci_id as a null-terminated string.
// Docs say 13 chars should always be sufficient.
constexpr int pci_id_len = 13;
char pci_id[pci_id_len];
NVBENCH_CUDA_CALL(cudaDeviceGetPCIBusId(pci_id, pci_id_len, m_id));
NVBENCH_NVML_CALL(nvmlDeviceGetHandleByPciBusId(pci_id, &m_nvml_device));
#endif // NVBENCH_HAS_NVML
}
void device_info::set_persistence_mode(bool state)
#ifndef NVBENCH_HAS_NVML
{
throw nvbench::nvml::not_enabled{};
}
#else // NVBENCH_HAS_NVML
try
{
NVBENCH_NVML_CALL(nvmlDeviceSetPersistenceMode(
m_nvml_device,
state ? NVML_FEATURE_ENABLED : NVML_FEATURE_DISABLED));
}
catch (nvml::call_failed &e)
{
if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED)
{
NVBENCH_THROW(std::runtime_error,
"{}",
"Persistence mode is only supported on Linux.");
}
else if (e.get_error_code() == NVML_ERROR_NO_PERMISSION)
{
NVBENCH_THROW(std::runtime_error,
"{}",
"Root/Admin permissions required to set persistence mode.");
}
throw;
}
#endif // NVBENCH_HAS_NVML
void device_info::lock_gpu_clocks(device_info::clock_rate rate)
#ifndef NVBENCH_HAS_NVML
{
throw nvbench::nvml::not_enabled{};
}
#else // NVBENCH_HAS_NVML
try
{
switch (rate)
{
case clock_rate::none:
NVBENCH_NVML_CALL(nvmlDeviceResetGpuLockedClocks(m_nvml_device));
break;
case clock_rate::base:
NVBENCH_NVML_CALL(nvmlDeviceSetGpuLockedClocks(
m_nvml_device,
static_cast<unsigned int>(NVML_CLOCK_LIMIT_ID_TDP),
static_cast<unsigned int>(NVML_CLOCK_LIMIT_ID_TDP)));
break;
case clock_rate::maximum: {
const auto max_mhz = static_cast<unsigned int>(
this->get_sm_default_clock_rate() / (1000 * 1000));
NVBENCH_NVML_CALL(
nvmlDeviceSetGpuLockedClocks(m_nvml_device, max_mhz, max_mhz));
break;
}
default:
NVBENCH_THROW(std::runtime_error,
"Unrecognized clock rate: {}",
static_cast<int>(rate));
}
}
catch (nvml::call_failed &e)
{
if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED &&
this->get_ptx_version() < 700)
{
NVBENCH_THROW(std::runtime_error,
"GPU clock rates can only be modified for Volta and later. "
"Device: {} ({}) SM: {} < {}",
this->get_name(),
this->get_id(),
this->get_ptx_version(),
700);
}
else if (e.get_error_code() == NVML_ERROR_NO_PERMISSION)
{
NVBENCH_THROW(std::runtime_error,
"{}",
"Root/Admin permissions required to change GPU clock rates.");
}
throw;
}
#endif // NVBENCH_HAS_NVML
} // namespace nvbench

View File

@@ -27,6 +27,9 @@
#include <string_view>
#include <utility>
// forward declare this for internal storage
struct nvmlDevice_st;
namespace nvbench
{
@@ -66,13 +69,35 @@ struct device_info
NVBENCH_CUDA_CALL(cudaSetDevice(m_id));
}
/// Enable or disable persistence mode.
/// @note Only supported on Linux.
/// @note Requires root / admin privileges.
void set_persistence_mode(bool state);
/// Symbolic values for special clock rates
enum class clock_rate
{
/// Unlock clocks
none,
/// Base TDP clock; Preferred for stable benchmarking
base,
/// Maximum clock rate
maximum
};
/// Lock GPU clocks to the specified rate.
/// @note Only supported on Volta+ (sm_70+) devices.
/// @note Requires root / admin privileges.
void lock_gpu_clocks(clock_rate rate);
/// @return The SM version of the current device as (major*100) + (minor*10).
[[nodiscard]] int get_sm_version() const
{
return m_prop.major * 100 + m_prop.minor * 10;
}
/// @return The PTX version of the current device
/// @return The PTX version of the current device, e.g. sm_80 returns 800.
[[nodiscard]] __forceinline__ int get_ptx_version() const
{
return detail::get_ptx_version(m_id);
@@ -197,6 +222,7 @@ struct device_info
private:
int m_id;
cudaDeviceProp m_prop;
nvmlDevice_st *m_nvml_device;
};
// get_ptx_version implementation; this needs to stay in the header so it will

119
nvbench/internal/nvml.cuh Normal file
View File

@@ -0,0 +1,119 @@
/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <nvbench/config.cuh>
#include <nvbench/detail/throw.cuh>
#include <fmt/format.h>
#ifdef NVBENCH_HAS_NVML
#include <nvml.h>
#endif // NVBENCH_HAS_NVML
#include <stdexcept>
namespace nvbench::nvml
{
/// Base class for NVML-specific exceptions
struct error : std::runtime_error
{
using runtime_error::runtime_error;
};
/// Thrown when NVML support is disabled.
struct not_enabled : error
{
not_enabled()
: error{"NVML not available. Reconfigure NVBench with the CMake option "
"`-DNVBench_ENABLE_NVML=ON`."}
{}
};
// Only `error` and `not_enabled` are defined when NVML is disabled.
// Other exceptions may hold types defined by NVML.
#ifdef NVBENCH_HAS_NVML
/// Thrown when a generic NVML call inside NVBENCH_NVML_CALL fails
struct call_failed : error
{
call_failed(const std::string &filename,
std::size_t lineno,
const std::string &call,
nvmlReturn_t error_code,
std::string error_string)
: error(fmt::format("{}:{}:\n"
"\tNVML call failed:\n"
"\t\tCall: {}\n"
"\t\tError: ({}) {}",
filename,
lineno,
call,
static_cast<int>(error_code),
error_string))
, m_error_code(error_code)
, m_error_string(error_string)
{}
[[nodiscard]] nvmlReturn_t get_error_code() const { return m_error_code; }
[[nodiscard]] const std::string &get_error_string() const
{
return m_error_string;
}
private:
nvmlReturn_t m_error_code;
std::string m_error_string;
};
#endif // NVBENCH_HAS_NVML
} // namespace nvbench::nvml
#ifdef NVBENCH_HAS_NVML
#define NVBENCH_NVML_CALL(call) \
do \
{ \
const auto _rr = call; \
if (_rr != NVML_SUCCESS) \
{ \
throw nvbench::nvml::call_failed(__FILE__, \
__LINE__, \
#call, \
_rr, \
nvmlErrorString(_rr)); \
} \
} while (false)
// Same as above, but used for nvmlInit(), where a failure means that
// nvmlErrorString is not available.
#define NVBENCH_NVML_CALL_NO_API(call) \
do \
{ \
const auto _rr = call; \
if (_rr != NVML_SUCCESS) \
{ \
throw nvbench::nvml::call_failed(__FILE__, __LINE__, #call, _rr, ""); \
} \
} while (false)
#endif // NVBENCH_HAS_NVML

71
nvbench/internal/nvml.cxx Normal file
View File

@@ -0,0 +1,71 @@
/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nvbench/internal/nvml.cuh>
#include <nvbench/config.cuh>
#include <fmt/format.h>
#include <nvml.h>
#include <stdexcept>
namespace
{
// RAII struct that initializes and shuts down NVML
struct NVMLLifetimeManager
{
NVMLLifetimeManager()
{
try
{
NVBENCH_NVML_CALL_NO_API(nvmlInit());
m_inited = true;
}
catch (std::exception &e)
{
fmt::print("NVML initialization failed:\n {}", e.what());
}
}
~NVMLLifetimeManager()
{
if (m_inited)
{
try
{
NVBENCH_NVML_CALL_NO_API(nvmlShutdown());
}
catch (std::exception &e)
{
fmt::print("NVML shutdown failed:\n {}", e.what());
}
}
}
private:
bool m_inited{false};
};
// NVML's lifetime should extend for the entirety of the process, so store in a
// global.
auto nvml_lifetime = NVMLLifetimeManager{};
} // namespace

View File

@@ -22,6 +22,7 @@
#include <nvbench/benchmark_base.cuh>
#include <nvbench/benchmark_manager.cuh>
#include <nvbench/callable.cuh>
#include <nvbench/config.cuh>
#include <nvbench/cpu_timer.cuh>
#include <nvbench/create.cuh>
#include <nvbench/cuda_call.cuh>

View File

@@ -252,6 +252,24 @@ std::vector<T> parse_values(std::string_view value_spec)
}
}
std::vector<nvbench::device_info> parse_devices(std::string_view devices)
{
auto &dev_mgr = nvbench::device_manager::get();
if (devices == "all")
{
return dev_mgr.get_devices();
}
std::vector<nvbench::device_info> result;
auto dev_ids = parse_values<nvbench::int32_t>(devices);
for (nvbench::int32_t dev_id : dev_ids)
{
result.push_back(dev_mgr.get_device(dev_id));
}
return result;
}
// Parse an axis specification into a 3-tuple of string_views containing the
// axis name, flags, and values.
auto parse_axis_key_flag_value_spec(const std::string &spec)
@@ -322,6 +340,9 @@ void option_parser::parse_impl()
{
m_global_benchmark_args.clear();
// Initialize to all devices:
m_recent_devices = nvbench::device_manager::get().get_devices();
// Initialize color variable based on env var:
{
const char *var = std::getenv("NVBENCH_COLOR");
@@ -330,6 +351,11 @@ void option_parser::parse_impl()
this->parse_range(m_args.cbegin(), m_args.cend());
if (m_exit_after_parsing)
{
std::exit(0);
}
if (m_benchmarks.empty())
{
// If no benchmarks were specified, add all:
@@ -397,6 +423,18 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
this->print_list();
std::exit(0);
}
else if (arg == "--persistence-mode" || arg == "--pm")
{
check_params(1);
this->set_persistence_mode(first[1]);
first += 2;
}
else if (arg == "--lock-gpu-clocks" || arg == "--lgc")
{
check_params(1);
this->lock_gpu_clocks(first[1]);
first += 2;
}
else if (arg == "--run-once")
{
this->enable_run_once();
@@ -569,6 +607,85 @@ void option_parser::print_help_axis() const
fmt::print("{}\n", ::cli_help_axis_text);
}
void option_parser::set_persistence_mode(const std::string &state)
try
{
m_exit_after_parsing = true;
nvbench::int32_t state_val{};
::parse(state, state_val);
for (nvbench::device_info &device : m_recent_devices)
{
fmt::print("Turning persistence mode {} for device '{}' ({}).\n",
static_cast<bool>(state_val) ? "ON" : "OFF",
device.get_name(),
device.get_id());
device.set_persistence_mode(static_cast<bool>(state_val));
}
}
catch (std::exception &e)
{
NVBENCH_THROW(std::runtime_error,
"Error handling option `--persistence-mode {}`:\n{}",
state,
e.what());
}
void option_parser::lock_gpu_clocks(const std::string &rate)
try
{
m_exit_after_parsing = true;
nvbench::device_info::clock_rate rate_val;
if (rate == "reset" || rate == "unlock" || rate == "none")
{
rate_val = nvbench::device_info::clock_rate::none;
}
else if (rate == "base" || rate == "tdp")
{
rate_val = nvbench::device_info::clock_rate::base;
}
else if (rate == "max" || rate == "maximum")
{
rate_val = nvbench::device_info::clock_rate::maximum;
}
else
{
NVBENCH_THROW(std::runtime_error,
"Unsupported argument: '{}'. Valid values are {}",
rate,
"{reset, base, max}");
}
for (nvbench::device_info &device : m_recent_devices)
{
if (rate_val == nvbench::device_info::clock_rate::none)
{
fmt::print("Unlocking clocks for device '{}' ({}).\n",
device.get_name(),
device.get_id());
}
else
{
fmt::print("Locking clocks to '{}' for device '{}' ({}).\n",
rate,
device.get_name(),
device.get_id());
}
device.lock_gpu_clocks(rate_val);
}
}
catch (std::exception &e)
{
NVBENCH_THROW(std::runtime_error,
"Error handling option `--lock-gpu-clocks {}`:\n{}",
rate,
e.what());
}
void option_parser::enable_run_once()
{
// If no active benchmark, save args as global.
@@ -606,7 +723,7 @@ try
catch (std::exception &e)
{
NVBENCH_THROW(std::runtime_error,
"Error parsing --benchmark `{}`:\n{}",
"Error handling option --benchmark `{}`:\n{}",
name,
e.what());
}
@@ -620,21 +737,26 @@ void option_parser::replay_global_args()
void option_parser::update_devices(const std::string &devices)
try
{
auto device_vec = ::parse_devices(devices);
// If no active benchmark, save args as global.
if (m_benchmarks.empty())
{
m_global_benchmark_args.push_back("--devices");
m_global_benchmark_args.push_back(devices);
return;
}
else
{
benchmark_base &bench = *m_benchmarks.back();
bench.set_devices(device_vec);
}
benchmark_base &bench = *m_benchmarks.back();
bench.set_devices(parse_values<nvbench::int32_t>(devices));
m_recent_devices = std::move(device_vec);
}
catch (std::exception &e)
{
NVBENCH_THROW(std::runtime_error,
"Error parsing --devices `{}`:\n{}",
"Error handling option --devices `{}`:\n{}",
devices,
e.what());
}
@@ -710,7 +832,7 @@ try
catch (std::exception &e)
{
NVBENCH_THROW(std::runtime_error,
"Error parsing --axis `{}`:\n{}",
"Error handling option --axis `{}`:\n{}",
spec,
e.what());
}
@@ -820,7 +942,7 @@ try
catch (std::exception &e)
{
NVBENCH_THROW(std::runtime_error,
"Error parsing `{} {}`:\n{}",
"Error handling option `{} {}`:\n{}",
prop_arg,
prop_val,
e.what());
@@ -866,7 +988,7 @@ try
catch (std::exception &e)
{
NVBENCH_THROW(std::runtime_error,
"Error parsing `{} {}`:\n{}",
"Error handling option `{} {}`:\n{}",
prop_arg,
prop_val,
e.what());

View File

@@ -18,6 +18,7 @@
#pragma once
#include <nvbench/device_info.cuh>
#include <nvbench/printer_multiplex.cuh>
#include <iosfwd>
@@ -89,6 +90,9 @@ private:
void print_help() const;
void print_help_axis() const;
void set_persistence_mode(const std::string &state);
void lock_gpu_clocks(const std::string &rate);
void enable_run_once();
void add_benchmark(const std::string &name);
@@ -123,6 +127,11 @@ private:
// Store benchmark modifiers passed in before any benchmarks are requested as
// "global args". Replay them after every benchmark.
std::vector<std::string> m_global_benchmark_args;
// List of devices specified by the most recent --devices option, or all
// devices if --devices has not been used.
std::vector<nvbench::device_info> m_recent_devices;
benchmark_vector m_benchmarks;
// Manages lifetimes of any ofstreams opened for m_printer.
@@ -136,6 +145,9 @@ private:
// True if any stdout printers have been added to m_printer.
bool m_have_stdout_printer{false};
// Used for device modification commands like --log-gpu-clocks
bool m_exit_after_parsing{false};
};
} // namespace nvbench