diff --git a/CMakeLists.txt b/CMakeLists.txt index e9bcf0a..1d0268a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ # 3.20.1 required for rapids-cmake +# 3.21.0 required for NVBench_ADD_DEPENDENT_DLLS_TO_* (MSVC only) cmake_minimum_required(VERSION 3.20.1) set(CMAKE_CXX_STANDARD 17) @@ -15,17 +16,21 @@ project(NVBench nvbench_init_rapids_cmake() +option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON) + +option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF) +option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF) + include(cmake/NVBenchConfigTarget.cmake) -include(cmake/NVBenchDependencies.cmake) +include(cmake/NVBenchDependentDlls.cmake) include(cmake/NVBenchExports.cmake) +include(cmake/NVBenchWriteConfigHeader.cmake) +include(cmake/NVBenchDependencies.cmake) include(cmake/NVBenchInstallRules.cmake) include(cmake/NVBenchUtilities.cmake) message(STATUS "NVBench CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") -option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF) -option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF) - add_subdirectory(nvbench) if (NVBench_ENABLE_EXAMPLES OR NVBench_ENABLE_TESTING) diff --git a/README.md b/README.md index 52b2b94..a7240ec 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ features: be dynamic numbers/strings or [static types](docs/benchmarks.md#type-axes). * [Runtime customization](docs/cli_help.md): A rich command-line interface allows [redefinition of parameter axes](docs/cli_help_axis.md), CUDA device - selection, changing output formats, and more. + selection, locking GPU clocks (Volta+), changing output formats, and more. * [Throughput calculations](docs/benchmarks.md#throughput-measurements): Compute and report: * Item throughput (elements/second) diff --git a/cmake/NVBenchDependencies.cmake b/cmake/NVBenchDependencies.cmake index f85b67b..78e63a6 100644 --- a/cmake/NVBenchDependencies.cmake +++ b/cmake/NVBenchDependencies.cmake @@ -57,3 +57,10 @@ rapids_find_package(CUDAToolkit REQUIRED # Append CTK targets to this as we add optional deps (NMVL, CUPTI, ...) set(ctk_libraries CUDA::toolkit) + +################################################################################ +# CUDAToolkit -> NVML +if (NVBench_ENABLE_NVML) + include("${CMAKE_CURRENT_LIST_DIR}/NVBenchNVML.cmake") + list(APPEND ctk_libraries nvbench::nvml) +endif() diff --git a/cmake/NVBenchDependentDlls.cmake b/cmake/NVBenchDependentDlls.cmake new file mode 100644 index 0000000..4ebf784 --- /dev/null +++ b/cmake/NVBenchDependentDlls.cmake @@ -0,0 +1,36 @@ +# By default, add dependent DLLs to the build dir on MSVC. This avoids +# a variety of runtime issues when using NVML, etc. +# This behavior can be disabled using the following options: +if (WIN32) + option(NVBench_ADD_DEPENDENT_DLLS_TO_BUILD + "Copy dependent dlls to NVBench library build location (MSVC only)." + ON + ) +else() + # These are forced off for non-MSVC builds, as $ + # will always be empty on non-dll platforms. + set(NVBench_ADD_DEPENDENT_DLLS_TO_BUILD OFF) +endif() + +if (NVBench_ADD_DEPENDENT_DLLS_TO_BUILD) + message(STATUS + "CMake 3.21.0 is required when NVBench_ADD_DEPENDENT_DLLS_TO_BUILD " + "is enabled." + ) + cmake_minimum_required(VERSION 3.21.0) +endif() + +function(nvbench_setup_dep_dlls target_name) + # The custom command below fails when there aren't any runtime DLLs to copy, + # so only enable it when a relevant dependency is enabled: + if (NVBench_ADD_DEPENDENT_DLLS_TO_BUILD AND NVBench_ENABLE_NVML) + add_custom_command(TARGET ${target_name} + POST_BUILD + COMMAND + "${CMAKE_COMMAND}" -E copy + "$" + "$" + COMMAND_EXPAND_LISTS + ) + endif() +endfunction() diff --git a/cmake/NVBenchExports.cmake b/cmake/NVBenchExports.cmake index 240d417..f18afc1 100644 --- a/cmake/NVBenchExports.cmake +++ b/cmake/NVBenchExports.cmake @@ -1,14 +1,28 @@ macro(nvbench_generate_exports) + set(nvbench_build_export_code_block "") + set(nvbench_install_export_code_block "") + + if (NVBench_ENABLE_NVML) + string(APPEND nvbench_build_export_code_block + "include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake\")\n" + ) + string(APPEND nvbench_install_export_code_block + "include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchNVML.cmake\")\n" + ) + endif() + rapids_export(BUILD NVBench EXPORT_SET nvbench-targets NAMESPACE "nvbench::" GLOBAL_TARGETS nvbench main LANGUAGES CUDA CXX + FINAL_CODE_BLOCK nvbench_build_export_code_block ) rapids_export(INSTALL NVBench EXPORT_SET nvbench-targets NAMESPACE "nvbench::" GLOBAL_TARGETS nvbench main LANGUAGES CUDA CXX + FINAL_CODE_BLOCK nvbench_install_export_code_block ) endmacro() diff --git a/cmake/NVBenchInstallRules.cmake b/cmake/NVBenchInstallRules.cmake index dd7e321..d287219 100644 --- a/cmake/NVBenchInstallRules.cmake +++ b/cmake/NVBenchInstallRules.cmake @@ -10,13 +10,35 @@ install(DIRECTORY "${NVBench_SOURCE_DIR}/nvbench" ) # generated headers from build dir: -install(FILES - "${NVBench_BINARY_DIR}/nvbench/detail/version.cuh" - "${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh" - +install( + FILES + "${NVBench_BINARY_DIR}/nvbench/config.cuh" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench" +) +install( + FILES + "${NVBench_BINARY_DIR}/nvbench/detail/version.cuh" + "${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench/detail" ) +# +# Install CMake files needed by consumers to locate dependencies: +# + +# Borrowing this logic from rapids_cmake's export logic to make sure these end +# up in the same location as nvbench-config.cmake: +rapids_cmake_install_lib_dir(config_install_location) +set(config_install_location "${config_install_location}/cmake/nvbench") + +if (NVBench_ENABLE_NVML) + install( + FILES + "${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake" + DESTINATION "${config_install_location}" + ) +endif() + # Call with a list of library targets to generate install rules: function(nvbench_install_libraries) install(TARGETS ${ARGN} diff --git a/cmake/NVBenchNVML.cmake b/cmake/NVBenchNVML.cmake new file mode 100644 index 0000000..f2aadbb --- /dev/null +++ b/cmake/NVBenchNVML.cmake @@ -0,0 +1,37 @@ +# Since this file is installed, we need to make sure that the CUDAToolkit has +# been found by consumers: +if (NOT TARGET CUDA::toolkit) + find_package(CUDAToolkit REQUIRED) +endif() + +if (WIN32) + # The CUDA:: targets currently don't provide dll locations through the + # `IMPORTED_LOCATION` property, nor are they marked as `SHARED` libraries + # (they're currently `UNKNOWN`). This prevents the `nvbench_setup_dep_dlls` + # CMake function from copying the dlls to the build / install directories. + # This is discussed in https://gitlab.kitware.com/cmake/cmake/-/issues/22845 + # and the other CMake issues it links to. + # + # We create a nvbench-specific target that configures the nvml interface as + # described here: + # https://gitlab.kitware.com/cmake/cmake/-/issues/22845#note_1077538 + # + # Use find_file instead of find_library, which would search for a .lib file. + # This is also nice because find_file searches recursively (find_library + # does not) and some versions of CTK nest nvml.dll several directories deep + # under C:\Windows\System32. + find_file(NVBench_NVML_DLL nvml.dll REQUIRED + DOC "The full path to nvml.dll. Usually somewhere under C:/Windows/System32." + PATHS "C:/Windows/System32" + ) + mark_as_advanced(NVBench_NVML_DLL) + add_library(nvbench::nvml SHARED IMPORTED) + target_link_libraries(nvbench::nvml INTERFACE CUDA::toolkit) + set_target_properties(nvbench::nvml PROPERTIES + IMPORTED_LOCATION "${NVBench_NVML_DLL}" + IMPORTED_IMPLIB "${CUDA_nvml_LIBRARY}" + ) +else() + # Linux is much easier... + add_library(nvbench::nvml ALIAS CUDA::nvml) +endif() diff --git a/cmake/NVBenchWriteConfigHeader.cmake b/cmake/NVBenchWriteConfigHeader.cmake new file mode 100644 index 0000000..a843bca --- /dev/null +++ b/cmake/NVBenchWriteConfigHeader.cmake @@ -0,0 +1,7 @@ +function(nvbench_write_config_header filepath) + if (NVBench_ENABLE_NVML) + set(NVBENCH_HAS_NVML 1) + endif() + + configure_file("${NVBench_SOURCE_DIR}/cmake/config.cuh.in" "${filepath}") +endfunction() diff --git a/cmake/config.cuh.in b/cmake/config.cuh.in new file mode 100644 index 0000000..f3e98cf --- /dev/null +++ b/cmake/config.cuh.in @@ -0,0 +1,22 @@ +/* +* Copyright 2021 NVIDIA Corporation +* +* Licensed under the Apache License, Version 2.0 with the LLVM exception +* (the "License"); you may not use this file except in compliance with +* the License. +* +* You may obtain a copy of the License at +* +* http://llvm.org/foundation/relicensing/LICENSE.txt +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#pragma once + +// Defined if NVBench has been built with NVML support. +#cmakedefine NVBENCH_HAS_NVML diff --git a/docs/cli_help.md b/docs/cli_help.md index d4dbf85..66072c4 100644 --- a/docs/cli_help.md +++ b/docs/cli_help.md @@ -12,6 +12,33 @@ * `--version` * Print information about the version of NVBench used to build the executable. +# Device Modification + +* `--persistence-mode `, `--pm ` + * Sets persistence mode for one or more GPU devices. + * Applies to the devices described by the most recent `--devices` option, + or all devices if `--devices` is not specified. + * This option requires root / admin permissions. + * This option is only supported on Linux. + * This call must precede all other device modification options, if any. + * Note that persistence mode is deprecated and will be removed at some point + in favor of the new persistence daemon. See the following link for more + details: https://docs.nvidia.com/deploy/driver-persistence/index.html + * Valid values for `state` are: + * `0`: Disable persistence mode. + * `1`: Enable persistence mode. + +* `--lock-gpu-clocks `, `--lgc ` + * Lock GPU clocks for one or more devices to a particular rate. + * Applies to the devices described by the most recent `--devices` option, + or all devices if `--devices` is not specified. + * This option requires root / admin permissions. + * This option is only supported in Volta+ (sm_70+) devices. + * Valid values for `rate` are: + * `reset`, `unlock`, `none`: Unlock the GPU clocks. + * `base`, `tdp`: Lock clocks to base frequency (best for stable results). + * `max`, `maximum`: Lock clocks to max frequency (best for fastest results). + # Output * `--csv ` @@ -51,7 +78,7 @@ * `--devices `, `--device `, `-d ` * Limit execution to one or more devices. - * `` is a single id, or a comma separated list. + * `` is a single id, a comma separated list, or the string "all". * Device ids can be obtained from `--list`. * Applies to the most recent `--benchmark`, or all benchmarks if specified before any `--benchmark` arguments. diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index 248c20f..30b2b96 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -26,6 +26,10 @@ set(srcs detail/state_generator.cxx ) +if (NVBench_ENABLE_NVML) + list(APPEND srcs internal/nvml.cxx) +endif() + # CUDA 11.0 can't compile json_printer without crashing # So for that version fall back to C++ with degraded # output ( no PTX version info ) @@ -49,6 +53,8 @@ file_to_string("../docs/cli_help_axis.md" cli_help_axis_text ) +nvbench_write_config_header("${NVBench_BINARY_DIR}/nvbench/config.cuh") + # nvbench (nvbench::nvbench) add_library(nvbench SHARED ${srcs}) target_include_directories(nvbench PUBLIC @@ -77,4 +83,5 @@ add_dependencies(nvbench.all nvbench_main) add_library(nvbench::nvbench ALIAS nvbench) add_library(nvbench::main ALIAS nvbench_main) +nvbench_setup_dep_dlls(nvbench) nvbench_install_libraries(nvbench nvbench_main) diff --git a/nvbench/device_info.cu b/nvbench/device_info.cu index 51260f4..1550604 100644 --- a/nvbench/device_info.cu +++ b/nvbench/device_info.cu @@ -18,8 +18,10 @@ #include +#include #include #include +#include #include @@ -38,8 +40,108 @@ device_info::memory_info device_info::get_global_memory_usage() const device_info::device_info(int id) : m_id{id} , m_prop{} + , m_nvml_device(nullptr) { NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id)); + +#ifdef NVBENCH_HAS_NVML + // Retrieve the current device's pci_id as a null-terminated string. + // Docs say 13 chars should always be sufficient. + constexpr int pci_id_len = 13; + char pci_id[pci_id_len]; + NVBENCH_CUDA_CALL(cudaDeviceGetPCIBusId(pci_id, pci_id_len, m_id)); + NVBENCH_NVML_CALL(nvmlDeviceGetHandleByPciBusId(pci_id, &m_nvml_device)); +#endif // NVBENCH_HAS_NVML } +void device_info::set_persistence_mode(bool state) +#ifndef NVBENCH_HAS_NVML +{ + throw nvbench::nvml::not_enabled{}; +} +#else // NVBENCH_HAS_NVML +try +{ + NVBENCH_NVML_CALL(nvmlDeviceSetPersistenceMode( + m_nvml_device, + state ? NVML_FEATURE_ENABLED : NVML_FEATURE_DISABLED)); +} +catch (nvml::call_failed &e) +{ + if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED) + { + NVBENCH_THROW(std::runtime_error, + "{}", + "Persistence mode is only supported on Linux."); + } + else if (e.get_error_code() == NVML_ERROR_NO_PERMISSION) + { + NVBENCH_THROW(std::runtime_error, + "{}", + "Root/Admin permissions required to set persistence mode."); + } + + throw; +} +#endif // NVBENCH_HAS_NVML + +void device_info::lock_gpu_clocks(device_info::clock_rate rate) +#ifndef NVBENCH_HAS_NVML +{ + throw nvbench::nvml::not_enabled{}; +} +#else // NVBENCH_HAS_NVML +try +{ + switch (rate) + { + case clock_rate::none: + NVBENCH_NVML_CALL(nvmlDeviceResetGpuLockedClocks(m_nvml_device)); + break; + + case clock_rate::base: + NVBENCH_NVML_CALL(nvmlDeviceSetGpuLockedClocks( + m_nvml_device, + static_cast(NVML_CLOCK_LIMIT_ID_TDP), + static_cast(NVML_CLOCK_LIMIT_ID_TDP))); + break; + + case clock_rate::maximum: { + const auto max_mhz = static_cast( + this->get_sm_default_clock_rate() / (1000 * 1000)); + NVBENCH_NVML_CALL( + nvmlDeviceSetGpuLockedClocks(m_nvml_device, max_mhz, max_mhz)); + break; + } + + default: + NVBENCH_THROW(std::runtime_error, + "Unrecognized clock rate: {}", + static_cast(rate)); + } +} +catch (nvml::call_failed &e) +{ + if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED && + this->get_ptx_version() < 700) + { + NVBENCH_THROW(std::runtime_error, + "GPU clock rates can only be modified for Volta and later. " + "Device: {} ({}) SM: {} < {}", + this->get_name(), + this->get_id(), + this->get_ptx_version(), + 700); + } + else if (e.get_error_code() == NVML_ERROR_NO_PERMISSION) + { + NVBENCH_THROW(std::runtime_error, + "{}", + "Root/Admin permissions required to change GPU clock rates."); + } + + throw; +} +#endif // NVBENCH_HAS_NVML + } // namespace nvbench diff --git a/nvbench/device_info.cuh b/nvbench/device_info.cuh index 4a5ca1d..c960138 100644 --- a/nvbench/device_info.cuh +++ b/nvbench/device_info.cuh @@ -27,6 +27,9 @@ #include #include +// forward declare this for internal storage +struct nvmlDevice_st; + namespace nvbench { @@ -66,13 +69,35 @@ struct device_info NVBENCH_CUDA_CALL(cudaSetDevice(m_id)); } + /// Enable or disable persistence mode. + /// @note Only supported on Linux. + /// @note Requires root / admin privileges. + void set_persistence_mode(bool state); + + + /// Symbolic values for special clock rates + enum class clock_rate + { + /// Unlock clocks + none, + /// Base TDP clock; Preferred for stable benchmarking + base, + /// Maximum clock rate + maximum + }; + + /// Lock GPU clocks to the specified rate. + /// @note Only supported on Volta+ (sm_70+) devices. + /// @note Requires root / admin privileges. + void lock_gpu_clocks(clock_rate rate); + /// @return The SM version of the current device as (major*100) + (minor*10). [[nodiscard]] int get_sm_version() const { return m_prop.major * 100 + m_prop.minor * 10; } - /// @return The PTX version of the current device + /// @return The PTX version of the current device, e.g. sm_80 returns 800. [[nodiscard]] __forceinline__ int get_ptx_version() const { return detail::get_ptx_version(m_id); @@ -197,6 +222,7 @@ struct device_info private: int m_id; cudaDeviceProp m_prop; + nvmlDevice_st *m_nvml_device; }; // get_ptx_version implementation; this needs to stay in the header so it will diff --git a/nvbench/internal/nvml.cuh b/nvbench/internal/nvml.cuh new file mode 100644 index 0000000..497f31a --- /dev/null +++ b/nvbench/internal/nvml.cuh @@ -0,0 +1,119 @@ +/* + * Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +#ifdef NVBENCH_HAS_NVML +#include +#endif // NVBENCH_HAS_NVML + +#include + +namespace nvbench::nvml +{ + +/// Base class for NVML-specific exceptions +struct error : std::runtime_error +{ + using runtime_error::runtime_error; +}; + +/// Thrown when NVML support is disabled. +struct not_enabled : error +{ + not_enabled() + : error{"NVML not available. Reconfigure NVBench with the CMake option " + "`-DNVBench_ENABLE_NVML=ON`."} + {} +}; + +// Only `error` and `not_enabled` are defined when NVML is disabled. +// Other exceptions may hold types defined by NVML. +#ifdef NVBENCH_HAS_NVML + +/// Thrown when a generic NVML call inside NVBENCH_NVML_CALL fails +struct call_failed : error +{ + call_failed(const std::string &filename, + std::size_t lineno, + const std::string &call, + nvmlReturn_t error_code, + std::string error_string) + : error(fmt::format("{}:{}:\n" + "\tNVML call failed:\n" + "\t\tCall: {}\n" + "\t\tError: ({}) {}", + filename, + lineno, + call, + static_cast(error_code), + error_string)) + , m_error_code(error_code) + , m_error_string(error_string) + {} + + [[nodiscard]] nvmlReturn_t get_error_code() const { return m_error_code; } + + [[nodiscard]] const std::string &get_error_string() const + { + return m_error_string; + } + +private: + nvmlReturn_t m_error_code; + std::string m_error_string; +}; + +#endif // NVBENCH_HAS_NVML + +} // namespace nvbench::nvml + +#ifdef NVBENCH_HAS_NVML + +#define NVBENCH_NVML_CALL(call) \ + do \ + { \ + const auto _rr = call; \ + if (_rr != NVML_SUCCESS) \ + { \ + throw nvbench::nvml::call_failed(__FILE__, \ + __LINE__, \ + #call, \ + _rr, \ + nvmlErrorString(_rr)); \ + } \ + } while (false) + +// Same as above, but used for nvmlInit(), where a failure means that +// nvmlErrorString is not available. +#define NVBENCH_NVML_CALL_NO_API(call) \ + do \ + { \ + const auto _rr = call; \ + if (_rr != NVML_SUCCESS) \ + { \ + throw nvbench::nvml::call_failed(__FILE__, __LINE__, #call, _rr, ""); \ + } \ + } while (false) + +#endif // NVBENCH_HAS_NVML diff --git a/nvbench/internal/nvml.cxx b/nvbench/internal/nvml.cxx new file mode 100644 index 0000000..4f750bc --- /dev/null +++ b/nvbench/internal/nvml.cxx @@ -0,0 +1,71 @@ +/* + * Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +#include + +#include + +namespace +{ + +// RAII struct that initializes and shuts down NVML +struct NVMLLifetimeManager +{ + NVMLLifetimeManager() + { + try + { + NVBENCH_NVML_CALL_NO_API(nvmlInit()); + m_inited = true; + } + catch (std::exception &e) + { + fmt::print("NVML initialization failed:\n {}", e.what()); + } + } + + ~NVMLLifetimeManager() + { + if (m_inited) + { + try + { + NVBENCH_NVML_CALL_NO_API(nvmlShutdown()); + } + catch (std::exception &e) + { + fmt::print("NVML shutdown failed:\n {}", e.what()); + } + } + } + +private: + bool m_inited{false}; +}; + +// NVML's lifetime should extend for the entirety of the process, so store in a +// global. +auto nvml_lifetime = NVMLLifetimeManager{}; + +} // namespace diff --git a/nvbench/nvbench.cuh b/nvbench/nvbench.cuh index 269575a..75bf1c1 100644 --- a/nvbench/nvbench.cuh +++ b/nvbench/nvbench.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu index 4f5c195..4eee401 100644 --- a/nvbench/option_parser.cu +++ b/nvbench/option_parser.cu @@ -252,6 +252,24 @@ std::vector parse_values(std::string_view value_spec) } } +std::vector parse_devices(std::string_view devices) +{ + auto &dev_mgr = nvbench::device_manager::get(); + + if (devices == "all") + { + return dev_mgr.get_devices(); + } + + std::vector result; + auto dev_ids = parse_values(devices); + for (nvbench::int32_t dev_id : dev_ids) + { + result.push_back(dev_mgr.get_device(dev_id)); + } + return result; +} + // Parse an axis specification into a 3-tuple of string_views containing the // axis name, flags, and values. auto parse_axis_key_flag_value_spec(const std::string &spec) @@ -322,6 +340,9 @@ void option_parser::parse_impl() { m_global_benchmark_args.clear(); + // Initialize to all devices: + m_recent_devices = nvbench::device_manager::get().get_devices(); + // Initialize color variable based on env var: { const char *var = std::getenv("NVBENCH_COLOR"); @@ -330,6 +351,11 @@ void option_parser::parse_impl() this->parse_range(m_args.cbegin(), m_args.cend()); + if (m_exit_after_parsing) + { + std::exit(0); + } + if (m_benchmarks.empty()) { // If no benchmarks were specified, add all: @@ -397,6 +423,18 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, this->print_list(); std::exit(0); } + else if (arg == "--persistence-mode" || arg == "--pm") + { + check_params(1); + this->set_persistence_mode(first[1]); + first += 2; + } + else if (arg == "--lock-gpu-clocks" || arg == "--lgc") + { + check_params(1); + this->lock_gpu_clocks(first[1]); + first += 2; + } else if (arg == "--run-once") { this->enable_run_once(); @@ -569,6 +607,85 @@ void option_parser::print_help_axis() const fmt::print("{}\n", ::cli_help_axis_text); } +void option_parser::set_persistence_mode(const std::string &state) +try +{ + m_exit_after_parsing = true; + + nvbench::int32_t state_val{}; + ::parse(state, state_val); + + for (nvbench::device_info &device : m_recent_devices) + { + fmt::print("Turning persistence mode {} for device '{}' ({}).\n", + static_cast(state_val) ? "ON" : "OFF", + device.get_name(), + device.get_id()); + device.set_persistence_mode(static_cast(state_val)); + } +} +catch (std::exception &e) +{ + NVBENCH_THROW(std::runtime_error, + "Error handling option `--persistence-mode {}`:\n{}", + state, + e.what()); +} + +void option_parser::lock_gpu_clocks(const std::string &rate) +try +{ + m_exit_after_parsing = true; + + nvbench::device_info::clock_rate rate_val; + + if (rate == "reset" || rate == "unlock" || rate == "none") + { + rate_val = nvbench::device_info::clock_rate::none; + } + else if (rate == "base" || rate == "tdp") + { + rate_val = nvbench::device_info::clock_rate::base; + } + else if (rate == "max" || rate == "maximum") + { + rate_val = nvbench::device_info::clock_rate::maximum; + } + else + { + NVBENCH_THROW(std::runtime_error, + "Unsupported argument: '{}'. Valid values are {}", + rate, + "{reset, base, max}"); + } + + for (nvbench::device_info &device : m_recent_devices) + { + if (rate_val == nvbench::device_info::clock_rate::none) + { + fmt::print("Unlocking clocks for device '{}' ({}).\n", + device.get_name(), + device.get_id()); + } + else + { + fmt::print("Locking clocks to '{}' for device '{}' ({}).\n", + rate, + device.get_name(), + device.get_id()); + } + + device.lock_gpu_clocks(rate_val); + } +} +catch (std::exception &e) +{ + NVBENCH_THROW(std::runtime_error, + "Error handling option `--lock-gpu-clocks {}`:\n{}", + rate, + e.what()); +} + void option_parser::enable_run_once() { // If no active benchmark, save args as global. @@ -606,7 +723,7 @@ try catch (std::exception &e) { NVBENCH_THROW(std::runtime_error, - "Error parsing --benchmark `{}`:\n{}", + "Error handling option --benchmark `{}`:\n{}", name, e.what()); } @@ -620,21 +737,26 @@ void option_parser::replay_global_args() void option_parser::update_devices(const std::string &devices) try { + auto device_vec = ::parse_devices(devices); + // If no active benchmark, save args as global. if (m_benchmarks.empty()) { m_global_benchmark_args.push_back("--devices"); m_global_benchmark_args.push_back(devices); - return; + } + else + { + benchmark_base &bench = *m_benchmarks.back(); + bench.set_devices(device_vec); } - benchmark_base &bench = *m_benchmarks.back(); - bench.set_devices(parse_values(devices)); + m_recent_devices = std::move(device_vec); } catch (std::exception &e) { NVBENCH_THROW(std::runtime_error, - "Error parsing --devices `{}`:\n{}", + "Error handling option --devices `{}`:\n{}", devices, e.what()); } @@ -710,7 +832,7 @@ try catch (std::exception &e) { NVBENCH_THROW(std::runtime_error, - "Error parsing --axis `{}`:\n{}", + "Error handling option --axis `{}`:\n{}", spec, e.what()); } @@ -820,7 +942,7 @@ try catch (std::exception &e) { NVBENCH_THROW(std::runtime_error, - "Error parsing `{} {}`:\n{}", + "Error handling option `{} {}`:\n{}", prop_arg, prop_val, e.what()); @@ -866,7 +988,7 @@ try catch (std::exception &e) { NVBENCH_THROW(std::runtime_error, - "Error parsing `{} {}`:\n{}", + "Error handling option `{} {}`:\n{}", prop_arg, prop_val, e.what()); diff --git a/nvbench/option_parser.cuh b/nvbench/option_parser.cuh index da23834..19d2984 100644 --- a/nvbench/option_parser.cuh +++ b/nvbench/option_parser.cuh @@ -18,6 +18,7 @@ #pragma once +#include #include #include @@ -89,6 +90,9 @@ private: void print_help() const; void print_help_axis() const; + void set_persistence_mode(const std::string &state); + void lock_gpu_clocks(const std::string &rate); + void enable_run_once(); void add_benchmark(const std::string &name); @@ -123,6 +127,11 @@ private: // Store benchmark modifiers passed in before any benchmarks are requested as // "global args". Replay them after every benchmark. std::vector m_global_benchmark_args; + + // List of devices specified by the most recent --devices option, or all + // devices if --devices has not been used. + std::vector m_recent_devices; + benchmark_vector m_benchmarks; // Manages lifetimes of any ofstreams opened for m_printer. @@ -136,6 +145,9 @@ private: // True if any stdout printers have been added to m_printer. bool m_have_stdout_printer{false}; + + // Used for device modification commands like --log-gpu-clocks + bool m_exit_after_parsing{false}; }; } // namespace nvbench