mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Enable more warning flags.
- /W4 on MSVC - -Wall -Wextra + others on gcc/clang - New NVBench_ENABLE_WERROR option to toggle "warnings as errors" - Mark the nlohmann_json library as IMPORTED to switch to system includes - Rename nvbench_main -> nvbench.main to follow target name conventions - Explicitly suppress some cudafe warnings when compiling templates in nlohmann_json headers. - Explicitly suppress some warnings from Thrust headers. - Various fixes for warnings exposed by new flags. - Disable CUPTI on CTK < 11.3 (See #52).
This commit is contained in:
@@ -6,18 +6,31 @@ set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_LIST_DIR}")
|
||||
set(NVBench_TOPLEVEL_PROJECT ON)
|
||||
else()
|
||||
set(NVBench_TOPLEVEL_PROJECT OFF)
|
||||
endif()
|
||||
|
||||
include(cmake/NVBenchRapidsCMake.cmake)
|
||||
nvbench_load_rapids_cmake()
|
||||
|
||||
project(NVBench
|
||||
LANGUAGES CUDA CXX # CXX to work around issues with CUDA-only CMake projects.
|
||||
LANGUAGES CUDA CXX
|
||||
VERSION 0.1.0
|
||||
)
|
||||
|
||||
nvbench_init_rapids_cmake()
|
||||
|
||||
# See NVIDIA/NVBench#52
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
set(cupti_default ON)
|
||||
if (${CUDAToolkit_VERSION} VERSION_LESS 11.3)
|
||||
set(cupti_default OFF)
|
||||
endif()
|
||||
|
||||
option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON)
|
||||
option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ON)
|
||||
option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default})
|
||||
|
||||
option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF)
|
||||
option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF)
|
||||
|
||||
@@ -1,10 +1,81 @@
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
option(NVBench_ENABLE_WERROR
|
||||
"Treat warnings as errors while compiling NVBench."
|
||||
${NVBench_TOPLEVEL_PROJECT}
|
||||
)
|
||||
mark_as_advanced(NVBench_ENABLE_WERROR)
|
||||
|
||||
# Builds all NVBench targets (libs, tests, examples, etc).
|
||||
add_custom_target(nvbench.all)
|
||||
|
||||
set(NVBench_LIBRARY_OUTPUT_DIR "${CMAKE_BINARY_DIR}/lib")
|
||||
set(NVBench_EXECUTABLE_OUTPUT_DIR "${CMAKE_BINARY_DIR}/bin")
|
||||
|
||||
add_library(nvbench.build_interface INTERFACE)
|
||||
|
||||
# TODO Why must this be installed/exported if it's just a private interface?
|
||||
# CMake complains about it missing from the export set unless we export it.
|
||||
# Is there way to avoid this?
|
||||
set_target_properties(nvbench.build_interface PROPERTIES
|
||||
EXPORT_NAME internal_build_interface
|
||||
)
|
||||
|
||||
function(nvbench_add_cxx_flag target_name type flag)
|
||||
string(MAKE_C_IDENTIFIER "NVBench_CXX_FLAG_${flag}" var)
|
||||
check_cxx_compiler_flag(${flag} ${var})
|
||||
|
||||
if (${${var}})
|
||||
target_compile_options(${target_name} ${type}
|
||||
$<$<COMPILE_LANGUAGE:CXX>:${flag}>
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcompiler=${flag}>
|
||||
# FIXME nvc++ case
|
||||
)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/W4")
|
||||
|
||||
if (NVBench_ENABLE_WERROR)
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/WX")
|
||||
endif()
|
||||
|
||||
# Suppress overly-pedantic/unavoidable warnings brought in with /W4:
|
||||
# C4505: unreferenced local function has been removed
|
||||
# The CUDA `host_runtime.h` header emits this for
|
||||
# `__cudaUnregisterBinaryUtil`.
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/wd4505")
|
||||
else()
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wall")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wextra")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wconversion")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Woverloaded-virtual")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wcast-qual")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wpointer-arith")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wunused-local-typedef")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wunused-parameter")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wvla")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wgnu")
|
||||
|
||||
if (NVBench_ENABLE_WERROR)
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Werror")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# CUDA-specific flags
|
||||
target_compile_options(nvbench.build_interface INTERFACE
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcudafe=--display_error_number>
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Wno-deprecated-gpu-targets>
|
||||
)
|
||||
if (NVBench_ENABLE_WERROR)
|
||||
target_compile_options(nvbench.build_interface INTERFACE
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcudafe=--promote_warnings>
|
||||
)
|
||||
endif()
|
||||
|
||||
function(nvbench_config_target target_name)
|
||||
target_link_libraries(${target_name} PRIVATE nvbench.build_interface)
|
||||
set_target_properties(${target_name} PROPERTIES
|
||||
ARCHIVE_OUTPUT_DIRECTORY "${NVBench_LIBRARY_OUTPUT_DIR}"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${NVBench_LIBRARY_OUTPUT_DIR}"
|
||||
|
||||
@@ -39,7 +39,7 @@ rapids_cpm_find(nlohmann_json 3.9.1
|
||||
)
|
||||
|
||||
# nlohmann_json release headers
|
||||
add_library(nvbench_json INTERFACE)
|
||||
add_library(nvbench_json INTERFACE IMPORTED)
|
||||
target_include_directories(nvbench_json SYSTEM INTERFACE
|
||||
"${nlohmann_json_SOURCE_DIR}/include"
|
||||
)
|
||||
|
||||
@@ -23,14 +23,14 @@ macro(nvbench_generate_exports)
|
||||
rapids_export(BUILD NVBench
|
||||
EXPORT_SET nvbench-targets
|
||||
NAMESPACE "nvbench::"
|
||||
GLOBAL_TARGETS nvbench main
|
||||
GLOBAL_TARGETS nvbench main internal_build_interface
|
||||
LANGUAGES CUDA CXX
|
||||
FINAL_CODE_BLOCK nvbench_build_export_code_block
|
||||
)
|
||||
rapids_export(INSTALL NVBench
|
||||
EXPORT_SET nvbench-targets
|
||||
NAMESPACE "nvbench::"
|
||||
GLOBAL_TARGETS nvbench main
|
||||
GLOBAL_TARGETS nvbench main internal_build_interface
|
||||
LANGUAGES CUDA CXX
|
||||
FINAL_CODE_BLOCK nvbench_install_export_code_block
|
||||
)
|
||||
|
||||
@@ -21,8 +21,30 @@ foreach(example_src IN LISTS example_srcs)
|
||||
target_link_libraries(${example_name} PRIVATE nvbench::main)
|
||||
set_target_properties(${example_name} PROPERTIES COMPILE_FEATURES cuda_std_17)
|
||||
add_test(NAME ${example_name}
|
||||
COMMAND "$<TARGET_FILE:${example_name}>" --timeout 1
|
||||
COMMAND "$<TARGET_FILE:${example_name}>" --timeout 0.1
|
||||
)
|
||||
|
||||
add_dependencies(nvbench.example.all ${example_name})
|
||||
endforeach()
|
||||
|
||||
# Silence some warnings from old thrust headers:
|
||||
set(thrust_examples
|
||||
auto_throughput
|
||||
axes
|
||||
exec_tag_sync
|
||||
exec_tag_timer
|
||||
skip
|
||||
throughput
|
||||
)
|
||||
foreach (example IN LISTS thrust_examples)
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# C4324: structure was padded due to alignment specifier
|
||||
nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4324")
|
||||
|
||||
# warning C4201: nonstandard extension used: nameless struct/union:
|
||||
# Fixed in Thrust 1.12.0 (CTK 11.4, NV HPC 21.3)
|
||||
if (${CUDAToolkit_VERSION} VERSION_LESS 11.4)
|
||||
nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4201")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@@ -68,8 +68,8 @@ void throughput_bench(nvbench::state &state,
|
||||
state.collect_stores_efficiency();
|
||||
|
||||
const auto threads_in_block = 256;
|
||||
const auto blocks_in_grid = (elements + threads_in_block - 1) /
|
||||
threads_in_block;
|
||||
const auto blocks_in_grid =
|
||||
static_cast<int>((elements + threads_in_block - 1) / threads_in_block);
|
||||
|
||||
state.exec([&](nvbench::launch &launch) {
|
||||
kernel<ItemsPerThread>
|
||||
|
||||
@@ -39,10 +39,13 @@ endif()
|
||||
# output ( no PTX version info )
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL NVIDIA AND
|
||||
CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.1)
|
||||
list(APPEND srcs json_printer.cxx)
|
||||
set(json_printer_impl json_printer.cxx)
|
||||
set(json_is_cu FALSE)
|
||||
else()
|
||||
list(APPEND srcs json_printer.cu)
|
||||
set(json_printer_impl json_printer.cu)
|
||||
set(json_is_cu TRUE)
|
||||
endif()
|
||||
list(APPEND srcs ${json_printer_impl})
|
||||
|
||||
# Generate doc strings from md files:
|
||||
include("../cmake/FileToString.cmake")
|
||||
@@ -61,31 +64,44 @@ nvbench_write_config_header("${NVBench_BINARY_DIR}/nvbench/config.cuh")
|
||||
|
||||
# nvbench (nvbench::nvbench)
|
||||
add_library(nvbench SHARED ${srcs})
|
||||
nvbench_config_target(nvbench)
|
||||
target_include_directories(nvbench PUBLIC
|
||||
"$<BUILD_INTERFACE:${NVBench_SOURCE_DIR}>"
|
||||
"$<BUILD_INTERFACE:${NVBench_BINARY_DIR}>"
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
)
|
||||
target_link_libraries(nvbench PUBLIC ${ctk_libraries})
|
||||
target_link_libraries(nvbench PRIVATE
|
||||
fmt::fmt
|
||||
nvbench_json
|
||||
nvbench_git_revision
|
||||
target_link_libraries(nvbench
|
||||
PUBLIC
|
||||
${ctk_libraries}
|
||||
PRIVATE
|
||||
fmt::fmt
|
||||
nvbench_json
|
||||
nvbench_git_revision
|
||||
)
|
||||
nvbench_config_target(nvbench)
|
||||
target_compile_features(nvbench PUBLIC cuda_std_17 PRIVATE cxx_std_17)
|
||||
add_dependencies(nvbench.all nvbench)
|
||||
|
||||
# nvbench_main (nvbench::main)
|
||||
add_library(nvbench_main OBJECT main.cu)
|
||||
nvbench_config_target(nvbench_main)
|
||||
target_link_libraries(nvbench_main PUBLIC nvbench)
|
||||
set_target_properties(nvbench_main PROPERTIES EXPORT_NAME main)
|
||||
add_dependencies(nvbench.all nvbench_main)
|
||||
# nvbench.main (nvbench::main)
|
||||
add_library(nvbench.main OBJECT main.cu)
|
||||
nvbench_config_target(nvbench.main)
|
||||
target_link_libraries(nvbench.main PUBLIC nvbench)
|
||||
set_target_properties(nvbench.main PROPERTIES EXPORT_NAME main)
|
||||
add_dependencies(nvbench.all nvbench.main)
|
||||
|
||||
# Support add_subdirectory:
|
||||
add_library(nvbench::nvbench ALIAS nvbench)
|
||||
add_library(nvbench::main ALIAS nvbench_main)
|
||||
add_library(nvbench::main ALIAS nvbench.main)
|
||||
|
||||
nvbench_setup_dep_dlls(nvbench)
|
||||
nvbench_install_libraries(nvbench nvbench_main)
|
||||
nvbench_install_libraries(nvbench nvbench.main nvbench.build_interface)
|
||||
|
||||
# nvcc emits several unavoidable warnings while compiling nlohmann_json:
|
||||
if (json_is_cu)
|
||||
set_property(SOURCE ${json_printer_impl} APPEND PROPERTY COMPILE_OPTIONS
|
||||
# error #186-D: pointless comparison of unsigned integer with zero
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcudafe=--diag_suppress=186>
|
||||
# error #940-D: missing return statement at end of non-void function
|
||||
# (the end of the function in hash.hpp(114) is unreachable)
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcudafe=--diag_suppress=940>
|
||||
)
|
||||
endif()
|
||||
|
||||
@@ -95,16 +95,16 @@ private:
|
||||
axes_type m_axes;
|
||||
};
|
||||
|
||||
template <typename ...TypeAxes>
|
||||
template <typename... TypeAxes>
|
||||
axes_metadata::axes_metadata(nvbench::type_list<TypeAxes...>)
|
||||
: axes_metadata{}
|
||||
{
|
||||
using type_axes = nvbench::type_list<TypeAxes...>;
|
||||
constexpr auto num_type_axes = nvbench::tl::size<type_axes>::value;
|
||||
using type_axes_list = nvbench::type_list<TypeAxes...>;
|
||||
constexpr auto num_type_axes = nvbench::tl::size<type_axes_list>::value;
|
||||
auto names = axes_metadata::generate_default_type_axis_names(num_type_axes);
|
||||
|
||||
auto names_iter = names.begin(); // contents will be moved from
|
||||
nvbench::tl::foreach<type_axes>(
|
||||
nvbench::tl::foreach<type_axes_list>(
|
||||
[&axes = m_axes, &names_iter]([[maybe_unused]] auto wrapped_type) {
|
||||
// This is always called before other axes are added, so the length of the
|
||||
// axes vector will be the type axis index:
|
||||
|
||||
@@ -39,7 +39,7 @@ std::unique_ptr<benchmark_base> benchmark_base::clone() const
|
||||
result->m_skip_time = m_skip_time;
|
||||
result->m_timeout = m_timeout;
|
||||
|
||||
return std::move(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
benchmark_base &benchmark_base::set_devices(std::vector<int> device_ids)
|
||||
|
||||
@@ -51,7 +51,7 @@ struct cpu_timer
|
||||
const auto duration = m_stop - m_start;
|
||||
const auto ns =
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(duration).count();
|
||||
return ns * (1e-9);
|
||||
return static_cast<nvbench::float64_t>(ns) * (1e-9);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -108,16 +108,16 @@ void cupti_profiler::initialize_profiler()
|
||||
m_device.get_sm_version());
|
||||
}
|
||||
|
||||
CUpti_Profiler_Initialize_Params params = {
|
||||
CUpti_Profiler_Initialize_Params_STRUCT_SIZE};
|
||||
CUpti_Profiler_Initialize_Params params{};
|
||||
params.structSize = CUpti_Profiler_Initialize_Params_STRUCT_SIZE;
|
||||
cupti_call(cuptiProfilerInitialize(¶ms));
|
||||
}
|
||||
|
||||
void cupti_profiler::initialize_chip_name()
|
||||
{
|
||||
CUpti_Device_GetChipName_Params params = {
|
||||
CUpti_Device_GetChipName_Params_STRUCT_SIZE};
|
||||
params.deviceIndex = m_device.get_id();
|
||||
CUpti_Device_GetChipName_Params params{};
|
||||
params.structSize = CUpti_Device_GetChipName_Params_STRUCT_SIZE;
|
||||
params.deviceIndex = static_cast<size_t>(m_device.get_id());
|
||||
cupti_call(cuptiDeviceGetChipName(¶ms));
|
||||
|
||||
m_chip_name = std::string(params.pChipName);
|
||||
@@ -125,10 +125,10 @@ void cupti_profiler::initialize_chip_name()
|
||||
|
||||
void cupti_profiler::initialize_availability_image()
|
||||
{
|
||||
CUpti_Profiler_GetCounterAvailability_Params params = {
|
||||
CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE};
|
||||
CUpti_Profiler_GetCounterAvailability_Params params{};
|
||||
|
||||
params.ctx = m_device.get_context();
|
||||
params.structSize = CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE;
|
||||
params.ctx = m_device.get_context();
|
||||
|
||||
cupti_call(cuptiProfilerGetCounterAvailability(¶ms));
|
||||
|
||||
@@ -141,8 +141,8 @@ void cupti_profiler::initialize_availability_image()
|
||||
|
||||
void cupti_profiler::initialize_nvpw()
|
||||
{
|
||||
NVPW_InitializeHost_Params params = {NVPW_InitializeHost_Params_STRUCT_SIZE};
|
||||
|
||||
NVPW_InitializeHost_Params params{};
|
||||
params.structSize = NVPW_InitializeHost_Params_STRUCT_SIZE;
|
||||
nvpw_call(NVPW_InitializeHost(¶ms));
|
||||
}
|
||||
|
||||
@@ -158,9 +158,11 @@ public:
|
||||
const std::string &metric_name)
|
||||
: evaluator_ptr(evaluator_ptr)
|
||||
{
|
||||
NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params params = {
|
||||
NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE};
|
||||
NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params params =
|
||||
{};
|
||||
|
||||
params.structSize =
|
||||
NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE;
|
||||
params.pMetricsEvaluator = evaluator_ptr;
|
||||
params.pMetricName = metric_name.c_str();
|
||||
params.pMetricEvalRequest = &request;
|
||||
@@ -174,9 +176,10 @@ public:
|
||||
{
|
||||
std::vector<const char *> raw_dependencies;
|
||||
|
||||
NVPW_MetricsEvaluator_GetMetricRawDependencies_Params params = {
|
||||
NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE};
|
||||
NVPW_MetricsEvaluator_GetMetricRawDependencies_Params params{};
|
||||
|
||||
params.structSize =
|
||||
NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE;
|
||||
params.pMetricsEvaluator = evaluator_ptr;
|
||||
params.pMetricEvalRequests = &request;
|
||||
params.numMetricEvalRequests = 1;
|
||||
@@ -209,9 +212,10 @@ public:
|
||||
const std::size_t counter_data_image_size = 0)
|
||||
{
|
||||
NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params
|
||||
scratch_buffer_param = {
|
||||
NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE};
|
||||
scratch_buffer_param{};
|
||||
|
||||
scratch_buffer_param.structSize =
|
||||
NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE;
|
||||
scratch_buffer_param.pChipName = chip_name.c_str();
|
||||
scratch_buffer_param.pCounterAvailabilityImage = counter_availability_image;
|
||||
|
||||
@@ -220,9 +224,10 @@ public:
|
||||
|
||||
scratch_buffer.resize(scratch_buffer_param.scratchBufferSize);
|
||||
|
||||
NVPW_CUDA_MetricsEvaluator_Initialize_Params evaluator_params = {
|
||||
NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE};
|
||||
NVPW_CUDA_MetricsEvaluator_Initialize_Params evaluator_params{};
|
||||
|
||||
evaluator_params.structSize =
|
||||
NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE;
|
||||
evaluator_params.scratchBufferSize = scratch_buffer.size();
|
||||
evaluator_params.pScratchBuffer = scratch_buffer.data();
|
||||
evaluator_params.pChipName = chip_name.c_str();
|
||||
@@ -240,9 +245,9 @@ public:
|
||||
{
|
||||
if (initialized)
|
||||
{
|
||||
NVPW_MetricsEvaluator_Destroy_Params params = {
|
||||
NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE};
|
||||
NVPW_MetricsEvaluator_Destroy_Params params{};
|
||||
|
||||
params.structSize = NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE;
|
||||
params.pMetricsEvaluator = evaluator_ptr;
|
||||
|
||||
nvpw_call(NVPW_MetricsEvaluator_Destroy(¶ms));
|
||||
@@ -289,7 +294,8 @@ namespace
|
||||
|
||||
for (auto &raw_name : raw_metric_names)
|
||||
{
|
||||
NVPA_RawMetricRequest metricRequest = {NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE};
|
||||
NVPA_RawMetricRequest metricRequest{};
|
||||
metricRequest.structSize = NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE;
|
||||
metricRequest.pMetricName = raw_name;
|
||||
metricRequest.isolated = true;
|
||||
metricRequest.keepInstances = true;
|
||||
@@ -306,9 +312,9 @@ class metrics_config
|
||||
void create(const std::string &chip_name,
|
||||
const std::uint8_t *availability_image)
|
||||
{
|
||||
NVPW_CUDA_RawMetricsConfig_Create_V2_Params params = {
|
||||
NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE};
|
||||
NVPW_CUDA_RawMetricsConfig_Create_V2_Params params{};
|
||||
|
||||
params.structSize = NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE;
|
||||
params.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
|
||||
params.pChipName = chip_name.c_str();
|
||||
params.pCounterAvailabilityImage = availability_image;
|
||||
@@ -321,9 +327,10 @@ class metrics_config
|
||||
|
||||
void set_availability_image(const std::uint8_t *availability_image)
|
||||
{
|
||||
NVPW_RawMetricsConfig_SetCounterAvailability_Params params = {
|
||||
NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE};
|
||||
NVPW_RawMetricsConfig_SetCounterAvailability_Params params{};
|
||||
|
||||
params.structSize =
|
||||
NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE;
|
||||
params.pRawMetricsConfig = raw_metrics_config;
|
||||
params.pCounterAvailabilityImage = availability_image;
|
||||
|
||||
@@ -332,9 +339,9 @@ class metrics_config
|
||||
|
||||
void begin_config_group()
|
||||
{
|
||||
NVPW_RawMetricsConfig_BeginPassGroup_Params params = {
|
||||
NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE};
|
||||
NVPW_RawMetricsConfig_BeginPassGroup_Params params{};
|
||||
|
||||
params.structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE;
|
||||
params.pRawMetricsConfig = raw_metrics_config;
|
||||
|
||||
nvpw_call(NVPW_RawMetricsConfig_BeginPassGroup(¶ms));
|
||||
@@ -342,9 +349,9 @@ class metrics_config
|
||||
|
||||
void add_metrics(const std::vector<NVPA_RawMetricRequest> &raw_metric_requests)
|
||||
{
|
||||
NVPW_RawMetricsConfig_AddMetrics_Params params = {
|
||||
NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE};
|
||||
NVPW_RawMetricsConfig_AddMetrics_Params params{};
|
||||
|
||||
params.structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE;
|
||||
params.pRawMetricsConfig = raw_metrics_config;
|
||||
params.pRawMetricRequests = raw_metric_requests.data();
|
||||
params.numMetricRequests = raw_metric_requests.size();
|
||||
@@ -354,9 +361,9 @@ class metrics_config
|
||||
|
||||
void end_config_group()
|
||||
{
|
||||
NVPW_RawMetricsConfig_EndPassGroup_Params params = {
|
||||
NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE};
|
||||
NVPW_RawMetricsConfig_EndPassGroup_Params params{};
|
||||
|
||||
params.structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE;
|
||||
params.pRawMetricsConfig = raw_metrics_config;
|
||||
|
||||
nvpw_call(NVPW_RawMetricsConfig_EndPassGroup(¶ms));
|
||||
@@ -364,9 +371,10 @@ class metrics_config
|
||||
|
||||
void generate()
|
||||
{
|
||||
NVPW_RawMetricsConfig_GenerateConfigImage_Params params = {
|
||||
NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE};
|
||||
NVPW_RawMetricsConfig_GenerateConfigImage_Params params{};
|
||||
|
||||
params.structSize =
|
||||
NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE;
|
||||
params.pRawMetricsConfig = raw_metrics_config;
|
||||
|
||||
nvpw_call(NVPW_RawMetricsConfig_GenerateConfigImage(¶ms));
|
||||
@@ -388,9 +396,9 @@ public:
|
||||
|
||||
[[nodiscard]] std::vector<std::uint8_t> get_config_image()
|
||||
{
|
||||
NVPW_RawMetricsConfig_GetConfigImage_Params params = {
|
||||
NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE};
|
||||
NVPW_RawMetricsConfig_GetConfigImage_Params params{};
|
||||
|
||||
params.structSize = NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE;
|
||||
params.pRawMetricsConfig = raw_metrics_config;
|
||||
params.bytesAllocated = 0;
|
||||
params.pBuffer = nullptr;
|
||||
@@ -409,9 +417,9 @@ public:
|
||||
{
|
||||
if (initialized)
|
||||
{
|
||||
NVPW_RawMetricsConfig_Destroy_Params params = {
|
||||
NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE};
|
||||
NVPW_RawMetricsConfig_Destroy_Params params{};
|
||||
|
||||
params.structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE;
|
||||
params.pRawMetricsConfig = raw_metrics_config;
|
||||
|
||||
NVPW_RawMetricsConfig_Destroy(¶ms);
|
||||
@@ -445,9 +453,9 @@ public:
|
||||
counter_data_builder(const std::string &chip_name,
|
||||
const std::uint8_t *pCounterAvailabilityImage)
|
||||
{
|
||||
NVPW_CUDA_CounterDataBuilder_Create_Params params = {
|
||||
NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE};
|
||||
NVPW_CUDA_CounterDataBuilder_Create_Params params{};
|
||||
|
||||
params.structSize = NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE;
|
||||
params.pChipName = chip_name.c_str();
|
||||
params.pCounterAvailabilityImage = pCounterAvailabilityImage;
|
||||
|
||||
@@ -461,9 +469,9 @@ public:
|
||||
{
|
||||
if (initialized)
|
||||
{
|
||||
NVPW_CounterDataBuilder_Destroy_Params params = {
|
||||
NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE};
|
||||
NVPW_CounterDataBuilder_Destroy_Params params{};
|
||||
|
||||
params.structSize = NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE;
|
||||
params.pCounterDataBuilder = builder;
|
||||
|
||||
NVPW_CounterDataBuilder_Destroy(¶ms);
|
||||
@@ -487,9 +495,9 @@ void cupti_profiler::initialize_counter_data_prefix_image()
|
||||
counter_data_builder data_builder(m_chip_name, counter_availability_image);
|
||||
|
||||
{
|
||||
NVPW_CounterDataBuilder_AddMetrics_Params params = {
|
||||
NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE};
|
||||
NVPW_CounterDataBuilder_AddMetrics_Params params{};
|
||||
|
||||
params.structSize = NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE;
|
||||
params.pCounterDataBuilder = data_builder.builder;
|
||||
params.pRawMetricRequests = raw_metric_requests.data();
|
||||
params.numMetricRequests = raw_metric_requests.size();
|
||||
@@ -498,9 +506,10 @@ void cupti_profiler::initialize_counter_data_prefix_image()
|
||||
}
|
||||
|
||||
{
|
||||
NVPW_CounterDataBuilder_GetCounterDataPrefix_Params params = {
|
||||
NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE};
|
||||
NVPW_CounterDataBuilder_GetCounterDataPrefix_Params params{};
|
||||
|
||||
params.structSize =
|
||||
NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE;
|
||||
params.pCounterDataBuilder = data_builder.builder;
|
||||
params.bytesAllocated = 0;
|
||||
params.pBuffer = nullptr;
|
||||
@@ -521,9 +530,10 @@ namespace
|
||||
[[nodiscard]] std::size_t
|
||||
get_counter_data_image_size(CUpti_Profiler_CounterDataImageOptions *options)
|
||||
{
|
||||
CUpti_Profiler_CounterDataImage_CalculateSize_Params params = {
|
||||
CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE};
|
||||
CUpti_Profiler_CounterDataImage_CalculateSize_Params params{};
|
||||
|
||||
params.structSize =
|
||||
CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE;
|
||||
params.pOptions = options;
|
||||
params.sizeofCounterDataImageOptions =
|
||||
CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
@@ -547,9 +557,10 @@ void cupti_profiler::initialize_counter_data_image()
|
||||
m_data_image.resize(get_counter_data_image_size(&counter_data_image_options));
|
||||
|
||||
{
|
||||
CUpti_Profiler_CounterDataImage_Initialize_Params params = {
|
||||
CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE};
|
||||
CUpti_Profiler_CounterDataImage_Initialize_Params params{};
|
||||
|
||||
params.structSize =
|
||||
CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE;
|
||||
params.sizeofCounterDataImageOptions =
|
||||
CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
params.pOptions = &counter_data_image_options;
|
||||
@@ -560,9 +571,10 @@ void cupti_profiler::initialize_counter_data_image()
|
||||
}
|
||||
|
||||
{
|
||||
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params params = {
|
||||
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE};
|
||||
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params params{};
|
||||
|
||||
params.structSize =
|
||||
CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE;
|
||||
params.counterDataImageSize = m_data_image.size();
|
||||
params.pCounterDataImage = &m_data_image[0];
|
||||
|
||||
@@ -573,9 +585,10 @@ void cupti_profiler::initialize_counter_data_image()
|
||||
}
|
||||
|
||||
{
|
||||
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params params = {
|
||||
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE};
|
||||
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params params{};
|
||||
|
||||
params.structSize =
|
||||
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE;
|
||||
params.counterDataImageSize = m_data_image.size();
|
||||
params.pCounterDataImage = &m_data_image[0];
|
||||
params.counterDataScratchBufferSize = m_data_scratch_buffer.size();
|
||||
@@ -589,9 +602,8 @@ cupti_profiler::~cupti_profiler()
|
||||
{
|
||||
if (is_initialized())
|
||||
{
|
||||
CUpti_Profiler_DeInitialize_Params params = {
|
||||
CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE};
|
||||
|
||||
CUpti_Profiler_DeInitialize_Params params{};
|
||||
params.structSize = CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE;
|
||||
cuptiProfilerDeInitialize(¶ms);
|
||||
}
|
||||
}
|
||||
@@ -604,9 +616,9 @@ bool cupti_profiler::is_initialized() const
|
||||
void cupti_profiler::prepare_user_loop()
|
||||
{
|
||||
{
|
||||
CUpti_Profiler_BeginSession_Params params = {
|
||||
CUpti_Profiler_BeginSession_Params_STRUCT_SIZE};
|
||||
CUpti_Profiler_BeginSession_Params params{};
|
||||
|
||||
params.structSize = CUpti_Profiler_BeginSession_Params_STRUCT_SIZE;
|
||||
params.ctx = nullptr;
|
||||
params.counterDataImageSize = m_data_image.size();
|
||||
params.pCounterDataImage = &m_data_image[0];
|
||||
@@ -623,9 +635,9 @@ void cupti_profiler::prepare_user_loop()
|
||||
}
|
||||
|
||||
{
|
||||
CUpti_Profiler_SetConfig_Params params = {
|
||||
CUpti_Profiler_SetConfig_Params_STRUCT_SIZE};
|
||||
CUpti_Profiler_SetConfig_Params params{};
|
||||
|
||||
params.structSize = CUpti_Profiler_SetConfig_Params_STRUCT_SIZE;
|
||||
params.pConfig = &m_config_image[0];
|
||||
params.configSize = m_config_image.size();
|
||||
params.minNestingLevel = 1;
|
||||
@@ -639,25 +651,24 @@ void cupti_profiler::prepare_user_loop()
|
||||
void cupti_profiler::start_user_loop()
|
||||
{
|
||||
{
|
||||
CUpti_Profiler_BeginPass_Params params = {
|
||||
CUpti_Profiler_BeginPass_Params_STRUCT_SIZE};
|
||||
|
||||
CUpti_Profiler_BeginPass_Params params{};
|
||||
params.structSize = CUpti_Profiler_BeginPass_Params_STRUCT_SIZE;
|
||||
cupti_call(cuptiProfilerBeginPass(¶ms));
|
||||
}
|
||||
|
||||
{
|
||||
CUpti_Profiler_EnableProfiling_Params params = {
|
||||
CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE};
|
||||
|
||||
CUpti_Profiler_EnableProfiling_Params params{};
|
||||
params.structSize = CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE;
|
||||
cupti_call(cuptiProfilerEnableProfiling(¶ms));
|
||||
}
|
||||
|
||||
{
|
||||
CUpti_Profiler_PushRange_Params params = {
|
||||
CUpti_Profiler_PushRange_Params_STRUCT_SIZE};
|
||||
CUpti_Profiler_PushRange_Params params{};
|
||||
|
||||
std::string rangeName = "nvbench";
|
||||
params.pRangeName = rangeName.c_str();
|
||||
|
||||
params.structSize = CUpti_Profiler_PushRange_Params_STRUCT_SIZE;
|
||||
params.pRangeName = rangeName.c_str();
|
||||
|
||||
cupti_call(cuptiProfilerPushRange(¶ms));
|
||||
}
|
||||
@@ -666,25 +677,22 @@ void cupti_profiler::start_user_loop()
|
||||
void cupti_profiler::stop_user_loop()
|
||||
{
|
||||
{
|
||||
CUpti_Profiler_PopRange_Params params = {
|
||||
CUpti_Profiler_PopRange_Params_STRUCT_SIZE};
|
||||
|
||||
CUpti_Profiler_PopRange_Params params{};
|
||||
params.structSize = CUpti_Profiler_PopRange_Params_STRUCT_SIZE;
|
||||
cupti_call(cuptiProfilerPopRange(¶ms));
|
||||
}
|
||||
|
||||
{
|
||||
CUpti_Profiler_DisableProfiling_Params params = {
|
||||
CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE};
|
||||
|
||||
CUpti_Profiler_DisableProfiling_Params params{};
|
||||
params.structSize = CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE;
|
||||
cupti_call(cuptiProfilerDisableProfiling(¶ms));
|
||||
}
|
||||
}
|
||||
|
||||
bool cupti_profiler::is_replay_required()
|
||||
{
|
||||
CUpti_Profiler_EndPass_Params params = {
|
||||
CUpti_Profiler_EndPass_Params_STRUCT_SIZE};
|
||||
|
||||
CUpti_Profiler_EndPass_Params params{};
|
||||
params.structSize = CUpti_Profiler_EndPass_Params_STRUCT_SIZE;
|
||||
cupti_call(cuptiProfilerEndPass(¶ms));
|
||||
|
||||
return !params.allPassesSubmitted;
|
||||
@@ -693,23 +701,20 @@ bool cupti_profiler::is_replay_required()
|
||||
void cupti_profiler::process_user_loop()
|
||||
{
|
||||
{
|
||||
CUpti_Profiler_FlushCounterData_Params params = {
|
||||
CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE};
|
||||
|
||||
CUpti_Profiler_FlushCounterData_Params params{};
|
||||
params.structSize = CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE;
|
||||
cupti_call(cuptiProfilerFlushCounterData(¶ms));
|
||||
}
|
||||
|
||||
{
|
||||
CUpti_Profiler_UnsetConfig_Params params = {
|
||||
CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE};
|
||||
|
||||
CUpti_Profiler_UnsetConfig_Params params{};
|
||||
params.structSize = CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE;
|
||||
cupti_call(cuptiProfilerUnsetConfig(¶ms));
|
||||
}
|
||||
|
||||
{
|
||||
CUpti_Profiler_EndSession_Params params = {
|
||||
CUpti_Profiler_EndSession_Params_STRUCT_SIZE};
|
||||
|
||||
CUpti_Profiler_EndSession_Params params{};
|
||||
params.structSize = CUpti_Profiler_EndSession_Params_STRUCT_SIZE;
|
||||
cupti_call(cuptiProfilerEndSession(¶ms));
|
||||
}
|
||||
}
|
||||
@@ -722,9 +727,9 @@ std::vector<double> cupti_profiler::get_counter_values()
|
||||
m_data_image.size());
|
||||
|
||||
{
|
||||
NVPW_CounterData_GetNumRanges_Params params = {
|
||||
NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE};
|
||||
NVPW_CounterData_GetNumRanges_Params params{};
|
||||
|
||||
params.structSize = NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE;
|
||||
params.pCounterDataImage = m_data_image.data();
|
||||
nvpw_call(NVPW_CounterData_GetNumRanges(¶ms));
|
||||
|
||||
@@ -745,9 +750,10 @@ std::vector<double> cupti_profiler::get_counter_values()
|
||||
eval_request request = evaluator.create_request(metric_name);
|
||||
|
||||
{
|
||||
NVPW_MetricsEvaluator_SetDeviceAttributes_Params params = {
|
||||
NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE};
|
||||
NVPW_MetricsEvaluator_SetDeviceAttributes_Params params{};
|
||||
|
||||
params.structSize =
|
||||
NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE;
|
||||
params.pMetricsEvaluator = evaluator;
|
||||
params.pCounterDataImage = m_data_image.data();
|
||||
params.counterDataImageSize = m_data_image.size();
|
||||
@@ -756,9 +762,10 @@ std::vector<double> cupti_profiler::get_counter_values()
|
||||
}
|
||||
|
||||
{
|
||||
NVPW_MetricsEvaluator_EvaluateToGpuValues_Params params = {
|
||||
NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE};
|
||||
NVPW_MetricsEvaluator_EvaluateToGpuValues_Params params{};
|
||||
|
||||
params.structSize =
|
||||
NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE;
|
||||
params.pMetricsEvaluator = evaluator;
|
||||
params.pMetricEvalRequests = &request.request;
|
||||
params.numMetricEvalRequests = 1;
|
||||
|
||||
@@ -179,8 +179,9 @@ private:
|
||||
m_total_samples += batch_size;
|
||||
|
||||
// Predict number of remaining iterations:
|
||||
batch_size = (m_min_time - m_total_cuda_time) /
|
||||
(m_total_cuda_time / m_total_samples);
|
||||
batch_size = static_cast<nvbench::int64_t>(
|
||||
(m_min_time - m_total_cuda_time) /
|
||||
(m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples)));
|
||||
|
||||
m_timeout_timer.stop();
|
||||
const auto total_time = m_timeout_timer.get_duration();
|
||||
|
||||
@@ -30,6 +30,6 @@ std::string float64_axis::do_get_input_string(std::size_t i) const
|
||||
return fmt::format("{:0.5g}", m_values[i]);
|
||||
}
|
||||
|
||||
std::string float64_axis::do_get_description(std::size_t i) const { return {}; }
|
||||
std::string float64_axis::do_get_description(std::size_t) const { return {}; }
|
||||
|
||||
} // namespace nvbench
|
||||
|
||||
@@ -73,7 +73,7 @@ struct table_builder
|
||||
m_num_rows = nvbench::detail::transform_reduce(
|
||||
m_columns.cbegin(),
|
||||
m_columns.cend(),
|
||||
0ll,
|
||||
std::size_t{},
|
||||
[](const auto &a, const auto &b) { return a > b ? a : b; },
|
||||
[](const column &col) { return col.rows.size(); });
|
||||
std::for_each(m_columns.begin(),
|
||||
|
||||
@@ -420,7 +420,7 @@ std::string markdown_printer::do_format_item_rate(const summary &data)
|
||||
|
||||
std::string markdown_printer::do_format_bytes(const summary &data)
|
||||
{
|
||||
const auto bytes = data.get_int64("value");
|
||||
const auto bytes = static_cast<nvbench::float64_t>(data.get_int64("value"));
|
||||
if (bytes >= 1024. * 1024. * 1024.) // 1 GiB
|
||||
{
|
||||
return fmt::format("{:0.3f} GiB", bytes / (1024. * 1024. * 1024.));
|
||||
|
||||
@@ -187,8 +187,8 @@ std::string state::get_axis_values_as_string(bool color) const
|
||||
if (axis_type == named_values::type::int64 &&
|
||||
axes.get_int64_axis(name).is_power_of_two())
|
||||
{
|
||||
const nvbench::uint64_t value = m_axis_values.get_int64(name);
|
||||
const nvbench::uint64_t exponent = int64_axis::compute_log2(value);
|
||||
const nvbench::int64_t value = m_axis_values.get_int64(name);
|
||||
const nvbench::int64_t exponent = int64_axis::compute_log2(value);
|
||||
append_key_value(name, exponent, "2^{}");
|
||||
}
|
||||
else if (axis_type == named_values::type::float64)
|
||||
|
||||
@@ -138,7 +138,7 @@ void test_type_configs()
|
||||
lots_of_types_bench bench;
|
||||
bench.set_type_axes_names({"Integer", "Float", "Other"});
|
||||
|
||||
ASSERT(bench.num_type_configs == 16);
|
||||
static_assert(bench.num_type_configs == 16);
|
||||
|
||||
std::size_t idx = 0;
|
||||
fmt::memory_buffer buffer;
|
||||
|
||||
@@ -64,11 +64,12 @@ void test_fp_tolerance()
|
||||
// Make sure that the range is padded a bit for floats to prevent rounding
|
||||
// errors from skipping `end`. This test will trigger failures without
|
||||
// the padding.
|
||||
const nvbench::float32_t start = 0.1;
|
||||
const nvbench::float32_t stride = 1e-4;
|
||||
const nvbench::float32_t start = 0.1f;
|
||||
const nvbench::float32_t stride = 1e-4f;
|
||||
for (std::size_t size = 1; size < 1024; ++size)
|
||||
{
|
||||
const nvbench::float32_t end = start + stride * (size - 1);
|
||||
const nvbench::float32_t end =
|
||||
start + stride * static_cast<nvbench::float32_t>(size - 1);
|
||||
ASSERT_MSG(nvbench::range(start, end, stride).size() == size,
|
||||
"size={}", size);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user