diff --git a/CMakeLists.txt b/CMakeLists.txt index 70d2d16..f46d88b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,18 +6,31 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CUDA_STANDARD 17) set(CMAKE_POSITION_INDEPENDENT_CODE ON) +if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_LIST_DIR}") + set(NVBench_TOPLEVEL_PROJECT ON) +else() + set(NVBench_TOPLEVEL_PROJECT OFF) +endif() + include(cmake/NVBenchRapidsCMake.cmake) nvbench_load_rapids_cmake() project(NVBench - LANGUAGES CUDA CXX # CXX to work around issues with CUDA-only CMake projects. + LANGUAGES CUDA CXX VERSION 0.1.0 ) nvbench_init_rapids_cmake() +# See NVIDIA/NVBench#52 +find_package(CUDAToolkit REQUIRED) +set(cupti_default ON) +if (${CUDAToolkit_VERSION} VERSION_LESS 11.3) + set(cupti_default OFF) +endif() + option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON) -option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ON) +option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default}) option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF) option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF) diff --git a/cmake/NVBenchConfigTarget.cmake b/cmake/NVBenchConfigTarget.cmake index 6676bb4..295feb5 100644 --- a/cmake/NVBenchConfigTarget.cmake +++ b/cmake/NVBenchConfigTarget.cmake @@ -1,10 +1,81 @@ +include(CheckCXXCompilerFlag) + +option(NVBench_ENABLE_WERROR + "Treat warnings as errors while compiling NVBench." + ${NVBench_TOPLEVEL_PROJECT} +) +mark_as_advanced(NVBench_ENABLE_WERROR) + # Builds all NVBench targets (libs, tests, examples, etc). add_custom_target(nvbench.all) set(NVBench_LIBRARY_OUTPUT_DIR "${CMAKE_BINARY_DIR}/lib") set(NVBench_EXECUTABLE_OUTPUT_DIR "${CMAKE_BINARY_DIR}/bin") +add_library(nvbench.build_interface INTERFACE) + +# TODO Why must this be installed/exported if it's just a private interface? +# CMake complains about it missing from the export set unless we export it. +# Is there way to avoid this? +set_target_properties(nvbench.build_interface PROPERTIES + EXPORT_NAME internal_build_interface +) + +function(nvbench_add_cxx_flag target_name type flag) + string(MAKE_C_IDENTIFIER "NVBench_CXX_FLAG_${flag}" var) + check_cxx_compiler_flag(${flag} ${var}) + + if (${${var}}) + target_compile_options(${target_name} ${type} + $<$:${flag}> + $<$:-Xcompiler=${flag}> + # FIXME nvc++ case + ) + endif() +endfunction() + +if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/W4") + + if (NVBench_ENABLE_WERROR) + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/WX") + endif() + + # Suppress overly-pedantic/unavoidable warnings brought in with /W4: + # C4505: unreferenced local function has been removed + # The CUDA `host_runtime.h` header emits this for + # `__cudaUnregisterBinaryUtil`. + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/wd4505") +else() + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wall") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wextra") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wconversion") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Woverloaded-virtual") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wcast-qual") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wpointer-arith") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wunused-local-typedef") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wunused-parameter") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wvla") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wgnu") + + if (NVBench_ENABLE_WERROR) + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Werror") + endif() +endif() + +# CUDA-specific flags +target_compile_options(nvbench.build_interface INTERFACE + $<$:-Xcudafe=--display_error_number> + $<$:-Wno-deprecated-gpu-targets> +) +if (NVBench_ENABLE_WERROR) + target_compile_options(nvbench.build_interface INTERFACE + $<$:-Xcudafe=--promote_warnings> + ) +endif() + function(nvbench_config_target target_name) + target_link_libraries(${target_name} PRIVATE nvbench.build_interface) set_target_properties(${target_name} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${NVBench_LIBRARY_OUTPUT_DIR}" LIBRARY_OUTPUT_DIRECTORY "${NVBench_LIBRARY_OUTPUT_DIR}" diff --git a/cmake/NVBenchDependencies.cmake b/cmake/NVBenchDependencies.cmake index 761210c..5496b8f 100644 --- a/cmake/NVBenchDependencies.cmake +++ b/cmake/NVBenchDependencies.cmake @@ -39,7 +39,7 @@ rapids_cpm_find(nlohmann_json 3.9.1 ) # nlohmann_json release headers -add_library(nvbench_json INTERFACE) +add_library(nvbench_json INTERFACE IMPORTED) target_include_directories(nvbench_json SYSTEM INTERFACE "${nlohmann_json_SOURCE_DIR}/include" ) diff --git a/cmake/NVBenchExports.cmake b/cmake/NVBenchExports.cmake index b4f0f6e..732539e 100644 --- a/cmake/NVBenchExports.cmake +++ b/cmake/NVBenchExports.cmake @@ -23,14 +23,14 @@ macro(nvbench_generate_exports) rapids_export(BUILD NVBench EXPORT_SET nvbench-targets NAMESPACE "nvbench::" - GLOBAL_TARGETS nvbench main + GLOBAL_TARGETS nvbench main internal_build_interface LANGUAGES CUDA CXX FINAL_CODE_BLOCK nvbench_build_export_code_block ) rapids_export(INSTALL NVBench EXPORT_SET nvbench-targets NAMESPACE "nvbench::" - GLOBAL_TARGETS nvbench main + GLOBAL_TARGETS nvbench main internal_build_interface LANGUAGES CUDA CXX FINAL_CODE_BLOCK nvbench_install_export_code_block ) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 34c8763..a6adc80 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -21,8 +21,30 @@ foreach(example_src IN LISTS example_srcs) target_link_libraries(${example_name} PRIVATE nvbench::main) set_target_properties(${example_name} PROPERTIES COMPILE_FEATURES cuda_std_17) add_test(NAME ${example_name} - COMMAND "$" --timeout 1 + COMMAND "$" --timeout 0.1 ) add_dependencies(nvbench.example.all ${example_name}) endforeach() + +# Silence some warnings from old thrust headers: +set(thrust_examples + auto_throughput + axes + exec_tag_sync + exec_tag_timer + skip + throughput +) +foreach (example IN LISTS thrust_examples) + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # C4324: structure was padded due to alignment specifier + nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4324") + + # warning C4201: nonstandard extension used: nameless struct/union: + # Fixed in Thrust 1.12.0 (CTK 11.4, NV HPC 21.3) + if (${CUDAToolkit_VERSION} VERSION_LESS 11.4) + nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4201") + endif() + endif() +endforeach() diff --git a/examples/auto_throughput.cu b/examples/auto_throughput.cu index 2e49e3c..14b6b94 100644 --- a/examples/auto_throughput.cu +++ b/examples/auto_throughput.cu @@ -68,8 +68,8 @@ void throughput_bench(nvbench::state &state, state.collect_stores_efficiency(); const auto threads_in_block = 256; - const auto blocks_in_grid = (elements + threads_in_block - 1) / - threads_in_block; + const auto blocks_in_grid = + static_cast((elements + threads_in_block - 1) / threads_in_block); state.exec([&](nvbench::launch &launch) { kernel diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index 1aeb2f6..e99699c 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -39,10 +39,13 @@ endif() # output ( no PTX version info ) if(CMAKE_CUDA_COMPILER_ID STREQUAL NVIDIA AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.1) - list(APPEND srcs json_printer.cxx) + set(json_printer_impl json_printer.cxx) + set(json_is_cu FALSE) else() - list(APPEND srcs json_printer.cu) + set(json_printer_impl json_printer.cu) + set(json_is_cu TRUE) endif() +list(APPEND srcs ${json_printer_impl}) # Generate doc strings from md files: include("../cmake/FileToString.cmake") @@ -61,31 +64,44 @@ nvbench_write_config_header("${NVBench_BINARY_DIR}/nvbench/config.cuh") # nvbench (nvbench::nvbench) add_library(nvbench SHARED ${srcs}) +nvbench_config_target(nvbench) target_include_directories(nvbench PUBLIC "$" "$" "$" ) -target_link_libraries(nvbench PUBLIC ${ctk_libraries}) -target_link_libraries(nvbench PRIVATE - fmt::fmt - nvbench_json - nvbench_git_revision +target_link_libraries(nvbench + PUBLIC + ${ctk_libraries} + PRIVATE + fmt::fmt + nvbench_json + nvbench_git_revision ) -nvbench_config_target(nvbench) target_compile_features(nvbench PUBLIC cuda_std_17 PRIVATE cxx_std_17) add_dependencies(nvbench.all nvbench) -# nvbench_main (nvbench::main) -add_library(nvbench_main OBJECT main.cu) -nvbench_config_target(nvbench_main) -target_link_libraries(nvbench_main PUBLIC nvbench) -set_target_properties(nvbench_main PROPERTIES EXPORT_NAME main) -add_dependencies(nvbench.all nvbench_main) +# nvbench.main (nvbench::main) +add_library(nvbench.main OBJECT main.cu) +nvbench_config_target(nvbench.main) +target_link_libraries(nvbench.main PUBLIC nvbench) +set_target_properties(nvbench.main PROPERTIES EXPORT_NAME main) +add_dependencies(nvbench.all nvbench.main) # Support add_subdirectory: add_library(nvbench::nvbench ALIAS nvbench) -add_library(nvbench::main ALIAS nvbench_main) +add_library(nvbench::main ALIAS nvbench.main) nvbench_setup_dep_dlls(nvbench) -nvbench_install_libraries(nvbench nvbench_main) +nvbench_install_libraries(nvbench nvbench.main nvbench.build_interface) + +# nvcc emits several unavoidable warnings while compiling nlohmann_json: +if (json_is_cu) + set_property(SOURCE ${json_printer_impl} APPEND PROPERTY COMPILE_OPTIONS + # error #186-D: pointless comparison of unsigned integer with zero + $<$:-Xcudafe=--diag_suppress=186> + # error #940-D: missing return statement at end of non-void function + # (the end of the function in hash.hpp(114) is unreachable) + $<$:-Xcudafe=--diag_suppress=940> + ) +endif() diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh index c2f92c8..353855a 100644 --- a/nvbench/axes_metadata.cuh +++ b/nvbench/axes_metadata.cuh @@ -95,16 +95,16 @@ private: axes_type m_axes; }; -template +template axes_metadata::axes_metadata(nvbench::type_list) : axes_metadata{} { - using type_axes = nvbench::type_list; - constexpr auto num_type_axes = nvbench::tl::size::value; + using type_axes_list = nvbench::type_list; + constexpr auto num_type_axes = nvbench::tl::size::value; auto names = axes_metadata::generate_default_type_axis_names(num_type_axes); auto names_iter = names.begin(); // contents will be moved from - nvbench::tl::foreach( + nvbench::tl::foreach( [&axes = m_axes, &names_iter]([[maybe_unused]] auto wrapped_type) { // This is always called before other axes are added, so the length of the // axes vector will be the type axis index: diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx index 93ae008..c981df8 100644 --- a/nvbench/benchmark_base.cxx +++ b/nvbench/benchmark_base.cxx @@ -39,7 +39,7 @@ std::unique_ptr benchmark_base::clone() const result->m_skip_time = m_skip_time; result->m_timeout = m_timeout; - return std::move(result); + return result; } benchmark_base &benchmark_base::set_devices(std::vector device_ids) diff --git a/nvbench/cpu_timer.cuh b/nvbench/cpu_timer.cuh index 06aa8dc..09d3c54 100644 --- a/nvbench/cpu_timer.cuh +++ b/nvbench/cpu_timer.cuh @@ -51,7 +51,7 @@ struct cpu_timer const auto duration = m_stop - m_start; const auto ns = std::chrono::duration_cast(duration).count(); - return ns * (1e-9); + return static_cast(ns) * (1e-9); } private: diff --git a/nvbench/cupti_profiler.cxx b/nvbench/cupti_profiler.cxx index a2bd432..6dcd81d 100644 --- a/nvbench/cupti_profiler.cxx +++ b/nvbench/cupti_profiler.cxx @@ -108,16 +108,16 @@ void cupti_profiler::initialize_profiler() m_device.get_sm_version()); } - CUpti_Profiler_Initialize_Params params = { - CUpti_Profiler_Initialize_Params_STRUCT_SIZE}; + CUpti_Profiler_Initialize_Params params{}; + params.structSize = CUpti_Profiler_Initialize_Params_STRUCT_SIZE; cupti_call(cuptiProfilerInitialize(¶ms)); } void cupti_profiler::initialize_chip_name() { - CUpti_Device_GetChipName_Params params = { - CUpti_Device_GetChipName_Params_STRUCT_SIZE}; - params.deviceIndex = m_device.get_id(); + CUpti_Device_GetChipName_Params params{}; + params.structSize = CUpti_Device_GetChipName_Params_STRUCT_SIZE; + params.deviceIndex = static_cast(m_device.get_id()); cupti_call(cuptiDeviceGetChipName(¶ms)); m_chip_name = std::string(params.pChipName); @@ -125,10 +125,10 @@ void cupti_profiler::initialize_chip_name() void cupti_profiler::initialize_availability_image() { - CUpti_Profiler_GetCounterAvailability_Params params = { - CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE}; + CUpti_Profiler_GetCounterAvailability_Params params{}; - params.ctx = m_device.get_context(); + params.structSize = CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE; + params.ctx = m_device.get_context(); cupti_call(cuptiProfilerGetCounterAvailability(¶ms)); @@ -141,8 +141,8 @@ void cupti_profiler::initialize_availability_image() void cupti_profiler::initialize_nvpw() { - NVPW_InitializeHost_Params params = {NVPW_InitializeHost_Params_STRUCT_SIZE}; - + NVPW_InitializeHost_Params params{}; + params.structSize = NVPW_InitializeHost_Params_STRUCT_SIZE; nvpw_call(NVPW_InitializeHost(¶ms)); } @@ -158,9 +158,11 @@ public: const std::string &metric_name) : evaluator_ptr(evaluator_ptr) { - NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params params = { - NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE}; + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params params = + {}; + params.structSize = + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE; params.pMetricsEvaluator = evaluator_ptr; params.pMetricName = metric_name.c_str(); params.pMetricEvalRequest = &request; @@ -174,9 +176,10 @@ public: { std::vector raw_dependencies; - NVPW_MetricsEvaluator_GetMetricRawDependencies_Params params = { - NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE}; + NVPW_MetricsEvaluator_GetMetricRawDependencies_Params params{}; + params.structSize = + NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE; params.pMetricsEvaluator = evaluator_ptr; params.pMetricEvalRequests = &request; params.numMetricEvalRequests = 1; @@ -209,9 +212,10 @@ public: const std::size_t counter_data_image_size = 0) { NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params - scratch_buffer_param = { - NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE}; + scratch_buffer_param{}; + scratch_buffer_param.structSize = + NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE; scratch_buffer_param.pChipName = chip_name.c_str(); scratch_buffer_param.pCounterAvailabilityImage = counter_availability_image; @@ -220,9 +224,10 @@ public: scratch_buffer.resize(scratch_buffer_param.scratchBufferSize); - NVPW_CUDA_MetricsEvaluator_Initialize_Params evaluator_params = { - NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE}; + NVPW_CUDA_MetricsEvaluator_Initialize_Params evaluator_params{}; + evaluator_params.structSize = + NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE; evaluator_params.scratchBufferSize = scratch_buffer.size(); evaluator_params.pScratchBuffer = scratch_buffer.data(); evaluator_params.pChipName = chip_name.c_str(); @@ -240,9 +245,9 @@ public: { if (initialized) { - NVPW_MetricsEvaluator_Destroy_Params params = { - NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE}; + NVPW_MetricsEvaluator_Destroy_Params params{}; + params.structSize = NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE; params.pMetricsEvaluator = evaluator_ptr; nvpw_call(NVPW_MetricsEvaluator_Destroy(¶ms)); @@ -289,7 +294,8 @@ namespace for (auto &raw_name : raw_metric_names) { - NVPA_RawMetricRequest metricRequest = {NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE}; + NVPA_RawMetricRequest metricRequest{}; + metricRequest.structSize = NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE; metricRequest.pMetricName = raw_name; metricRequest.isolated = true; metricRequest.keepInstances = true; @@ -306,9 +312,9 @@ class metrics_config void create(const std::string &chip_name, const std::uint8_t *availability_image) { - NVPW_CUDA_RawMetricsConfig_Create_V2_Params params = { - NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE}; + NVPW_CUDA_RawMetricsConfig_Create_V2_Params params{}; + params.structSize = NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE; params.activityKind = NVPA_ACTIVITY_KIND_PROFILER; params.pChipName = chip_name.c_str(); params.pCounterAvailabilityImage = availability_image; @@ -321,9 +327,10 @@ class metrics_config void set_availability_image(const std::uint8_t *availability_image) { - NVPW_RawMetricsConfig_SetCounterAvailability_Params params = { - NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE}; + NVPW_RawMetricsConfig_SetCounterAvailability_Params params{}; + params.structSize = + NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; params.pCounterAvailabilityImage = availability_image; @@ -332,9 +339,9 @@ class metrics_config void begin_config_group() { - NVPW_RawMetricsConfig_BeginPassGroup_Params params = { - NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE}; + NVPW_RawMetricsConfig_BeginPassGroup_Params params{}; + params.structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; nvpw_call(NVPW_RawMetricsConfig_BeginPassGroup(¶ms)); @@ -342,9 +349,9 @@ class metrics_config void add_metrics(const std::vector &raw_metric_requests) { - NVPW_RawMetricsConfig_AddMetrics_Params params = { - NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE}; + NVPW_RawMetricsConfig_AddMetrics_Params params{}; + params.structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; params.pRawMetricRequests = raw_metric_requests.data(); params.numMetricRequests = raw_metric_requests.size(); @@ -354,9 +361,9 @@ class metrics_config void end_config_group() { - NVPW_RawMetricsConfig_EndPassGroup_Params params = { - NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE}; + NVPW_RawMetricsConfig_EndPassGroup_Params params{}; + params.structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; nvpw_call(NVPW_RawMetricsConfig_EndPassGroup(¶ms)); @@ -364,9 +371,10 @@ class metrics_config void generate() { - NVPW_RawMetricsConfig_GenerateConfigImage_Params params = { - NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE}; + NVPW_RawMetricsConfig_GenerateConfigImage_Params params{}; + params.structSize = + NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; nvpw_call(NVPW_RawMetricsConfig_GenerateConfigImage(¶ms)); @@ -388,9 +396,9 @@ public: [[nodiscard]] std::vector get_config_image() { - NVPW_RawMetricsConfig_GetConfigImage_Params params = { - NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE}; + NVPW_RawMetricsConfig_GetConfigImage_Params params{}; + params.structSize = NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; params.bytesAllocated = 0; params.pBuffer = nullptr; @@ -409,9 +417,9 @@ public: { if (initialized) { - NVPW_RawMetricsConfig_Destroy_Params params = { - NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE}; + NVPW_RawMetricsConfig_Destroy_Params params{}; + params.structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; NVPW_RawMetricsConfig_Destroy(¶ms); @@ -445,9 +453,9 @@ public: counter_data_builder(const std::string &chip_name, const std::uint8_t *pCounterAvailabilityImage) { - NVPW_CUDA_CounterDataBuilder_Create_Params params = { - NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE}; + NVPW_CUDA_CounterDataBuilder_Create_Params params{}; + params.structSize = NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE; params.pChipName = chip_name.c_str(); params.pCounterAvailabilityImage = pCounterAvailabilityImage; @@ -461,9 +469,9 @@ public: { if (initialized) { - NVPW_CounterDataBuilder_Destroy_Params params = { - NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE}; + NVPW_CounterDataBuilder_Destroy_Params params{}; + params.structSize = NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE; params.pCounterDataBuilder = builder; NVPW_CounterDataBuilder_Destroy(¶ms); @@ -487,9 +495,9 @@ void cupti_profiler::initialize_counter_data_prefix_image() counter_data_builder data_builder(m_chip_name, counter_availability_image); { - NVPW_CounterDataBuilder_AddMetrics_Params params = { - NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE}; + NVPW_CounterDataBuilder_AddMetrics_Params params{}; + params.structSize = NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE; params.pCounterDataBuilder = data_builder.builder; params.pRawMetricRequests = raw_metric_requests.data(); params.numMetricRequests = raw_metric_requests.size(); @@ -498,9 +506,10 @@ void cupti_profiler::initialize_counter_data_prefix_image() } { - NVPW_CounterDataBuilder_GetCounterDataPrefix_Params params = { - NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE}; + NVPW_CounterDataBuilder_GetCounterDataPrefix_Params params{}; + params.structSize = + NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE; params.pCounterDataBuilder = data_builder.builder; params.bytesAllocated = 0; params.pBuffer = nullptr; @@ -521,9 +530,10 @@ namespace [[nodiscard]] std::size_t get_counter_data_image_size(CUpti_Profiler_CounterDataImageOptions *options) { - CUpti_Profiler_CounterDataImage_CalculateSize_Params params = { - CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE}; + CUpti_Profiler_CounterDataImage_CalculateSize_Params params{}; + params.structSize = + CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE; params.pOptions = options; params.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; @@ -547,9 +557,10 @@ void cupti_profiler::initialize_counter_data_image() m_data_image.resize(get_counter_data_image_size(&counter_data_image_options)); { - CUpti_Profiler_CounterDataImage_Initialize_Params params = { - CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE}; + CUpti_Profiler_CounterDataImage_Initialize_Params params{}; + params.structSize = + CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE; params.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; params.pOptions = &counter_data_image_options; @@ -560,9 +571,10 @@ void cupti_profiler::initialize_counter_data_image() } { - CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params params = { - CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE}; + CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params params{}; + params.structSize = + CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE; params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; @@ -573,9 +585,10 @@ void cupti_profiler::initialize_counter_data_image() } { - CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params params = { - CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE}; + CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params params{}; + params.structSize = + CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE; params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; params.counterDataScratchBufferSize = m_data_scratch_buffer.size(); @@ -589,9 +602,8 @@ cupti_profiler::~cupti_profiler() { if (is_initialized()) { - CUpti_Profiler_DeInitialize_Params params = { - CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE}; - + CUpti_Profiler_DeInitialize_Params params{}; + params.structSize = CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE; cuptiProfilerDeInitialize(¶ms); } } @@ -604,9 +616,9 @@ bool cupti_profiler::is_initialized() const void cupti_profiler::prepare_user_loop() { { - CUpti_Profiler_BeginSession_Params params = { - CUpti_Profiler_BeginSession_Params_STRUCT_SIZE}; + CUpti_Profiler_BeginSession_Params params{}; + params.structSize = CUpti_Profiler_BeginSession_Params_STRUCT_SIZE; params.ctx = nullptr; params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; @@ -623,9 +635,9 @@ void cupti_profiler::prepare_user_loop() } { - CUpti_Profiler_SetConfig_Params params = { - CUpti_Profiler_SetConfig_Params_STRUCT_SIZE}; + CUpti_Profiler_SetConfig_Params params{}; + params.structSize = CUpti_Profiler_SetConfig_Params_STRUCT_SIZE; params.pConfig = &m_config_image[0]; params.configSize = m_config_image.size(); params.minNestingLevel = 1; @@ -639,25 +651,24 @@ void cupti_profiler::prepare_user_loop() void cupti_profiler::start_user_loop() { { - CUpti_Profiler_BeginPass_Params params = { - CUpti_Profiler_BeginPass_Params_STRUCT_SIZE}; - + CUpti_Profiler_BeginPass_Params params{}; + params.structSize = CUpti_Profiler_BeginPass_Params_STRUCT_SIZE; cupti_call(cuptiProfilerBeginPass(¶ms)); } { - CUpti_Profiler_EnableProfiling_Params params = { - CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE}; - + CUpti_Profiler_EnableProfiling_Params params{}; + params.structSize = CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE; cupti_call(cuptiProfilerEnableProfiling(¶ms)); } { - CUpti_Profiler_PushRange_Params params = { - CUpti_Profiler_PushRange_Params_STRUCT_SIZE}; + CUpti_Profiler_PushRange_Params params{}; std::string rangeName = "nvbench"; - params.pRangeName = rangeName.c_str(); + + params.structSize = CUpti_Profiler_PushRange_Params_STRUCT_SIZE; + params.pRangeName = rangeName.c_str(); cupti_call(cuptiProfilerPushRange(¶ms)); } @@ -666,25 +677,22 @@ void cupti_profiler::start_user_loop() void cupti_profiler::stop_user_loop() { { - CUpti_Profiler_PopRange_Params params = { - CUpti_Profiler_PopRange_Params_STRUCT_SIZE}; - + CUpti_Profiler_PopRange_Params params{}; + params.structSize = CUpti_Profiler_PopRange_Params_STRUCT_SIZE; cupti_call(cuptiProfilerPopRange(¶ms)); } { - CUpti_Profiler_DisableProfiling_Params params = { - CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE}; - + CUpti_Profiler_DisableProfiling_Params params{}; + params.structSize = CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE; cupti_call(cuptiProfilerDisableProfiling(¶ms)); } } bool cupti_profiler::is_replay_required() { - CUpti_Profiler_EndPass_Params params = { - CUpti_Profiler_EndPass_Params_STRUCT_SIZE}; - + CUpti_Profiler_EndPass_Params params{}; + params.structSize = CUpti_Profiler_EndPass_Params_STRUCT_SIZE; cupti_call(cuptiProfilerEndPass(¶ms)); return !params.allPassesSubmitted; @@ -693,23 +701,20 @@ bool cupti_profiler::is_replay_required() void cupti_profiler::process_user_loop() { { - CUpti_Profiler_FlushCounterData_Params params = { - CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE}; - + CUpti_Profiler_FlushCounterData_Params params{}; + params.structSize = CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE; cupti_call(cuptiProfilerFlushCounterData(¶ms)); } { - CUpti_Profiler_UnsetConfig_Params params = { - CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE}; - + CUpti_Profiler_UnsetConfig_Params params{}; + params.structSize = CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE; cupti_call(cuptiProfilerUnsetConfig(¶ms)); } { - CUpti_Profiler_EndSession_Params params = { - CUpti_Profiler_EndSession_Params_STRUCT_SIZE}; - + CUpti_Profiler_EndSession_Params params{}; + params.structSize = CUpti_Profiler_EndSession_Params_STRUCT_SIZE; cupti_call(cuptiProfilerEndSession(¶ms)); } } @@ -722,9 +727,9 @@ std::vector cupti_profiler::get_counter_values() m_data_image.size()); { - NVPW_CounterData_GetNumRanges_Params params = { - NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE}; + NVPW_CounterData_GetNumRanges_Params params{}; + params.structSize = NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE; params.pCounterDataImage = m_data_image.data(); nvpw_call(NVPW_CounterData_GetNumRanges(¶ms)); @@ -745,9 +750,10 @@ std::vector cupti_profiler::get_counter_values() eval_request request = evaluator.create_request(metric_name); { - NVPW_MetricsEvaluator_SetDeviceAttributes_Params params = { - NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE}; + NVPW_MetricsEvaluator_SetDeviceAttributes_Params params{}; + params.structSize = + NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE; params.pMetricsEvaluator = evaluator; params.pCounterDataImage = m_data_image.data(); params.counterDataImageSize = m_data_image.size(); @@ -756,9 +762,10 @@ std::vector cupti_profiler::get_counter_values() } { - NVPW_MetricsEvaluator_EvaluateToGpuValues_Params params = { - NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE}; + NVPW_MetricsEvaluator_EvaluateToGpuValues_Params params{}; + params.structSize = + NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE; params.pMetricsEvaluator = evaluator; params.pMetricEvalRequests = &request.request; params.numMetricEvalRequests = 1; diff --git a/nvbench/detail/measure_hot.cuh b/nvbench/detail/measure_hot.cuh index 79f1bd4..76b0f6c 100644 --- a/nvbench/detail/measure_hot.cuh +++ b/nvbench/detail/measure_hot.cuh @@ -179,8 +179,9 @@ private: m_total_samples += batch_size; // Predict number of remaining iterations: - batch_size = (m_min_time - m_total_cuda_time) / - (m_total_cuda_time / m_total_samples); + batch_size = static_cast( + (m_min_time - m_total_cuda_time) / + (m_total_cuda_time / static_cast(m_total_samples))); m_timeout_timer.stop(); const auto total_time = m_timeout_timer.get_duration(); diff --git a/nvbench/float64_axis.cxx b/nvbench/float64_axis.cxx index e0e0cef..e75d2bf 100644 --- a/nvbench/float64_axis.cxx +++ b/nvbench/float64_axis.cxx @@ -30,6 +30,6 @@ std::string float64_axis::do_get_input_string(std::size_t i) const return fmt::format("{:0.5g}", m_values[i]); } -std::string float64_axis::do_get_description(std::size_t i) const { return {}; } +std::string float64_axis::do_get_description(std::size_t) const { return {}; } } // namespace nvbench diff --git a/nvbench/internal/table_builder.cuh b/nvbench/internal/table_builder.cuh index 6528d44..81fca0a 100644 --- a/nvbench/internal/table_builder.cuh +++ b/nvbench/internal/table_builder.cuh @@ -73,7 +73,7 @@ struct table_builder m_num_rows = nvbench::detail::transform_reduce( m_columns.cbegin(), m_columns.cend(), - 0ll, + std::size_t{}, [](const auto &a, const auto &b) { return a > b ? a : b; }, [](const column &col) { return col.rows.size(); }); std::for_each(m_columns.begin(), diff --git a/nvbench/markdown_printer.cu b/nvbench/markdown_printer.cu index b254d61..5e079ba 100644 --- a/nvbench/markdown_printer.cu +++ b/nvbench/markdown_printer.cu @@ -420,7 +420,7 @@ std::string markdown_printer::do_format_item_rate(const summary &data) std::string markdown_printer::do_format_bytes(const summary &data) { - const auto bytes = data.get_int64("value"); + const auto bytes = static_cast(data.get_int64("value")); if (bytes >= 1024. * 1024. * 1024.) // 1 GiB { return fmt::format("{:0.3f} GiB", bytes / (1024. * 1024. * 1024.)); diff --git a/nvbench/state.cxx b/nvbench/state.cxx index 8a1af3a..931dbbc 100644 --- a/nvbench/state.cxx +++ b/nvbench/state.cxx @@ -187,8 +187,8 @@ std::string state::get_axis_values_as_string(bool color) const if (axis_type == named_values::type::int64 && axes.get_int64_axis(name).is_power_of_two()) { - const nvbench::uint64_t value = m_axis_values.get_int64(name); - const nvbench::uint64_t exponent = int64_axis::compute_log2(value); + const nvbench::int64_t value = m_axis_values.get_int64(name); + const nvbench::int64_t exponent = int64_axis::compute_log2(value); append_key_value(name, exponent, "2^{}"); } else if (axis_type == named_values::type::float64) diff --git a/testing/benchmark.cu b/testing/benchmark.cu index 79b1387..5685d16 100644 --- a/testing/benchmark.cu +++ b/testing/benchmark.cu @@ -138,7 +138,7 @@ void test_type_configs() lots_of_types_bench bench; bench.set_type_axes_names({"Integer", "Float", "Other"}); - ASSERT(bench.num_type_configs == 16); + static_assert(bench.num_type_configs == 16); std::size_t idx = 0; fmt::memory_buffer buffer; diff --git a/testing/range.cu b/testing/range.cu index 35e158e..53226cd 100644 --- a/testing/range.cu +++ b/testing/range.cu @@ -64,11 +64,12 @@ void test_fp_tolerance() // Make sure that the range is padded a bit for floats to prevent rounding // errors from skipping `end`. This test will trigger failures without // the padding. - const nvbench::float32_t start = 0.1; - const nvbench::float32_t stride = 1e-4; + const nvbench::float32_t start = 0.1f; + const nvbench::float32_t stride = 1e-4f; for (std::size_t size = 1; size < 1024; ++size) { - const nvbench::float32_t end = start + stride * (size - 1); + const nvbench::float32_t end = + start + stride * static_cast(size - 1); ASSERT_MSG(nvbench::range(start, end, stride).size() == size, "size={}", size); }