From 2fe4c8acec12236d16e738da4ab217cd077d6420 Mon Sep 17 00:00:00 2001 From: Johannes Graner Date: Mon, 15 Dec 2025 13:38:25 +0100 Subject: [PATCH] Add grouped convnd dataset tests for bwd_data, bwd_weight and make them parallel (#3380) * Parallelization in dataset generation * Parallelizable tests for fwd, bwd data, bwd weight with datasets * .gitignore generated datasets * Test parallelization script with round-robin GPU scheduling * Parallelization updates to test generation and running * Dataset paths relative to executable * Update output from test generation * Default to one GPU in test generation * Add small dataset tests to Jenkins * Update copyright lines * Update test_data/generate_test_dataset.sh Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Move trap disable * Common get path function --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> [ROCm/composable_kernel commit: fe35ba5dac168619462669192423ff40548d532d] --- .gitignore | 5 + Jenkinsfile | 10 +- test/common/csv_test_loader.hpp | 246 ++++ test/grouped_convnd_bwd_data/CMakeLists.txt | 4 + ...st_grouped_convnd_bwd_data_dataset_xdl.cpp | 317 +++++ test/grouped_convnd_bwd_weight/CMakeLists.txt | 4 + ..._grouped_convnd_bwd_weight_dataset_xdl.cpp | 258 ++++ .../test_grouped_convnd_fwd_dataset_xdl.cpp | 451 ++----- test_data/generate_test_dataset.sh | 198 ++- test_data/gtest_parallel.py | 1187 +++++++++++++++++ 10 files changed, 2349 insertions(+), 331 deletions(-) create mode 100644 test/common/csv_test_loader.hpp create mode 100644 test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_dataset_xdl.cpp create mode 100644 test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_dataset_xdl.cpp create mode 100644 test_data/gtest_parallel.py diff --git a/.gitignore b/.gitignore index d8468cf24e..98234268c1 100644 --- a/.gitignore +++ b/.gitignore @@ -83,6 +83,11 @@ __pycache__/ .cache/ +# Generated test data +test_data/* +!test_data/*.py +!test_data/*.sh + # Exceptions to build* patterns above # The experimental/builder directory should be tracked despite matching build* !experimental/builder diff --git a/Jenkinsfile b/Jenkinsfile index 5f03310cab..aea14c78b6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1476,15 +1476,19 @@ pipeline { setup_args = "NO_CK_BUILD" execute_args = """ cd ../build && \ ../script/cmake-ck-dev.sh ../ gfx90a && \ - make -j64 test_grouped_convnd_fwd_dataset_xdl && \ + make -j64 test_grouped_convnd_fwd_dataset_xdl \ + test_grouped_convnd_bwd_data_dataset_xdl \ + test_grouped_convnd_bwd_weight_dataset_xdl && \ cd ../test_data && \ # Dataset generation modes: # - small: ~60 test cases (minimal, quick testing - 3 models, 2 batch sizes, 2 image sizes) # - half: ~300 test cases (moderate coverage - 16 models, 3 batch sizes, 5 image sizes), ~ 17 hours testing time # - full: ~600 test cases (comprehensive - 16 models, 5 batch sizes, 9 image sizes), ~ 40 hours testing time - ./generate_test_dataset.sh half && \ + ./generate_test_dataset.sh small && \ cd ../build && \ - ./bin/test_grouped_convnd_fwd_dataset_xdl""" + ./bin/test_grouped_convnd_fwd_dataset_xdl && \ + ./bin/test_grouped_convnd_bwd_data_dataset_xdl && \ + ./bin/test_grouped_convnd_bwd_weight_dataset_xdl""" } steps{ buildHipClangJobAndReboot(setup_args:setup_args, build_type: 'Release', execute_cmd: execute_args) diff --git a/test/common/csv_test_loader.hpp b/test/common/csv_test_loader.hpp new file mode 100644 index 0000000000..78d3595f1a --- /dev/null +++ b/test/common/csv_test_loader.hpp @@ -0,0 +1,246 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "ck/library/utility/convolution_parameter.hpp" + +namespace ck { +namespace test { + +namespace fs = std::filesystem; + +// Helper function to find test_data directory relative to the test binary +static std::string GetTestDataPath() +{ + // Get the path to the current executable + fs::path exe_path = fs::read_symlink("/proc/self/exe"); + + // Get the directory containing the executable + fs::path current_dir = exe_path.parent_path(); + + // Search for test_data directory by going up the directory tree + // This makes the code robust regardless of build directory depth + while(current_dir != current_dir.root_path()) + { + fs::path test_data_path = current_dir / "test_data"; + if(fs::exists(test_data_path) && fs::is_directory(test_data_path)) + { + return test_data_path.string(); + } + current_dir = current_dir.parent_path(); + } + + // If not found, return empty string + std::cerr << "ERROR: Could not find test_data directory relative to executable" << std::endl; + return ""; +} + +// CSV Reader Function for Loading Test Cases +// Reads convolution parameters from CSV file and returns vector of ConvParam structures +inline std::vector load_csv_test_cases(const std::string& filename) +{ + std::vector conv_params; // Return vector + std::ifstream file(filename); // Open CSV file + + if(!file.is_open()) + { + std::cerr << "ERROR: Cannot open CSV file: " << filename << std::endl; + return conv_params; // Return empty vector on error + } + + std::string line; + int line_number = 0; + + // Read file line by line + while(std::getline(file, line)) + { + line_number++; + // Skip comment lines (starting with #) and empty lines + if(line.empty() || line[0] == '#') + { + continue; + } + + // Skip header line (contains column names) + if(line.find("NDim,Groups,BatchSize") != std::string::npos) + { + continue; + } + + // Parse CSV line using stringstream + std::stringstream ss(line); + std::string cell; + std::vector row; + + // Split line by commas + while(std::getline(ss, cell, ',')) + { + row.push_back(cell); + } + + // Validate row has correct number of columns + if(row.size() < 19) + { // Need at least 19 columns for 2D (excluding TestName) + std::cerr << "WARNING: Line " << line_number << " has insufficient columns (" + << row.size() << "), skipping" << std::endl; + continue; + } + + try + { + // Parse CSV data into ConvParam structure + // CSV Format: + // NDim,Groups,BatchSize,OutChannels,InChannels,KernelH,KernelW,InputH,InputW,OutputH,OutputW,StrideH,StrideW,DilationH,DilationW,LeftPadH,LeftPadW,RightPadH,RightPadW,TestName + int NDim = std::stoi(row[0]); + int Groups = std::stoi(row[1]); + int BatchSize = std::stoi(row[2]); + int OutChannels = std::stoi(row[3]); + int InChannels = std::stoi(row[4]); + + if(NDim == 1) + { + // 1D Convolution: Need fewer columns for 1D parameters + if(row.size() < 13) + { + std::cerr << "WARNING: 1D convolution on line " << line_number + << " needs 13+ columns, has " << row.size() << ", skipping" + << std::endl; + continue; + } + // 1D Convolution: {NDim, Groups, BatchSize, OutChannels, InChannels, + // {KernelW}, {InputW}, {StrideW}, {DilationW}, {LeftPadW}, {RightPadW}} + ck::utils::conv::ConvParam param = { + NDim, // NDim = 1 + Groups, // Groups + BatchSize, // Batch size + OutChannels, // Output channels + InChannels, // Input channels + {std::stoi(row[5])}, // Kernel: {W} + {std::stoi(row[7])}, // Input: {W} + {std::stoi(row[11])}, // Stride: {W} + {std::stoi(row[13])}, // Dilation: {W} + {std::stoi(row[15])}, // Left pad: {W} + {std::stoi(row[17])} // Right pad: {W} + }; + conv_params.push_back(param); + } + else if(NDim == 2) + { + // 2D Convolution: {NDim, Groups, BatchSize, OutChannels, InChannels, + // {KernelH,KernelW}, {InputH,InputW}, {StrideH,StrideW}, {DilationH,DilationW}, + // {LeftPadH,LeftPadW}, {RightPadH,RightPadW}} + ck::utils::conv::ConvParam param = { + NDim, // NDim = 2 + Groups, // Groups + BatchSize, // Batch size + OutChannels, // Output channels + InChannels, // Input channels + {std::stoi(row[5]), std::stoi(row[6])}, // Kernel: {H, W} + {std::stoi(row[7]), std::stoi(row[8])}, // Input: {H, W} + {std::stoi(row[11]), std::stoi(row[12])}, // Stride: {H, W} + {std::stoi(row[13]), std::stoi(row[14])}, // Dilation: {H, W} + {std::stoi(row[15]), std::stoi(row[16])}, // Left pad: {H, W} + {std::stoi(row[17]), std::stoi(row[18])} // Right pad: {H, W} + }; + conv_params.push_back(param); + } + else if(NDim == 3) + { + // 3D Convolution: Need more columns for 3D parameters + if(row.size() < 26) + { + std::cerr << "WARNING: 3D convolution on line " << line_number + << " needs 26+ columns, has " << row.size() << ", skipping" + << std::endl; + continue; + } + // 3D Convolution: {NDim, Groups, BatchSize, OutChannels, InChannels, + // {KernelD,KernelH,KernelW}, {InputD,InputH,InputW}, {OutputD,OutputH,OutputW}, + // {StrideD,StrideH,StrideW}, {DilationD,DilationH,DilationW}, + // {LeftPadD,LeftPadH,LeftPadW}, {RightPadD,RightPadH,RightPadW}} + ck::utils::conv::ConvParam param = { + NDim, // NDim = 3 + Groups, // Groups + BatchSize, // Batch size + OutChannels, // Output channels + InChannels, // Input channels + {std::stoi(row[5]), std::stoi(row[6]), std::stoi(row[7])}, // Kernel: {D, H, W} + {std::stoi(row[8]), std::stoi(row[9]), std::stoi(row[10])}, // Input: {D, H, W} + {std::stoi(row[14]), + std::stoi(row[15]), + std::stoi(row[16])}, // Stride: {D, H, W} + {std::stoi(row[17]), + std::stoi(row[18]), + std::stoi(row[19])}, // Dilation: {D, H, W} + {std::stoi(row[20]), + std::stoi(row[21]), + std::stoi(row[22])}, // Left pad: {D, H, W} + {std::stoi(row[23]), + std::stoi(row[24]), + std::stoi(row[25])} // Right pad: {D, H, W} + }; + conv_params.push_back(param); + } + else + { + std::cerr << "WARNING: Unsupported NDim=" << NDim << " on line " << line_number + << ", skipping" << std::endl; + } + } + catch(const std::exception& e) + { + std::cerr << "ERROR: Failed to parse line " << line_number << ": " << e.what() + << std::endl; + continue; + } + } + + file.close(); + std::cout << "Loaded " << conv_params.size() << " test cases from " << filename << std::endl; + return conv_params; +} + +// Helper function to load CSV test cases and populate conv_params vector +// Returns true if loading succeeded, false otherwise +inline bool load_and_populate_test_cases(const std::vector& csv_paths, + std::vector& conv_params, + const std::string& dimension_label) +{ + for(const auto& csv_path : csv_paths) + { + auto csv_cases = load_csv_test_cases(csv_path); + if(!csv_cases.empty()) + { + // Successfully loaded CSV data - add all test cases to conv_params + for(const auto& test_case : csv_cases) + { + conv_params.push_back(test_case); + } + std::cout << "Loaded " << csv_cases.size() << " " << dimension_label + << " test cases from " << csv_path << std::endl; + return true; + } + } + + // Failed to load from any path + std::cerr << "ERROR: Failed to load CSV test data from any of these locations:" << std::endl; + for(const auto& path : csv_paths) + { + std::cerr << " - " << path << std::endl; + } + std::cerr << "\nPlease ensure CSV test data exists in one of these locations." << std::endl; + std::cerr << "Run generate_test_dataset.sh in test_data/ to create test datasets." << std::endl; + + return false; +} + +} // namespace test +} // namespace ck diff --git a/test/grouped_convnd_bwd_data/CMakeLists.txt b/test/grouped_convnd_bwd_data/CMakeLists.txt index 1da477ebb3..a9413bd25b 100644 --- a/test/grouped_convnd_bwd_data/CMakeLists.txt +++ b/test/grouped_convnd_bwd_data/CMakeLists.txt @@ -9,6 +9,10 @@ if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") add_executable(test_grouped_convnd_bwd_data_xdl_large_cases test_grouped_convnd_bwd_data_xdl_large_cases.cpp) target_compile_options(test_grouped_convnd_bwd_data_xdl_large_cases PRIVATE -Wno-global-constructors -Wno-undef) target_link_libraries(test_grouped_convnd_bwd_data_xdl_large_cases PRIVATE gtest_main getopt::getopt utility device_grouped_conv2d_bwd_data_instance device_grouped_conv3d_bwd_data_instance) + + add_executable(test_grouped_convnd_bwd_data_dataset_xdl test_grouped_convnd_bwd_data_dataset_xdl.cpp) + target_compile_options(test_grouped_convnd_bwd_data_dataset_xdl PRIVATE -Wno-global-constructors -Wno-undef) + target_link_libraries(test_grouped_convnd_bwd_data_dataset_xdl PRIVATE gtest_main getopt::getopt utility device_grouped_conv2d_bwd_data_instance device_grouped_conv3d_bwd_data_instance) endif() add_gtest_executable(test_grouped_convnd_bwd_data_wmma test_grouped_convnd_bwd_data_wmma.cpp) if(result EQUAL 0) diff --git a/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_dataset_xdl.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_dataset_xdl.cpp new file mode 100644 index 0000000000..53b8ec32af --- /dev/null +++ b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_dataset_xdl.cpp @@ -0,0 +1,317 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include // Standard C library (exit codes, malloc) +#include // C++ I/O streams (cout, cerr) +#include // C++ initializer list support (unused here) +#include // C++ vector container - stores test cases +#include // String operations +#include // Google Test framework - provides TEST_P, INSTANTIATE_TEST_SUITE_P + +#include "profiler/profile_grouped_conv_bwd_data_impl.hpp" // The actual GPU profiler that does convolution work +#include "../common/csv_test_loader.hpp" // Shared CSV test case loader + +using namespace ck::tensor_layout::convolution; // Import tensor layout names (GNHWK, GKYXC, etc.) + +// Load CSV data for 2D tests +static std::vector Get2DTestCases() +{ + static std::vector test_cases; + if(test_cases.empty()) + { + std::string test_data_dir = ck::test::GetTestDataPath(); + if(test_data_dir.empty()) + { + std::cerr << "FATAL: test_data directory not found" << std::endl; + return test_cases; + } + + std::vector csv_paths = {test_data_dir + "/conv_test_set_2d_dataset.csv"}; + bool loaded = ck::test::load_and_populate_test_cases(csv_paths, test_cases, "2D"); + if(!loaded) + { + std::cerr << "FATAL: Failed to load 2D test cases from " << csv_paths[0] << std::endl; + } + } + return test_cases; +} + +// Load CSV data for 3D tests +static std::vector Get3DTestCases() +{ + static std::vector test_cases; + if(test_cases.empty()) + { + std::string test_data_dir = ck::test::GetTestDataPath(); + if(test_data_dir.empty()) + { + std::cerr << "FATAL: test_data directory not found" << std::endl; + return test_cases; + } + + std::vector csv_paths = {test_data_dir + "/conv_test_set_3d_dataset.csv"}; + bool loaded = ck::test::load_and_populate_test_cases(csv_paths, test_cases, "3D"); + if(!loaded) + { + std::cerr << "FATAL: Failed to load 3D test cases from " << csv_paths[0] << std::endl; + } + } + return test_cases; +} + +// Helper template to run a single backward data convolution test with split_k +template +bool RunConvBwdDataTest(const ck::utils::conv::ConvParam& param, ck::index_t split_k) +{ + return ck::profiler::profile_grouped_conv_bwd_data_impl(true, // do_verification + 1, // init_method + false, // do_log + false, // time_kernel + param, // ConvParam + split_k, // Split-K value + -1); // instance_index +} + +// 2D Tests - GNHWK layout - Float - SplitK=1 +class TestGroupedConvndBwdData2dGNHWKFloatSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dGNHWKFloatSplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, GNHWK, GKYXC, GNHWC, float>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dGNHWKFloatSplitK1, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - GNHWK layout - Float - SplitK=2 +class TestGroupedConvndBwdData2dGNHWKFloatSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dGNHWKFloatSplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, GNHWK, GKYXC, GNHWC, float>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dGNHWKFloatSplitK2, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - GNHWK layout - Half - SplitK=1 +class TestGroupedConvndBwdData2dGNHWKHalfSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dGNHWKHalfSplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, GNHWK, GKYXC, GNHWC, ck::half_t>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dGNHWKHalfSplitK1, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - GNHWK layout - Half - SplitK=2 +class TestGroupedConvndBwdData2dGNHWKHalfSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dGNHWKHalfSplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, GNHWK, GKYXC, GNHWC, ck::half_t>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dGNHWKHalfSplitK2, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - GNHWK layout - BFloat16 - SplitK=1 +class TestGroupedConvndBwdData2dGNHWKBFloat16SplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dGNHWKBFloat16SplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, GNHWK, GKYXC, GNHWC, ck::bhalf_t>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dGNHWKBFloat16SplitK1, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - GNHWK layout - BFloat16 - SplitK=2 +class TestGroupedConvndBwdData2dGNHWKBFloat16SplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dGNHWKBFloat16SplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, GNHWK, GKYXC, GNHWC, ck::bhalf_t>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dGNHWKBFloat16SplitK2, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - Float - SplitK=1 +class TestGroupedConvndBwdData2dNHWGKFloatSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dNHWGKFloatSplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, NHWGK, GKYXC, NHWGC, float>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dNHWGKFloatSplitK1, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - Float - SplitK=2 +class TestGroupedConvndBwdData2dNHWGKFloatSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dNHWGKFloatSplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, NHWGK, GKYXC, NHWGC, float>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dNHWGKFloatSplitK2, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - Half - SplitK=1 +class TestGroupedConvndBwdData2dNHWGKHalfSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dNHWGKHalfSplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, NHWGK, GKYXC, NHWGC, ck::half_t>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dNHWGKHalfSplitK1, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - Half - SplitK=2 +class TestGroupedConvndBwdData2dNHWGKHalfSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dNHWGKHalfSplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, NHWGK, GKYXC, NHWGC, ck::half_t>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dNHWGKHalfSplitK2, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - BFloat16 - SplitK=1 +class TestGroupedConvndBwdData2dNHWGKBFloat16SplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dNHWGKBFloat16SplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, NHWGK, GKYXC, NHWGC, ck::bhalf_t>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dNHWGKBFloat16SplitK1, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - BFloat16 - SplitK=2 +class TestGroupedConvndBwdData2dNHWGKBFloat16SplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData2dNHWGKBFloat16SplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<2, NHWGK, GKYXC, NHWGC, ck::bhalf_t>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData2dNHWGKBFloat16SplitK2, + ::testing::ValuesIn(Get2DTestCases())); + +// 3D Tests - NDHWGK layout - Float - SplitK=1 +class TestGroupedConvndBwdData3dNDHWGKFloatSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData3dNDHWGKFloatSplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<3, NDHWGK, GKZYXC, NDHWGC, float>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData3dNDHWGKFloatSplitK1, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - Float - SplitK=2 +class TestGroupedConvndBwdData3dNDHWGKFloatSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData3dNDHWGKFloatSplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<3, NDHWGK, GKZYXC, NDHWGC, float>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData3dNDHWGKFloatSplitK2, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - Half - SplitK=1 +class TestGroupedConvndBwdData3dNDHWGKHalfSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData3dNDHWGKHalfSplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<3, NDHWGK, GKZYXC, NDHWGC, ck::half_t>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData3dNDHWGKHalfSplitK1, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - Half - SplitK=2 +class TestGroupedConvndBwdData3dNDHWGKHalfSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData3dNDHWGKHalfSplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<3, NDHWGK, GKZYXC, NDHWGC, ck::half_t>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData3dNDHWGKHalfSplitK2, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - BFloat16 - SplitK=1 +class TestGroupedConvndBwdData3dNDHWGKBFloat16SplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData3dNDHWGKBFloat16SplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<3, NDHWGK, GKZYXC, NDHWGC, ck::bhalf_t>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData3dNDHWGKBFloat16SplitK1, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - BFloat16 - SplitK=2 +class TestGroupedConvndBwdData3dNDHWGKBFloat16SplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdData3dNDHWGKBFloat16SplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdDataTest<3, NDHWGK, GKZYXC, NDHWGC, ck::bhalf_t>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdData3dNDHWGKBFloat16SplitK2, + ::testing::ValuesIn(Get3DTestCases())); diff --git a/test/grouped_convnd_bwd_weight/CMakeLists.txt b/test/grouped_convnd_bwd_weight/CMakeLists.txt index e46113bea0..7b994f5bb8 100644 --- a/test/grouped_convnd_bwd_weight/CMakeLists.txt +++ b/test/grouped_convnd_bwd_weight/CMakeLists.txt @@ -4,6 +4,10 @@ if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") add_gtest_executable(test_grouped_convnd_bwd_weight test_grouped_convnd_bwd_weight.cpp) target_link_libraries(test_grouped_convnd_bwd_weight PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance device_grouped_convnd_bwd_weight_instance) + + add_executable(test_grouped_convnd_bwd_weight_dataset_xdl test_grouped_convnd_bwd_weight_dataset_xdl.cpp) + target_compile_options(test_grouped_convnd_bwd_weight_dataset_xdl PRIVATE -Wno-global-constructors -Wno-undef) + target_link_libraries(test_grouped_convnd_bwd_weight_dataset_xdl PRIVATE gtest_main getopt::getopt utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance device_grouped_convnd_bwd_weight_instance) elseif(DL_KERNELS) add_gtest_executable(test_grouped_convnd_bwd_weight test_grouped_convnd_bwd_weight.cpp) target_link_libraries(test_grouped_convnd_bwd_weight PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance) diff --git a/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_dataset_xdl.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_dataset_xdl.cpp new file mode 100644 index 0000000000..aff6ba8873 --- /dev/null +++ b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_dataset_xdl.cpp @@ -0,0 +1,258 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include // Standard C library (exit codes, malloc) +#include // C++ I/O streams (cout, cerr) +#include // C++ initializer list support (unused here) +#include // C++ vector container - stores test cases +#include // String operations +#include // Google Test framework - provides TEST_P, INSTANTIATE_TEST_SUITE_P + +#include "ck/utility/common_header.hpp" +#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" +#include "ck/host_utility/device_prop.hpp" + +#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp" // The actual GPU profiler that does convolution work +#include "../common/csv_test_loader.hpp" // Shared CSV test case loader + +using namespace ck::tensor_layout::convolution; + +// Load CSV data for 2D tests +static std::vector Get2DTestCases() +{ + static std::vector test_cases; + if(test_cases.empty()) + { + std::string test_data_dir = ck::test::GetTestDataPath(); + if(test_data_dir.empty()) + { + std::cerr << "FATAL: test_data directory not found" << std::endl; + return test_cases; + } + + std::vector csv_paths = {test_data_dir + "/conv_test_set_2d_dataset.csv"}; + bool loaded = ck::test::load_and_populate_test_cases(csv_paths, test_cases, "2D"); + if(!loaded) + { + std::cerr << "FATAL: Failed to load 2D test cases from " << csv_paths[0] << std::endl; + } + } + return test_cases; +} + +// Load CSV data for 3D tests +static std::vector Get3DTestCases() +{ + static std::vector test_cases; + if(test_cases.empty()) + { + std::string test_data_dir = ck::test::GetTestDataPath(); + if(test_data_dir.empty()) + { + std::cerr << "FATAL: test_data directory not found" << std::endl; + return test_cases; + } + + std::vector csv_paths = {test_data_dir + "/conv_test_set_3d_dataset.csv"}; + bool loaded = ck::test::load_and_populate_test_cases(csv_paths, test_cases, "3D"); + if(!loaded) + { + std::cerr << "FATAL: Failed to load 3D test cases from " << csv_paths[0] << std::endl; + } + } + return test_cases; +} + +// Helper template to run a single backward weight convolution test +template +bool RunConvBwdWeightTest(const ck::utils::conv::ConvParam& param, ck::index_t split_k) +{ + return ck::profiler::profile_grouped_conv_bwd_weight_impl( + true, // do_verification + 1, // init_method + false, // do_log + false, // time_kernel + param, // ConvParam + std::to_string(split_k), // Split-K value as string + -1); // instance_index +} + +// 2D Tests - NHWGK layout - Float - SplitK=1 +class TestGroupedConvndBwdWeight2dNHWGKFloatSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight2dNHWGKFloatSplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdWeightTest<2, NHWGC, GKYXC, NHWGK, float, float, float>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight2dNHWGKFloatSplitK1, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - Float - SplitK=2 +class TestGroupedConvndBwdWeight2dNHWGKFloatSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight2dNHWGKFloatSplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdWeightTest<2, NHWGC, GKYXC, NHWGK, float, float, float>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight2dNHWGKFloatSplitK2, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - Half - SplitK=1 +class TestGroupedConvndBwdWeight2dNHWGKHalfSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight2dNHWGKHalfSplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdWeightTest<2, NHWGC, GKYXC, NHWGK, ck::half_t, ck::half_t, ck::half_t>( + GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight2dNHWGKHalfSplitK1, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - Half - SplitK=2 +class TestGroupedConvndBwdWeight2dNHWGKHalfSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight2dNHWGKHalfSplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdWeightTest<2, NHWGC, GKYXC, NHWGK, ck::half_t, ck::half_t, ck::half_t>( + GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight2dNHWGKHalfSplitK2, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - BFloat16 - SplitK=1 +class TestGroupedConvndBwdWeight2dNHWGKBFloat16SplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight2dNHWGKBFloat16SplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdWeightTest<2, NHWGC, GKYXC, NHWGK, ck::bhalf_t, float, ck::bhalf_t>( + GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight2dNHWGKBFloat16SplitK1, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - NHWGK layout - BFloat16 - SplitK=2 +class TestGroupedConvndBwdWeight2dNHWGKBFloat16SplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight2dNHWGKBFloat16SplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdWeightTest<2, NHWGC, GKYXC, NHWGK, ck::bhalf_t, float, ck::bhalf_t>( + GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight2dNHWGKBFloat16SplitK2, + ::testing::ValuesIn(Get2DTestCases())); + +// 3D Tests - NDHWGK layout - Float - SplitK=1 +class TestGroupedConvndBwdWeight3dNDHWGKFloatSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight3dNDHWGKFloatSplitK1, ConvTest) +{ + EXPECT_TRUE( + (RunConvBwdWeightTest<3, NDHWGC, GKZYXC, NDHWGK, float, float, float>(GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight3dNDHWGKFloatSplitK1, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - Float - SplitK=2 +class TestGroupedConvndBwdWeight3dNDHWGKFloatSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight3dNDHWGKFloatSplitK2, ConvTest) +{ + EXPECT_TRUE( + (RunConvBwdWeightTest<3, NDHWGC, GKZYXC, NDHWGK, float, float, float>(GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight3dNDHWGKFloatSplitK2, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - Half - SplitK=1 +class TestGroupedConvndBwdWeight3dNDHWGKHalfSplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight3dNDHWGKHalfSplitK1, ConvTest) +{ + EXPECT_TRUE( + (RunConvBwdWeightTest<3, NDHWGC, GKZYXC, NDHWGK, ck::half_t, ck::half_t, ck::half_t>( + GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight3dNDHWGKHalfSplitK1, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - Half - SplitK=2 +class TestGroupedConvndBwdWeight3dNDHWGKHalfSplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight3dNDHWGKHalfSplitK2, ConvTest) +{ + EXPECT_TRUE( + (RunConvBwdWeightTest<3, NDHWGC, GKZYXC, NDHWGK, ck::half_t, ck::half_t, ck::half_t>( + GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight3dNDHWGKHalfSplitK2, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - BFloat16 - SplitK=1 +class TestGroupedConvndBwdWeight3dNDHWGKBFloat16SplitK1 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight3dNDHWGKBFloat16SplitK1, ConvTest) +{ + EXPECT_TRUE((RunConvBwdWeightTest<3, NDHWGC, GKZYXC, NDHWGK, ck::bhalf_t, float, ck::bhalf_t>( + GetParam(), 1))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight3dNDHWGKBFloat16SplitK1, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - NDHWGK layout - BFloat16 - SplitK=2 +class TestGroupedConvndBwdWeight3dNDHWGKBFloat16SplitK2 + : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndBwdWeight3dNDHWGKBFloat16SplitK2, ConvTest) +{ + EXPECT_TRUE((RunConvBwdWeightTest<3, NDHWGC, GKZYXC, NDHWGK, ck::bhalf_t, float, ck::bhalf_t>( + GetParam(), 2))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndBwdWeight3dNDHWGKBFloat16SplitK2, + ::testing::ValuesIn(Get3DTestCases())); diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd_dataset_xdl.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_dataset_xdl.cpp index 0928256817..c99f7ccf2f 100644 --- a/test/grouped_convnd_fwd/test_grouped_convnd_fwd_dataset_xdl.cpp +++ b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_dataset_xdl.cpp @@ -5,330 +5,165 @@ #include // C++ I/O streams (cout, cerr) #include // C++ initializer list support (unused here) #include // C++ vector container - stores test cases -#include // File I/O for CSV reading -#include // String stream for CSV parsing #include // String operations -#include // Google Test framework - provides TYPED_TEST, EXPECT_TRUE +#include // Google Test framework - provides TEST_P, INSTANTIATE_TEST_SUITE_P #include "profiler/profile_grouped_conv_fwd_impl.hpp" // The actual GPU profiler that does convolution work - -// CSV Reader Function for Loading Test Cases -// Reads convolution parameters from CSV file and returns vector of ConvParam structures -std::vector load_csv_test_cases(const std::string& filename) -{ - std::vector conv_params; // Return vector - std::ifstream file(filename); // Open CSV file - - if(!file.is_open()) - { - std::cerr << "ERROR: Cannot open CSV file: " << filename << std::endl; - return conv_params; // Return empty vector on error - } - - std::string line; - int line_number = 0; - - // Read file line by line - while(std::getline(file, line)) - { - line_number++; - // Skip comment lines (starting with #) and empty lines - if(line.empty() || line[0] == '#') - { - continue; - } - - // Skip header line (contains column names) - if(line.find("NDim,Groups,BatchSize") != std::string::npos) - { - continue; - } - - // Parse CSV line using stringstream - std::stringstream ss(line); - std::string cell; - std::vector row; - - // Split line by commas - while(std::getline(ss, cell, ',')) - { - row.push_back(cell); - } - - // Validate row has correct number of columns - if(row.size() < 19) - { // Need at least 19 columns for 2D (excluding TestName) - std::cerr << "WARNING: Line " << line_number << " has insufficient columns (" - << row.size() << "), skipping" << std::endl; - continue; - } - - try - { - // Parse CSV data into ConvParam structure - // CSV Format: - // NDim,Groups,BatchSize,OutChannels,InChannels,KernelH,KernelW,InputH,InputW,OutputH,OutputW,StrideH,StrideW,DilationH,DilationW,LeftPadH,LeftPadW,RightPadH,RightPadW,TestName - int NDim = std::stoi(row[0]); - int Groups = std::stoi(row[1]); - int BatchSize = std::stoi(row[2]); - int OutChannels = std::stoi(row[3]); - int InChannels = std::stoi(row[4]); - - if(NDim == 2) - { - // 2D Convolution: {NDim, Groups, BatchSize, OutChannels, InChannels, - // {KernelH,KernelW}, {InputH,InputW}, {StrideH,StrideW}, {DilationH,DilationW}, - // {LeftPadH,LeftPadW}, {RightPadH,RightPadW}} - ck::utils::conv::ConvParam param = { - NDim, // NDim = 2 - Groups, // Groups - BatchSize, // Batch size - OutChannels, // Output channels - InChannels, // Input channels - {std::stoi(row[5]), std::stoi(row[6])}, // Kernel: {H, W} - {std::stoi(row[7]), std::stoi(row[8])}, // Input: {H, W} - {std::stoi(row[11]), std::stoi(row[12])}, // Stride: {H, W} - {std::stoi(row[13]), std::stoi(row[14])}, // Dilation: {H, W} - {std::stoi(row[15]), std::stoi(row[16])}, // Left pad: {H, W} - {std::stoi(row[17]), std::stoi(row[18])} // Right pad: {H, W} - }; - conv_params.push_back(param); - } - else if(NDim == 3) - { - // 3D Convolution: Need more columns for 3D parameters - if(row.size() < 26) - { - std::cerr << "WARNING: 3D convolution on line " << line_number - << " needs 26+ columns, has " << row.size() << ", skipping" - << std::endl; - continue; - } - // 3D Convolution: {NDim, Groups, BatchSize, OutChannels, InChannels, - // {KernelD,KernelH,KernelW}, {InputD,InputH,InputW}, {OutputD,OutputH,OutputW}, - // {StrideD,StrideH,StrideW}, {DilationD,DilationH,DilationW}, - // {LeftPadD,LeftPadH,LeftPadW}, {RightPadD,RightPadH,RightPadW}} - ck::utils::conv::ConvParam param = { - NDim, // NDim = 3 - Groups, // Groups - BatchSize, // Batch size - OutChannels, // Output channels - InChannels, // Input channels - {std::stoi(row[5]), std::stoi(row[6]), std::stoi(row[7])}, // Kernel: {D, H, W} - {std::stoi(row[8]), std::stoi(row[9]), std::stoi(row[10])}, // Input: {D, H, W} - {std::stoi(row[14]), - std::stoi(row[15]), - std::stoi(row[16])}, // Stride: {D, H, W} - {std::stoi(row[17]), - std::stoi(row[18]), - std::stoi(row[19])}, // Dilation: {D, H, W} - {std::stoi(row[20]), - std::stoi(row[21]), - std::stoi(row[22])}, // Left pad: {D, H, W} - {std::stoi(row[23]), - std::stoi(row[24]), - std::stoi(row[25])} // Right pad: {D, H, W} - }; - conv_params.push_back(param); - } - else - { - std::cerr << "WARNING: Unsupported NDim=" << NDim << " on line " << line_number - << ", skipping" << std::endl; - } - } - catch(const std::exception& e) - { - std::cerr << "ERROR: Failed to parse line " << line_number << ": " << e.what() - << std::endl; - continue; - } - } - - file.close(); - std::cout << "Loaded " << conv_params.size() << " test cases from " << filename << std::endl; - return conv_params; -} - -// Template class that works with different data types and tensor layouts -template -class TestGroupedConvndFwd : public ::testing::Test // Inherit from Google Test base class -{ - protected: - using DataType = - std::tuple_element_t<0, Tuple>; // Extract data type from tuple (fp32, fp16, bf16, int8) - using InLayout = - std::tuple_element_t<1, Tuple>; // Extract input tensor layout (NHWGC, NDHWGC, etc.) - using WeiLayout = - std::tuple_element_t<2, Tuple>; // Extract weight tensor layout (GKYXC, GKZYXC, etc.) - using OutLayout = - std::tuple_element_t<3, Tuple>; // Extract output tensor layout (NHWGK, NDHWGK, etc.) - using IndexType = ck::long_index_t; // 64-bit integer type for tensor dimensions - - // THE KEY CONTAINER: This stores all test case parameters - // Each test will push_back() ConvParam structures here - std::vector conv_params; - - // Template function to run tests for N-dimensional spatial convolution (2D or 3D) - template - void Run() - { - EXPECT_FALSE(conv_params.empty()); // Google Test assertion: ensure we have test cases - bool pass = true; // Track overall pass/fail across all test cases - - // MAIN LOOP: Execute every test case that was added to conv_params - for(auto& param : conv_params) - { - // CALL THE ACTUAL GPU PROFILER - This is where convolution happens! - pass = pass && - ck::profiler::profile_grouped_conv_fwd_impl( // Index type (int64) - true, // do_verification: Compare GPU result with CPU reference - 1, // init_method: How to initialize random test data (1 = uniform -5 to 5) - false, // do_log: Don't print detailed tensor values - false, // time_kernel: Don't do performance timing (just correctness) - param); // ConvParam: {NDim, Groups, Batch, OutChannels, InChannels, - // KernelSize, InputSize, ...} - } - EXPECT_TRUE(pass); // Google Test assertion: ALL test cases must pass - } -}; +#include "../common/csv_test_loader.hpp" // Shared CSV test case loader using namespace ck::tensor_layout::convolution; // Import tensor layout names (NHWGC, GKYXC, etc.) -// GOOGLE TEST TYPE COMBINATIONS: Define what data types and layouts to test -// This creates 4 separate test instances for 2D convolution: -using KernelTypes2d = - ::testing::Types, // fp32 test - std::tuple, // fp16 test - std::tuple, // bfloat16 test - std::tuple>; // int8 test - -// This creates 3 separate test instances for 3D convolution (no int8 support for 3D): -using KernelTypes3d = - ::testing::Types, // fp32 3D test - std::tuple, // fp16 3D test - std::tuple>; // bfloat16 3D test - -// Create specialized test classes that inherit from the base template class -template -class TestGroupedConvndFwd2d : public TestGroupedConvndFwd // 2D convolution test class +// Load CSV data for 2D tests +static std::vector Get2DTestCases() { -}; - -template -class TestGroupedConvndFwd3d : public TestGroupedConvndFwd // 3D convolution test class -{ -}; - -// GOOGLE TEST MAGIC: Create test suites -// This tells Google Test to create 4 test instances for 2D (fp32, fp16, bf16, int8) -TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d); -// This tells Google Test to create 3 test instances for 3D (fp32, fp16, bf16) -TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d); - -// THE ACTUAL 2D TEST - This runs 4 times (once for each data type: fp32, fp16, bf16, int8) -TYPED_TEST(TestGroupedConvndFwd2d, Test2D) -{ - // LOAD TEST CASES FROM CSV FILE instead of hardcoded cases - // Try different locations for the CSV file (build directory vs source directory) - std::vector csv_paths = { - "../test_data/conv_test_set_2d_dataset.csv", // From build directory to source - }; - - bool loaded = false; - for(const auto& csv_path : csv_paths) + static std::vector test_cases; + if(test_cases.empty()) { - auto csv_cases = load_csv_test_cases(csv_path); - if(!csv_cases.empty()) + std::string test_data_dir = ck::test::GetTestDataPath(); + if(test_data_dir.empty()) { - // Successfully loaded CSV data - add all test cases to conv_params - for(const auto& test_case : csv_cases) - { - this->conv_params.push_back(test_case); - } - std::cout << "Loaded " << csv_cases.size() << " 2D test cases from " << csv_path - << std::endl; - loaded = true; - break; + std::cerr << "FATAL: test_data directory not found" << std::endl; + return test_cases; + } + + std::vector csv_paths = {test_data_dir + "/conv_test_set_2d_dataset.csv"}; + bool loaded = ck::test::load_and_populate_test_cases(csv_paths, test_cases, "2D"); + if(!loaded) + { + std::cerr << "FATAL: Failed to load 2D test cases from " << csv_paths[0] << std::endl; } } - - // FAIL if CSV loading fails - no fallback! - if(!loaded) - { - std::cerr << "ERROR: Failed to load CSV test data from any of these locations:" - << std::endl; - for(const auto& path : csv_paths) - { - std::cerr << " - " << path << std::endl; - } - std::cerr << "\nPlease ensure CSV test data exists in one of these locations." << std::endl; - std::cerr << "Run generate_test_dataset.sh in test_data/ to create test datasets." - << std::endl; - - // Force test failure - no test cases means test should fail - EXPECT_TRUE(loaded) << "CSV test data loading failed"; - } - - // Execute all test cases with 2D convolution - // This calls Run<2>() which loops through conv_params and calls GPU profiler for each - this->template Run<2>(); + return test_cases; } -// THE ACTUAL 3D TEST - This runs 3 times (once for each data type: fp32, fp16, bf16) -TYPED_TEST(TestGroupedConvndFwd3d, Test3D) +// Load CSV data for 3D tests +static std::vector Get3DTestCases() { - // LOAD TEST CASES FROM CSV FILE instead of hardcoded cases - // Try different locations for the CSV file (build directory vs source directory) - std::vector csv_paths = { - "../test_data/conv_test_set_3d_dataset.csv", // From build directory to source - }; - - bool loaded = false; - for(const auto& csv_path : csv_paths) + static std::vector test_cases; + if(test_cases.empty()) { - auto csv_cases = load_csv_test_cases(csv_path); - if(!csv_cases.empty()) + std::string test_data_dir = ck::test::GetTestDataPath(); + if(test_data_dir.empty()) { - // Successfully loaded CSV data - add all test cases to conv_params - for(const auto& test_case : csv_cases) - { - this->conv_params.push_back(test_case); - } - std::cout << "Loaded " << csv_cases.size() << " 3D test cases from " << csv_path - << std::endl; - loaded = true; - break; + std::cerr << "FATAL: test_data directory not found" << std::endl; + return test_cases; + } + + std::vector csv_paths = {test_data_dir + "/conv_test_set_3d_dataset.csv"}; + bool loaded = ck::test::load_and_populate_test_cases(csv_paths, test_cases, "3D"); + if(!loaded) + { + std::cerr << "FATAL: Failed to load 3D test cases from " << csv_paths[0] << std::endl; } } - - // FAIL if CSV loading fails - no fallback! - if(!loaded) - { - std::cerr << "ERROR: Failed to load CSV test data from any of these locations:" - << std::endl; - for(const auto& path : csv_paths) - { - std::cerr << " - " << path << std::endl; - } - std::cerr << "\nPlease ensure CSV test data exists in one of these locations." << std::endl; - std::cerr << "Run generate_test_dataset.sh in test_data/ to create test datasets." - << std::endl; - - // Force test failure - no test cases means test should fail - EXPECT_TRUE(loaded) << "CSV test data loading failed"; - } - - // Execute all test cases with 3D convolution - // This calls Run<3>() which loops through conv_params and calls GPU profiler for each - this->template Run<3>(); + return test_cases; } + +// Helper template to run a single convolution test +template +bool RunConvTest(const ck::utils::conv::ConvParam& param) +{ + using IndexType = ck::long_index_t; + return ck::profiler::profile_grouped_conv_fwd_impl(true, // do_verification + 1, // init_method + false, // do_log + false, // time_kernel + param); +} + +// 2D Tests - Float +class TestGroupedConvndFwd2dFloat : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndFwd2dFloat, ConvTest) +{ + EXPECT_TRUE((RunConvTest<2, NHWGC, GKYXC, NHWGK, float>(GetParam()))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndFwd2dFloat, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - Half +class TestGroupedConvndFwd2dHalf : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndFwd2dHalf, ConvTest) +{ + EXPECT_TRUE((RunConvTest<2, NHWGC, GKYXC, NHWGK, ck::half_t>(GetParam()))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndFwd2dHalf, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - BFloat16 +class TestGroupedConvndFwd2dBFloat16 : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndFwd2dBFloat16, ConvTest) +{ + EXPECT_TRUE((RunConvTest<2, NHWGC, GKYXC, NHWGK, ck::bhalf_t>(GetParam()))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndFwd2dBFloat16, + ::testing::ValuesIn(Get2DTestCases())); + +// 2D Tests - Int8 +class TestGroupedConvndFwd2dInt8 : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndFwd2dInt8, ConvTest) +{ + EXPECT_TRUE((RunConvTest<2, NHWGC, GKYXC, NHWGK, int8_t>(GetParam()))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndFwd2dInt8, + ::testing::ValuesIn(Get2DTestCases())); + +// 3D Tests - Float +class TestGroupedConvndFwd3dFloat : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndFwd3dFloat, ConvTest) +{ + EXPECT_TRUE((RunConvTest<3, NDHWGC, GKZYXC, NDHWGK, float>(GetParam()))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndFwd3dFloat, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - Half +class TestGroupedConvndFwd3dHalf : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndFwd3dHalf, ConvTest) +{ + EXPECT_TRUE((RunConvTest<3, NDHWGC, GKZYXC, NDHWGK, ck::half_t>(GetParam()))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndFwd3dHalf, + ::testing::ValuesIn(Get3DTestCases())); + +// 3D Tests - BFloat16 +class TestGroupedConvndFwd3dBFloat16 : public ::testing::TestWithParam +{ +}; +TEST_P(TestGroupedConvndFwd3dBFloat16, ConvTest) +{ + EXPECT_TRUE((RunConvTest<3, NDHWGC, GKZYXC, NDHWGK, ck::bhalf_t>(GetParam()))); +} +INSTANTIATE_TEST_SUITE_P(Dataset, + TestGroupedConvndFwd3dBFloat16, + ::testing::ValuesIn(Get3DTestCases())); diff --git a/test_data/generate_test_dataset.sh b/test_data/generate_test_dataset.sh index e9c4937445..27f45a3bc7 100755 --- a/test_data/generate_test_dataset.sh +++ b/test_data/generate_test_dataset.sh @@ -8,6 +8,20 @@ set -e # Exit on error set +x # Disable command echo (even if called with bash -x) +# Trap to kill all background jobs on script exit/interruption +cleanup() { + echo "" + echo "Cleaning up background processes..." + # Kill all jobs in the current process group + jobs -p | xargs -r kill 2>/dev/null || true + wait 2>/dev/null || true + echo "Cleanup complete." + exit 1 +} + +# Set up trap for common termination signals +trap cleanup SIGINT SIGTERM EXIT + echo "==========================================" echo "CK Convolution Test Dataset Generator" echo "==========================================" @@ -18,7 +32,7 @@ if ! python3 -c "import torch" 2>/dev/null; then echo "PyTorch not found. Creating virtual environment..." # Create a virtual environment in the current directory - VENV_DIR="./pytorch_venv" + VENV_DIR="./.venv" if [ ! -d "$VENV_DIR" ]; then python3 -m venv $VENV_DIR || { echo "ERROR: Failed to create virtual environment." @@ -66,11 +80,71 @@ if ! $PYTHON_CMD -c "import torch; import sys; sys.exit(0 if torch.cuda.is_avail echo "Continuing anyway to generate placeholder data..." fi +# Parse command line arguments +CONFIG_MODE="full" # Default configuration mode: 'small', 'half' or 'full' +MAX_PARALLEL_JOBS=1 # Default number of parallel jobs +NUM_GPUS=1 # Number of GPUs to use (0 means no GPU assignment) + +# Process arguments +while [[ $# -gt 0 ]]; do + case $1 in + -j) + MAX_PARALLEL_JOBS="$2" + shift 2 + ;; + -j*) + MAX_PARALLEL_JOBS="${1#-j}" + shift + ;; + --gpus) + NUM_GPUS="$2" + shift 2 + ;; + small|half|full) + CONFIG_MODE="$1" + shift + ;; + *) + echo "Usage: $0 [small|half|full] [-j ] [--gpus ]" + echo " Configuration modes: small, half, full (default: full)" + echo " -j : Number of parallel jobs (default: 1)" + echo " --gpus : Number of GPUs to use (e.g., 8 for GPUs 0-7)" + exit 1 + ;; + esac +done + +# Setup GPU array if GPUs are requested +if [ $NUM_GPUS -gt 0 ]; then + # Auto-detect available GPUs + AVAILABLE_GPUS_COUNT=$(rocm-smi --showid 2>/dev/null | grep -oP 'GPU\[\K[0-9]+' | wc -l) + if [ "$AVAILABLE_GPUS_COUNT" -gt 0 ]; then + MAX_AVAILABLE=$AVAILABLE_GPUS_COUNT + else + MAX_AVAILABLE=0 + fi + + # Validate requested GPU count + if [ $NUM_GPUS -gt $MAX_AVAILABLE ]; then + echo "WARNING: Requested $NUM_GPUS GPUs but only $MAX_AVAILABLE available. Using $MAX_AVAILABLE GPUs." + NUM_GPUS=$MAX_AVAILABLE + fi + + # Build GPU array (0 to NUM_GPUS-1) + GPU_ARRAY=() + for ((i=0; i /dev/null 2>> $OUTPUT_DIR/${model}_miopen_log_2d.txt || true - + # Run in background + ( + # Set HIP_VISIBLE_DEVICES if GPU was assigned + if [ -n "$GPU_ID" ]; then + export HIP_VISIBLE_DEVICES=$GPU_ID + fi + + MIOPEN_ENABLE_LOGGING_CMD=1 $PYTHON_CMD run_model_with_miopen.py \ + --model $model --batch-size $batch_size --channels $channels --height $height --width $width --precision $precision \ + > /dev/null 2>> $OUTPUT_DIR/${model}_miopen_log_2d.txt || true + echo -e "${GREEN}[DONE]${NC} ${CYAN}2D${NC} ${YELLOW}$CONFIG_NAME${NC}" + ) & + + job_pids+=($!) + + # Limit number of parallel jobs + if [ ${#job_pids[@]} -ge $MAX_PARALLEL_JOBS ]; then + # Wait for any job to complete + wait -n + # Remove completed jobs from array + for i in "${!job_pids[@]}"; do + if ! kill -0 "${job_pids[$i]}" 2>/dev/null; then + unset 'job_pids[$i]' + fi + done + job_pids=("${job_pids[@]}") # Re-index array + fi done < $OUTPUT_DIR/model_configs_2d.csv +# Wait for all remaining 2D jobs to complete +echo "Waiting for remaining 2D jobs to complete..." +wait + +echo "All 2D models processed!" +echo "" + # Process 3D models from CSV configuration file echo "Processing 3D models from $OUTPUT_DIR/model_configs_3d.csv..." @@ -175,6 +291,10 @@ CURRENT_3D_CONFIG=0 echo "Total 3D configurations to process: $TOTAL_3D_CONFIGS" echo "" +# Reset job tracking array +declare -a job_pids=() +# GPU counter continues from 2D models for round-robin assignment + # Read 3D configurations from CSV (skip comments and header) while IFS=',' read -r config_name model batch_size channels temporal_size height width precision; do # Skip comments and empty lines @@ -185,21 +305,59 @@ while IFS=',' read -r config_name model batch_size channels temporal_size height # Increment counter CURRENT_3D_CONFIG=$((CURRENT_3D_CONFIG + 1)) - # Build configuration command for 3D models CONFIG="--model $model --batch-size $batch_size --channels $channels --temporal-size $temporal_size --height $height --width $width --precision $precision" CONFIG_NAME="$config_name" - echo -e "${GREEN}[${CURRENT_3D_CONFIG}/${TOTAL_3D_CONFIGS}]${NC} ${CYAN}3D${NC} ${YELLOW}$CONFIG_NAME${NC}" + # Assign GPU in round-robin fashion if GPUs are specified + if [ $NUM_GPUS -gt 0 ]; then + GPU_ID=${GPU_ARRAY[$((GPU_COUNTER % NUM_GPUS))]} + GPU_COUNTER=$((GPU_COUNTER + 1)) + echo -e "${GREEN}[${CURRENT_3D_CONFIG}/${TOTAL_3D_CONFIGS}]${NC} ${CYAN}3D${NC} ${YELLOW}$CONFIG_NAME${NC} ${PURPLE}[GPU ${GPU_ID}]${NC} - Starting in background" + else + GPU_ID="" + echo -e "${GREEN}[${CURRENT_3D_CONFIG}/${TOTAL_3D_CONFIGS}]${NC} ${CYAN}3D${NC} ${YELLOW}$CONFIG_NAME${NC} - Starting in background" + fi + # Run in background + ( + # Set HIP_VISIBLE_DEVICES if GPU was assigned + if [ -n "$GPU_ID" ]; then + export HIP_VISIBLE_DEVICES=$GPU_ID + fi + + MIOPEN_ENABLE_LOGGING_CMD=1 $PYTHON_CMD run_model_with_miopen.py \ + --model $model --batch-size $batch_size --channels $channels --temporal-size $temporal_size --height $height --width $width --precision $precision \ + > /dev/null 2>> $OUTPUT_DIR/${model}_miopen_log_3d.txt || true + echo -e "${GREEN}[DONE]${NC} ${CYAN}3D${NC} ${YELLOW}$CONFIG_NAME${NC}" + ) & - # Actual run with logging (suppress stdout, only capture stderr with MIOpen commands) - MIOPEN_ENABLE_LOGGING_CMD=1 $PYTHON_CMD run_model_with_miopen.py \ - --model $model --batch-size $batch_size --channels $channels --temporal-size $temporal_size --height $height --width $width --precision $precision \ - > /dev/null 2>> $OUTPUT_DIR/${model}_miopen_log_3d.txt || true + job_pids+=($!) + + # Limit number of parallel jobs + if [ ${#job_pids[@]} -ge $MAX_PARALLEL_JOBS ]; then + # Wait for any job to complete + wait -n + # Remove completed jobs from array + for i in "${!job_pids[@]}"; do + if ! kill -0 "${job_pids[$i]}" 2>/dev/null; then + unset 'job_pids[$i]' + fi + done + job_pids=("${job_pids[@]}") # Re-index array + fi done < $OUTPUT_DIR/model_configs_3d.csv +# Wait for all remaining 3D jobs to complete +echo "Waiting for remaining 3D jobs to complete..." +wait + +echo "All 3D models processed!" +echo "" + +# Disable trap on successful completion +trap - SIGINT SIGTERM EXIT echo "" echo "Step 3: Converting MIOpen commands to CSV test cases" @@ -311,7 +469,7 @@ if [ $COUNT_3D -gt 0 ]; then fi echo " - Intermediate files in: $OUTPUT_DIR/" echo "" -echo "To use these datasets:" -echo " 1. Build the test: cd ../script && make -j64 test_grouped_convnd_fwd_dataset_xdl" -echo " 2. Run the test: ./bin/test_grouped_convnd_fwd_dataset_xdl" +echo "To use these datasets for direction (bwd_data, bwd_weight, or fwd):" +echo " 1. Build the test: cd ../script && make -j64 test_grouped_convnd__dataset_xdl" +echo " 2. Run the test: ./bin/test_grouped_convnd__dataset_xdl" echo "" diff --git a/test_data/gtest_parallel.py b/test_data/gtest_parallel.py new file mode 100644 index 0000000000..9ea9ee79b0 --- /dev/null +++ b/test_data/gtest_parallel.py @@ -0,0 +1,1187 @@ +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +# This file has been modified to allow round-robin GPU scheduling. +# Original file can be found at +# https://github.com/google/gtest-parallel/blob/cd488bdedc1d2cffb98201a17afc1b298b0b90f1/gtest_parallel.py +# Changes from the original file are subject to the following license: +# SPDX-License-Identifier: MIT +# +# Copyright 2013 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import errno +from functools import total_ordering +import gzip +import io +import json +import multiprocessing +import optparse +import os +import re +import shutil +import signal +import subprocess +import sys +import tempfile +import threading +import time + +if sys.version_info.major >= 3: + long = int + import _pickle as cPickle + import _thread as thread +else: + import cPickle + import thread + +from pickle import HIGHEST_PROTOCOL as PICKLE_HIGHEST_PROTOCOL + +if sys.platform == "win32": + import msvcrt +else: + import fcntl + + +# An object that catches SIGINT sent to the Python process and notices +# if processes passed to wait() die by SIGINT (we need to look for +# both of those cases, because pressing Ctrl+C can result in either +# the main process or one of the subprocesses getting the signal). +# +# Before a SIGINT is seen, wait(p) will simply call p.wait() and +# return the result. Once a SIGINT has been seen (in the main process +# or a subprocess, including the one the current call is waiting for), +# wait(p) will call p.terminate() and raise ProcessWasInterrupted. +class SigintHandler(object): + class ProcessWasInterrupted(Exception): + pass + + sigint_returncodes = { + -signal.SIGINT, # Unix + -1073741510, # Windows + } + + def __init__(self): + self.__lock = threading.Lock() + self.__processes = set() + self.__got_sigint = False + signal.signal(signal.SIGINT, lambda signal_num, frame: self.interrupt()) + + def __on_sigint(self): + self.__got_sigint = True + while self.__processes: + try: + self.__processes.pop().terminate() + except OSError: + pass + + def interrupt(self): + with self.__lock: + self.__on_sigint() + + def got_sigint(self): + with self.__lock: + return self.__got_sigint + + def wait(self, p, timeout_per_test): + with self.__lock: + if self.__got_sigint: + p.terminate() + self.__processes.add(p) + try: + code = p.wait(timeout_per_test) + except subprocess.TimeoutExpired: + p.terminate() + self.__processes.remove(p) + code = -errno.ETIME + with self.__lock: + self.__processes.discard(p) + if code in self.sigint_returncodes: + self.__on_sigint() + if self.__got_sigint: + raise self.ProcessWasInterrupted + return code + + +sigint_handler = SigintHandler() + + +# Return the width of the terminal, or None if it couldn't be +# determined (e.g. because we're not being run interactively). +def term_width(out): + if not out.isatty(): + return None + try: + p = subprocess.Popen( + ["stty", "size"], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + (out, err) = p.communicate() + if p.returncode != 0 or err: + return None + return int(out.split()[1]) + except (IndexError, OSError, ValueError): + return None + + +# Output transient and permanent lines of text. If several transient +# lines are written in sequence, the new will overwrite the old. We +# use this to ensure that lots of unimportant info (tests passing) +# won't drown out important info (tests failing). +class Outputter(object): + def __init__(self, out_file): + self.__out_file = out_file + self.__previous_line_was_transient = False + self.__width = term_width(out_file) # Line width, or None if not a tty. + + def transient_line(self, msg): + if self.__width is None: + self.__out_file.write(msg + "\n") + self.__out_file.flush() + else: + self.__out_file.write("\r" + msg[: self.__width].ljust(self.__width)) + self.__previous_line_was_transient = True + + def flush_transient_output(self): + if self.__previous_line_was_transient: + self.__out_file.write("\n") + self.__previous_line_was_transient = False + + def permanent_line(self, msg): + self.flush_transient_output() + self.__out_file.write(msg + "\n") + if self.__width is None: + self.__out_file.flush() + + +def get_available_gpus(num_gpus): + """Get list of available GPU IDs based on HIP_VISIBLE_DEVICES and num_gpus. + + Returns a list of GPU IDs to use. + If HIP_VISIBLE_DEVICES is set, we return the first min(num_gpus, len(HIP_VISIBLE_DEVICES)) GPU IDs from it. + If not set, we return GPU IDs 0 to num_gpus-1. + """ + hip_visible = os.environ.get("HIP_VISIBLE_DEVICES", None) + + # Treat empty string as not set + if hip_visible is not None and hip_visible.strip(): + # Parse HIP_VISIBLE_DEVICES to get the list of available GPU IDs + try: + available_gpu_ids = [ + gpu_id.strip() for gpu_id in hip_visible.split(",") if gpu_id.strip() + ] + except ValueError: + sys.stderr.write( + "Warning: Invalid HIP_VISIBLE_DEVICES format, using GPU 0\n" + ) + return ["0"] + + # If parsing resulted in empty list, treat as not set + if not available_gpu_ids: + return [str(i) for i in range(num_gpus)] + + # Use the first min(num_gpus, len(available_gpu_ids)) GPUs from the list + num_to_use = min(num_gpus, len(available_gpu_ids)) + return available_gpu_ids[:num_to_use] + else: + # If HIP_VISIBLE_DEVICES is not set or empty, use GPU IDs 0 to num_gpus-1 + return [str(i) for i in range(num_gpus)] + + +def get_save_file_path(): + """Return path to file for saving transient data.""" + if sys.platform == "win32": + default_cache_path = os.path.join(os.path.expanduser("~"), "AppData", "Local") + cache_path = os.environ.get("LOCALAPPDATA", default_cache_path) + else: + # We don't use xdg module since it's not a standard. + default_cache_path = os.path.join(os.path.expanduser("~"), ".cache") + cache_path = os.environ.get("XDG_CACHE_HOME", default_cache_path) + + if os.path.isdir(cache_path): + return os.path.join(cache_path, "gtest-parallel") + else: + sys.stderr.write("Directory {} does not exist".format(cache_path)) + return os.path.join(os.path.expanduser("~"), ".gtest-parallel-times") + + +@total_ordering +class Task(object): + """Stores information about a task (single execution of a test). + + This class stores information about the test to be executed (gtest binary and + test name), and its result (log file, exit code and runtime). + Each task is uniquely identified by the gtest binary, the test name and an + execution number that increases each time the test is executed. + Additionaly we store the last execution time, so that next time the test is + executed, the slowest tests are run first. + """ + + def __init__( + self, + test_binary, + test_name, + test_command, + execution_number, + last_execution_time, + output_dir, + ): + self.test_name = test_name + self.output_dir = output_dir + self.test_binary = test_binary + self.test_command = test_command + self.execution_number = execution_number + self.last_execution_time = last_execution_time + + self.exit_code = None + self.runtime_ms = None + + self.test_id = (test_binary, test_name) + self.task_id = (test_binary, test_name, self.execution_number) + + self.log_file = Task._logname( + self.output_dir, self.test_binary, test_name, self.execution_number + ) + + def __sorting_key(self): + # Unseen or failing tests (both missing execution time) take precedence over + # execution time. Tests are greater (seen as slower) when missing times so + # that they are executed first. + return (1 if self.last_execution_time is None else 0, self.last_execution_time) + + def __eq__(self, other): + return self.__sorting_key() == other.__sorting_key() + + def __ne__(self, other): + return not (self == other) + + def __lt__(self, other): + return self.__sorting_key() < other.__sorting_key() + + @staticmethod + def _normalize(string): + return re.sub("[^A-Za-z0-9]", "_", string) + + @staticmethod + def _logname(output_dir, test_binary, test_name, execution_number): + # Store logs to temporary files if there is no output_dir. + if output_dir is None: + (log_handle, log_name) = tempfile.mkstemp( + prefix="gtest_parallel_", suffix=".log" + ) + os.close(log_handle) + return log_name + + log_name = "%s-%s-%d.log" % ( + Task._normalize(os.path.basename(test_binary)), + Task._normalize(test_name), + execution_number, + ) + + return os.path.join(output_dir, log_name) + + def run(self, timeout_per_test, gpu_id=None): + begin = time.time() + with open(self.log_file, "w") as log: + # Set up environment with GPU assignment if specified + env = os.environ.copy() + if gpu_id is not None: + env["HIP_VISIBLE_DEVICES"] = str(gpu_id) + + # Get the absolute path to the test binary and its directory + # This handles both relative and absolute paths correctly + abs_test_binary = os.path.abspath(self.test_binary) + test_binary_dir = os.path.dirname(abs_test_binary) + + # Update the test command to use the absolute path + abs_test_command = [abs_test_binary] + self.test_command[1:] + + task = subprocess.Popen( + abs_test_command, stdout=log, stderr=log, env=env, cwd=test_binary_dir + ) + try: + self.exit_code = sigint_handler.wait(task, timeout_per_test) + except sigint_handler.ProcessWasInterrupted: + thread.exit() + self.runtime_ms = int(1000 * (time.time() - begin)) + self.last_execution_time = None if self.exit_code else self.runtime_ms + + +class TaskManager(object): + """Executes the tasks and stores the passed, failed and interrupted tasks. + + When a task is run, this class keeps track if it passed, failed or was + interrupted. After a task finishes it calls the relevant functions of the + Logger, TestResults and TestTimes classes, and in case of failure, retries the + test as specified by the --retry_failed flag. + """ + + def __init__( + self, + times, + logger, + test_results, + task_factory, + times_to_retry, + initial_execution_number, + ): + self.times = times + self.logger = logger + self.test_results = test_results + self.task_factory = task_factory + self.times_to_retry = times_to_retry + self.initial_execution_number = initial_execution_number + + self.global_exit_code = 0 + + self.passed = [] + self.failed = [] + self.started = {} + self.timed_out = [] + self.execution_number = {} + + self.lock = threading.Lock() + + def __get_next_execution_number(self, test_id): + with self.lock: + next_execution_number = self.execution_number.setdefault( + test_id, self.initial_execution_number + ) + self.execution_number[test_id] += 1 + return next_execution_number + + def __register_start(self, task): + with self.lock: + self.started[task.task_id] = task + + def register_exit(self, task): + self.logger.log_exit(task) + self.times.record_test_time( + task.test_binary, task.test_name, task.last_execution_time + ) + if self.test_results: + self.test_results.log( + task.test_name, task.runtime_ms / 1000.0, task.exit_code + ) + + with self.lock: + self.started.pop(task.task_id) + if task.exit_code == 0: + self.passed.append(task) + elif task.exit_code == -errno.ETIME: + self.timed_out.append(task) + else: + self.failed.append(task) + + def run_task(self, task, timeout_per_test, gpu_id=None): + for try_number in range(self.times_to_retry + 1): + self.__register_start(task) + task.run(timeout_per_test, gpu_id) + self.register_exit(task) + + if task.exit_code == 0: + break + + if try_number < self.times_to_retry: + execution_number = self.__get_next_execution_number(task.test_id) + # We need create a new Task instance. Each task represents a single test + # execution, with its own runtime, exit code and log file. + task = self.task_factory( + task.test_binary, + task.test_name, + task.test_command, + execution_number, + task.last_execution_time, + task.output_dir, + ) + + with self.lock: + if task.exit_code != 0: + self.global_exit_code = task.exit_code + + +class FilterFormat(object): + def __init__(self, output_dir): + if sys.stdout.isatty(): + # stdout needs to be unbuffered since the output is interactive. + if isinstance(sys.stdout, io.TextIOWrapper): + # workaround for https://bugs.python.org/issue17404 + sys.stdout = io.TextIOWrapper( + sys.stdout.detach(), + line_buffering=True, + write_through=True, + newline="\n", + ) + else: + sys.stdout = os.fdopen(sys.stdout.fileno(), "w", 0) + + self.output_dir = output_dir + + self.total_tasks = 0 + self.finished_tasks = 0 + self.out = Outputter(sys.stdout) + self.stdout_lock = threading.Lock() + + def move_to(self, destination_dir, tasks): + if self.output_dir is None: + return + + destination_dir = os.path.join(self.output_dir, destination_dir) + os.makedirs(destination_dir) + for task in tasks: + shutil.move(task.log_file, destination_dir) + + def print_tests(self, message, tasks, print_try_number, print_test_command): + self.out.permanent_line("%s (%s/%s):" % (message, len(tasks), self.total_tasks)) + for task in sorted(tasks): + runtime_ms = "Interrupted" + if task.runtime_ms is not None: + runtime_ms = "%d ms" % task.runtime_ms + if print_test_command: + try: + cmd_str = " ".join(task.test_command) + except TypeError: + cmd_str = task.test_command + self.out.permanent_line( + "%11s: %s%s" + % ( + runtime_ms, + cmd_str, + (" (try #%d)" % task.execution_number) + if print_try_number + else "", + ) + ) + else: + self.out.permanent_line( + "%11s: %s %s%s" + % ( + runtime_ms, + task.test_binary, + task.test_name, + (" (try #%d)" % task.execution_number) + if print_try_number + else "", + ) + ) + + def log_exit(self, task): + with self.stdout_lock: + self.finished_tasks += 1 + self.out.transient_line( + "[%d/%d] %s (%d ms)" + % ( + self.finished_tasks, + self.total_tasks, + task.test_name, + task.runtime_ms, + ) + ) + if task.exit_code != 0: + signal_name = None + if task.exit_code < 0: + try: + signal_name = signal.Signals(-task.exit_code).name + except ValueError: + pass + + with open(task.log_file) as f: + for line in f.readlines(): + self.out.permanent_line(line.rstrip()) + if task.exit_code is None: + self.out.permanent_line( + "[%d/%d] %s aborted after %d ms" + % ( + self.finished_tasks, + self.total_tasks, + task.test_name, + task.runtime_ms, + ) + ) + elif task.exit_code == -errno.ETIME: + self.out.permanent_line( + "\033[31m[ TIMEOUT ]\033[0m %s timed out after %d s" + % (task.test_name, task.runtime_ms / 1000) + ) + elif signal_name is not None: + self.out.permanent_line( + "[%d/%d] %s killed by signal %s (%d ms)" + % ( + self.finished_tasks, + self.total_tasks, + task.test_name, + signal_name, + task.runtime_ms, + ) + ) + else: + self.out.permanent_line( + "[%d/%d] %s returned with exit code %d (%d ms)" + % ( + self.finished_tasks, + self.total_tasks, + task.test_name, + task.exit_code, + task.runtime_ms, + ) + ) + + if self.output_dir is None: + # Try to remove the file 100 times (sleeping for 0.1 second in between). + # This is a workaround for a process handle seemingly holding on to the + # file for too long inside os.subprocess. This workaround is in place + # until we figure out a minimal repro to report upstream (or a better + # suspect) to prevent os.remove exceptions. + num_tries = 100 + for i in range(num_tries): + try: + os.remove(task.log_file) + except OSError as e: + if e.errno is not errno.ENOENT: + if i is num_tries - 1: + self.out.permanent_line( + "Could not remove temporary log file: " + str(e) + ) + else: + time.sleep(0.1) + continue + break + + def log_tasks(self, total_tasks): + self.total_tasks += total_tasks + self.out.transient_line("[0/%d] Running tests..." % self.total_tasks) + + def summarize(self, passed_tasks, failed_tasks, interrupted_tasks): + stats = {} + + def add_stats(stats, task, idx): + task_key = (task.test_binary, task.test_name) + if task_key not in stats: + # (passed, failed, interrupted) task_key is added as tie breaker to get + # alphabetic sorting on equally-stable tests + stats[task_key] = [0, 0, 0, task_key] + stats[task_key][idx] += 1 + + for task in passed_tasks: + add_stats(stats, task, 0) + for task in failed_tasks: + add_stats(stats, task, 1) + for task in interrupted_tasks: + add_stats(stats, task, 2) + + self.out.permanent_line("SUMMARY:") + for task_key in sorted(stats, key=stats.__getitem__): + (num_passed, num_failed, num_interrupted, _) = stats[task_key] + (test_binary, task_name) = task_key + total_runs = num_passed + num_failed + num_interrupted + if num_passed == total_runs: + continue + self.out.permanent_line( + " %s %s passed %d / %d times%s." + % ( + test_binary, + task_name, + num_passed, + total_runs, + "" + if num_interrupted == 0 + else (" (%d interrupted)" % num_interrupted), + ) + ) + + def flush(self): + self.out.flush_transient_output() + + +class CollectTestResults(object): + def __init__(self, json_dump_filepath): + self.test_results_lock = threading.Lock() + self.json_dump_file = open(json_dump_filepath, "w") + self.test_results = { + "interrupted": False, + "path_delimiter": ".", + # Third version of the file format. See the link in the flag description + # for details. + "version": 3, + "seconds_since_epoch": int(time.time()), + "num_failures_by_type": { + "PASS": 0, + "FAIL": 0, + "TIMEOUT": 0, + }, + "tests": {}, + } + + def log(self, test, runtime_seconds, exit_code): + if exit_code is None: + actual_result = "TIMEOUT" + elif exit_code == 0: + actual_result = "PASS" + else: + actual_result = "FAIL" + with self.test_results_lock: + self.test_results["num_failures_by_type"][actual_result] += 1 + results = self.test_results["tests"] + for name in test.split("."): + results = results.setdefault(name, {}) + + if results: + results["actual"] += " " + actual_result + results["times"].append(runtime_seconds) + else: # This is the first invocation of the test + results["actual"] = actual_result + results["times"] = [runtime_seconds] + results["time"] = runtime_seconds + results["expected"] = "PASS" + + def dump_to_file_and_close(self): + json.dump(self.test_results, self.json_dump_file) + self.json_dump_file.close() + + +# Record of test runtimes. Has built-in locking. +class TestTimes(object): + class LockedFile(object): + def __init__(self, filename, mode): + self._filename = filename + self._mode = mode + self._fo = None + + def __enter__(self): + self._fo = open(self._filename, self._mode) + + # Regardless of opening mode we always seek to the beginning of file. + # This simplifies code working with LockedFile and also ensures that + # we lock (and unlock below) always the same region in file on win32. + self._fo.seek(0) + + try: + if sys.platform == "win32": + # We are locking here fixed location in file to use it as + # an exclusive lock on entire file. + msvcrt.locking(self._fo.fileno(), msvcrt.LK_LOCK, 1) + else: + fcntl.flock(self._fo.fileno(), fcntl.LOCK_EX) + except IOError: + self._fo.close() + raise + + return self._fo + + def __exit__(self, exc_type, exc_value, traceback): + # Flush any buffered data to disk. This is needed to prevent race + # condition which happens from the moment of releasing file lock + # till closing the file. + self._fo.flush() + + try: + if sys.platform == "win32": + self._fo.seek(0) + msvcrt.locking(self._fo.fileno(), msvcrt.LK_UNLCK, 1) + else: + fcntl.flock(self._fo.fileno(), fcntl.LOCK_UN) + finally: + self._fo.close() + + return exc_value is None + + def __init__(self, save_file): + "Create new object seeded with saved test times from the given file." + self.__times = {} # (test binary, test name) -> runtime in ms + + # Protects calls to record_test_time(); other calls are not + # expected to be made concurrently. + self.__lock = threading.Lock() + + try: + with TestTimes.LockedFile(save_file, "rb") as fd: + times = TestTimes.__read_test_times_file(fd) + except IOError: + # We couldn't obtain the lock. + return + + # Discard saved times if the format isn't right. + if type(times) is not dict: + return + for (test_binary, test_name), runtime in times.items(): + if ( + type(test_binary) is not str + or type(test_name) is not str + or type(runtime) not in {int, long, type(None)} + ): + return + + self.__times = times + + def get_test_time(self, binary, testname): + """Return the last duration for the given test as an integer number of + milliseconds, or None if the test failed or if there's no record for it.""" + return self.__times.get((binary, testname), None) + + def record_test_time(self, binary, testname, runtime_ms): + """Record that the given test ran in the specified number of + milliseconds. If the test failed, runtime_ms should be None.""" + with self.__lock: + self.__times[(binary, testname)] = runtime_ms + + def write_to_file(self, save_file): + "Write all the times to file." + try: + with TestTimes.LockedFile(save_file, "a+b") as fd: + times = TestTimes.__read_test_times_file(fd) + + if times is None: + times = self.__times + else: + times.update(self.__times) + + # We erase data from file while still holding a lock to it. This + # way reading old test times and appending new ones are atomic + # for external viewer. + fd.seek(0) + fd.truncate() + with gzip.GzipFile(fileobj=fd, mode="wb") as gzf: + cPickle.dump(times, gzf, PICKLE_HIGHEST_PROTOCOL) + except IOError: + pass # ignore errors---saving the times isn't that important + + @staticmethod + def __read_test_times_file(fd): + try: + with gzip.GzipFile(fileobj=fd, mode="rb") as gzf: + times = cPickle.load(gzf) + except Exception: + # File doesn't exist, isn't readable, is malformed---whatever. + # Just ignore it. + return None + else: + return times + + +def find_tests(binaries, additional_args, options, times): + test_count = 0 + tasks = [] + for test_binary in binaries: + command = [test_binary] + additional_args + if options.gtest_also_run_disabled_tests: + command += ["--gtest_also_run_disabled_tests"] + + list_command = command + ["--gtest_list_tests"] + if options.gtest_filter != "": + list_command += ["--gtest_filter=" + options.gtest_filter] + + # Get absolute path and directory for the test binary + abs_test_binary = os.path.abspath(test_binary) + test_binary_dir = os.path.dirname(abs_test_binary) + + # Create list command with absolute path + abs_list_command = [abs_test_binary] + additional_args + ["--gtest_list_tests"] + if options.gtest_also_run_disabled_tests: + abs_list_command += ["--gtest_also_run_disabled_tests"] + if options.gtest_filter != "": + abs_list_command += ["--gtest_filter=" + options.gtest_filter] + + try: + # Run the list command from the binary's directory so relative paths work + test_list = subprocess.check_output( + abs_list_command, stderr=subprocess.STDOUT, cwd=test_binary_dir + ) + except subprocess.CalledProcessError as e: + sys.exit("%s: %s\n%s" % (test_binary, str(e), e.output)) + + try: + test_list = test_list.split("\n") + except TypeError: + # subprocess.check_output() returns bytes in python3 + test_list = test_list.decode(sys.stdout.encoding).split("\n") + + command += ["--gtest_color=" + options.gtest_color] + + test_group = "" + for line in test_list: + if not line.strip(): + continue + if line[0] != " ": + # Remove comments for typed tests and strip whitespace. + test_group = line.split("#")[0].strip() + continue + # Remove comments for parameterized tests and strip whitespace. + line = line.split("#")[0].strip() + if not line: + continue + + test_name = test_group + line + if not options.gtest_also_run_disabled_tests and "DISABLED_" in test_name: + continue + + # Skip PRE_ tests which are used by Chromium. + if ".PRE_" in test_name: + continue + + last_execution_time = times.get_test_time(test_binary, test_name) + if options.failed and last_execution_time is not None: + continue + + test_command = command + ["--gtest_filter=" + test_name] + if (test_count - options.shard_index) % options.shard_count == 0: + for execution_number in range(options.repeat): + tasks.append( + Task( + test_binary, + test_name, + test_command, + execution_number + 1, + last_execution_time, + options.output_dir, + ) + ) + + test_count += 1 + + # Sort the tasks to run the slowest tests first, so that faster ones can be + # finished in parallel. + return sorted(tasks, reverse=True) + + +def execute_tasks( + tasks, + pool_size, + task_manager, + timeout_seconds, + timeout_per_test, + serialize_test_cases, + available_gpus=None, +): + class WorkerFn(object): + def __init__(self, tasks, running_groups, timeout_per_test, available_gpus): + self.tasks = tasks + self.running_groups = running_groups + self.timeout_per_test = timeout_per_test + self.available_gpus = available_gpus + self.task_lock = threading.Lock() + self.task_counter = 0 + + def __call__(self): + while True: + gpu_id = None + with self.task_lock: + for task_id in range(len(self.tasks)): + task = self.tasks[task_id] + + if self.running_groups is not None: + test_group = task.test_name.split(".")[0] + if test_group in self.running_groups: + # Try to find other non-running test group. + continue + else: + self.running_groups.add(test_group) + + # Assign GPU in round-robin fashion if GPUs are available + if self.available_gpus: + gpu_id = self.available_gpus[ + self.task_counter % len(self.available_gpus) + ] + self.task_counter += 1 + + del self.tasks[task_id] + break + else: + # Either there is no tasks left or number or remaining test + # cases (groups) is less than number or running threads. + return + + task_manager.run_task(task, self.timeout_per_test, gpu_id) + + if self.running_groups is not None: + with self.task_lock: + self.running_groups.remove(test_group) + + def start_daemon(func): + t = threading.Thread(target=func) + t.daemon = True + t.start() + return t + + timeout = None + try: + if timeout_seconds: + timeout = threading.Timer(timeout_seconds, sigint_handler.interrupt) + timeout.start() + running_groups = set() if serialize_test_cases else None + worker_fn = WorkerFn(tasks, running_groups, timeout_per_test, available_gpus) + workers = [start_daemon(worker_fn) for _ in range(pool_size)] + for worker in workers: + worker.join() + finally: + if timeout: + timeout.cancel() + for task in list(task_manager.started.values()): + task.runtime_ms = timeout_seconds * 1000 + task_manager.register_exit(task) + + +def default_options_parser(): + parser = optparse.OptionParser( + usage="usage: %prog [options] binary [binary ...] -- [additional args]" + ) + + parser.add_option( + "-d", + "--output_dir", + type="string", + default=None, + help="Output directory for test logs. Logs will be " + "available under gtest-parallel-logs/, so " + "--output_dir=/tmp will results in all logs being " + "available under /tmp/gtest-parallel-logs/.", + ) + parser.add_option( + "-r", + "--repeat", + type="int", + default=1, + help="Number of times to execute all the tests.", + ) + parser.add_option( + "--retry_failed", + type="int", + default=0, + help="Number of times to repeat failed tests.", + ) + parser.add_option( + "--failed", + action="store_true", + default=False, + help="run only failed and new tests", + ) + parser.add_option( + "-w", + "--workers", + type="int", + default=multiprocessing.cpu_count(), + help="number of workers to spawn", + ) + parser.add_option( + "--gpus", + type="int", + default=1, + help="number of GPUs to use for parallel execution (default: 1)", + ) + parser.add_option( + "--gtest_color", type="string", default="yes", help="color output" + ) + parser.add_option("--gtest_filter", type="string", default="", help="test filter") + parser.add_option( + "--gtest_also_run_disabled_tests", + action="store_true", + default=False, + help="run disabled tests too", + ) + parser.add_option( + "--print_test_times", + action="store_true", + default=False, + help="list the run time of each test at the end of execution", + ) + parser.add_option( + "--print_test_command", + action="store_true", + default=False, + help="Print full test command instead of name", + ) + parser.add_option( + "--shard_count", + type="int", + default=1, + help="total number of shards (for sharding test execution " + "between multiple machines)", + ) + parser.add_option( + "--shard_index", + type="int", + default=0, + help="zero-indexed number identifying this shard (for " + "sharding test execution between multiple machines)", + ) + parser.add_option( + "--dump_json_test_results", + type="string", + default=None, + help="Saves the results of the tests as a JSON machine-" + "readable file. The format of the file is specified at " + "https://www.chromium.org/developers/the-json-test-results-format", + ) + parser.add_option( + "--timeout", + type="int", + default=None, + help="Interrupt all remaining processes after the given time (in seconds).", + ) + parser.add_option( + "--timeout_per_test", + type="int", + default=None, + help="Interrupt single processes after the given time (in seconds).", + ) + parser.add_option( + "--serialize_test_cases", + action="store_true", + default=False, + help="Do not run tests from the same test case in parallel.", + ) + return parser + + +def main(): + # Remove additional arguments (anything after --). + additional_args = [] + + for i in range(len(sys.argv)): + if sys.argv[i] == "--": + additional_args = sys.argv[i + 1 :] + sys.argv = sys.argv[:i] + break + + parser = default_options_parser() + (options, binaries) = parser.parse_args() + + if options.output_dir is not None and not os.path.isdir(options.output_dir): + parser.error( + "--output_dir value must be an existing directory, " + 'current value is "%s"' % options.output_dir + ) + + # Append gtest-parallel-logs to log output, this is to avoid deleting user + # data if an user passes a directory where files are already present. If a + # user specifies --output_dir=Docs/, we'll create Docs/gtest-parallel-logs + # and clean that directory out on startup, instead of nuking Docs/. + if options.output_dir: + options.output_dir = os.path.join(options.output_dir, "gtest-parallel-logs") + + if binaries == []: + parser.print_usage() + sys.exit(1) + + if options.shard_count < 1: + parser.error( + "Invalid number of shards: %d. Must be at least 1." % options.shard_count + ) + if not (0 <= options.shard_index < options.shard_count): + parser.error( + "Invalid shard index: %d. Must be between 0 and %d " + "(less than the number of shards)." + % (options.shard_index, options.shard_count - 1) + ) + + # Check that all test binaries have an unique basename. That way we can ensure + # the logs are saved to unique files even when two different binaries have + # common tests. + unique_binaries = set(os.path.basename(binary) for binary in binaries) + assert len(unique_binaries) == len(binaries), ( + "All test binaries must have an unique basename." + ) + + if options.output_dir: + # Remove files from old test runs. + if os.path.isdir(options.output_dir): + shutil.rmtree(options.output_dir) + # Create directory for test log output. + try: + os.makedirs(options.output_dir) + except OSError as e: + # Ignore errors if this directory already exists. + if e.errno != errno.EEXIST or not os.path.isdir(options.output_dir): + raise e + + test_results = None + if options.dump_json_test_results is not None: + test_results = CollectTestResults(options.dump_json_test_results) + + save_file = get_save_file_path() + + times = TestTimes(save_file) + logger = FilterFormat(options.output_dir) + + task_manager = TaskManager( + times, logger, test_results, Task, options.retry_failed, options.repeat + 1 + ) + + # Get available GPUs based on HIP_VISIBLE_DEVICES and --gpus option + available_gpus = get_available_gpus(options.gpus) if options.gpus > 0 else None + + tasks = find_tests(binaries, additional_args, options, times) + logger.log_tasks(len(tasks)) + execute_tasks( + tasks, + options.workers, + task_manager, + options.timeout, + options.timeout_per_test, + options.serialize_test_cases, + available_gpus, + ) + + print_try_number = options.retry_failed > 0 or options.repeat > 1 + if task_manager.passed: + logger.move_to("passed", task_manager.passed) + if options.print_test_times: + logger.print_tests( + "PASSED TESTS", + task_manager.passed, + print_try_number, + options.print_test_command, + ) + + if task_manager.failed: + logger.print_tests( + "FAILED TESTS", + task_manager.failed, + print_try_number, + options.print_test_command, + ) + logger.move_to("failed", task_manager.failed) + + if task_manager.timed_out: + logger.print_tests( + "TIMED OUT TESTS", + task_manager.timed_out, + print_try_number, + options.print_test_command, + ) + logger.move_to("timed_out", task_manager.timed_out) + + if task_manager.started: + logger.print_tests( + "INTERRUPTED TESTS", + task_manager.started.values(), + print_try_number, + options.print_test_command, + ) + logger.move_to("interrupted", task_manager.started.values()) + + if options.repeat > 1 and (task_manager.failed or task_manager.started): + logger.summarize( + task_manager.passed, task_manager.failed, task_manager.started.values() + ) + + logger.flush() + times.write_to_file(save_file) + if test_results: + test_results.dump_to_file_and_close() + + if sigint_handler.got_sigint(): + return -signal.SIGINT + + return task_manager.global_exit_code + + +if __name__ == "__main__": + sys.exit(main())