From 71c266900a61377c828c38960197ba40fd15f5d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= Date: Wed, 8 Oct 2025 07:47:17 +0000 Subject: [PATCH] Fix new unit projects. --- test/ck_tile/CMakeLists.txt | 1 - .../coordinate_transform/CMakeLists.txt | 5 +- test/ck_tile/tensor_descriptor/CMakeLists.txt | 5 +- test/ck_tile/tensor_view/CMakeLists.txt | 5 +- test/ck_tile/tensor_view/test_tensor_view.cpp | 106 +++++++----------- test/ck_tile/tile_distribution/CMakeLists.txt | 4 - .../test_tile_distribution.cpp | 50 --------- .../transform_conv_to_gemm/CMakeLists.txt | 5 +- ...test_transform_conv_bwd_weight_to_gemm.cpp | 33 +++--- 9 files changed, 62 insertions(+), 152 deletions(-) delete mode 100644 test/ck_tile/tile_distribution/CMakeLists.txt delete mode 100644 test/ck_tile/tile_distribution/test_tile_distribution.cpp diff --git a/test/ck_tile/CMakeLists.txt b/test/ck_tile/CMakeLists.txt index cca5145e27..ab75d2106a 100644 --- a/test/ck_tile/CMakeLists.txt +++ b/test/ck_tile/CMakeLists.txt @@ -35,4 +35,3 @@ add_subdirectory(transform_conv_to_gemm) add_subdirectory(coordinate_transform) add_subdirectory(tensor_view) add_subdirectory(tensor_descriptor) -add_subdirectory(tile_distribution) diff --git a/test/ck_tile/coordinate_transform/CMakeLists.txt b/test/ck_tile/coordinate_transform/CMakeLists.txt index f4145b8bbb..9fd4e0fa6d 100644 --- a/test/ck_tile/coordinate_transform/CMakeLists.txt +++ b/test/ck_tile/coordinate_transform/CMakeLists.txt @@ -1,4 +1 @@ -add_gtest_executable(test_coordinate_transform test_coordinate_transform.cpp) -if(result EQUAL 0) - target_link_libraries(test_coordinate_transform PRIVATE utility) -endif() \ No newline at end of file +add_gtest_executable(test_ck_tile_coordinate_transform test_coordinate_transform.cpp) diff --git a/test/ck_tile/tensor_descriptor/CMakeLists.txt b/test/ck_tile/tensor_descriptor/CMakeLists.txt index 61338dd07f..0433884620 100644 --- a/test/ck_tile/tensor_descriptor/CMakeLists.txt +++ b/test/ck_tile/tensor_descriptor/CMakeLists.txt @@ -1,4 +1 @@ -add_gtest_executable(test_tensor_descriptor test_tensor_descriptor.cpp) -if(result EQUAL 0) - target_link_libraries(test_tensor_descriptor PRIVATE utility) -endif() \ No newline at end of file +add_gtest_executable(test_ck_tile_tensor_descriptor test_tensor_descriptor.cpp) diff --git a/test/ck_tile/tensor_view/CMakeLists.txt b/test/ck_tile/tensor_view/CMakeLists.txt index db79356d3c..69a2c9acf4 100644 --- a/test/ck_tile/tensor_view/CMakeLists.txt +++ b/test/ck_tile/tensor_view/CMakeLists.txt @@ -1,4 +1 @@ -add_gtest_executable(test_tensor_view test_tensor_view.cpp) -if(result EQUAL 0) - target_link_libraries(test_tensor_view PRIVATE utility) -endif() \ No newline at end of file +add_gtest_executable(test_ck_tile_tensor_view test_tensor_view.cpp) diff --git a/test/ck_tile/tensor_view/test_tensor_view.cpp b/test/ck_tile/tensor_view/test_tensor_view.cpp index 21a4adc829..877ae26975 100644 --- a/test/ck_tile/tensor_view/test_tensor_view.cpp +++ b/test/ck_tile/tensor_view/test_tensor_view.cpp @@ -253,21 +253,6 @@ TEST_F(TestTensorView, StaticDistributedTensor) hip_check_error(hipFree(output_device)); } -template -__device__ void print_distributed_index(const DistributedIndex& idx) -{ - printf("["); - for(auto i = 0; i < idx.impl_.size(); i++) - { - printf("%d", idx.impl_[i]); - if(i < idx.impl_.size() - 1) - { - printf(", "); - } - } - printf("]"); -} - __global__ void test_4x4_matrix_2x2_blocks_modify_input_kernel(int* input, int* output, bool) { constexpr index_t global_shape_0 = 4; @@ -316,16 +301,6 @@ __global__ void test_4x4_matrix_2x2_blocks_modify_input_kernel(int* input, int* constexpr index_t x0_size = reduce_on_sequence(hs_lengths_0, multiplies{}, number<1>{}); constexpr index_t x1_size = reduce_on_sequence(hs_lengths_1, multiplies{}, number<1>{}); - if(threadIdx.x == 0 && blockIdx.x == 0) - { - printf("\n- Tile distribution created:\n"); - printf(" X dimensions: %d\n", distribution.get_num_of_dimension_x()); - printf(" Y dimensions: %d\n", distribution.get_num_of_dimension_y()); - printf(" P dimensions: %d\n", distribution.get_num_of_dimension_p()); - printf(" X lengths: [%d, %d]\n", x0_size, x1_size); - } - block_sync_lds(); - auto global_view = make_naive_tensor_view_packed( input, make_tuple(global_shape_0, global_shape_1)); @@ -393,13 +368,15 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_modify_input) output_host.data(), output_device, total_elements * sizeof(int), hipMemcpyDeviceToHost)); // Verify the 4x4 matrix is correctly organized as 2x2 blocks - // Expected matrix: - // 2 4 6 8 - // 10 12 14 16 - // 18 20 22 24 - // 26 28 30 32 - - std::vector expected_output = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; + // clang-format off + std::vector expected_output = + { + 2, 4, 6, 8, + 10, 12, 14, 16, + 18, 20, 22, 24, + 26, 28, 30, 32 + }; + // clang-format on EXPECT_EQ(output_host, expected_output); @@ -540,9 +517,9 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks) // clang-format off std::vector data_host = { - 1, 2, 3 ,4, - 5, 6, 7, 8, - 9, 10, 11, 12, + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; // clang-format on @@ -571,13 +548,15 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks) output_host.data(), output_device, total_elements * sizeof(int), hipMemcpyDeviceToHost)); // Verify the 4x4 matrix is correctly organized as 2x2 blocks - // Expected matrix: - // 1 2 - // 5 6 - // 11 12 - // 15 16 - - std::vector expected_output = {1, 2, 5, 6, 11, 12, 15, 16}; + // clang-format off + std::vector expected_output = + { + 1, 2, + 5, 6, + 11, 12, + 15, 16 + }; + // clang-format on EXPECT_EQ(output_host, expected_output); @@ -637,7 +616,7 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu {0, 0}); // We have only threadblock //------------------------------------------------------------ - // CShuffle epilogue similation + // CShuffle epilogue simulation //------------------------------------------------------------ // Allocate and prepare LDS @@ -659,6 +638,7 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu constexpr index_t MPerIterationShuffle = 2; constexpr index_t NPerIterationShuffle = 2; + constexpr index_t VectorSize = 1; auto lds_tile = make_static_distributed_tensor(distribution); @@ -692,16 +672,14 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu sequence, false>; - using TileEncodingPattern = - tile_distribution_encoding_pattern_2d<4, // Block size - MPerIterationShuffle, - NPerIterationShuffle, - 2, // Vector size - tile_distribution_pattern::sparse_row, - 1>; // Number of wave groups - - constexpr auto output_tile_distribution = - TileEncodingPattern::make_2d_static_tile_distribution(); + constexpr auto output_tile_distribution = make_static_tile_distribution( + tile_distribution_encoding, + tuple, + sequence<1, 1, NPerIterationShuffle, VectorSize>>, + tuple, sequence<1, 2>>, + tuple, sequence<2, 2>>, + sequence<1, 1, 2, 2>, + sequence<0, 3, 0, 3>>{}); // Copy the tile at one go from register to LDS. block_sync_lds(); @@ -737,9 +715,9 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks_ // clang-format off std::vector data_host = { - 1, 2, 3 ,4, - 5, 6, 7, 8, - 9, 10, 11, 12, + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; // clang-format on @@ -769,13 +747,15 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks_ output_host.data(), output_device, total_elements * sizeof(int), hipMemcpyDeviceToHost)); // Verify the 4x4 matrix is correctly organized as 2x2 blocks - // Expected matrix: - // 1 2 - // 5 6 - // 11 12 - // 15 16 - - std::vector expected_output = {1, 2, 5, 6, 11, 12, 15, 16}; + // clang-format off + std::vector expected_output = + { + 1, 2, + 5, 6, + 11, 12, + 15, 16 + }; + // clang-format on EXPECT_EQ(output_host, expected_output); diff --git a/test/ck_tile/tile_distribution/CMakeLists.txt b/test/ck_tile/tile_distribution/CMakeLists.txt deleted file mode 100644 index e030a5c906..0000000000 --- a/test/ck_tile/tile_distribution/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_gtest_executable(test_tile_distribution test_tile_distribution.cpp) -if(result EQUAL 0) - target_link_libraries(test_tile_distribution PRIVATE utility) -endif() \ No newline at end of file diff --git a/test/ck_tile/tile_distribution/test_tile_distribution.cpp b/test/ck_tile/tile_distribution/test_tile_distribution.cpp deleted file mode 100644 index 8ec86f4cfc..0000000000 --- a/test/ck_tile/tile_distribution/test_tile_distribution.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include - -#include "ck_tile/core/tensor/tile_distribution.hpp" -#include "ck_tile/core/tensor/tensor_descriptor.hpp" - -using namespace ck_tile; - -class TestTileDistribution : public ::testing::Test -{ - protected: - void SetUp() override {} - void TearDown() override {} -}; - -TEST_F(TestTileDistribution, 4x4_matrix_2x2_blocks) -{ - constexpr index_t MRepeat = 1; - constexpr index_t NRepeat = 1; - constexpr index_t MWarpPerBlock = 1; - constexpr index_t NWarpPerBlock = 1; - constexpr index_t MThreadPerWarp = 2; - constexpr index_t NThreadPerWarp = 2; - constexpr index_t MVectorPerThread = 2; - constexpr index_t NVectorPerThread = 2; - - // Tile distribution encoding for 4x4 matrix as 2x2 blocks - constexpr auto matrix_4x4_dstr_encoding = tile_distribution_encoding< - sequence<>, // No reduction dims - tuple, - sequence>, - tuple, sequence<1, 2>>, // 2D thread grid mapping - tuple, sequence<2, 2>>, // Warp arrangement - sequence<1, 1, 2, 2>, // Dimension order - sequence<0, 3, 0, 3>>{}; // Each thread has 2x2 blocks. - - constexpr auto matrix_4x4_dstr = make_static_tile_distribution(matrix_4x4_dstr_encoding); - - EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_x(), 2); - EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_y(), 2); - EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_p(), 1); - EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_r(), 0); - - const auto distributed_spans = matrix_4x4_dstr.get_distributed_spans(); - EXPECT_EQ(distributed_spans.size(), 2); - EXPECT_EQ(distributed_spans[number<0>{}].impl_.size(), 1); // M dimension - EXPECT_EQ(distributed_spans[number<1>{}].impl_.size(), 1); // N dimension - EXPECT_EQ(distributed_spans[number<0>{}].impl_[0], 4); // M dimension - EXPECT_EQ(distributed_spans[number<1>{}].impl_[0], 4); // N dimension -} diff --git a/test/ck_tile/transform_conv_to_gemm/CMakeLists.txt b/test/ck_tile/transform_conv_to_gemm/CMakeLists.txt index ae6d65ab42..4564894206 100644 --- a/test/ck_tile/transform_conv_to_gemm/CMakeLists.txt +++ b/test/ck_tile/transform_conv_to_gemm/CMakeLists.txt @@ -1,4 +1 @@ -add_gtest_executable(test_transform_conv_bwd_weight_to_gemm test_transform_conv_bwd_weight_to_gemm.cpp) -if(result EQUAL 0) - target_link_libraries(test_transform_conv_bwd_weight_to_gemm PRIVATE utility) -endif() \ No newline at end of file +add_gtest_executable(test_ck_tile_transform_conv_bwd_weight_to_gemm test_transform_conv_bwd_weight_to_gemm.cpp) diff --git a/test/ck_tile/transform_conv_to_gemm/test_transform_conv_bwd_weight_to_gemm.cpp b/test/ck_tile/transform_conv_to_gemm/test_transform_conv_bwd_weight_to_gemm.cpp index 9d8693a667..b0e220cf69 100644 --- a/test/ck_tile/transform_conv_to_gemm/test_transform_conv_bwd_weight_to_gemm.cpp +++ b/test/ck_tile/transform_conv_to_gemm/test_transform_conv_bwd_weight_to_gemm.cpp @@ -24,6 +24,9 @@ struct TestConfig static constexpr ConvolutionSpecialization ConvSpec = ConvolutionSpecialization::Default; static constexpr bool SplitN = false; static constexpr index_t NumberOfGroupsToMerge = NumGroupsToMerge; + static constexpr index_t VectorSizeA = 2; + static constexpr index_t VectorSizeB = 4; + static constexpr index_t VectorSizeC = 8; using ADataType = float; using CDataType = float; @@ -31,6 +34,9 @@ struct TestConfig using TransformType = TransformConvBwdWeightToGemmWi_); EXPECT_EQ(transform.Wo_, this->Wo_); EXPECT_EQ(transform.X_, this->X_); - EXPECT_EQ(transform.ZYX_, this->X_); } else if constexpr(NDim == 2) { @@ -233,7 +238,6 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, Constructor) EXPECT_EQ(transform.Wo_, this->Wo_); EXPECT_EQ(transform.Y_, this->Y_); EXPECT_EQ(transform.X_, this->X_); - EXPECT_EQ(transform.ZYX_, this->Y_ * this->X_); } else if constexpr(NDim == 3) { @@ -259,7 +263,6 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, Constructor) EXPECT_EQ(transform.Z_, this->Z_); EXPECT_EQ(transform.Y_, this->Y_); EXPECT_EQ(transform.X_, this->X_); - EXPECT_EQ(transform.ZYX_, this->Z_ * this->Y_ * this->X_); } } @@ -302,15 +305,13 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, GridDescriptors) { EXPECT_EQ(out_grid_desc.get_num_of_dimension(), 3); EXPECT_EQ(in_grid_desc.get_num_of_dimension(), NDim + 3); - EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 4); + EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 2); EXPECT_EQ(in_grid_desc.get_length(I2), Gm); EXPECT_EQ(in_grid_desc.get_length(I3), this->C_); - EXPECT_EQ(wei_grid_desc.get_length(I0), 1); // Padding dimension - EXPECT_EQ(wei_grid_desc.get_length(I1), Gm); - EXPECT_EQ(wei_grid_desc.get_length(I2), this->K_); - EXPECT_EQ(wei_grid_desc.get_length(I3), this->X_ * this->C_); + EXPECT_EQ(wei_grid_desc.get_length(I0), Gm * this->K_); + EXPECT_EQ(wei_grid_desc.get_length(I1), Gm * this->X_ * this->C_); EXPECT_EQ(out_grid_desc.get_length(I1), Gm); EXPECT_EQ(out_grid_desc.get_length(I2), this->N_ * this->Wo_); @@ -356,15 +357,13 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, GridDescriptors) { EXPECT_EQ(out_grid_desc.get_num_of_dimension(), 3); EXPECT_EQ(in_grid_desc.get_num_of_dimension(), NDim + 3); - EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 4); + EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 2); EXPECT_EQ(in_grid_desc.get_length(I3), Gm); EXPECT_EQ(in_grid_desc.get_length(I4), this->C_); - EXPECT_EQ(wei_grid_desc.get_length(I0), 1); // Padding dimension - EXPECT_EQ(wei_grid_desc.get_length(I1), Gm); - EXPECT_EQ(wei_grid_desc.get_length(I2), this->K_); - EXPECT_EQ(wei_grid_desc.get_length(I3), this->Y_ * this->X_ * this->C_); + EXPECT_EQ(wei_grid_desc.get_length(I0), Gm * this->K_); + EXPECT_EQ(wei_grid_desc.get_length(I1), Gm * this->Y_ * this->X_ * this->C_); EXPECT_EQ(out_grid_desc.get_length(I1), Gm); EXPECT_EQ(out_grid_desc.get_length(I2), this->N_ * this->Ho_ * this->Wo_); @@ -411,15 +410,13 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, GridDescriptors) { EXPECT_EQ(out_grid_desc.get_num_of_dimension(), 3); EXPECT_EQ(in_grid_desc.get_num_of_dimension(), NDim + 3); - EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 4); + EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 2); EXPECT_EQ(in_grid_desc.get_length(I4), Gm); EXPECT_EQ(in_grid_desc.get_length(I5), this->C_); - EXPECT_EQ(wei_grid_desc.get_length(I0), 1); // Padding dimension - EXPECT_EQ(wei_grid_desc.get_length(I1), Gm); - EXPECT_EQ(wei_grid_desc.get_length(I2), this->K_); - EXPECT_EQ(wei_grid_desc.get_length(I3), this->Z_ * this->Y_ * this->X_ * this->C_); + EXPECT_EQ(wei_grid_desc.get_length(I0), Gm * this->K_); + EXPECT_EQ(wei_grid_desc.get_length(I1), Gm * this->Z_ * this->Y_ * this->X_ * this->C_); EXPECT_EQ(out_grid_desc.get_length(I1), Gm); EXPECT_EQ(out_grid_desc.get_length(I2), this->N_ * this->Do_ * this->Ho_ * this->Wo_);