Fix new unit projects.

2026-06-30 19:57:40 +00:00 · 2025-10-08 07:47:17 +00:00
parent 0c00794e14
commit 71c266900a
9 changed files with 62 additions and 152 deletions
--- a/test/ck_tile/CMakeLists.txt
+++ b/test/ck_tile/CMakeLists.txt
@@ -35,4 +35,3 @@ add_subdirectory(transform_conv_to_gemm)
 add_subdirectory(coordinate_transform)
 add_subdirectory(tensor_view)
 add_subdirectory(tensor_descriptor)
-add_subdirectory(tile_distribution)
--- a/test/ck_tile/coordinate_transform/CMakeLists.txt
+++ b/test/ck_tile/coordinate_transform/CMakeLists.txt
@@ -1,4 +1 @@
-add_gtest_executable(test_coordinate_transform test_coordinate_transform.cpp)
-if(result EQUAL 0)
-    target_link_libraries(test_coordinate_transform PRIVATE utility)
-endif()
+add_gtest_executable(test_ck_tile_coordinate_transform test_coordinate_transform.cpp)
--- a/test/ck_tile/tensor_descriptor/CMakeLists.txt
+++ b/test/ck_tile/tensor_descriptor/CMakeLists.txt
@@ -1,4 +1 @@
-add_gtest_executable(test_tensor_descriptor test_tensor_descriptor.cpp)
-if(result EQUAL 0)
-    target_link_libraries(test_tensor_descriptor PRIVATE utility)
-endif()
+add_gtest_executable(test_ck_tile_tensor_descriptor test_tensor_descriptor.cpp)
--- a/test/ck_tile/tensor_view/CMakeLists.txt
+++ b/test/ck_tile/tensor_view/CMakeLists.txt
@@ -1,4 +1 @@
-add_gtest_executable(test_tensor_view test_tensor_view.cpp)
-if(result EQUAL 0)
-    target_link_libraries(test_tensor_view PRIVATE utility)
-endif()
+add_gtest_executable(test_ck_tile_tensor_view test_tensor_view.cpp)
--- a/test/ck_tile/tensor_view/test_tensor_view.cpp
+++ b/test/ck_tile/tensor_view/test_tensor_view.cpp
@@ -253,21 +253,6 @@ TEST_F(TestTensorView, StaticDistributedTensor)
    hip_check_error(hipFree(output_device));
 }

-template <typename DistributedIndex>
-__device__ void print_distributed_index(const DistributedIndex& idx)
-{
-    printf("[");
-    for(auto i = 0; i < idx.impl_.size(); i++)
-    {
-        printf("%d", idx.impl_[i]);
-        if(i < idx.impl_.size() - 1)
-        {
-            printf(", ");
-        }
-    }
-    printf("]");
-}
-
 __global__ void test_4x4_matrix_2x2_blocks_modify_input_kernel(int* input, int* output, bool)
 {
    constexpr index_t global_shape_0 = 4;
@@ -316,16 +301,6 @@ __global__ void test_4x4_matrix_2x2_blocks_modify_input_kernel(int* input, int*
    constexpr index_t x0_size = reduce_on_sequence(hs_lengths_0, multiplies{}, number<1>{});
    constexpr index_t x1_size = reduce_on_sequence(hs_lengths_1, multiplies{}, number<1>{});

-    if(threadIdx.x == 0 && blockIdx.x == 0)
-    {
-        printf("\n- Tile distribution created:\n");
-        printf("  X dimensions: %d\n", distribution.get_num_of_dimension_x());
-        printf("  Y dimensions: %d\n", distribution.get_num_of_dimension_y());
-        printf("  P dimensions: %d\n", distribution.get_num_of_dimension_p());
-        printf("  X lengths: [%d, %d]\n", x0_size, x1_size);
-    }
-    block_sync_lds();
-
    auto global_view = make_naive_tensor_view_packed<address_space_enum::global>(
        input, make_tuple(global_shape_0, global_shape_1));

@@ -393,13 +368,15 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_modify_input)
        output_host.data(), output_device, total_elements * sizeof(int), hipMemcpyDeviceToHost));

    // Verify the 4x4 matrix is correctly organized as 2x2 blocks
-    // Expected matrix:
-    //  2  4  6  8
-    //  10 12 14 16
-    //  18 20 22 24
-    //  26 28 30 32
-
-    std::vector<int> expected_output = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
+    // clang-format off
+    std::vector<int> expected_output = 
+        {
+            2,  4,  6,  8, 
+            10, 12, 14, 16, 
+            18, 20, 22, 24, 
+            26, 28, 30, 32
+        };
+    // clang-format on

    EXPECT_EQ(output_host, expected_output);

@@ -540,9 +517,9 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks)
    // clang-format off
    std::vector<int> data_host = 
      {
-        1, 2, 3 ,4,
-        5, 6, 7, 8, 
-        9, 10, 11, 12,
+        1,  2,  3,  4,
+        5,  6,  7,  8, 
+        9,  10, 11, 12,
        13, 14, 15, 16
      };
    // clang-format on
@@ -571,13 +548,15 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks)
        output_host.data(), output_device, total_elements * sizeof(int), hipMemcpyDeviceToHost));

    // Verify the 4x4 matrix is correctly organized as 2x2 blocks
-    // Expected matrix:
-    //  1  2
-    //  5  6
-    //  11 12
-    //  15 16
-
-    std::vector<int> expected_output = {1, 2, 5, 6, 11, 12, 15, 16};
+    // clang-format off
+    std::vector<int> expected_output = 
+        {
+            1,  2, 
+            5,  6, 
+            11, 12, 
+            15, 16
+        };
+    // clang-format on

    EXPECT_EQ(output_host, expected_output);

@@ -637,7 +616,7 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu
                         {0, 0}); // We have only threadblock

    //------------------------------------------------------------
-    // CShuffle epilogue similation
+    // CShuffle epilogue simulation
    //------------------------------------------------------------

    // Allocate and prepare LDS
@@ -659,6 +638,7 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu

    constexpr index_t MPerIterationShuffle = 2;
    constexpr index_t NPerIterationShuffle = 2;
+    constexpr index_t VectorSize           = 1;

    auto lds_tile = make_static_distributed_tensor<int>(distribution);

@@ -692,16 +672,14 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu
                                         sequence<MPerIterationShuffle, NPerIterationShuffle>,
                                         false>;

-    using TileEncodingPattern =
-        tile_distribution_encoding_pattern_2d<4, // Block size
-                                              MPerIterationShuffle,
-                                              NPerIterationShuffle,
-                                              2, // Vector size
-                                              tile_distribution_pattern::sparse_row,
-                                              1>; // Number of wave groups
-
-    constexpr auto output_tile_distribution =
-        TileEncodingPattern::make_2d_static_tile_distribution();
+    constexpr auto output_tile_distribution = make_static_tile_distribution(
+        tile_distribution_encoding<sequence<>,
+                                   tuple<sequence<1, 1, MPerIterationShuffle, VectorSize>,
+                                         sequence<1, 1, NPerIterationShuffle, VectorSize>>,
+                                   tuple<sequence<1, 2>, sequence<1, 2>>,
+                                   tuple<sequence<1, 1>, sequence<2, 2>>,
+                                   sequence<1, 1, 2, 2>,
+                                   sequence<0, 3, 0, 3>>{});

    // Copy the tile at one go from register to LDS.
    block_sync_lds();
@@ -737,9 +715,9 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks_
    // clang-format off
    std::vector<int> data_host = 
      {
-        1, 2, 3 ,4,
-        5, 6, 7, 8, 
-        9, 10, 11, 12,
+        1,  2,  3,  4,
+        5,  6,  7,  8, 
+        9,  10, 11, 12,
        13, 14, 15, 16
      };
    // clang-format on
@@ -769,13 +747,15 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks_
        output_host.data(), output_device, total_elements * sizeof(int), hipMemcpyDeviceToHost));

    // Verify the 4x4 matrix is correctly organized as 2x2 blocks
-    // Expected matrix:
-    //  1  2
-    //  5  6
-    //  11 12
-    //  15 16
-
-    std::vector<int> expected_output = {1, 2, 5, 6, 11, 12, 15, 16};
+    // clang-format off
+    std::vector<int> expected_output = 
+        {
+            1,  2, 
+            5,  6, 
+            11, 12, 
+            15, 16
+        };
+    // clang-format on

    EXPECT_EQ(output_host, expected_output);

--- a/test/ck_tile/tile_distribution/CMakeLists.txt
+++ b/test/ck_tile/tile_distribution/CMakeLists.txt
@@ -1,4 +0,0 @@
-add_gtest_executable(test_tile_distribution test_tile_distribution.cpp)
-if(result EQUAL 0)
-    target_link_libraries(test_tile_distribution PRIVATE utility)
-endif()
--- a/test/ck_tile/tile_distribution/test_tile_distribution.cpp
+++ b/test/ck_tile/tile_distribution/test_tile_distribution.cpp
@@ -1,50 +0,0 @@
-#include <gtest/gtest.h>
-#include <vector>
-
-#include "ck_tile/core/tensor/tile_distribution.hpp"
-#include "ck_tile/core/tensor/tensor_descriptor.hpp"
-
-using namespace ck_tile;
-
-class TestTileDistribution : public ::testing::Test
-{
-    protected:
-    void SetUp() override {}
-    void TearDown() override {}
-};
-
-TEST_F(TestTileDistribution, 4x4_matrix_2x2_blocks)
-{
-    constexpr index_t MRepeat          = 1;
-    constexpr index_t NRepeat          = 1;
-    constexpr index_t MWarpPerBlock    = 1;
-    constexpr index_t NWarpPerBlock    = 1;
-    constexpr index_t MThreadPerWarp   = 2;
-    constexpr index_t NThreadPerWarp   = 2;
-    constexpr index_t MVectorPerThread = 2;
-    constexpr index_t NVectorPerThread = 2;
-
-    // Tile distribution encoding for 4x4 matrix as 2x2 blocks
-    constexpr auto matrix_4x4_dstr_encoding = tile_distribution_encoding<
-        sequence<>, // No reduction dims
-        tuple<sequence<MRepeat, MWarpPerBlock, MThreadPerWarp, MVectorPerThread>,
-              sequence<NRepeat, NWarpPerBlock, NThreadPerWarp, NVectorPerThread>>,
-        tuple<sequence<1, 2>, sequence<1, 2>>, // 2D thread grid mapping
-        tuple<sequence<1, 1>, sequence<2, 2>>, // Warp arrangement
-        sequence<1, 1, 2, 2>,                  // Dimension order
-        sequence<0, 3, 0, 3>>{};               // Each thread has 2x2 blocks.
-
-    constexpr auto matrix_4x4_dstr = make_static_tile_distribution(matrix_4x4_dstr_encoding);
-
-    EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_x(), 2);
-    EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_y(), 2);
-    EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_p(), 1);
-    EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_r(), 0);
-
-    const auto distributed_spans = matrix_4x4_dstr.get_distributed_spans();
-    EXPECT_EQ(distributed_spans.size(), 2);
-    EXPECT_EQ(distributed_spans[number<0>{}].impl_.size(), 1); // M dimension
-    EXPECT_EQ(distributed_spans[number<1>{}].impl_.size(), 1); // N dimension
-    EXPECT_EQ(distributed_spans[number<0>{}].impl_[0], 4);     // M dimension
-    EXPECT_EQ(distributed_spans[number<1>{}].impl_[0], 4);     // N dimension
-}
--- a/test/ck_tile/transform_conv_to_gemm/CMakeLists.txt
+++ b/test/ck_tile/transform_conv_to_gemm/CMakeLists.txt
@@ -1,4 +1 @@
-add_gtest_executable(test_transform_conv_bwd_weight_to_gemm test_transform_conv_bwd_weight_to_gemm.cpp)
-if(result EQUAL 0)
-    target_link_libraries(test_transform_conv_bwd_weight_to_gemm PRIVATE utility)
-endif()
+add_gtest_executable(test_ck_tile_transform_conv_bwd_weight_to_gemm test_transform_conv_bwd_weight_to_gemm.cpp)
--- a/test/ck_tile/transform_conv_to_gemm/test_transform_conv_bwd_weight_to_gemm.cpp
+++ b/test/ck_tile/transform_conv_to_gemm/test_transform_conv_bwd_weight_to_gemm.cpp
@@ -24,6 +24,9 @@ struct TestConfig
    static constexpr ConvolutionSpecialization ConvSpec = ConvolutionSpecialization::Default;
    static constexpr bool SplitN                        = false;
    static constexpr index_t NumberOfGroupsToMerge      = NumGroupsToMerge;
+    static constexpr index_t VectorSizeA                = 2;
+    static constexpr index_t VectorSizeB                = 4;
+    static constexpr index_t VectorSizeC                = 8;

    using ADataType = float;
    using CDataType = float;
@@ -31,6 +34,9 @@ struct TestConfig

    using TransformType = TransformConvBwdWeightToGemm<NDimSpatial,
                                                       ConvSpec,
+                                                       VectorSizeA,
+                                                       VectorSizeB,
+                                                       VectorSizeC,
                                                       NumGroupsToMerge,
                                                       SplitN,
                                                       ADataType,
@@ -210,7 +216,6 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, Constructor)
        EXPECT_EQ(transform.Wi_, this->Wi_);
        EXPECT_EQ(transform.Wo_, this->Wo_);
        EXPECT_EQ(transform.X_, this->X_);
-        EXPECT_EQ(transform.ZYX_, this->X_);
    }
    else if constexpr(NDim == 2)
    {
@@ -233,7 +238,6 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, Constructor)
        EXPECT_EQ(transform.Wo_, this->Wo_);
        EXPECT_EQ(transform.Y_, this->Y_);
        EXPECT_EQ(transform.X_, this->X_);
-        EXPECT_EQ(transform.ZYX_, this->Y_ * this->X_);
    }
    else if constexpr(NDim == 3)
    {
@@ -259,7 +263,6 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, Constructor)
        EXPECT_EQ(transform.Z_, this->Z_);
        EXPECT_EQ(transform.Y_, this->Y_);
        EXPECT_EQ(transform.X_, this->X_);
-        EXPECT_EQ(transform.ZYX_, this->Z_ * this->Y_ * this->X_);
    }
 }

@@ -302,15 +305,13 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, GridDescriptors)
        {
            EXPECT_EQ(out_grid_desc.get_num_of_dimension(), 3);
            EXPECT_EQ(in_grid_desc.get_num_of_dimension(), NDim + 3);
-            EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 4);
+            EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 2);

            EXPECT_EQ(in_grid_desc.get_length(I2), Gm);
            EXPECT_EQ(in_grid_desc.get_length(I3), this->C_);

-            EXPECT_EQ(wei_grid_desc.get_length(I0), 1); // Padding dimension
-            EXPECT_EQ(wei_grid_desc.get_length(I1), Gm);
-            EXPECT_EQ(wei_grid_desc.get_length(I2), this->K_);
-            EXPECT_EQ(wei_grid_desc.get_length(I3), this->X_ * this->C_);
+            EXPECT_EQ(wei_grid_desc.get_length(I0), Gm * this->K_);
+            EXPECT_EQ(wei_grid_desc.get_length(I1), Gm * this->X_ * this->C_);

            EXPECT_EQ(out_grid_desc.get_length(I1), Gm);
            EXPECT_EQ(out_grid_desc.get_length(I2), this->N_ * this->Wo_);
@@ -356,15 +357,13 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, GridDescriptors)
        {
            EXPECT_EQ(out_grid_desc.get_num_of_dimension(), 3);
            EXPECT_EQ(in_grid_desc.get_num_of_dimension(), NDim + 3);
-            EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 4);
+            EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 2);

            EXPECT_EQ(in_grid_desc.get_length(I3), Gm);
            EXPECT_EQ(in_grid_desc.get_length(I4), this->C_);

-            EXPECT_EQ(wei_grid_desc.get_length(I0), 1); // Padding dimension
-            EXPECT_EQ(wei_grid_desc.get_length(I1), Gm);
-            EXPECT_EQ(wei_grid_desc.get_length(I2), this->K_);
-            EXPECT_EQ(wei_grid_desc.get_length(I3), this->Y_ * this->X_ * this->C_);
+            EXPECT_EQ(wei_grid_desc.get_length(I0), Gm * this->K_);
+            EXPECT_EQ(wei_grid_desc.get_length(I1), Gm * this->Y_ * this->X_ * this->C_);

            EXPECT_EQ(out_grid_desc.get_length(I1), Gm);
            EXPECT_EQ(out_grid_desc.get_length(I2), this->N_ * this->Ho_ * this->Wo_);
@@ -411,15 +410,13 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, GridDescriptors)
        {
            EXPECT_EQ(out_grid_desc.get_num_of_dimension(), 3);
            EXPECT_EQ(in_grid_desc.get_num_of_dimension(), NDim + 3);
-            EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 4);
+            EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 2);

            EXPECT_EQ(in_grid_desc.get_length(I4), Gm);
            EXPECT_EQ(in_grid_desc.get_length(I5), this->C_);

-            EXPECT_EQ(wei_grid_desc.get_length(I0), 1); // Padding dimension
-            EXPECT_EQ(wei_grid_desc.get_length(I1), Gm);
-            EXPECT_EQ(wei_grid_desc.get_length(I2), this->K_);
-            EXPECT_EQ(wei_grid_desc.get_length(I3), this->Z_ * this->Y_ * this->X_ * this->C_);
+            EXPECT_EQ(wei_grid_desc.get_length(I0), Gm * this->K_);
+            EXPECT_EQ(wei_grid_desc.get_length(I1), Gm * this->Z_ * this->Y_ * this->X_ * this->C_);

            EXPECT_EQ(out_grid_desc.get_length(I1), Gm);
            EXPECT_EQ(out_grid_desc.get_length(I2), this->N_ * this->Do_ * this->Ho_ * this->Wo_);