Fix new unit projects.

This commit is contained in:
Ville Pietilä
2025-10-08 07:47:17 +00:00
parent 0c00794e14
commit 71c266900a
9 changed files with 62 additions and 152 deletions

View File

@@ -35,4 +35,3 @@ add_subdirectory(transform_conv_to_gemm)
add_subdirectory(coordinate_transform)
add_subdirectory(tensor_view)
add_subdirectory(tensor_descriptor)
add_subdirectory(tile_distribution)

View File

@@ -1,4 +1 @@
add_gtest_executable(test_coordinate_transform test_coordinate_transform.cpp)
if(result EQUAL 0)
target_link_libraries(test_coordinate_transform PRIVATE utility)
endif()
add_gtest_executable(test_ck_tile_coordinate_transform test_coordinate_transform.cpp)

View File

@@ -1,4 +1 @@
add_gtest_executable(test_tensor_descriptor test_tensor_descriptor.cpp)
if(result EQUAL 0)
target_link_libraries(test_tensor_descriptor PRIVATE utility)
endif()
add_gtest_executable(test_ck_tile_tensor_descriptor test_tensor_descriptor.cpp)

View File

@@ -1,4 +1 @@
add_gtest_executable(test_tensor_view test_tensor_view.cpp)
if(result EQUAL 0)
target_link_libraries(test_tensor_view PRIVATE utility)
endif()
add_gtest_executable(test_ck_tile_tensor_view test_tensor_view.cpp)

View File

@@ -253,21 +253,6 @@ TEST_F(TestTensorView, StaticDistributedTensor)
hip_check_error(hipFree(output_device));
}
template <typename DistributedIndex>
__device__ void print_distributed_index(const DistributedIndex& idx)
{
printf("[");
for(auto i = 0; i < idx.impl_.size(); i++)
{
printf("%d", idx.impl_[i]);
if(i < idx.impl_.size() - 1)
{
printf(", ");
}
}
printf("]");
}
__global__ void test_4x4_matrix_2x2_blocks_modify_input_kernel(int* input, int* output, bool)
{
constexpr index_t global_shape_0 = 4;
@@ -316,16 +301,6 @@ __global__ void test_4x4_matrix_2x2_blocks_modify_input_kernel(int* input, int*
constexpr index_t x0_size = reduce_on_sequence(hs_lengths_0, multiplies{}, number<1>{});
constexpr index_t x1_size = reduce_on_sequence(hs_lengths_1, multiplies{}, number<1>{});
if(threadIdx.x == 0 && blockIdx.x == 0)
{
printf("\n- Tile distribution created:\n");
printf(" X dimensions: %d\n", distribution.get_num_of_dimension_x());
printf(" Y dimensions: %d\n", distribution.get_num_of_dimension_y());
printf(" P dimensions: %d\n", distribution.get_num_of_dimension_p());
printf(" X lengths: [%d, %d]\n", x0_size, x1_size);
}
block_sync_lds();
auto global_view = make_naive_tensor_view_packed<address_space_enum::global>(
input, make_tuple(global_shape_0, global_shape_1));
@@ -393,13 +368,15 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_modify_input)
output_host.data(), output_device, total_elements * sizeof(int), hipMemcpyDeviceToHost));
// Verify the 4x4 matrix is correctly organized as 2x2 blocks
// Expected matrix:
// 2 4 6 8
// 10 12 14 16
// 18 20 22 24
// 26 28 30 32
std::vector<int> expected_output = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
// clang-format off
std::vector<int> expected_output =
{
2, 4, 6, 8,
10, 12, 14, 16,
18, 20, 22, 24,
26, 28, 30, 32
};
// clang-format on
EXPECT_EQ(output_host, expected_output);
@@ -540,9 +517,9 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks)
// clang-format off
std::vector<int> data_host =
{
1, 2, 3 ,4,
5, 6, 7, 8,
9, 10, 11, 12,
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16
};
// clang-format on
@@ -571,13 +548,15 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks)
output_host.data(), output_device, total_elements * sizeof(int), hipMemcpyDeviceToHost));
// Verify the 4x4 matrix is correctly organized as 2x2 blocks
// Expected matrix:
// 1 2
// 5 6
// 11 12
// 15 16
std::vector<int> expected_output = {1, 2, 5, 6, 11, 12, 15, 16};
// clang-format off
std::vector<int> expected_output =
{
1, 2,
5, 6,
11, 12,
15, 16
};
// clang-format on
EXPECT_EQ(output_host, expected_output);
@@ -637,7 +616,7 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu
{0, 0}); // We have only threadblock
//------------------------------------------------------------
// CShuffle epilogue similation
// CShuffle epilogue simulation
//------------------------------------------------------------
// Allocate and prepare LDS
@@ -659,6 +638,7 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu
constexpr index_t MPerIterationShuffle = 2;
constexpr index_t NPerIterationShuffle = 2;
constexpr index_t VectorSize = 1;
auto lds_tile = make_static_distributed_tensor<int>(distribution);
@@ -692,16 +672,14 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu
sequence<MPerIterationShuffle, NPerIterationShuffle>,
false>;
using TileEncodingPattern =
tile_distribution_encoding_pattern_2d<4, // Block size
MPerIterationShuffle,
NPerIterationShuffle,
2, // Vector size
tile_distribution_pattern::sparse_row,
1>; // Number of wave groups
constexpr auto output_tile_distribution =
TileEncodingPattern::make_2d_static_tile_distribution();
constexpr auto output_tile_distribution = make_static_tile_distribution(
tile_distribution_encoding<sequence<>,
tuple<sequence<1, 1, MPerIterationShuffle, VectorSize>,
sequence<1, 1, NPerIterationShuffle, VectorSize>>,
tuple<sequence<1, 2>, sequence<1, 2>>,
tuple<sequence<1, 1>, sequence<2, 2>>,
sequence<1, 1, 2, 2>,
sequence<0, 3, 0, 3>>{});
// Copy the tile at one go from register to LDS.
block_sync_lds();
@@ -737,9 +715,9 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks_
// clang-format off
std::vector<int> data_host =
{
1, 2, 3 ,4,
5, 6, 7, 8,
9, 10, 11, 12,
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16
};
// clang-format on
@@ -769,13 +747,15 @@ TEST_F(TestTensorView, StaticDistributedTensor4x4Matrix2x2Blocks_get_sub_blocks_
output_host.data(), output_device, total_elements * sizeof(int), hipMemcpyDeviceToHost));
// Verify the 4x4 matrix is correctly organized as 2x2 blocks
// Expected matrix:
// 1 2
// 5 6
// 11 12
// 15 16
std::vector<int> expected_output = {1, 2, 5, 6, 11, 12, 15, 16};
// clang-format off
std::vector<int> expected_output =
{
1, 2,
5, 6,
11, 12,
15, 16
};
// clang-format on
EXPECT_EQ(output_host, expected_output);

View File

@@ -1,4 +0,0 @@
add_gtest_executable(test_tile_distribution test_tile_distribution.cpp)
if(result EQUAL 0)
target_link_libraries(test_tile_distribution PRIVATE utility)
endif()

View File

@@ -1,50 +0,0 @@
#include <gtest/gtest.h>
#include <vector>
#include "ck_tile/core/tensor/tile_distribution.hpp"
#include "ck_tile/core/tensor/tensor_descriptor.hpp"
using namespace ck_tile;
class TestTileDistribution : public ::testing::Test
{
protected:
void SetUp() override {}
void TearDown() override {}
};
TEST_F(TestTileDistribution, 4x4_matrix_2x2_blocks)
{
constexpr index_t MRepeat = 1;
constexpr index_t NRepeat = 1;
constexpr index_t MWarpPerBlock = 1;
constexpr index_t NWarpPerBlock = 1;
constexpr index_t MThreadPerWarp = 2;
constexpr index_t NThreadPerWarp = 2;
constexpr index_t MVectorPerThread = 2;
constexpr index_t NVectorPerThread = 2;
// Tile distribution encoding for 4x4 matrix as 2x2 blocks
constexpr auto matrix_4x4_dstr_encoding = tile_distribution_encoding<
sequence<>, // No reduction dims
tuple<sequence<MRepeat, MWarpPerBlock, MThreadPerWarp, MVectorPerThread>,
sequence<NRepeat, NWarpPerBlock, NThreadPerWarp, NVectorPerThread>>,
tuple<sequence<1, 2>, sequence<1, 2>>, // 2D thread grid mapping
tuple<sequence<1, 1>, sequence<2, 2>>, // Warp arrangement
sequence<1, 1, 2, 2>, // Dimension order
sequence<0, 3, 0, 3>>{}; // Each thread has 2x2 blocks.
constexpr auto matrix_4x4_dstr = make_static_tile_distribution(matrix_4x4_dstr_encoding);
EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_x(), 2);
EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_y(), 2);
EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_p(), 1);
EXPECT_EQ(matrix_4x4_dstr.get_num_of_dimension_r(), 0);
const auto distributed_spans = matrix_4x4_dstr.get_distributed_spans();
EXPECT_EQ(distributed_spans.size(), 2);
EXPECT_EQ(distributed_spans[number<0>{}].impl_.size(), 1); // M dimension
EXPECT_EQ(distributed_spans[number<1>{}].impl_.size(), 1); // N dimension
EXPECT_EQ(distributed_spans[number<0>{}].impl_[0], 4); // M dimension
EXPECT_EQ(distributed_spans[number<1>{}].impl_[0], 4); // N dimension
}

View File

@@ -1,4 +1 @@
add_gtest_executable(test_transform_conv_bwd_weight_to_gemm test_transform_conv_bwd_weight_to_gemm.cpp)
if(result EQUAL 0)
target_link_libraries(test_transform_conv_bwd_weight_to_gemm PRIVATE utility)
endif()
add_gtest_executable(test_ck_tile_transform_conv_bwd_weight_to_gemm test_transform_conv_bwd_weight_to_gemm.cpp)

View File

@@ -24,6 +24,9 @@ struct TestConfig
static constexpr ConvolutionSpecialization ConvSpec = ConvolutionSpecialization::Default;
static constexpr bool SplitN = false;
static constexpr index_t NumberOfGroupsToMerge = NumGroupsToMerge;
static constexpr index_t VectorSizeA = 2;
static constexpr index_t VectorSizeB = 4;
static constexpr index_t VectorSizeC = 8;
using ADataType = float;
using CDataType = float;
@@ -31,6 +34,9 @@ struct TestConfig
using TransformType = TransformConvBwdWeightToGemm<NDimSpatial,
ConvSpec,
VectorSizeA,
VectorSizeB,
VectorSizeC,
NumGroupsToMerge,
SplitN,
ADataType,
@@ -210,7 +216,6 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, Constructor)
EXPECT_EQ(transform.Wi_, this->Wi_);
EXPECT_EQ(transform.Wo_, this->Wo_);
EXPECT_EQ(transform.X_, this->X_);
EXPECT_EQ(transform.ZYX_, this->X_);
}
else if constexpr(NDim == 2)
{
@@ -233,7 +238,6 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, Constructor)
EXPECT_EQ(transform.Wo_, this->Wo_);
EXPECT_EQ(transform.Y_, this->Y_);
EXPECT_EQ(transform.X_, this->X_);
EXPECT_EQ(transform.ZYX_, this->Y_ * this->X_);
}
else if constexpr(NDim == 3)
{
@@ -259,7 +263,6 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, Constructor)
EXPECT_EQ(transform.Z_, this->Z_);
EXPECT_EQ(transform.Y_, this->Y_);
EXPECT_EQ(transform.X_, this->X_);
EXPECT_EQ(transform.ZYX_, this->Z_ * this->Y_ * this->X_);
}
}
@@ -302,15 +305,13 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, GridDescriptors)
{
EXPECT_EQ(out_grid_desc.get_num_of_dimension(), 3);
EXPECT_EQ(in_grid_desc.get_num_of_dimension(), NDim + 3);
EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 4);
EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 2);
EXPECT_EQ(in_grid_desc.get_length(I2), Gm);
EXPECT_EQ(in_grid_desc.get_length(I3), this->C_);
EXPECT_EQ(wei_grid_desc.get_length(I0), 1); // Padding dimension
EXPECT_EQ(wei_grid_desc.get_length(I1), Gm);
EXPECT_EQ(wei_grid_desc.get_length(I2), this->K_);
EXPECT_EQ(wei_grid_desc.get_length(I3), this->X_ * this->C_);
EXPECT_EQ(wei_grid_desc.get_length(I0), Gm * this->K_);
EXPECT_EQ(wei_grid_desc.get_length(I1), Gm * this->X_ * this->C_);
EXPECT_EQ(out_grid_desc.get_length(I1), Gm);
EXPECT_EQ(out_grid_desc.get_length(I2), this->N_ * this->Wo_);
@@ -356,15 +357,13 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, GridDescriptors)
{
EXPECT_EQ(out_grid_desc.get_num_of_dimension(), 3);
EXPECT_EQ(in_grid_desc.get_num_of_dimension(), NDim + 3);
EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 4);
EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 2);
EXPECT_EQ(in_grid_desc.get_length(I3), Gm);
EXPECT_EQ(in_grid_desc.get_length(I4), this->C_);
EXPECT_EQ(wei_grid_desc.get_length(I0), 1); // Padding dimension
EXPECT_EQ(wei_grid_desc.get_length(I1), Gm);
EXPECT_EQ(wei_grid_desc.get_length(I2), this->K_);
EXPECT_EQ(wei_grid_desc.get_length(I3), this->Y_ * this->X_ * this->C_);
EXPECT_EQ(wei_grid_desc.get_length(I0), Gm * this->K_);
EXPECT_EQ(wei_grid_desc.get_length(I1), Gm * this->Y_ * this->X_ * this->C_);
EXPECT_EQ(out_grid_desc.get_length(I1), Gm);
EXPECT_EQ(out_grid_desc.get_length(I2), this->N_ * this->Ho_ * this->Wo_);
@@ -411,15 +410,13 @@ TYPED_TEST(TestTransformConvBwdWeightToGemm, GridDescriptors)
{
EXPECT_EQ(out_grid_desc.get_num_of_dimension(), 3);
EXPECT_EQ(in_grid_desc.get_num_of_dimension(), NDim + 3);
EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 4);
EXPECT_EQ(wei_grid_desc.get_num_of_dimension(), 2);
EXPECT_EQ(in_grid_desc.get_length(I4), Gm);
EXPECT_EQ(in_grid_desc.get_length(I5), this->C_);
EXPECT_EQ(wei_grid_desc.get_length(I0), 1); // Padding dimension
EXPECT_EQ(wei_grid_desc.get_length(I1), Gm);
EXPECT_EQ(wei_grid_desc.get_length(I2), this->K_);
EXPECT_EQ(wei_grid_desc.get_length(I3), this->Z_ * this->Y_ * this->X_ * this->C_);
EXPECT_EQ(wei_grid_desc.get_length(I0), Gm * this->K_);
EXPECT_EQ(wei_grid_desc.get_length(I1), Gm * this->Z_ * this->Y_ * this->X_ * this->C_);
EXPECT_EQ(out_grid_desc.get_length(I1), Gm);
EXPECT_EQ(out_grid_desc.get_length(I2), this->N_ * this->Do_ * this->Ho_ * this->Wo_);