mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 04:49:54 +00:00
[CK_BUILDER] Refactor builder factory code. (#3276)
Refactor the builder factory code into multiple files and subdirectories and a ck_tile::builder::factory namespace.
The factory implements compile-time dispatch from high-level signature and algorithm descriptors to our existing specialized convolution kernel implementations.
Major changes in this PR:
Dispatch logic is explicit in the function make_conv_instance instead of implicit in template specialization selection.
Helper code is moved to a subdirectory builder/factory/helpers.
Helpers now have unit tests.
Factories are moved to their own files.
Code moved to namespaces ck_tile::builder::factory and ck_tile::builder::factory::internal.
This does not yet fix the problem of bad error messages, but the make_conv_instance function makes the poor error messages clear. The choice of algorithm must be much more robust (perhaps with explicit enumeration in the algorithm descriptor), so that the dispatch doesn't fail.
Quality changes:
Making dispatch explicit rather than implicit will improve robustness, readability, maintainability, testability, and extensibility.
Separating code into separate files and subdirectories helps readability and extensibility.
Adding unit tests for helpers documents behavior and will enable more complex logic and functionality.
Separating files (especially unit tests) helps clarify includes and dependencies and makes code easier to refactor.
[ROCm/composable_kernel commit: 280bc42191]
This commit is contained in:
@@ -73,12 +73,17 @@ endfunction()
|
||||
# They should complete in under 10ms each and are suitable for frequent execution
|
||||
# during development.
|
||||
add_ck_builder_test(test_ckb_conv_builder
|
||||
test_bwd_weight_instance_traits.cpp
|
||||
test_conv_builder.cpp
|
||||
test_fwd_instance_traits.cpp
|
||||
test_bwd_weight_instance_traits.cpp
|
||||
test_bwd_data_instance_traits.cpp
|
||||
test_instance_traits_util.cpp
|
||||
)
|
||||
|
||||
unit_conv_elementwise_op.cpp
|
||||
unit_conv_tensor_layout.cpp
|
||||
unit_conv_tensor_type.cpp
|
||||
unit_conv_thread_block.cpp
|
||||
unit_conv_tuning_params.cpp)
|
||||
|
||||
# Tests the inline diff utility used for comparing strings in tests assertions
|
||||
add_ck_builder_test(test_ckb_inline_diff test_inline_diff.cpp)
|
||||
|
||||
@@ -91,7 +91,7 @@ static_assert(LdsTransferDescriptor<LdsTransfer>);
|
||||
|
||||
struct Epilogue
|
||||
{
|
||||
size_t m_per_wave_per_shuffle;
|
||||
size_t m_xdl_per_wave_per_shuffle;
|
||||
size_t n_per_wave_per_shuffle;
|
||||
size_t scalar_per_vector;
|
||||
};
|
||||
|
||||
0
experimental/builder/test/test_ckb_conv_builder.cpp
Normal file
0
experimental/builder/test/test_ckb_conv_builder.cpp
Normal file
@@ -67,11 +67,11 @@ struct DefaultAlgorithm
|
||||
ckb::test::TransferABC transfer{
|
||||
.a =
|
||||
{
|
||||
.block_transfer = {.k0 = 4, .m_n = 256, .k1 = 8},
|
||||
.block_transfer = {.k0 = 1, .m_n = 128, .k1 = 2},
|
||||
.lds_transfer = {.src_vector_dim = 2,
|
||||
.src_scalar_per_vector = 8,
|
||||
.lds_dst_scalar_per_vector = 8,
|
||||
.is_direct_load = true,
|
||||
.src_scalar_per_vector = 2,
|
||||
.lds_dst_scalar_per_vector = 2,
|
||||
.is_direct_load = false,
|
||||
.lds_padding = false},
|
||||
.block_transfer_access_order = {.order = {0, 1, 2}},
|
||||
.src_access_order = {.order = {0, 1, 2}},
|
||||
@@ -79,11 +79,11 @@ struct DefaultAlgorithm
|
||||
},
|
||||
.b =
|
||||
{
|
||||
.block_transfer = {.k0 = 4, .m_n = 256, .k1 = 8},
|
||||
.block_transfer = {.k0 = 1, .m_n = 128, .k1 = 2},
|
||||
.lds_transfer = {.src_vector_dim = 2,
|
||||
.src_scalar_per_vector = 8,
|
||||
.lds_dst_scalar_per_vector = 8,
|
||||
.is_direct_load = true,
|
||||
.src_scalar_per_vector = 2,
|
||||
.lds_dst_scalar_per_vector = 2,
|
||||
.is_direct_load = false,
|
||||
.lds_padding = false},
|
||||
.block_transfer_access_order = {.order = {0, 1, 2}},
|
||||
.src_access_order = {.order = {0, 1, 2}},
|
||||
@@ -92,9 +92,9 @@ struct DefaultAlgorithm
|
||||
{
|
||||
.thread_cluster_dims =
|
||||
{.m_block = 1, .m_wave_per_xdl = 32, .n_block = 1, .n_wave_per_xdl = 8},
|
||||
.epilogue = {.m_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
.epilogue = {.m_xdl_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 2},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -144,22 +144,22 @@ TEST(ConvDescriptionTest, DefaultInstanceHasDetailedDescription)
|
||||
" │ ├─ Spatial thread distribution over the data tile: 0×1×2\n"
|
||||
" │ ├─ The order of accessing data tile axes: 0×1×2\n"
|
||||
" │ ├─ Vectorized memory access axis index (with contiguous memory): 2\n"
|
||||
" │ ├─ Vector access (GMEM read) instruction size: 8\n"
|
||||
" │ ├─ Vector access (LDS write) instruction size: 8\n"
|
||||
" │ └─ LDS data layout padding (to prevent bank conflicts): 8\n"
|
||||
" │ ├─ Vector access (GMEM read) instruction size: 2\n"
|
||||
" │ ├─ Vector access (LDS write) instruction size: 2\n"
|
||||
" │ └─ LDS data layout padding (to prevent bank conflicts): 2\n"
|
||||
" ├─ B Tile transfer: \n"
|
||||
" │ ├─ Tile dimensions: 4×256×8×\n"
|
||||
" │ ├─ The innermost K subdimension size: 8\n"
|
||||
" │ ├─ Spatial thread distribution over the data tile: 0×1×2\n"
|
||||
" │ ├─ The order of accessing data tile axes: 0×1×2\n"
|
||||
" │ ├─ Vectorized memory access axis index (with contiguous memory): 2\n"
|
||||
" │ ├─ Vector access (GMEM read) instruction size: 8\n"
|
||||
" │ ├─ Vector access (LDS write) instruction size: 8\n"
|
||||
" │ └─ LDS data layout padding (to prevent bank conflicts): 8\n"
|
||||
" │ ├─ Vector access (GMEM read) instruction size: 2\n"
|
||||
" │ ├─ Vector access (LDS write) instruction size: 2\n"
|
||||
" │ └─ LDS data layout padding (to prevent bank conflicts): 2\n"
|
||||
" └─ C Tile transfer: \n"
|
||||
" ├─ Data shuffle (number of gemm instructions per iteration): 1×1\n"
|
||||
" ├─ Spatial thread distribution used to store data: 1×32×1×8\n"
|
||||
" └─ Vector access (GMEM write) instruction size: 8"));
|
||||
" └─ Vector access (GMEM write) instruction size: 2"));
|
||||
}
|
||||
|
||||
// NOTE: BackwardDataInstanceHasDetailedDescription test is disabled because ConvFactory
|
||||
|
||||
37
experimental/builder/test/unit_conv_elementwise_op.cpp
Normal file
37
experimental/builder/test/unit_conv_elementwise_op.cpp
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <type_traits>
|
||||
|
||||
#include "ck_tile/builder/factory/helpers/conv_elementwise_op.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
using ::ck_tile::builder::factory::internal::ElementwiseOps;
|
||||
using enum ::ck_tile::builder::ElementwiseOperation;
|
||||
|
||||
TEST(ConvElementwiseOp, AssignsOpsForPassThrough)
|
||||
{
|
||||
using Ops = ElementwiseOps<PASS_THROUGH>;
|
||||
|
||||
EXPECT_TRUE(
|
||||
(std::is_same_v<Ops::AElementwiseOp, ck::tensor_operation::element_wise::PassThrough>));
|
||||
EXPECT_TRUE(
|
||||
(std::is_same_v<Ops::BElementwiseOp, ck::tensor_operation::element_wise::PassThrough>));
|
||||
EXPECT_TRUE(
|
||||
(std::is_same_v<Ops::CDEElementwiseOp, ck::tensor_operation::element_wise::PassThrough>));
|
||||
}
|
||||
|
||||
TEST(ConvElementwiseOp, AssignsOpsForScale)
|
||||
{
|
||||
using Ops = ElementwiseOps<SCALE>;
|
||||
|
||||
EXPECT_TRUE(
|
||||
(std::is_same_v<Ops::AElementwiseOp, ck::tensor_operation::element_wise::PassThrough>));
|
||||
EXPECT_TRUE(
|
||||
(std::is_same_v<Ops::BElementwiseOp, ck::tensor_operation::element_wise::PassThrough>));
|
||||
EXPECT_TRUE((std::is_same_v<Ops::CDEElementwiseOp, ck::tensor_operation::element_wise::Scale>));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
119
experimental/builder/test/unit_conv_tensor_layout.cpp
Normal file
119
experimental/builder/test/unit_conv_tensor_layout.cpp
Normal file
@@ -0,0 +1,119 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <type_traits>
|
||||
|
||||
// Include the helper file we're testing
|
||||
#include "ck_tile/builder/factory/helpers/conv_tensor_layout.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
namespace ckb = ::ck_tile::builder;
|
||||
using ::ck_tile::builder::factory::internal::ConvTensorLayouts;
|
||||
using ::ck_tile::builder::factory::internal::GetTensorLayout;
|
||||
using enum ::ck_tile::builder::ConvDirection;
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor1D_NWGC_GKXC_NWGK)
|
||||
{
|
||||
using TensorLayouts = ConvTensorLayouts<ckb::GroupConvLayout1D::NWGC_GKXC_NWGK, 1, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::NWGC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKXC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::NWGK>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKXC_NGKW)
|
||||
{
|
||||
using TensorLayouts = ConvTensorLayouts<ckb::GroupConvLayout1D::NGCW_GKXC_NGKW, 1, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::NGCW>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKXC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::NGKW>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor1D_GNWC_GKXC_GNWK)
|
||||
{
|
||||
using TensorLayouts = ConvTensorLayouts<ckb::GroupConvLayout1D::GNWC_GKXC_GNWK, 1, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::GNWC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKXC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::GNWK>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor1D_NGCW_GKCX_NGKW)
|
||||
{
|
||||
using TensorLayouts = ConvTensorLayouts<ckb::GroupConvLayout1D::NGCW_GKCX_NGKW, 1, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::NGCW>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKCX>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::NGKW>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKYXC_NGKHW)
|
||||
{
|
||||
using TensorLayouts = ConvTensorLayouts<ckb::GroupConvLayout2D::NGCHW_GKYXC_NGKHW, 2, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::NGCHW>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKYXC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::NGKHW>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor2D_NHWGC_GKYXC_NHWGK)
|
||||
{
|
||||
using TensorLayouts = ConvTensorLayouts<ckb::GroupConvLayout2D::NHWGC_GKYXC_NHWGK, 2, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::NHWGC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKYXC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::NHWGK>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor2D_GNHWC_GKYXC_GNHWK)
|
||||
{
|
||||
using TensorLayouts = ConvTensorLayouts<ckb::GroupConvLayout2D::GNHWC_GKYXC_GNHWK, 2, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::GNHWC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKYXC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::GNHWK>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor2D_NGCHW_GKCYX_NGKHW)
|
||||
{
|
||||
using TensorLayouts = ConvTensorLayouts<ckb::GroupConvLayout2D::NGCHW_GKCYX_NGKHW, 2, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::NGCHW>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKCYX>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::NGKHW>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor3D_NGCDHW_GKCZYX_NGKDHW)
|
||||
{
|
||||
using TensorLayouts =
|
||||
ConvTensorLayouts<ckb::GroupConvLayout3D::NGCDHW_GKCZYX_NGKDHW, 3, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::NGCDHW>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKCZYX>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::NGKDHW>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor3D_NDHWGC_GKZYXC_NDHWGK)
|
||||
{
|
||||
using TensorLayouts =
|
||||
ConvTensorLayouts<ckb::GroupConvLayout3D::NDHWGC_GKZYXC_NDHWGK, 3, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::NDHWGC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKZYXC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::NDHWGK>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorLayout, AssignsLayoutsFor3D_GNDHWC_GKZYXC_GNDHWK)
|
||||
{
|
||||
using TensorLayouts =
|
||||
ConvTensorLayouts<ckb::GroupConvLayout3D::GNDHWC_GKZYXC_GNDHWK, 3, FORWARD>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ALayout, ck::tensor_layout::convolution::GNDHWC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::BLayout, ck::tensor_layout::convolution::GKZYXC>));
|
||||
EXPECT_TRUE((std::is_same_v<TensorLayouts::ELayout, ck::tensor_layout::convolution::GNDHWK>));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
79
experimental/builder/test/unit_conv_tensor_type.cpp
Normal file
79
experimental/builder/test/unit_conv_tensor_type.cpp
Normal file
@@ -0,0 +1,79 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <type_traits>
|
||||
|
||||
#include "ck_tile/builder/factory/helpers/conv_tensor_type.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
namespace ckb = ck_tile::builder;
|
||||
using ck_tile::builder::factory::internal::ConvTensorTypes;
|
||||
|
||||
TEST(ConvTensorType, AssignsTypesForFP16)
|
||||
{
|
||||
using Types = ConvTensorTypes<ckb::DataType::FP16>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<Types::ADataType, ck::half_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BDataType, ck::half_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::EDataType, ck::half_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AccDataType, float>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AComputeType, ck::half_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BComputeType, ck::half_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::CShuffleDataType, ck::half_t>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorType, AssignsTypesForBF16)
|
||||
{
|
||||
using Types = ConvTensorTypes<ckb::DataType::BF16>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<Types::ADataType, ck::bhalf_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BDataType, ck::bhalf_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::EDataType, ck::bhalf_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AccDataType, float>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AComputeType, ck::bhalf_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BComputeType, ck::bhalf_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::CShuffleDataType, ck::bhalf_t>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorType, AssignsTypesForFP32)
|
||||
{
|
||||
using Types = ConvTensorTypes<ckb::DataType::FP32>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<Types::ADataType, float>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BDataType, float>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::EDataType, float>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AccDataType, float>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AComputeType, float>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BComputeType, float>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::CShuffleDataType, float>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorType, AssignsTypesForI8)
|
||||
{
|
||||
using Types = ConvTensorTypes<ckb::DataType::I8>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<Types::ADataType, int8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BDataType, int8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::EDataType, int8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AccDataType, int32_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AComputeType, int8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BComputeType, int8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::CShuffleDataType, int8_t>));
|
||||
}
|
||||
|
||||
TEST(ConvTensorType, AssignsTypesForFP8)
|
||||
{
|
||||
using Types = ConvTensorTypes<ckb::DataType::FP8>;
|
||||
|
||||
EXPECT_TRUE((std::is_same_v<Types::ADataType, ck::f8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BDataType, ck::f8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::EDataType, ck::f8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AccDataType, float>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::AComputeType, ck::f8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::BComputeType, ck::f8_t>));
|
||||
EXPECT_TRUE((std::is_same_v<Types::CShuffleDataType, ck::f8_t>));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
35
experimental/builder/test/unit_conv_thread_block.cpp
Normal file
35
experimental/builder/test/unit_conv_thread_block.cpp
Normal file
@@ -0,0 +1,35 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "ck_tile/builder/factory/helpers/conv_thread_block.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
using ::ck_tile::builder::factory::internal::ConvBlock;
|
||||
using ::ck_tile::builder::factory::internal::SetThreadBlockInfo;
|
||||
|
||||
TEST(ConvThreadBlock, AssignsThreadBlockAndTileSize)
|
||||
{
|
||||
constexpr struct Algorithm
|
||||
{
|
||||
struct ThreadBlock
|
||||
{
|
||||
int block_size = 256;
|
||||
struct TileSize
|
||||
{
|
||||
int m = 128;
|
||||
int n = 128;
|
||||
int k = 16;
|
||||
} tile_size;
|
||||
} thread_block;
|
||||
} kAlgorithm;
|
||||
constexpr ConvBlock block_info = SetThreadBlockInfo<kAlgorithm>();
|
||||
|
||||
EXPECT_EQ(block_info.block_size, 256);
|
||||
EXPECT_EQ(block_info.per_block.m, 128);
|
||||
EXPECT_EQ(block_info.per_block.n, 128);
|
||||
EXPECT_EQ(block_info.per_block.k, 16);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
90
experimental/builder/test/unit_conv_tuning_params.cpp
Normal file
90
experimental/builder/test/unit_conv_tuning_params.cpp
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "ck_tile/builder/factory/helpers/conv_tuning_params.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
namespace ckb = ::ck_tile::builder;
|
||||
using namespace ck_tile::builder;
|
||||
using namespace ck_tile::builder::factory::internal;
|
||||
|
||||
TEST(ConvTuningParams, AssignsBlockGemmParams)
|
||||
{
|
||||
constexpr struct Algorithm
|
||||
{
|
||||
struct BlockGemm
|
||||
{
|
||||
ckb::PipelineVersion pipeline_version = ckb::PipelineVersion::V3;
|
||||
ckb::PipelineScheduler scheduler = ckb::PipelineScheduler::INTRAWAVE;
|
||||
} block_gemm;
|
||||
} kAlgorithm;
|
||||
constexpr auto block_gemm = SetBlockGemm<kAlgorithm>();
|
||||
|
||||
EXPECT_EQ(block_gemm.pipeline_version, ck::BlockGemmPipelineVersion::v3);
|
||||
EXPECT_EQ(block_gemm.scheduler, ck::BlockGemmPipelineScheduler::Intrawave);
|
||||
}
|
||||
|
||||
TEST(ConvTuningParams, AssignsLoopSchedulerParam)
|
||||
{
|
||||
constexpr struct Algorithm
|
||||
{
|
||||
ckb::PipelineScheduler loop_scheduler = ckb::PipelineScheduler::INTERWAVE;
|
||||
} kAlgorithm;
|
||||
constexpr auto loop_scheduler = SetLoopScheduler<kAlgorithm>();
|
||||
|
||||
EXPECT_EQ(loop_scheduler, ck::LoopScheduler::Interwave);
|
||||
}
|
||||
|
||||
TEST(ConvTuningParams, AssignsGridwiseGemmPipelineVersion)
|
||||
{
|
||||
constexpr struct Algorithm
|
||||
{
|
||||
struct GridwiseGemm
|
||||
{
|
||||
ckb::PipelineVersion pipeline_version = ckb::PipelineVersion::V4;
|
||||
} gridwise_gemm;
|
||||
} kAlgorithm;
|
||||
constexpr auto pipeline_version = SetGridwiseGemmPipelineVersion<kAlgorithm>();
|
||||
|
||||
EXPECT_EQ(pipeline_version, ck::PipelineVersion::v4);
|
||||
}
|
||||
|
||||
TEST(ConvTuningParams, AssignsGemmSpecialization)
|
||||
{
|
||||
constexpr struct Algorithm
|
||||
{
|
||||
ckb::GemmSpecialization gemm_specialization = ckb::GemmSpecialization::MNKPadding;
|
||||
} kAlgorithm;
|
||||
constexpr auto gemm_spec = SetGemmSpecialization<kAlgorithm>();
|
||||
|
||||
EXPECT_EQ(gemm_spec, ck::tensor_operation::device::GemmSpecialization::MNKPadding);
|
||||
}
|
||||
|
||||
TEST(ConvTuningParams, AssignsBlockGemmPipelineVersion)
|
||||
{
|
||||
constexpr struct Algorithm
|
||||
{
|
||||
ckb::PipelineVersion pipeline_version = ckb::PipelineVersion::V2;
|
||||
} kAlgorithm;
|
||||
constexpr auto pipeline_version = SetBlockGemmPipelineVersion<kAlgorithm>();
|
||||
|
||||
EXPECT_EQ(pipeline_version, ck::BlockGemmPipelineVersion::v2);
|
||||
}
|
||||
|
||||
TEST(ConvTuningParams, AssignsFwdConvSpecialization)
|
||||
{
|
||||
constexpr struct Algorithm
|
||||
{
|
||||
ckb::ConvFwdSpecialization fwd_specialization =
|
||||
ckb::ConvFwdSpecialization::FILTER_1X1_STRIDE1_PAD0;
|
||||
} kAlgorithm;
|
||||
constexpr auto conv_spec = SetFwdConvSpecialization<kAlgorithm>();
|
||||
|
||||
EXPECT_EQ(conv_spec,
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -66,9 +66,9 @@ constexpr TransferABC FwdTransfer_4x64x1{
|
||||
{
|
||||
.thread_cluster_dims =
|
||||
{.m_block = 1, .m_wave_per_xdl = 32, .n_block = 1, .n_wave_per_xdl = 8},
|
||||
.epilogue = {.m_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
.epilogue = {.m_xdl_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -99,9 +99,9 @@ constexpr TransferABC FwdTransfer_4x64x1_fp8{
|
||||
{
|
||||
.thread_cluster_dims =
|
||||
{.m_block = 1, .m_wave_per_xdl = 32, .n_block = 1, .n_wave_per_xdl = 8},
|
||||
.epilogue = {.m_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
.epilogue = {.m_xdl_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -132,9 +132,9 @@ constexpr TransferABC FwdTransfer_4x16x1{
|
||||
{
|
||||
.thread_cluster_dims =
|
||||
{.m_block = 1, .m_wave_per_xdl = 16, .n_block = 1, .n_wave_per_xdl = 4},
|
||||
.epilogue = {.m_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
.epilogue = {.m_xdl_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
|
||||
},
|
||||
};
|
||||
@@ -166,9 +166,9 @@ constexpr TransferABC FwdTransfer_4x32x1{
|
||||
{
|
||||
.thread_cluster_dims =
|
||||
{.m_block = 1, .m_wave_per_xdl = 32, .n_block = 1, .n_wave_per_xdl = 4},
|
||||
.epilogue = {.m_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
.epilogue = {.m_xdl_per_wave_per_shuffle = 1,
|
||||
.n_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user