mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
[CK_BUILDER] First fwd convolution builder implementation (#3070)
* Add experimental builder infrastructure for composable_kernel - Add experimental/builder directory with README documentation. - Create initial test infrastructure with CMakeLists.txt and placeholder test. - Update root CMakeLists.txt to support CK_EXPERIMENTAL_BUILDER option. - Update .gitignore to not treat `experimental/builder` as a CMake build directory. This establishes the directory structure for a high-level builder pattern that will provide a semantically-clear interface for constructing CK operations, with initial focus on convolution kernels for MIOpen integration. * Fix clang formatting. * Fix CMake build infrastructure for experimental builder - Add experimental/builder CMakeLists.txt with proper subdirectory structure - Add placeholder include/ck_tile/builder CMakeLists.txt for header installation - Fix gtest.cmake to use include_guard to prevent multiple inclusions - Update root CMakeLists.txt to include full builder directory instead of just tests * Scope C++20 settingto the test code Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Remove redundant GTest::gtest linkage Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Introduce basic types, and convolution algorithm concepts and limits. * Add convolution signature concepts. * Add convolution factory. * Finalize conv factory implementation for fwd convolutions. * Add type definitions for testing. * Add placeholder test. * Add convolution builder definition. * Fully functional fwd conv builder. * Test improvements. * Clean-up include headers. * Enable the limit checks for the convolution algorithm parameters. * Remove dead code. * clang formatting. * Add more tests and missing conv specialization argument. * clang formatting. * Add explicit handling of the tensor layouts. * Add complete 2D/3D layout support to CK Builder - Add missing 2D layouts: GNHWC_GKYXC_GNHWK, NGCHW_GKCYX_NGKHW - Add missing 3D layout: GNDHWC_GKZYXC_GNDHWK - Add 1D layouts (NWGC, NGCW, GNWC, NGCW_GKCX) for future support - Add 3 tests for new 2D/3D layouts - All tests pass (5/5) * Add tests for remaining 2D/3D layouts - Add test for 2D NGCHW_GKYXC_NGKHW (channels-first) with Filter1x1Stride1Pad0 - Add test for 3D NDHWGC_GKZYXC_NDHWGK (channels-last) - All 7 tests pass (complete coverage for all 2D/3D forward layouts) * Change enum converters to consteval. * 7 tests with pipeline and specialization| Test # | Dim | Type | Layout | Pipeline | Specialization | |--------|-----|------|----------------------|----------|-------------------------| | 1 | 2D | BF16 | NHWGC_GKYXC_NHWGK | V1 | DEFAULT | | 2 | 2D | FP16 | GNHWC_GKYXC_GNHWK | V3 | FILTER_1X1_PAD0 | | 3 | 2D | FP32 | NGCHW_GKCYX_NGKHW | V4 | FILTER_1X1_STRIDE1_PAD0 | | 4 | 2D | BF16 | NHWGC_GKYXC_NHWGK | V5 | FILTER_3x3 | | 5 | 3D | FP32 | NGCDHW_GKCZYX_NGKDHW | V1 | FILTER_1X1_PAD0 | | 6 | 3D | BF16 | GNDHWC_GKZYXC_GNDHWK | V3 | DEFAULT | | 7 | 3D | FP16 | NDHWGC_GKZYXC_NDHWGK | V4 | FILTER_1X1_PAD0 | * Add missing convolution layouts and provide better compile-time error in instance traits. * Fix clang formatting. * Changed I8 -> S8. * Fix signature. * Rename concepts and corresponding members. * Rename LDS related parameters. * Remove ODD_C specialization. Add V2 pipeline. * Add missing types. * Add elementwise operation to the conv signature. * Improve compile-time error message for unsupported elementwise ops. * Separate different fwd conv builder tests into separate compilation units. * Fix layout to string and add name to old CK PassThrough elementwise op. * Enable both CK and CK Tile tensor layouts in instance traits. * Fix clang-format. --------- Co-authored-by: John Shumway <jshumway@amd.com> Co-authored-by: John Shumway <john.shumwayjr@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: JH-Leon-KIM-AMD <jeonghyun.kim@amd.com>
This commit is contained in:
103
experimental/builder/test/utils/ckb_conv_test_common.hpp
Normal file
103
experimental/builder/test/utils/ckb_conv_test_common.hpp
Normal file
@@ -0,0 +1,103 @@
|
||||
#pragma once
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "impl/conv_algorithm_types.hpp"
|
||||
#include "impl/conv_signature_types.hpp"
|
||||
#include "ck_tile/builder/conv_builder.hpp"
|
||||
|
||||
namespace ck_tile::builder::test_utils {
|
||||
|
||||
using namespace ck_tile::builder;
|
||||
using namespace test;
|
||||
|
||||
// Common test base class
|
||||
class FwdConvBuilderTestBase : public ::testing::Test
|
||||
{
|
||||
};
|
||||
|
||||
// Common test implementation
|
||||
template <auto FwdConvSignature,
|
||||
ThreadBlock FwdThreadBlock,
|
||||
BlockGemmPipelineVersion FwdPipelineVersion,
|
||||
ConvFwdSpecialization FwdConvSpecialization>
|
||||
constexpr void run_test()
|
||||
{
|
||||
constexpr GridwiseGemm FwdGemmParams{.ak1 = 8,
|
||||
.bk1 = 8,
|
||||
.m_per_xdl = 32,
|
||||
.n_per_xdl = 32,
|
||||
.m_xdl_per_wave = 4,
|
||||
.n_xdl_per_wave = 4};
|
||||
|
||||
constexpr BlockTransferABC FwdBlockTransfer{.block_transfer_a = {.k0 = 4, .m_n = 64, .k1 = 1},
|
||||
.block_transfer_b = {.k0 = 4, .m_n = 64, .k1 = 1},
|
||||
.thread_cluster_dims_c = {.m_block = 1,
|
||||
.m_wave_per_xdl = 32,
|
||||
.n_block = 1,
|
||||
.n_wave_per_xdl = 8},
|
||||
.lds_transfer_a = {.src_vector_dim = 2,
|
||||
.src_scalar_per_vector = 2,
|
||||
.lds_dst_scalar_per_vector = 8,
|
||||
.is_direct_load = false,
|
||||
.lds_padding = false},
|
||||
.lds_transfer_b = {.src_vector_dim = 2,
|
||||
.src_scalar_per_vector = 8,
|
||||
.lds_dst_scalar_per_vector = 8,
|
||||
.is_direct_load = false,
|
||||
.lds_padding = false},
|
||||
.epilogue_c = {.m_xdl_per_wave_per_shuffle = 1,
|
||||
.n_xdl_per_wave_per_shuffle = 1,
|
||||
.scalar_per_vector = 8},
|
||||
.block_transfer_access_order_a = {1, 0, 2},
|
||||
.block_transfer_access_order_b = {1, 0, 2},
|
||||
.src_access_order_a = {1, 0, 2},
|
||||
.src_access_order_b = {1, 0, 2}};
|
||||
|
||||
constexpr ConvAlgorithm FwdConvAlgorithm{.thread_block = FwdThreadBlock,
|
||||
.gridwise_gemm = FwdGemmParams,
|
||||
.block_transfer = FwdBlockTransfer,
|
||||
.pipeline_version = FwdPipelineVersion,
|
||||
.fwd_specialization = FwdConvSpecialization};
|
||||
|
||||
using Builder = ConvBuilder<FwdConvSignature, FwdConvAlgorithm>;
|
||||
|
||||
auto instance = typename Builder::Instance{};
|
||||
|
||||
const auto kernel_string = instance.GetTypeString();
|
||||
std::cout << "Generated kernel: " << kernel_string << std::endl;
|
||||
EXPECT_GT(kernel_string.size(), 0);
|
||||
|
||||
EXPECT_TRUE(kernel_string.starts_with("DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3"));
|
||||
|
||||
// Verify pipeline version is correct
|
||||
if(FwdPipelineVersion == BlockGemmPipelineVersion::V1)
|
||||
EXPECT_TRUE(kernel_string.find("BlkGemmPipelineVersion: v1") != std::string::npos);
|
||||
else if(FwdPipelineVersion == BlockGemmPipelineVersion::V3)
|
||||
EXPECT_TRUE(kernel_string.find("BlkGemmPipelineVersion: v3") != std::string::npos);
|
||||
else if(FwdPipelineVersion == BlockGemmPipelineVersion::V4)
|
||||
EXPECT_TRUE(kernel_string.find("BlkGemmPipelineVersion: v4") != std::string::npos);
|
||||
else if(FwdPipelineVersion == BlockGemmPipelineVersion::V5)
|
||||
EXPECT_TRUE(kernel_string.find("BlkGemmPipelineVersion: v5") != std::string::npos);
|
||||
|
||||
// Verify specialization is correct
|
||||
if(FwdConvSpecialization == ConvFwdSpecialization::DEFAULT)
|
||||
EXPECT_TRUE(kernel_string.find("Default") != std::string::npos);
|
||||
else if(FwdConvSpecialization == ConvFwdSpecialization::FILTER_1X1_PAD0)
|
||||
EXPECT_TRUE(kernel_string.find("Filter1x1Pad0") != std::string::npos);
|
||||
else if(FwdConvSpecialization == ConvFwdSpecialization::FILTER_1X1_STRIDE1_PAD0)
|
||||
EXPECT_TRUE(kernel_string.find("Filter1x1Stride1Pad0") != std::string::npos);
|
||||
else if(FwdConvSpecialization == ConvFwdSpecialization::FILTER_3x3)
|
||||
EXPECT_TRUE(kernel_string.find("Filter3x3") != std::string::npos);
|
||||
|
||||
const auto invoker_ptr = instance.MakeInvokerPointer();
|
||||
EXPECT_NE(invoker_ptr, nullptr);
|
||||
}
|
||||
|
||||
// Common thread block configurations
|
||||
constexpr ThreadBlock DefaultThreadBlock{.block_size = 256,
|
||||
.tile_size = {.m = 256, .n = 256, .k = 32}};
|
||||
|
||||
constexpr ThreadBlock SmallThreadBlock{.block_size = 256,
|
||||
.tile_size = {.m = 128, .n = 128, .k = 32}};
|
||||
|
||||
} // namespace ck_tile::builder::test_utils
|
||||
Reference in New Issue
Block a user