[CK_BUILDER] Add grouped conv fwd ck tile profiler (#3518)

* [BULDER] Add grouped conv fwd ck tile profiler

* [CK TILE] Fix grouped conv kernels splitk and double lds

* Updates

* Fixes

* Move to ckProfiler

* Fixes

* fix

* fix

* Change instances to empty list by default

* fix

* fix

* Update grouped_convolution_signatures.hpp

* Update grouped_convolution_forward_tile_algs.hpp

* [CK TILE] Add grouped convolution forward tests (#3556)

* [CK TILE] Add grouped convolution forward tests

* fix jenkins

* fixes

* comments fixes

* unit test

* unit test fix

* Move instances outside builder

* fix includes

* clang format fix

* readme fix

* fix includes

* fixes

[ROCm/composable_kernel commit: 0727e85e52]
This commit is contained in:
Bartłomiej Kocot
2026-01-20 06:29:01 +01:00
committed by GitHub
parent 1a5d3590ef
commit d15cc593ea
44 changed files with 3083 additions and 65 deletions

View File

@@ -19,6 +19,18 @@ if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12")
target_link_libraries(test_grouped_convnd_fwd_large_cases PRIVATE gtest_main getopt::getopt utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance)
endif()
if(GPU_TARGETS MATCHES "gfx9")
if(CK_EXPERIMENTAL_BUILDER)
# TODO: Reenable after the instance fixes
# add_executable(test_grouped_convnd_fwd_tile test_grouped_convnd_fwd_tile.cpp)
# target_compile_options(test_grouped_convnd_fwd_tile PRIVATE -Wno-global-constructors -Wno-undef -Wno-c++20-compat)
# target_link_libraries(test_grouped_convnd_fwd_tile PRIVATE gtest_main getopt::getopt utility)
# if(TARGET device_grouped_conv_fwd_tile_instances)
# target_link_libraries(test_grouped_convnd_fwd_tile PRIVATE device_grouped_conv_fwd_tile_instances)
# endif()
endif()
endif()
add_gtest_executable(test_grouped_convnd_fwd_multi_ab_interface test_grouped_convnd_fwd_multi_ab_interface.cpp)
if(result EQUAL 0)
target_link_libraries(test_grouped_convnd_fwd_multi_ab_interface PRIVATE utility)

View File

@@ -0,0 +1,273 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck_tile/builder/testing/conv_fwd_ck_tile.hpp"
#include "ck_tile/host/device_prop.hpp"
#include "profiler/grouped_convolution_forward_tile_algs.hpp"
// TODO: Remove limitation of conv fwd gpu reference which does not support right pad
#define CK_CONV_FWD_REF_SKIP_RIGHT_PAD_CASES 1
static ck::index_t args_mask = 0xffff;
static ck::index_t instance_index = -1;
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
namespace ckp = ck_tile::builder::profiling;
template <ck_tile::index_t num_spatial_dim_,
ckb::DataType data_type_,
ckb::DataType acc_data_type_,
ckb::TensorLayout in_layout_,
ckb::TensorLayout wei_layout_,
ckb::TensorLayout out_layout_>
struct SignatureDetails
{
static constexpr ck_tile::index_t num_spatial_dim = num_spatial_dim_;
static constexpr ckb::DataType data_type = data_type_;
static constexpr ckb::DataType acc_data_type = acc_data_type_;
static constexpr ckb::TensorLayout in_layout = in_layout_;
static constexpr ckb::TensorLayout wei_layout = wei_layout_;
static constexpr ckb::TensorLayout out_layout = out_layout_;
};
template <typename SignatureDetailsType>
class TestGroupedConvndFwdTile : public ::testing::Test
{
protected:
static constexpr auto SIGNATURE =
ckt::ConvSignature{.spatial_dim = SignatureDetailsType::num_spatial_dim,
.direction = ckb::ConvDirection::FORWARD,
.data_type = SignatureDetailsType::data_type,
.accumulation_data_type = SignatureDetailsType::acc_data_type,
.input = {.config = {.layout = SignatureDetailsType::in_layout}},
.weight = {.config = {.layout = SignatureDetailsType::wei_layout}},
.output = {.config = {.layout = SignatureDetailsType::out_layout}}};
std::vector<ckt::Args<SIGNATURE>> conv_args;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_args.empty());
bool pass = true;
for(size_t i = 0; i < conv_args.size(); i++)
{
if((args_mask & (1 << i)) == 0)
{
continue;
}
auto& args = conv_args[i];
auto inputs = alloc_inputs(args);
auto outputs = alloc_outputs(args);
ckt::init_inputs(args, inputs.get());
std::cout << args.make_input_descriptor() << std::endl;
std::cout << args.make_weight_descriptor() << std::endl;
std::cout << args.make_output_descriptor() << std::endl;
float avg_time;
std::string op_name;
bool case_passed;
std::tie(case_passed, avg_time, op_name) = ckp::run_grouped_conv_forward_tile_algs(
args,
inputs.get(),
outputs.get(),
ck_tile::stream_config{nullptr, false /*time_kernel*/});
pass = pass && case_passed;
}
EXPECT_TRUE(pass);
}
void conv_args_append(std::size_t,
std::size_t G,
std::size_t N,
std::size_t K,
std::size_t C,
const std::vector<std::size_t>& filter_spatial_lengths,
const std::vector<std::size_t>& input_spatial_lengths,
const std::vector<std::size_t>& conv_filter_strides,
const std::vector<std::size_t>& conv_filter_dilations,
const std::vector<std::size_t>& input_left_pads,
const std::vector<std::size_t>& input_right_pads)
{
#if CK_CONV_FWD_REF_SKIP_RIGHT_PAD_CASES
bool without_right_pad = true;
for(const std::size_t& right_pad : input_right_pads)
{
without_right_pad &= right_pad == 0;
}
if(!without_right_pad)
{
return;
}
#endif
ckt::Args<SIGNATURE> args = {
.lengths =
{
.batch_size = N,
.groups = G,
.input_channels = C,
.output_channels = K,
.image = ckt::filter_extent_from_vector<SignatureDetailsType::num_spatial_dim>(
input_spatial_lengths),
.filter = ckt::filter_extent_from_vector<SignatureDetailsType::num_spatial_dim>(
filter_spatial_lengths),
},
.filter_strides = ckt::filter_extent_from_vector<SignatureDetailsType::num_spatial_dim>(
conv_filter_strides),
.filter_dilation =
ckt::filter_extent_from_vector<SignatureDetailsType::num_spatial_dim>(
conv_filter_dilations),
.input_left_pad = ckt::filter_extent_from_vector<SignatureDetailsType::num_spatial_dim>(
input_left_pads),
.input_right_pad =
ckt::filter_extent_from_vector<SignatureDetailsType::num_spatial_dim>(
input_right_pads),
.a_elementwise_op = {},
.b_elementwise_op = {},
.cde_elementwise_op = {},
};
conv_args.push_back(args);
}
};
using KernelTypes2d = ::testing::Types<SignatureDetails<2,
ckb::DataType::FP32,
ckb::DataType::FP32,
ckb::TensorLayout::NHWGC,
ckb::TensorLayout::GKYXC,
ckb::TensorLayout::NHWGK>,
SignatureDetails<2,
ckb::DataType::FP16,
ckb::DataType::FP32,
ckb::TensorLayout::NHWGC,
ckb::TensorLayout::GKYXC,
ckb::TensorLayout::NHWGK>,
SignatureDetails<2,
ckb::DataType::BF16,
ckb::DataType::FP32,
ckb::TensorLayout::NHWGC,
ckb::TensorLayout::GKYXC,
ckb::TensorLayout::NHWGK>>;
using KernelTypes3d = ::testing::Types<SignatureDetails<3,
ckb::DataType::FP32,
ckb::DataType::FP32,
ckb::TensorLayout::NDHWGC,
ckb::TensorLayout::GKZYXC,
ckb::TensorLayout::NDHWGK>,
SignatureDetails<3,
ckb::DataType::FP16,
ckb::DataType::FP32,
ckb::TensorLayout::NDHWGC,
ckb::TensorLayout::GKZYXC,
ckb::TensorLayout::NDHWGK>,
SignatureDetails<3,
ckb::DataType::BF16,
ckb::DataType::FP32,
ckb::TensorLayout::NDHWGC,
ckb::TensorLayout::GKZYXC,
ckb::TensorLayout::NDHWGK>>;
template <typename SignatureDetailsType>
class TestGroupedConvndFwdTile2d : public TestGroupedConvndFwdTile<SignatureDetailsType>
{
};
template <typename SignatureDetailsType>
class TestGroupedConvndFwdTile3d : public TestGroupedConvndFwdTile<SignatureDetailsType>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwdTile2d, KernelTypes2d);
TYPED_TEST_SUITE(TestGroupedConvndFwdTile3d, KernelTypes3d);
TYPED_TEST(TestGroupedConvndFwdTile2d, Test2D)
{
this->conv_args.clear();
this->conv_args_append(2, 3, 5, 96, 200, {1, 1}, {73, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0});
this->conv_args_append(2, 1, 1, 32, 32, {1, 1}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0});
this->conv_args_append(2, 1, 1, 32, 32, {2, 2}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0});
this->conv_args_append(2, 1, 1, 32, 32, {3, 3}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0});
this->conv_args_append(2, 1, 1, 32, 32, {5, 5}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0});
this->conv_args_append(2, 1, 1, 32, 32, {9, 9}, {128, 128}, {1, 1}, {1, 1}, {0, 0}, {0, 0});
this->conv_args_append(2, 2, 32, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0});
this->conv_args_append(2, 2, 32, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1});
this->conv_args_append(2, 2, 32, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0});
this->conv_args_append(2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1});
this->conv_args_append(2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1});
this->conv_args_append(2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1});
this->conv_args_append(2, 96, 1, 1, 1, {1, 1}, {120, 160}, {1, 1}, {1, 1}, {1, 1}, {1, 1});
this->conv_args_append(2, 96, 1, 1, 1, {3, 3}, {120, 160}, {1, 1}, {1, 1}, {1, 1}, {1, 1});
this->template Run<2>();
}
TYPED_TEST(TestGroupedConvndFwdTile3d, Test3D)
{
this->conv_args.clear();
this->conv_args_append(
3, 3, 5, 96, 200, {1, 1, 1}, {37, 37, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0});
this->conv_args_append(
3, 1, 1, 32, 32, {1, 1, 1}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0});
this->conv_args_append(
3, 1, 1, 32, 32, {2, 2, 2}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0});
this->conv_args_append(
3, 1, 1, 32, 32, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0});
this->conv_args_append(
3, 1, 1, 32, 32, {5, 5, 5}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0});
this->conv_args_append(
3, 1, 1, 32, 32, {9, 9, 9}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0});
this->conv_args_append(
3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0});
this->conv_args_append(
3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
this->conv_args_append(
3, 2, 32, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0});
this->conv_args_append(
3, 1, 1, 32, 32, {1, 1, 1}, {16, 16, 16}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0});
this->conv_args_append(
3, 1, 1, 1, 32, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
this->conv_args_append(
3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
this->conv_args_append(
3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
this->conv_args_append(
3, 96, 1, 1, 1, {1, 1, 1}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
this->conv_args_append(
3, 96, 1, 1, 1, {3, 3, 3}, {120, 40, 20}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
args_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: args_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}