[CK_BUILDER] Add grouped conv fwd ck tile profiler (#3518)

* [BULDER] Add grouped conv fwd ck tile profiler

* [CK TILE] Fix grouped conv kernels splitk and double lds

* Updates

* Fixes

* Move to ckProfiler

* Fixes

* fix

* fix

* Change instances to empty list by default

* fix

* fix

* Update grouped_convolution_signatures.hpp

* Update grouped_convolution_forward_tile_algs.hpp

* [CK TILE] Add grouped convolution forward tests (#3556)

* [CK TILE] Add grouped convolution forward tests

* fix jenkins

* fixes

* comments fixes

* unit test

* unit test fix

* Move instances outside builder

* fix includes

* clang format fix

* readme fix

* fix includes

* fixes
This commit is contained in:
Bartłomiej Kocot
2026-01-20 06:29:01 +01:00
committed by GitHub
parent 0517d43d31
commit 0727e85e52
44 changed files with 3083 additions and 65 deletions

View File

@@ -0,0 +1,84 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include "utils/ckb_conv_tile_test_configs.hpp"
#include "utils/ckb_conv_test_utils.hpp"
#include "utils/conv_algorithm_type_utils.hpp"
#include "ck_tile/builder/testing/conv_fwd_ck_tile.hpp"
#include "ck_tile/builder/testing/conv_fwd_reference.hpp"
#include "ck_tile/host/device_prop.hpp"
#include "testing_utils.hpp"
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
namespace cku = ck_tile::builder::test_utils;
constexpr auto SIGNATURE =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto ALGORITHM =
cku::ConvAlgorithm_Tile_GroupedConvolutionKernel{}
.with_tile_specializations(ckb::TileConvSpecialization::DEFAULT)
.with_tile_thread_block(cku::FwdTileThreadBlock_64x64x64)
.with_tile_block_gemm(cku::TileBlockGemmDesc_16x16_v3_intrawave)
.with_tile_transfer(cku::FwdTileTransfer_4x4x4)
.with_tile_optimizations(ckt::TileOptimizations{
.num_groups_to_merge = 1, .split_image = false, .explicit_gemm = false});
using Builder = ckb::ConvBuilder<SIGNATURE, ALGORITHM>;
using Instance = Builder::Instance;
using Reference = ckb::ConvBuilder<SIGNATURE, ckt::ConvAlgorithm_Reference{}>::Instance;
TEST(Fwd2DFp16_CShufV3_NHWGC, EndToEnd)
{
if(!ck_tile::get_device_name().starts_with("gfx9"))
{
GTEST_SKIP() << "unsupported architecture";
}
ckt::Args<SIGNATURE> args = {
.lengths =
{
.batch_size = 16,
.groups = 1,
.input_channels = 32,
.output_channels = 48,
.image =
{
.width = 56,
.height = 64,
},
.filter =
{
.width = 3,
.height = 5,
},
},
.filter_strides = {.width = 1, .height = 1},
.filter_dilation = {.width = 1, .height = 1},
.input_left_pad = {.width = 0, .height = 0},
.input_right_pad = {.width = 0, .height = 0},
.a_elementwise_op = {},
.b_elementwise_op = {},
.cde_elementwise_op = {},
};
auto inputs = alloc_inputs(args);
auto outputs = alloc_outputs(args);
auto reference = alloc_outputs(args);
ckt::init_inputs(args, inputs.get());
auto conv = Instance{};
ckt::run(conv, args, inputs.get(), outputs.get());
auto ref_conv = Reference{};
ckt::run(ref_conv, args, inputs.get(), reference.get());
EXPECT_THAT(outputs.get(), ck_tile::test::MatchesReference(args, reference.get()));
}

View File

@@ -16,6 +16,8 @@ struct TensorConfig
// Optional data types, override the type defined in the signature if provided.
DataType data_type{DataType::UNDEFINED_DATA_TYPE};
DataType compute_type{DataType::UNDEFINED_DATA_TYPE};
constexpr bool operator==(const TensorConfig& other) const = default;
};
template <TensorConfig... Configs>
@@ -31,6 +33,8 @@ struct TensorOperation
return TensorOperation<Configs..., TensorConfig{AuxiliaryConfigs}...>{
.elementwise_operation = this->elementwise_operation};
}
constexpr bool operator==(const TensorOperation& other) const = default;
};
template <typename Op = TensorOperation<>>
@@ -38,6 +42,8 @@ struct ConvolutionTensor
{
TensorConfig config;
Op operation{};
constexpr bool operator==(const ConvolutionTensor& other) const = default;
};
template <typename InputTensor = ConvolutionTensor<>,
@@ -52,6 +58,8 @@ struct ConvSignature
InputTensor input;
WeightTensor weight;
OutputTensor output;
constexpr bool operator==(const ConvSignature& other) const = default;
};
} // namespace ck_tile::builder::test

View File

@@ -190,6 +190,8 @@ TEST(TensorDescriptor, IsPacked)
ckt::make_descriptor<dt>(ckt::Extent{10, 11, 12}, ckt::Extent{1, 100, 1100}).is_packed());
EXPECT_FALSE(
ckt::make_descriptor<dt>(ckt::Extent{30, 20, 10}, ckt::Extent{1, 1, 1}).is_packed());
EXPECT_TRUE(
ckt::make_descriptor<dt>(ckt::Extent{30, 20, 1}, ckt::Extent{1, 30, 30}).is_packed());
}
TEST(TensorDescriptor, PrintExtent)

View File

@@ -3,8 +3,8 @@
#pragma once
#include "impl/conv_algorithm_types.hpp"
#include "impl/conv_signature_types.hpp"
#include "../impl/conv_algorithm_types.hpp"
#include "../impl/conv_signature_types.hpp"
#include "ck_tile/builder/conv_builder.hpp"
namespace ck_tile::builder::test_utils {