[rocm-libraries] ROCm/rocm-libraries#5516 (commit ff3afda)

[CK_TILE, CK_BUILDER] Add bwd data to CK Tile profiler
 (#5516)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Motivation

We want close the performance gap between old CK and CK Tile for bwd
data convolutions. To achieve this, we need tow things

- Configurations for the old CK kernel instances such that we can map
them into CK Tile instances.
- Support in CK profiler to run the CK Tile instance with the same API
as for old CK instances.

## Technical Details

Extracted kernel configurations from old CK. The codegen python script
for CK Tile convs is extended to support also bwd data. The generated
instances are added to the CMake build (target
`device_grouped_conv_bwd_data_tile_instances`).
A new profiler op (`grouped_conv_bwd_data_tile`) has been added to the
CK Profiler. The API is same as for old CK's profiler op
`grouped_conv_bwd_data`.
This commit is contained in:
Ville Pietilä
2026-03-25 14:36:11 +00:00
committed by assistant-librarian[bot]
parent 1834e318da
commit ec2dbfbfde
29 changed files with 1588 additions and 956 deletions

View File

@@ -1,177 +1,8 @@
#include "../../builder/test/utils/ckb_conv_tile_test_configs.hpp"
#include "ck_tile/builder/testing/conv/fwd.hpp"
#include "ck_tile/builder/testing/conv/bwd_weight.hpp"
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
#include "ck_tile/builder/testing/conv/bwd_data.hpp"
#include "signatures.hpp"
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
namespace cku = ck_tile::builder::test_utils;
namespace ckf = ck_tile::builder::factory;
namespace ck_tile::builder::profiling {
constexpr auto SIGNATURE_NHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
// Backward Weight Signatures
constexpr auto SIGNATURE_NHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
// Backward Data Signatures
constexpr auto SIGNATURE_NHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
} // namespace ck_tile::builder::profiling