mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 19:28:33 +00:00
[CK_TILE] Add depthwise conv2d forward kernel (FP16/FP32) (#6838) ## Motivation CK currently has no kernel optimized for depthwise convolution (G=C_in=C_out, C=K=1 per group) and existing generic paths perform poorly for this workload. This PR adds a dedicated depthwise conv forward kernel in CK Tile. ## Technical Details Adds a dedicated depthwise conv2d forward op to CK Tile that performs direct convolution rather than falling back to the generic GEMM path. The kernel is templatized by filter size, stride, and data type, and compiled into ~60 instances covering common configurations (kernel 3/5/7/9, stride 1/2, FP16/FP32). Supports both CDNA (gfx942/gfx950) and RDNA (gfx1100/gfx1200) architectures. ## Test Plan - [x] Correctness and performance validated on gfx942, gfx950, and gfx1100, with ckProfiler `grouped_conv_fwd` as baseline. - [ ] MI300A (gfx942) and gfx1200 validation. ## Submission Checklist - [x ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. AICK-1137 --------- Co-authored-by: GenDu <Gen.Du@amd.com>
217 lines
13 KiB
C++
217 lines
13 KiB
C++
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#pragma once
|
|
|
|
#include <tuple>
|
|
|
|
#include "../../builder/test/impl/conv_signature_types.hpp"
|
|
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
|
|
|
|
namespace ck_tile::builder::profiling {
|
|
|
|
namespace ckb = ck_tile::builder;
|
|
namespace ckt = ck_tile::builder::test;
|
|
|
|
constexpr auto SIGNATURE_NHWGC_FP32_FWD =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::FORWARD,
|
|
.data_type = ckb::DataType::FP32,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NHWGC_BF16_FWD =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::FORWARD,
|
|
.data_type = ckb::DataType::BF16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NHWGC_FP16_FWD =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::FORWARD,
|
|
.data_type = ckb::DataType::FP16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NDHWGC_FP32_FWD =
|
|
ckt::ConvSignature{.spatial_dim = 3,
|
|
.direction = ckb::ConvDirection::FORWARD,
|
|
.data_type = ckb::DataType::FP32,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NDHWGC_BF16_FWD =
|
|
ckt::ConvSignature{.spatial_dim = 3,
|
|
.direction = ckb::ConvDirection::FORWARD,
|
|
.data_type = ckb::DataType::BF16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NDHWGC_FP16_FWD =
|
|
ckt::ConvSignature{.spatial_dim = 3,
|
|
.direction = ckb::ConvDirection::FORWARD,
|
|
.data_type = ckb::DataType::FP16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
|
|
|
/////////////////////////////////////////
|
|
// FWD signatures (NGCHW / NGCDHW)
|
|
//////////////////////////////////////////
|
|
|
|
constexpr auto SIGNATURE_NGCHW_FP32_FWD =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::FORWARD,
|
|
.data_type = ckb::DataType::FP32,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NGCHW}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKCYX}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NGKHW}}};
|
|
|
|
constexpr auto SIGNATURE_NGCHW_FP16_FWD =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::FORWARD,
|
|
.data_type = ckb::DataType::FP16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NGCHW}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKCYX}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NGKHW}}};
|
|
|
|
constexpr auto SIGNATURE_NGCHW_BF16_FWD =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::FORWARD,
|
|
.data_type = ckb::DataType::BF16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NGCHW}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKCYX}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NGKHW}}};
|
|
/////////////////////////////////////////
|
|
// BWD WEIGHT signatures
|
|
//////////////////////////////////////////
|
|
|
|
constexpr auto SIGNATURE_NHWGC_BF16_BWD_WEIGHT =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
|
.data_type = ckb::DataType::BF16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NHWGC_FP16_BWD_WEIGHT =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
|
.data_type = ckb::DataType::FP16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NHWGC_FP32_BWD_WEIGHT =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
|
.data_type = ckb::DataType::FP32,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_WEIGHT =
|
|
ckt::ConvSignature{.spatial_dim = 3,
|
|
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
|
.data_type = ckb::DataType::BF16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_WEIGHT =
|
|
ckt::ConvSignature{.spatial_dim = 3,
|
|
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
|
.data_type = ckb::DataType::FP16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_WEIGHT =
|
|
ckt::ConvSignature{.spatial_dim = 3,
|
|
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
|
.data_type = ckb::DataType::FP32,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
|
|
|
/////////////////////////////////////////
|
|
// BWD DATA signatures
|
|
//////////////////////////////////////////
|
|
|
|
constexpr auto SIGNATURE_NHWGC_BF16_BWD_DATA =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
|
.data_type = ckb::DataType::BF16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NHWGC_FP16_BWD_DATA =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
|
.data_type = ckb::DataType::FP16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NHWGC_FP32_BWD_DATA =
|
|
ckt::ConvSignature{.spatial_dim = 2,
|
|
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
|
.data_type = ckb::DataType::FP32,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_DATA =
|
|
ckt::ConvSignature{.spatial_dim = 3,
|
|
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
|
.data_type = ckb::DataType::BF16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_DATA =
|
|
ckt::ConvSignature{.spatial_dim = 3,
|
|
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
|
.data_type = ckb::DataType::FP16,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
|
|
|
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_DATA =
|
|
ckt::ConvSignature{.spatial_dim = 3,
|
|
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
|
.data_type = ckb::DataType::FP32,
|
|
.accumulation_data_type = ckb::DataType::FP32,
|
|
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
|
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
|
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
|
|
|
} // namespace ck_tile::builder::profiling
|