[CK_BUILDER] Instance traits for conv bwd weight algorithms (#3498)

Added instance traits for the following bwd weight conv algorithms

DeviceGroupedConvBwdWeight_Xdl_CShuffleV3
DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
DeviceGroupedConvBwdWeight_Wmma_CShuffle
DeviceGroupedConvBwdWeight_TwoStage_Xdl_CShuffle
DeviceGroupedConvBwdWeight_TwoStage_Wmma_CShuffleV3
DeviceGroupedConvBwdWeight_DL
DeviceGroupedConvBwdWeightMultipleD_Xdl_CShuffle
DeviceGroupedConvBwdWeightMultipleD_Wmma_CShuffleV3
Added also unit tests for instance traits of those bwd weigth algorithms that are currently exposed by the narrow CK build for MIOpen.
---------

Co-authored-by: Ville Pietilä <>

[ROCm/composable_kernel commit: 6e8c401e33]
This commit is contained in:
Ville Pietilä
2025-12-31 15:41:15 -08:00
committed by GitHub
parent 67b61ccf5c
commit ba9dbd433a
25 changed files with 3206 additions and 2 deletions

View File

@@ -119,13 +119,29 @@ add_ck_builder_test(test_ckb_conv_builder
# - Group convolution (v3, standard, large tensor, WMMA, DL variants)
# - Backward weight group convolution (XDL)
# Requires kernel compilation to validate the generated strings through the base class.
add_ck_builder_test(test_ckb_instance_string
set(INSTANCE_STRING_TESTS
test_instance_string_fwd_grp_conv_v3.cpp
test_instance_string_fwd_grp_conv.cpp
test_instance_string_fwd_grp_conv_large_tensor.cpp
test_instance_string_fwd_grp_conv_wmma.cpp
test_instance_string_fwd_grp_conv_dl.cpp
test_instance_string_bwd_weight_grp_conv_xdl.cpp)
test_instance_string_bwd_weight_grp_conv_xdl.cpp
test_instance_string_bwd_weight_grp_conv_dl.cpp
test_instance_string_bwd_weight_grp_conv_multiple_d_xdl.cpp
test_instance_string_bwd_weight_grp_conv_two_stage_xdl.cpp
test_instance_string_bwd_weight_grp_conv_xdl_v3.cpp
)
if (CK_USE_WMMA)
list(APPEND INSTANCE_STRING_TESTS
test_instance_string_bwd_weight_grp_conv_wmma_v3.cpp
test_instance_string_bwd_weight_grp_conv_multiple_d_wmma_v3.cpp
test_instance_string_bwd_weight_grp_conv_two_stage_wmma_v3.cpp
)
endif()
add_ck_builder_test(test_ckb_instance_string ${INSTANCE_STRING_TESTS})
# Tests the forward convolution builder across multiple data types and dimensions.
# Individual tests are split into separate files to enable parallel compilation.

View File

@@ -0,0 +1,79 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <gtest/gtest.h>
#include "ck_tile/builder/reflect/instance_traits.hpp"
#include "ck_tile/builder/reflect/conv_description.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp"
namespace {
namespace ckr = ck_tile::reflect;
// Use the first instance from device_grouped_conv_bwd_weight_dl_f16_instances
using InstanceTuple =
ck::tensor_operation::device::instance::device_grouped_conv_bwd_weight_dl_f16_instances<
2, // NDimSpatial
ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout)
ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout)
ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout)
ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default>;
using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type;
// Expected string based on the generic instance
std::string expected_str = "DeviceGroupedConvBwdWeight_Dl"
"<2" // NDimSpatial
",GNHWC" // InLayout
",GKYXC" // WeiLayout
",GNHWK" // OutLayout
",fp16" // InDataType
",fp16" // WeiDataType
",fp16" // OutDataType
",fp32" // AccDataType
",PassThrough" // InElementwiseOperation
",PassThrough" // WeiElementwiseOperation
",PassThrough" // OutElementwiseOperation
",Default" // ConvBackwardWeightSpecialization
",256" // BlockSize
",128" // MPerBlock
",128" // NPerBlock
",16" // K0PerBlock
",1" // K1
",4" // M1PerThread
",4" // N1PerThread
",1" // KPerThread
",Seq(8,2)" // M1N1ThreadClusterM1Xs
",Seq(8,2)" // M1N1ThreadClusterN1Xs
",Seq(1,8,1,1,1)" // ABlockTransferThreadSliceLengths_K0_M0_M1_K1
",Seq(1,2,1,128,1)" // ABlockTransferThreadClusterLengths_K0_M0_M1_K1
",Seq(0,2,3,1,4)" // ABlockTransferThreadClusterArrangeOrder
",Seq(0,2,3,1,4)" // ABlockTransferSrcAccessOrder
",Seq(1,1,1,1,1)" // ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1
",Seq(0,2,3,1,4)" // ABlockTransferSrcVectorTensorContiguousDimOrder
",Seq(1,1,1,1,1)" // ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1
",Seq(1,1,1,8,1)" // BBlockTransferThreadSliceLengths_K0_N0_N1_K1
",Seq(1,16,1,16,1)" // BBlockTransferThreadClusterLengths_K0_N0_N1_K1
",Seq(0,1,4,2,3)" // BBlockTransferThreadClusterArrangeOrder
",Seq(0,1,4,2,3)" // BBlockTransferSrcAccessOrder
",Seq(1,1,1,1,1)" // BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1
",Seq(0,1,4,2,3)" // BBlockTransferSrcVectorTensorContiguousDimOrder
",Seq(1,1,1,1,1)" // BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1
",Seq(0,1,2,3,4,5)" // CThreadTransferSrcDstAccessOrder
",5" // CThreadTransferSrcDstVectorDim
",1" // CThreadTransferDstScalarPerVector
">";
TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvDl)
{
using BaseClass = ck::tensor_operation::device::BaseOperator;
DeviceInstance device_instance;
BaseClass* base_ptr = &device_instance;
auto desc = base_ptr->describe();
ASSERT_NE(desc, nullptr);
EXPECT_EQ(desc->instance_string(), expected_str);
}
} // namespace

View File

@@ -0,0 +1,86 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <gtest/gtest.h>
#include "ck_tile/builder/reflect/instance_traits.hpp"
#include "ck_tile/builder/reflect/conv_description.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_scale_instance.hpp"
namespace {
namespace ckr = ck_tile::reflect;
// Use the first instance from device_grouped_conv_bwd_weight_wmma_c_shuffle_f16_scale_instances
using InstanceTuple = ck::tensor_operation::device::instance::
device_grouped_conv_bwd_weight_wmma_c_shuffle_f16_scale_instances<
2, // NDimSpatial
ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout)
ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout)
ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout)
ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default>;
using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type;
// Expected string based on the generic instance
std::string expected_str =
"DeviceGroupedConvBwdWeightMultipleD_Wmma_CShuffleV3"
"<2" // NDimSpatial
",GNHWC" // InLayout
",GKYXC" // WeiLayout
",GNHWK" // OutLayout
",EmptyTuple" // DsLayout
",fp16" // InDataType
",fp16" // WeiDataType
",fp16" // OutDataType
",fp32" // AccDataType
",EmptyTuple" // DsDataType
",PassThrough" // InElementwiseOperation
",Scale" // WeiElementwiseOperation
",PassThrough" // OutElementwiseOperation
",Default" // ConvBackwardWeightSpecialization
",64" // BlockSize
",64" // MPerBlock
",64" // NPerBlock
",32" // KPerBlock
",8" // ABK1
",16" // MPerWmma
",16" // NPerWmma
",4" // MRepeat
",2" // NRepeat
",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_AK0_M_AK1
",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // ABlockTransferSrcAccessOrder
",1" // ABlockTransferSrcVectorDim
",2" // ABlockTransferSrcScalarPerVector
",4" // ABlockTransferDstScalarPerVector_AK1
",true" // ABlockLdsAddExtraM
",Seq(4,8,1)" // BBlockTransferThreadClusterLengths_BK0_N_BK1
",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // BBlockTransferSrcAccessOrder
",1" // BBlockTransferSrcVectorDim
",2" // BBlockTransferSrcScalarPerVector
",4" // BBlockTransferDstScalarPerVector_BK1
",true" // BBlockLdsAddExtraN
",1" // CShuffleMRepeatPerShuffle
",1" // CShuffleNRepeatPerShuffle
",Seq(1,16,1,4)" // CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
",2" // CShuffleBlockTransferScalarPerVector_NPerBlock
",Intrawave" // BlkGemmPipeSched
",v1" // BlkGemmPipelineVer
",fp16" // ComputeTypeA
",fp16" // ComputeTypeB
">";
TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvMultipleDWmmaV3)
{
using BaseClass = ck::tensor_operation::device::BaseOperator;
DeviceInstance device_instance;
BaseClass* base_ptr = &device_instance;
auto desc = base_ptr->describe();
ASSERT_NE(desc, nullptr);
EXPECT_EQ(desc->instance_string(), expected_str);
}
} // namespace

View File

@@ -0,0 +1,84 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <gtest/gtest.h>
#include "ck_tile/builder/reflect/instance_traits.hpp"
#include "ck_tile/builder/reflect/conv_description.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_scale_instance.hpp"
namespace {
namespace ckr = ck_tile::reflect;
// Use the first instance from device_grouped_conv_bwd_weight_xdl_c_shuffle_f16_scale_instances
using InstanceTuple = ck::tensor_operation::device::instance::
device_grouped_conv_bwd_weight_xdl_c_shuffle_f16_scale_instances<
2, // NDimSpatial
ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout)
ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout)
ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout)
ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default>;
using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type;
// Expected string based on the generic instance
std::string expected_str =
"DeviceGroupedConvBwdWeightMultipleD_Xdl_CShuffle"
"<2" // NDimSpatial
",GNHWC" // InLayout
",GKYXC" // WeiLayout
",GNHWK" // OutLayout
",EmptyTuple" // DsLayout
",fp16" // InDataType
",fp16" // WeiDataType
",fp16" // OutDataType
",fp32" // AccDataType
",EmptyTuple" // DsDataType
",PassThrough" // InElementwiseOperation
",Scale" // WeiElementwiseOperation
",PassThrough" // OutElementwiseOperation
",Default" // ConvBackwardWeightSpecialization
",64" // BlockSize
",64" // MPerBlock
",64" // NPerBlock
",4" // K0PerBlock
",8" // K1
",32" // MPerXDL
",32" // NPerXDL
",2" // MXdlPerWave
",2" // NXdlPerWave
",Seq(1,4,8,2)" // ABlockTransferThreadClusterLengths_K0_M_K1
",Seq(0,3,1,2)" // ABlockTransferThreadClusterArrangeOrder
",Seq(0,2,1,3)" // ABlockTransferSrcAccessOrder
",2" // ABlockTransferSrcVectorDim
",2" // ABlockTransferSrcScalarPerVector
",4" // ABlockTransferDstScalarPerVector_K1
",true" // ABlockLdsAddExtraM
",Seq(1,4,8,2)" // BBlockTransferThreadClusterLengths_K0_N_K1
",Seq(0,3,1,2)" // BBlockTransferThreadClusterArrangeOrder
",Seq(0,2,1,3)" // BBlockTransferSrcAccessOrder
",2" // BBlockTransferSrcVectorDim
",2" // BBlockTransferSrcScalarPerVector
",4" // BBlockTransferDstScalarPerVector_K1
",true" // BBlockLdsAddExtraN
",1" // CShuffleMXdlPerWavePerShuffle
",1" // CShuffleNXdlPerWavePerShuffle
",Seq(1,16,1,4)" // CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
",2" // CBlockTransferScalarPerVector_NWaveNPerXdl
",fp16" // ComputeTypeA
",fp16" // ComputeTypeB
">";
TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvMultipleDXdl)
{
using BaseClass = ck::tensor_operation::device::BaseOperator;
DeviceInstance device_instance;
BaseClass* base_ptr = &device_instance;
auto desc = base_ptr->describe();
ASSERT_NE(desc, nullptr);
EXPECT_EQ(desc->instance_string(), expected_str);
}
} // namespace

View File

@@ -0,0 +1,90 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <gtest/gtest.h>
#include "ck_tile/builder/reflect/instance_traits.hpp"
#include "ck_tile/builder/reflect/conv_description.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_two_stage_wmma_instance.hpp"
namespace {
namespace ckr = ck_tile::reflect;
// Use the first instance from
// device_grouped_conv_bwd_weight_two_stage_nhwgc_wmma_c_shuffle_f16_instances
using InstanceTuple = ck::tensor_operation::device::instance::
device_grouped_conv_bwd_weight_two_stage_nhwgc_wmma_c_shuffle_f16_instances<
2, // NDimSpatial
ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout)
ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout)
ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout)
ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default,
ck::BlockGemmPipelineScheduler::Intrawave,
ck::BlockGemmPipelineVersion::v1>;
using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type;
// Expected string based on the first instance (BlockSize=32, MPerBlock=16, NPerBlock=16, etc.)
std::string expected_str =
"DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3"
"<2" // NDimSpatial
",GNHWC" // InLayout
",GKYXC" // WeiLayout
",GNHWK" // OutLayout
",fp16" // InDataType
",fp16" // WeiDataType
",fp16" // OutDataType
",fp32" // AccDataType
",PassThrough" // InElementwiseOperation
",PassThrough" // WeiElementwiseOperation
",PassThrough" // OutElementwiseOperation
",Default" // ConvBackwardWeightSpecialization
",32" // BlockSize
",16" // MPerBlock
",16" // NPerBlock
",32" // KPerBlock
",8" // ABK1
",16" // MPerWmma
",16" // NPerWmma
",1" // MRepeat
",1" // NRepeat
",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_AK0_M_AK1
",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // ABlockTransferSrcAccessOrder
",1" // ABlockTransferSrcVectorDim
",1" // ABlockTransferSrcScalarPerVector
",4" // ABlockTransferDstScalarPerVector_AK1
",false" // ABlockLdsAddExtraM
",Seq(4,8,1)" // BBlockTransferThreadClusterLengths_BK0_N_BK1
",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // BBlockTransferSrcAccessOrder
",1" // BBlockTransferSrcVectorDim
",1" // BBlockTransferSrcScalarPerVector
",4" // BBlockTransferDstScalarPerVector_BK1
",false" // BBlockLdsAddExtraN
",1" // CShuffleMRepeatPerShuffle
",1" // CShuffleNRepeatPerShuffle
",Seq(1,4,1,8)" // CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
",1" // CShuffleBlockTransferScalarPerVector_NPerBlock
",Intrawave" // BlkGemmPipeSched
",v1" // BlkGemmPipelineVer
",1" // NumGroupsToMerge
",fp16" // ComputeTypeA
",fp16" // ComputeTypeB
",1" // TransposeTransferSrcScalarPerVector
",1" // TransposeTransferDstScalarPerVector
">";
TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvTwoStageWmmaV3)
{
using BaseClass = ck::tensor_operation::device::BaseOperator;
DeviceInstance device_instance;
BaseClass* base_ptr = &device_instance;
auto desc = base_ptr->describe();
ASSERT_NE(desc, nullptr);
EXPECT_EQ(desc->instance_string(), expected_str);
}
} // namespace

View File

@@ -0,0 +1,90 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <gtest/gtest.h>
#include "ck_tile/builder/reflect/instance_traits.hpp"
#include "ck_tile/builder/reflect/conv_description.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_two_stage_xdl_instance.hpp"
namespace {
namespace ckr = ck_tile::reflect;
// Use the first instance from
// device_grouped_conv_bwd_weight_two_stage_nhwgc_xdl_c_shuffle_f16_generic_instances
using InstanceTuple = ck::tensor_operation::device::instance::
device_grouped_conv_bwd_weight_two_stage_nhwgc_xdl_c_shuffle_f16_generic_instances<
2, // NDimSpatial
ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout)
ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout)
ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout)
ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default,
ck::BlockGemmPipelineScheduler::Intrawave,
ck::BlockGemmPipelineVersion::v1>;
using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type;
// Expected string based on the first instance in the tuple
std::string expected_str =
"DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle"
"<2" // NDimSpatial
",GNHWC" // InLayout
",GKYXC" // WeiLayout
",GNHWK" // OutLayout
",fp16" // InDataType
",fp16" // WeiDataType
",fp16" // OutDataType
",fp32" // AccDataType
",PassThrough" // InElementwiseOperation
",PassThrough" // WeiElementwiseOperation
",PassThrough" // OutElementwiseOperation
",Default" // ConvBackwardWeightSpecialization
",64" // BlockSize
",16" // MPerBlock
",16" // NPerBlock
",32" // KPerBlock
",8" // K1
",16" // MPerXDL
",16" // NPerXDL
",1" // MXdlPerWave
",1" // NXdlPerWave
",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_K0_M_K1
",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // ABlockTransferSrcAccessOrder
",1" // ABlockTransferSrcVectorDim
",1" // ABlockTransferSrcScalarPerVector
",4" // ABlockTransferDstScalarPerVector_K1
",false" // ABlockLdsAddExtraM
",Seq(4,8,1)" // BBlockTransferThreadClusterLengths_K0_N_K1
",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // BBlockTransferSrcAccessOrder
",1" // BBlockTransferSrcVectorDim
",1" // BBlockTransferSrcScalarPerVector
",4" // BBlockTransferDstScalarPerVector_K1
",false" // BBlockLdsAddExtraN
",1" // CShuffleMXdlPerWavePerShuffle
",1" // CShuffleNXdlPerWavePerShuffle
",Seq(1,8,1,8)" // CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
",1" // CBlockTransferScalarPerVector_NWaveNPerXdl
",Intrawave" // BlkGemmPipeSched
",v1" // BlkGemmPipelineVer
",1" // NumGroupsToMerge
",fp16" // ComputeTypeA
",fp16" // ComputeTypeB
",1" // TransposeTransferSrcScalarPerVector
",1" // TransposeTransferDstScalarPerVector
">";
TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvTwoStageXdl)
{
using BaseClass = ck::tensor_operation::device::BaseOperator;
DeviceInstance device_instance;
BaseClass* base_ptr = &device_instance;
auto desc = base_ptr->describe();
ASSERT_NE(desc, nullptr);
EXPECT_EQ(desc->instance_string(), expected_str);
}
} // namespace

View File

@@ -0,0 +1,90 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
// #ifdef _NOT_DEFINED_
#include <gtest/gtest.h>
#include "ck_tile/builder/reflect/instance_traits.hpp"
#include "ck_tile/builder/reflect/conv_description.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp"
namespace {
namespace ckr = ck_tile::reflect;
using InstanceTuple = ck::tensor_operation::device::instance::
device_grouped_conv_bwd_weight_wmma_c_shuffle_bf16_instances<
2, // NDimSpatial
ck::tensor_operation::device::instance::NHWGC, // InLayout
ck::tensor_operation::device::instance::GKYXC, // WeiLayout
ck::tensor_operation::device::instance::NHWGK, // OutLayout
ck::tensor_operation::device::instance::ConvBwdWeightDefault>;
// Expected complete instance string
std::string expected_str = "DeviceGroupedConvBwdWeight_Wmma_CShuffle"
"<2" // NDimSpatial
",NHWGC" // InLayout
",GKYXC" // WeiLayout
",NHWGK" // OutLayout
",fp16" // InDataType
",fp16" // WeiDataType
",fp16" // OutDataType
",fp32" // AccDataType
",PassThrough" // InElementwiseOperation
",PassThrough" // WeiElementwiseOperation
",PassThrough" // OutElementwiseOperation
",Default" // ConvBackwardWeightSpecialization
",64" // BlockSize
",32" // MPerBlock
",32" // NPerBlock
",32" // KPerBlock
",8" // ABK1
",16" // MPerWmma
",16" // NPerWmma
",2" // MRepeat
",1" // NRepeat
",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_AK0_M_AK1
",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // ABlockTransferSrcAccessOrder
",1" // ABlockTransferSrcVectorDim
",2" // ABlockTransferSrcScalarPerVector
",2" // ABlockTransferDstScalarPerVector_AK1
",false" // ABlockLdsAddExtraM
",Seq(4,16,1)" // BBlockTransferThreadClusterLengths_BK0_N_BK1
",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // BBlockTransferSrcAccessOrder
",1" // BBlockTransferSrcVectorDim
",2" // BBlockTransferSrcScalarPerVector
",2" // BBlockTransferDstScalarPerVector_BK1
",false" // BBlockLdsAddExtraN
",1" // CShuffleMRepeatPerShuffle
",1" // CShuffleNRepeatPerShuffle
",Seq(1,8,1,8)" // CShuffleBlockTransferClusterLengths
",2" // CShuffleBlockTransferScalarPerVector_NPerBlock
",Intrawave" // BlkGemmPipeSched
",v1" // BlkGemmPipelineVer
",fp16" // ComputeTypeA
",fp16" // ComputeTypeB
",1" // MaxTransposeTransferSrcScalarPerVector
",1" // MaxTransposeTransferDstScalarPerVector
">";
// Get the first instance from the tuple
using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type;
// Test describe() through base class pointer for WMMA V3 variant
TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvWmmaV3)
{
using BaseClass = ck::tensor_operation::device::BaseOperator;
DeviceInstance device_instance;
BaseClass* base_ptr = &device_instance;
auto desc = base_ptr->describe();
ASSERT_NE(desc, nullptr);
EXPECT_EQ(desc->instance_string(), expected_str);
}
} // namespace
// #endif

View File

@@ -0,0 +1,90 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
// #ifdef _NOT_DEFINED_
#include <gtest/gtest.h>
#include "ck_tile/builder/reflect/instance_traits.hpp"
#include "ck_tile/builder/reflect/conv_description.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_v3_wmma_instance.hpp"
namespace {
namespace ckr = ck_tile::reflect;
using InstanceTuple = ck::tensor_operation::device::instance::
device_grouped_conv_bwd_weight_v3_wmma_c_shuffle_f16_instances<
2, // NDimSpatial
ck::tensor_operation::device::instance::NHWGC, // InLayout
ck::tensor_operation::device::instance::GKYXC, // WeiLayout
ck::tensor_operation::device::instance::NHWGK, // OutLayout
ck::tensor_operation::device::instance::ConvBwdWeightDefault>;
// Expected complete instance string
std::string expected_str = "DeviceGroupedConvBwdWeight_Wmma_CShuffleV3"
"<2" // NDimSpatial
",NHWGC" // InLayout
",GKYXC" // WeiLayout
",NHWGK" // OutLayout
",fp16" // InDataType
",fp16" // WeiDataType
",fp16" // OutDataType
",fp32" // AccDataType
",PassThrough" // InElementwiseOperation
",PassThrough" // WeiElementwiseOperation
",PassThrough" // OutElementwiseOperation
",Default" // ConvBackwardWeightSpecialization
",64" // BlockSize
",32" // MPerBlock
",32" // NPerBlock
",32" // KPerBlock
",8" // ABK1
",16" // MPerWmma
",16" // NPerWmma
",2" // MRepeat
",1" // NRepeat
",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_AK0_M_AK1
",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // ABlockTransferSrcAccessOrder
",1" // ABlockTransferSrcVectorDim
",2" // ABlockTransferSrcScalarPerVector
",2" // ABlockTransferDstScalarPerVector_AK1
",false" // ABlockLdsAddExtraM
",Seq(4,16,1)" // BBlockTransferThreadClusterLengths_BK0_N_BK1
",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // BBlockTransferSrcAccessOrder
",1" // BBlockTransferSrcVectorDim
",2" // BBlockTransferSrcScalarPerVector
",2" // BBlockTransferDstScalarPerVector_BK1
",false" // BBlockLdsAddExtraN
",1" // CShuffleMRepeatPerShuffle
",1" // CShuffleNRepeatPerShuffle
",Seq(1,8,1,8)" // CShuffleBlockTransferClusterLengths
",2" // CShuffleBlockTransferScalarPerVector_NPerBlock
",Intrawave" // BlkGemmPipeSched
",v1" // BlkGemmPipelineVer
",fp16" // ComputeTypeA
",fp16" // ComputeTypeB
",1" // MaxTransposeTransferSrcScalarPerVector
",1" // MaxTransposeTransferDstScalarPerVector
">";
// Get the first instance from the tuple
using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type;
// Test describe() through base class pointer for WMMA V3 variant
TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvWmmaV3)
{
using BaseClass = ck::tensor_operation::device::BaseOperator;
DeviceInstance device_instance;
BaseClass* base_ptr = &device_instance;
auto desc = base_ptr->describe();
ASSERT_NE(desc, nullptr);
EXPECT_EQ(desc->instance_string(), expected_str);
}
} // namespace
// #endif

View File

@@ -0,0 +1,86 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <gtest/gtest.h>
#include "ck_tile/builder/reflect/instance_traits.hpp"
#include "ck_tile/builder/reflect/conv_description.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_v3_xdl_instance.hpp"
namespace {
namespace ckr = ck_tile::reflect;
using InstanceTuple = ck::tensor_operation::device::instance::
device_grouped_conv_bwd_weight_v3_xdl_c_shuffle_f16_instances<
2, // NDimSpatial
ck::tensor_operation::device::instance::GNHWC, // InLayout
ck::tensor_operation::device::instance::GKYXC, // WeiLayout
ck::tensor_operation::device::instance::GNHWK, // OutLayout
ck::tensor_operation::device::instance::ConvBwdWeightDefault, // ConvBwdWeightSpecialization
ck::BlockGemmPipelineScheduler::Intrawave, // BlkGemmPipeSched
ck::BlockGemmPipelineVersion::v1>; // BlkGemmPipelineVer
using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type;
// Expected complete instance string based on the generic instance
std::string expected_str =
"DeviceGroupedConvBwdWeight_Xdl_CShuffleV3"
"<2" // NDimSpatial
",GNHWC" // InLayout
",GKYXC" // WeiLayout
",GNHWK" // OutLayout
",fp16" // InDataType
",fp16" // WeiDataType
",fp16" // OutDataType
",fp32" // AccDataType
",PassThrough" // InElementwiseOperation
",PassThrough" // WeiElementwiseOperation
",PassThrough" // OutElementwiseOperation
",Default" // ConvBackwardWeightSpecialization
",64" // BlockSize
",32" // MPerBlock
",32" // NPerBlock
",32" // K0PerBlock
",8" // K1
",32" // MPerXDL
",32" // NPerXDL
",1" // MXdlPerWave
",1" // NXdlPerWave
",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_K0_M_K1
",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // ABlockTransferSrcAccessOrder
",1" // ABlockTransferSrcVectorDim
",2" // ABlockTransferSrcScalarPerVector
",2" // ABlockTransferDstScalarPerVector_K1
",false" // ABlockLdsAddExtraM
",Seq(4,16,1)" // BBlockTransferThreadClusterLengths_K0_N_K1
",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder
",Seq(1,0,2)" // BBlockTransferSrcAccessOrder
",1" // BBlockTransferSrcVectorDim
",2" // BBlockTransferSrcScalarPerVector
",2" // BBlockTransferDstScalarPerVector_K1
",false" // BBlockLdsAddExtraN
",1" // CShuffleMXdlPerWavePerShuffle
",1" // CShuffleNXdlPerWavePerShuffle
",Seq(1,8,1,8)" // CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
",2" // CBlockTransferScalarPerVector_NWaveNPerXdl
",Intrawave" // BlkGemmPipeSched
",v1" // BlkGemmPipelineVer
",fp16" // ComputeTypeA
",fp16" // ComputeTypeB
">";
// Test describe() through base class pointer for XDL V3 variant
TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvXdlV3)
{
using BaseClass = ck::tensor_operation::device::BaseOperator;
DeviceInstance device_instance;
BaseClass* base_ptr = &device_instance;
auto desc = base_ptr->describe();
ASSERT_NE(desc, nullptr);
EXPECT_EQ(desc->instance_string(), expected_str);
}
} // namespace