From adfab9db7e648fd45ceed49ae9b59a8947cfc03f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Tue, 30 Dec 2025 05:52:15 -0500 Subject: [PATCH] Add unit tests for instance strings. --- ...instance_string_bwd_weight_grp_conv_dl.cpp | 79 ++++++++++++++++ ...bwd_weight_grp_conv_multiple_d_wmma_v3.cpp | 85 +++++++++++++++++ ...ing_bwd_weight_grp_conv_multiple_d_xdl.cpp | 83 +++++++++++++++++ ..._bwd_weight_grp_conv_two_stage_wmma_v3.cpp | 88 ++++++++++++++++++ ...ring_bwd_weight_grp_conv_two_stage_xdl.cpp | 88 ++++++++++++++++++ ...nce_string_bwd_weight_grp_conv_wmma_v3.cpp | 91 +++++++++++++++++++ ...ance_string_bwd_weight_grp_conv_xdl_v3.cpp | 85 +++++++++++++++++ 7 files changed, 599 insertions(+) create mode 100644 experimental/builder/test/test_instance_string_bwd_weight_grp_conv_dl.cpp create mode 100644 experimental/builder/test/test_instance_string_bwd_weight_grp_conv_multiple_d_wmma_v3.cpp create mode 100644 experimental/builder/test/test_instance_string_bwd_weight_grp_conv_multiple_d_xdl.cpp create mode 100644 experimental/builder/test/test_instance_string_bwd_weight_grp_conv_two_stage_wmma_v3.cpp create mode 100644 experimental/builder/test/test_instance_string_bwd_weight_grp_conv_two_stage_xdl.cpp create mode 100644 experimental/builder/test/test_instance_string_bwd_weight_grp_conv_wmma_v3.cpp create mode 100644 experimental/builder/test/test_instance_string_bwd_weight_grp_conv_xdl_v3.cpp diff --git a/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_dl.cpp b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_dl.cpp new file mode 100644 index 0000000000..9758e39901 --- /dev/null +++ b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_dl.cpp @@ -0,0 +1,79 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include +#include "ck_tile/builder/reflect/instance_traits.hpp" +#include "ck_tile/builder/reflect/conv_description.hpp" +#include "ck/tensor_operation/gpu/device/device_base.hpp" +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp" + +namespace { + +namespace ckr = ck_tile::reflect; + +// Use the first instance from device_grouped_conv_bwd_weight_dl_f16_instances +using InstanceTuple = ck::tensor_operation::device::instance:: + device_grouped_conv_bwd_weight_dl_f16_instances< + 2, // NDimSpatial + ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout) + ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout) + ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout) + ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default>; + +using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type; + +// Expected string based on the generic instance +std::string expected_str = "DeviceGroupedConvBwdWeight_Dl" + "<2" // NDimSpatial + ",GNHWC" // InLayout + ",GKYXC" // WeiLayout + ",GNHWK" // OutLayout + ",fp16" // InDataType + ",fp16" // WeiDataType + ",fp16" // OutDataType + ",fp32" // AccDataType + ",PassThrough" // InElementwiseOperation + ",PassThrough" // WeiElementwiseOperation + ",PassThrough" // OutElementwiseOperation + ",Default" // ConvBackwardWeightSpecialization + ",256" // BlockSize + ",128" // MPerBlock + ",128" // NPerBlock + ",16" // K0PerBlock + ",1" // K1 + ",4" // M1PerThread + ",4" // N1PerThread + ",1" // KPerThread + ",Seq(8,2)" // M1N1ThreadClusterM1Xs + ",Seq(8,2)" // M1N1ThreadClusterN1Xs + ",Seq(1,8,1,1,1)" // ABlockTransferThreadSliceLengths_K0_M0_M1_K1 + ",Seq(1,2,1,128,1)" // ABlockTransferThreadClusterLengths_K0_M0_M1_K1 + ",Seq(0,2,3,1,4)" // ABlockTransferThreadClusterArrangeOrder + ",Seq(0,2,3,1,4)" // ABlockTransferSrcAccessOrder + ",Seq(1,1,1,1,1)" // ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 + ",Seq(0,2,3,1,4)" // ABlockTransferSrcVectorTensorContiguousDimOrder + ",Seq(1,1,1,1,1)" // ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 + ",Seq(1,1,1,8,1)" // BBlockTransferThreadSliceLengths_K0_N0_N1_K1 + ",Seq(1,16,1,16,1)" // BBlockTransferThreadClusterLengths_K0_N0_N1_K1 + ",Seq(0,1,4,2,3)" // BBlockTransferThreadClusterArrangeOrder + ",Seq(0,1,4,2,3)" // BBlockTransferSrcAccessOrder + ",Seq(1,1,1,1,1)" // BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 + ",Seq(0,1,4,2,3)" // BBlockTransferSrcVectorTensorContiguousDimOrder + ",Seq(1,1,1,1,1)" // BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 + ",Seq(0,1,2,3,4,5)" // CThreadTransferSrcDstAccessOrder + ",5" // CThreadTransferSrcDstVectorDim + ",1" // CThreadTransferDstScalarPerVector + ">"; + +TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvDl) +{ + using BaseClass = ck::tensor_operation::device::BaseOperator; + DeviceInstance device_instance; + BaseClass* base_ptr = &device_instance; + + auto desc = base_ptr->describe(); + ASSERT_NE(desc, nullptr); + EXPECT_EQ(desc->instance_string(), expected_str); +} + +} // namespace diff --git a/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_multiple_d_wmma_v3.cpp b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_multiple_d_wmma_v3.cpp new file mode 100644 index 0000000000..d610ea666d --- /dev/null +++ b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_multiple_d_wmma_v3.cpp @@ -0,0 +1,85 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include +#include "ck_tile/builder/reflect/instance_traits.hpp" +#include "ck_tile/builder/reflect/conv_description.hpp" +#include "ck/tensor_operation/gpu/device/device_base.hpp" +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_scale_instance.hpp" + +namespace { + +namespace ckr = ck_tile::reflect; + +// Use the first instance from device_grouped_conv_bwd_weight_wmma_c_shuffle_f16_scale_instances +using InstanceTuple = ck::tensor_operation::device::instance:: + device_grouped_conv_bwd_weight_wmma_c_shuffle_f16_scale_instances< + 2, // NDimSpatial + ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout) + ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout) + ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout) + ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default>; + +using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type; + +// Expected string based on the generic instance +std::string expected_str = "DeviceGroupedConvBwdWeightMultipleD_Wmma_CShuffleV3" + "<2" // NDimSpatial + ",GNHWC" // InLayout + ",GKYXC" // WeiLayout + ",GNHWK" // OutLayout + ",EmptyTuple" // DsLayout + ",fp16" // InDataType + ",fp16" // WeiDataType + ",fp16" // OutDataType + ",fp32" // AccDataType + ",EmptyTuple" // DsDataType + ",PassThrough" // InElementwiseOperation + ",Scale" // WeiElementwiseOperation + ",PassThrough" // OutElementwiseOperation + ",Default" // ConvBackwardWeightSpecialization + ",64" // BlockSize + ",64" // MPerBlock + ",64" // NPerBlock + ",32" // KPerBlock + ",8" // ABK1 + ",16" // MPerWmma + ",16" // NPerWmma + ",4" // MRepeat + ",2" // NRepeat + ",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_AK0_M_AK1 + ",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // ABlockTransferSrcAccessOrder + ",1" // ABlockTransferSrcVectorDim + ",2" // ABlockTransferSrcScalarPerVector + ",4" // ABlockTransferDstScalarPerVector_AK1 + ",true" // ABlockLdsAddExtraM + ",Seq(4,8,1)" // BBlockTransferThreadClusterLengths_BK0_N_BK1 + ",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // BBlockTransferSrcAccessOrder + ",1" // BBlockTransferSrcVectorDim + ",2" // BBlockTransferSrcScalarPerVector + ",4" // BBlockTransferDstScalarPerVector_BK1 + ",true" // BBlockLdsAddExtraN + ",1" // CShuffleMRepeatPerShuffle + ",1" // CShuffleNRepeatPerShuffle + ",Seq(1,16,1,4)" // CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock + ",2" // CShuffleBlockTransferScalarPerVector_NPerBlock + ",Intrawave" // BlkGemmPipeSched + ",v1" // BlkGemmPipelineVer + ",fp16" // ComputeTypeA + ",fp16" // ComputeTypeB + ">"; + +TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvMultipleDWmmaV3) +{ + using BaseClass = ck::tensor_operation::device::BaseOperator; + DeviceInstance device_instance; + BaseClass* base_ptr = &device_instance; + + auto desc = base_ptr->describe(); + ASSERT_NE(desc, nullptr); + EXPECT_EQ(desc->instance_string(), expected_str); +} + +} // namespace diff --git a/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_multiple_d_xdl.cpp b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_multiple_d_xdl.cpp new file mode 100644 index 0000000000..d9a6d2fbdf --- /dev/null +++ b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_multiple_d_xdl.cpp @@ -0,0 +1,83 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include +#include "ck_tile/builder/reflect/instance_traits.hpp" +#include "ck_tile/builder/reflect/conv_description.hpp" +#include "ck/tensor_operation/gpu/device/device_base.hpp" +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_scale_instance.hpp" + +namespace { + +namespace ckr = ck_tile::reflect; + +// Use the first instance from device_grouped_conv_bwd_weight_xdl_c_shuffle_f16_scale_instances +using InstanceTuple = ck::tensor_operation::device::instance:: + device_grouped_conv_bwd_weight_xdl_c_shuffle_f16_scale_instances< + 2, // NDimSpatial + ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout) + ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout) + ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout) + ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default>; + +using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type; + +// Expected string based on the generic instance +std::string expected_str = "DeviceGroupedConvBwdWeightMultipleD_Xdl_CShuffle" + "<2" // NDimSpatial + ",GNHWC" // InLayout + ",GKYXC" // WeiLayout + ",GNHWK" // OutLayout + ",EmptyTuple" // DsLayout + ",fp16" // InDataType + ",fp16" // WeiDataType + ",fp16" // OutDataType + ",fp32" // AccDataType + ",EmptyTuple" // DsDataType + ",PassThrough" // InElementwiseOperation + ",Scale" // WeiElementwiseOperation + ",PassThrough" // OutElementwiseOperation + ",Default" // ConvBackwardWeightSpecialization + ",64" // BlockSize + ",64" // MPerBlock + ",64" // NPerBlock + ",4" // K0PerBlock + ",8" // K1 + ",32" // MPerXDL + ",32" // NPerXDL + ",2" // MXdlPerWave + ",2" // NXdlPerWave + ",Seq(1,4,8,2)" // ABlockTransferThreadClusterLengths_K0_M_K1 + ",Seq(0,3,1,2)" // ABlockTransferThreadClusterArrangeOrder + ",Seq(0,2,1,3)" // ABlockTransferSrcAccessOrder + ",2" // ABlockTransferSrcVectorDim + ",2" // ABlockTransferSrcScalarPerVector + ",4" // ABlockTransferDstScalarPerVector_K1 + ",true" // ABlockLdsAddExtraM + ",Seq(1,4,8,2)" // BBlockTransferThreadClusterLengths_K0_N_K1 + ",Seq(0,3,1,2)" // BBlockTransferThreadClusterArrangeOrder + ",Seq(0,2,1,3)" // BBlockTransferSrcAccessOrder + ",2" // BBlockTransferSrcVectorDim + ",2" // BBlockTransferSrcScalarPerVector + ",4" // BBlockTransferDstScalarPerVector_K1 + ",true" // BBlockLdsAddExtraN + ",1" // CShuffleMXdlPerWavePerShuffle + ",1" // CShuffleNXdlPerWavePerShuffle + ",Seq(1,16,1,4)" // CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock + ",2" // CBlockTransferScalarPerVector_NWaveNPerXdl + ",fp16" // ComputeTypeA + ",fp16" // ComputeTypeB + ">"; + +TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvMultipleDXdl) +{ + using BaseClass = ck::tensor_operation::device::BaseOperator; + DeviceInstance device_instance; + BaseClass* base_ptr = &device_instance; + + auto desc = base_ptr->describe(); + ASSERT_NE(desc, nullptr); + EXPECT_EQ(desc->instance_string(), expected_str); +} + +} // namespace diff --git a/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_two_stage_wmma_v3.cpp b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_two_stage_wmma_v3.cpp new file mode 100644 index 0000000000..476ec7bb63 --- /dev/null +++ b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_two_stage_wmma_v3.cpp @@ -0,0 +1,88 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include +#include "ck_tile/builder/reflect/instance_traits.hpp" +#include "ck_tile/builder/reflect/conv_description.hpp" +#include "ck/tensor_operation/gpu/device/device_base.hpp" +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_two_stage_wmma_instance.hpp" + +namespace { + +namespace ckr = ck_tile::reflect; + +// Use the first instance from device_grouped_conv_bwd_weight_two_stage_nhwgc_wmma_c_shuffle_f16_instances +using InstanceTuple = ck::tensor_operation::device::instance:: + device_grouped_conv_bwd_weight_two_stage_nhwgc_wmma_c_shuffle_f16_instances< + 2, // NDimSpatial + ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout) + ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout) + ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout) + ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default, + ck::BlockGemmPipelineScheduler::Intrawave, + ck::BlockGemmPipelineVersion::v1>; + +using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type; + +// Expected string based on the first instance (BlockSize=32, MPerBlock=16, NPerBlock=16, etc.) +std::string expected_str = "DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3" + "<2" // NDimSpatial + ",GNHWC" // InLayout + ",GKYXC" // WeiLayout + ",GNHWK" // OutLayout + ",fp16" // InDataType + ",fp16" // WeiDataType + ",fp16" // OutDataType + ",fp32" // AccDataType + ",PassThrough" // InElementwiseOperation + ",PassThrough" // WeiElementwiseOperation + ",PassThrough" // OutElementwiseOperation + ",Default" // ConvBackwardWeightSpecialization + ",32" // BlockSize + ",16" // MPerBlock + ",16" // NPerBlock + ",32" // KPerBlock + ",8" // ABK1 + ",16" // MPerWmma + ",16" // NPerWmma + ",1" // MRepeat + ",1" // NRepeat + ",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_AK0_M_AK1 + ",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // ABlockTransferSrcAccessOrder + ",1" // ABlockTransferSrcVectorDim + ",1" // ABlockTransferSrcScalarPerVector + ",4" // ABlockTransferDstScalarPerVector_AK1 + ",false" // ABlockLdsAddExtraM + ",Seq(4,8,1)" // BBlockTransferThreadClusterLengths_BK0_N_BK1 + ",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // BBlockTransferSrcAccessOrder + ",1" // BBlockTransferSrcVectorDim + ",1" // BBlockTransferSrcScalarPerVector + ",4" // BBlockTransferDstScalarPerVector_BK1 + ",false" // BBlockLdsAddExtraN + ",1" // CShuffleMRepeatPerShuffle + ",1" // CShuffleNRepeatPerShuffle + ",Seq(1,4,1,8)" // CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock + ",1" // CShuffleBlockTransferScalarPerVector_NPerBlock + ",Intrawave" // BlkGemmPipeSched + ",v1" // BlkGemmPipelineVer + ",1" // NumGroupsToMerge + ",fp16" // ComputeTypeA + ",fp16" // ComputeTypeB + ",1" // TransposeTransferSrcScalarPerVector + ",1" // TransposeTransferDstScalarPerVector + ">"; + +TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvTwoStageWmmaV3) +{ + using BaseClass = ck::tensor_operation::device::BaseOperator; + DeviceInstance device_instance; + BaseClass* base_ptr = &device_instance; + + auto desc = base_ptr->describe(); + ASSERT_NE(desc, nullptr); + EXPECT_EQ(desc->instance_string(), expected_str); +} + +} // namespace diff --git a/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_two_stage_xdl.cpp b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_two_stage_xdl.cpp new file mode 100644 index 0000000000..7393757e4f --- /dev/null +++ b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_two_stage_xdl.cpp @@ -0,0 +1,88 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include +#include "ck_tile/builder/reflect/instance_traits.hpp" +#include "ck_tile/builder/reflect/conv_description.hpp" +#include "ck/tensor_operation/gpu/device/device_base.hpp" +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_two_stage_xdl_instance.hpp" + +namespace { + +namespace ckr = ck_tile::reflect; + +// Use the first instance from device_grouped_conv_bwd_weight_two_stage_nhwgc_xdl_c_shuffle_f16_generic_instances +using InstanceTuple = ck::tensor_operation::device::instance:: + device_grouped_conv_bwd_weight_two_stage_nhwgc_xdl_c_shuffle_f16_generic_instances< + 2, // NDimSpatial + ck::tensor_layout::convolution::GNHWC, // ALayout (InLayout) + ck::tensor_layout::convolution::GKYXC, // BLayout (WeiLayout) + ck::tensor_layout::convolution::GNHWK, // ELayout (OutLayout) + ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default, + ck::BlockGemmPipelineScheduler::Intrawave, + ck::BlockGemmPipelineVersion::v1>; + +using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type; + +// Expected string based on the first instance in the tuple +std::string expected_str = "DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle" + "<2" // NDimSpatial + ",GNHWC" // InLayout + ",GKYXC" // WeiLayout + ",GNHWK" // OutLayout + ",fp16" // InDataType + ",fp16" // WeiDataType + ",fp16" // OutDataType + ",fp32" // AccDataType + ",PassThrough" // InElementwiseOperation + ",PassThrough" // WeiElementwiseOperation + ",PassThrough" // OutElementwiseOperation + ",Default" // ConvBackwardWeightSpecialization + ",64" // BlockSize + ",16" // MPerBlock + ",16" // NPerBlock + ",32" // KPerBlock + ",8" // K1 + ",16" // MPerXDL + ",16" // NPerXDL + ",1" // MXdlPerWave + ",1" // NXdlPerWave + ",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_K0_M_K1 + ",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // ABlockTransferSrcAccessOrder + ",1" // ABlockTransferSrcVectorDim + ",1" // ABlockTransferSrcScalarPerVector + ",4" // ABlockTransferDstScalarPerVector_K1 + ",false" // ABlockLdsAddExtraM + ",Seq(4,8,1)" // BBlockTransferThreadClusterLengths_K0_N_K1 + ",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // BBlockTransferSrcAccessOrder + ",1" // BBlockTransferSrcVectorDim + ",1" // BBlockTransferSrcScalarPerVector + ",4" // BBlockTransferDstScalarPerVector_K1 + ",false" // BBlockLdsAddExtraN + ",1" // CShuffleMXdlPerWavePerShuffle + ",1" // CShuffleNXdlPerWavePerShuffle + ",Seq(1,8,1,8)" // CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock + ",1" // CBlockTransferScalarPerVector_NWaveNPerXdl + ",Intrawave" // BlkGemmPipeSched + ",v1" // BlkGemmPipelineVer + ",1" // NumGroupsToMerge + ",fp16" // ComputeTypeA + ",fp16" // ComputeTypeB + ",1" // TransposeTransferSrcScalarPerVector + ",1" // TransposeTransferDstScalarPerVector + ">"; + +TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvTwoStageXdl) +{ + using BaseClass = ck::tensor_operation::device::BaseOperator; + DeviceInstance device_instance; + BaseClass* base_ptr = &device_instance; + + auto desc = base_ptr->describe(); + ASSERT_NE(desc, nullptr); + EXPECT_EQ(desc->instance_string(), expected_str); +} + +} // namespace diff --git a/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_wmma_v3.cpp b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_wmma_v3.cpp new file mode 100644 index 0000000000..1b7f599e80 --- /dev/null +++ b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_wmma_v3.cpp @@ -0,0 +1,91 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +//#ifdef _NOT_DEFINED_ + +#include +#include "ck_tile/builder/reflect/instance_traits.hpp" +#include "ck_tile/builder/reflect/conv_description.hpp" +#include "ck/tensor_operation/gpu/device/device_base.hpp" +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_v3_wmma_instance.hpp" + +namespace { + +namespace ckr = ck_tile::reflect; + +using InstanceTuple = ck::tensor_operation::device::instance:: + device_grouped_conv_bwd_weight_v3_wmma_c_shuffle_f16_instances< + 2, // NDimSpatial + ck::tensor_operation::device::instance::NHWGC, // InLayout + ck::tensor_operation::device::instance::GKYXC, // WeiLayout + ck::tensor_operation::device::instance::NHWGK, // OutLayout + ck::tensor_operation::device::instance:: + ConvBwdWeightDefault>; + +// Expected complete instance string +std::string expected_str = "DeviceGroupedConvBwdWeight_Wmma_CShuffleV3" + "<2" // NDimSpatial + ",NHWGC" // InLayout + ",GKYXC" // WeiLayout + ",NHWGK" // OutLayout + ",fp16" // InDataType + ",fp16" // WeiDataType + ",fp16" // OutDataType + ",fp32" // AccDataType + ",PassThrough" // InElementwiseOperation + ",PassThrough" // WeiElementwiseOperation + ",PassThrough" // OutElementwiseOperation + ",Default" // ConvBackwardWeightSpecialization + ",64" // BlockSize + ",32" // MPerBlock + ",32" // NPerBlock + ",32" // KPerBlock + ",8" // ABK1 + ",16" // MPerWmma + ",16" // NPerWmma + ",2" // MRepeat + ",1" // NRepeat + ",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_AK0_M_AK1 + ",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // ABlockTransferSrcAccessOrder + ",1" // ABlockTransferSrcVectorDim + ",2" // ABlockTransferSrcScalarPerVector + ",2" // ABlockTransferDstScalarPerVector_AK1 + ",false" // ABlockLdsAddExtraM + ",Seq(4,16,1)" // BBlockTransferThreadClusterLengths_BK0_N_BK1 + ",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // BBlockTransferSrcAccessOrder + ",1" // BBlockTransferSrcVectorDim + ",2" // BBlockTransferSrcScalarPerVector + ",2" // BBlockTransferDstScalarPerVector_BK1 + ",false" // BBlockLdsAddExtraN + ",1" // CShuffleMRepeatPerShuffle + ",1" // CShuffleNRepeatPerShuffle + ",Seq(1,8,1,8)" // CShuffleBlockTransferClusterLengths + ",2" // CShuffleBlockTransferScalarPerVector_NPerBlock + ",Intrawave" // BlkGemmPipeSched + ",v1" // BlkGemmPipelineVer + ",fp16" // ComputeTypeA + ",fp16" // ComputeTypeB + ",1" // MaxTransposeTransferSrcScalarPerVector + ",1" // MaxTransposeTransferDstScalarPerVector + ">"; + +// Get the first instance from the tuple +using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type; + +// Test describe() through base class pointer for WMMA V3 variant +TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvWmmaV3) +{ + using BaseClass = ck::tensor_operation::device::BaseOperator; + DeviceInstance device_instance; + BaseClass* base_ptr = &device_instance; + + auto desc = base_ptr->describe(); + ASSERT_NE(desc, nullptr); + EXPECT_EQ(desc->instance_string(), expected_str); +} + +} // namespace + +//#endif diff --git a/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_xdl_v3.cpp b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_xdl_v3.cpp new file mode 100644 index 0000000000..d3b6acbd14 --- /dev/null +++ b/experimental/builder/test/test_instance_string_bwd_weight_grp_conv_xdl_v3.cpp @@ -0,0 +1,85 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include +#include "ck_tile/builder/reflect/instance_traits.hpp" +#include "ck_tile/builder/reflect/conv_description.hpp" +#include "ck/tensor_operation/gpu/device/device_base.hpp" +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_v3_xdl_instance.hpp" + +namespace { + +namespace ckr = ck_tile::reflect; + +using InstanceTuple = ck::tensor_operation::device::instance:: + device_grouped_conv_bwd_weight_v3_xdl_c_shuffle_f16_instances< + 2, // NDimSpatial + ck::tensor_operation::device::instance::GNHWC, // InLayout + ck::tensor_operation::device::instance::GKYXC, // WeiLayout + ck::tensor_operation::device::instance::GNHWK, // OutLayout + ck::tensor_operation::device::instance::ConvBwdWeightDefault, // ConvBwdWeightSpecialization + ck::BlockGemmPipelineScheduler::Intrawave, // BlkGemmPipeSched + ck::BlockGemmPipelineVersion::v1>; // BlkGemmPipelineVer + +using DeviceInstance = typename std::tuple_element<0, InstanceTuple>::type; + +// Expected complete instance string based on the generic instance +std::string expected_str = "DeviceGroupedConvBwdWeight_Xdl_CShuffleV3" + "<2" // NDimSpatial + ",GNHWC" // InLayout + ",GKYXC" // WeiLayout + ",GNHWK" // OutLayout + ",fp16" // InDataType + ",fp16" // WeiDataType + ",fp16" // OutDataType + ",fp32" // AccDataType + ",PassThrough" // InElementwiseOperation + ",PassThrough" // WeiElementwiseOperation + ",PassThrough" // OutElementwiseOperation + ",Default" // ConvBackwardWeightSpecialization + ",64" // BlockSize + ",32" // MPerBlock + ",32" // NPerBlock + ",32" // K0PerBlock + ",8" // K1 + ",32" // MPerXDL + ",32" // NPerXDL + ",1" // MXdlPerWave + ",1" // NXdlPerWave + ",Seq(4,8,1)" // ABlockTransferThreadClusterLengths_K0_M_K1 + ",Seq(2,0,1)" // ABlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // ABlockTransferSrcAccessOrder + ",1" // ABlockTransferSrcVectorDim + ",2" // ABlockTransferSrcScalarPerVector + ",2" // ABlockTransferDstScalarPerVector_K1 + ",false" // ABlockLdsAddExtraM + ",Seq(4,16,1)" // BBlockTransferThreadClusterLengths_K0_N_K1 + ",Seq(2,0,1)" // BBlockTransferThreadClusterArrangeOrder + ",Seq(1,0,2)" // BBlockTransferSrcAccessOrder + ",1" // BBlockTransferSrcVectorDim + ",2" // BBlockTransferSrcScalarPerVector + ",2" // BBlockTransferDstScalarPerVector_K1 + ",false" // BBlockLdsAddExtraN + ",1" // CShuffleMXdlPerWavePerShuffle + ",1" // CShuffleNXdlPerWavePerShuffle + ",Seq(1,8,1,8)" // CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock + ",2" // CBlockTransferScalarPerVector_NWaveNPerXdl + ",Intrawave" // BlkGemmPipeSched + ",v1" // BlkGemmPipelineVer + ",fp16" // ComputeTypeA + ",fp16" // ComputeTypeB + ">"; + +// Test describe() through base class pointer for XDL V3 variant +TEST(InstanceString, DescribeReturnsCorrectValueForBwdWeightGrpConvXdlV3) +{ + using BaseClass = ck::tensor_operation::device::BaseOperator; + DeviceInstance device_instance; + BaseClass* base_ptr = &device_instance; + + auto desc = base_ptr->describe(); + ASSERT_NE(desc, nullptr); + EXPECT_EQ(desc->instance_string(), expected_str); +} + +} // namespace