mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
[rocm-libraries] ROCm/rocm-libraries#4273 (commit 591f504)
[CK] Add fwd conv group merging to v3 conv instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Proposed changes Added conv group merging to the (universal) V3 fwd conv pipeline. The new instance improves fwd conv performance when the number of input/output channel per group is low. On MI300 (`gfx942`) we get | CK prof command | Baseline (TFLOPS) | V3 group merging (TFLOPS) | |:-----|:------:|------:| | grouped_conv_fwd 1 1 1 0 1 0 1 2 32 32 4 4 3 3 200 200 1 1 1 1 1 1 1 1 | 3.86035 | 8.36796 | | grouped_conv_fwd 1 1 1 0 1 0 1 2 32 32 8 8 3 3 200 200 2 2 1 1 1 1 1 1 | 10.1867 | 13.4677 | | grouped_conv_fwd 1 1 1 0 1 0 1 2 32 32 8 8 3 3 100 100 1 2 1 1 1 1 1 1 | 11.7875 | 16.3657 |
This commit is contained in:
committed by
assistant-librarian[bot]
parent
4266f867d6
commit
57d26db844
@@ -83,7 +83,8 @@ TEST(InstanceTraits, V3ExtractsAllFieldsCorrectly)
|
||||
ck::BlockGemmPipelineVersion::v1, // BlkGemmPipelineVer
|
||||
ck::half_t, // AComputeDataType
|
||||
ck::half_t, // BComputeDataType
|
||||
false>;
|
||||
false, // DirectLoad
|
||||
1>; // NumGroupsToMerge
|
||||
|
||||
// Use InstanceTraits to extract compile-time information
|
||||
using Traits = ck_tile::reflect::InstanceTraits<DeviceInstance>;
|
||||
@@ -225,7 +226,8 @@ TEST(InstanceTraits, V3InstanceStringReturnsCorrectFormat)
|
||||
ck::BlockGemmPipelineVersion::v1, // BlkGemmPipelineVer
|
||||
ck::half_t, // AComputeDataType
|
||||
ck::half_t, // BComputeDataType
|
||||
false>; // DirectLoad
|
||||
false, // DirectLoad
|
||||
1>; // NumGroupsToMerge
|
||||
|
||||
std::string instance_str = ck_tile::reflect::instance_string<DeviceInstance>();
|
||||
|
||||
@@ -278,7 +280,8 @@ TEST(InstanceTraits, V3InstanceStringReturnsCorrectFormat)
|
||||
",v1" // BlkGemmPipelineVer
|
||||
",fp16" // AComputeDataType
|
||||
",fp16" // BComputeDataType
|
||||
",false>"; // DirectLoad
|
||||
",false" // DirectLoad
|
||||
",1>"; // NumGroupsToMerge
|
||||
|
||||
EXPECT_EQ(instance_str, expected_str);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user