From 78635fd74ec86a1d337df69982cb7cc189d35d51 Mon Sep 17 00:00:00 2001 From: kiefer Date: Thu, 28 Aug 2025 15:57:30 +0000 Subject: [PATCH] Add 1D instances --- .../gpu/grouped_convolution_forward.hpp | 8 +- ...ed_convolution_forward_wmma_cshufflev3.inc | 78 +++++++++---------- .../gpu/grouped_conv1d_fwd/CMakeLists.txt | 6 +- ...shufflev3_gnwc_gkxc_gnwk_bf16_instance.cpp | 55 +++++++++++++ ...cshufflev3_gnwc_gkxc_gnwk_f16_instance.cpp | 55 +++++++++++++ ...shufflev3_gnwc_gkxc_gnwk_int8_instance.cpp | 55 +++++++++++++ test/grouped_convnd_fwd/CMakeLists.txt | 7 +- 7 files changed, 215 insertions(+), 49 deletions(-) create mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instance.cpp create mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instance.cpp create mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instance.cpp diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp index 644f91eec1..48775f1164 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp @@ -735,7 +735,7 @@ struct DeviceOperationInstanceFactory && is_same_v && is_same_v) { - // add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instances(op_ptrs); + add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instances(op_ptrs); } #endif #ifdef CK_ENABLE_BF16 @@ -745,7 +745,8 @@ struct DeviceOperationInstanceFactory && is_same_v) { - // add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instances(op_ptrs); + add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instances( + op_ptrs); } #endif #ifdef CK_ENABLE_INT8 @@ -753,7 +754,8 @@ struct DeviceOperationInstanceFactory && is_same_v && is_same_v) { - // add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instances(op_ptrs); + add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instances( + op_ptrs); } #endif } diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_wmma_cshufflev3.inc b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_wmma_cshufflev3.inc index ddc0bbe73e..e6dcc6ec1f 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_wmma_cshufflev3.inc +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_wmma_cshufflev3.inc @@ -10,51 +10,51 @@ namespace instance { #ifdef CK_ENABLE_BF16 // grouped conv1d forward, GNWC/GKXC/GNWK -// void add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instances( -// std::vector>>& instances); +void add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instances( + std::vector>>& instances); #endif #ifdef CK_ENABLE_FP16 -// void add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instances( -// std::vector>>& instances); +void add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instances( + std::vector>>& instances); #endif #ifdef CK_ENABLE_INT8 -// void add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instances( -// std::vector>>& instances); +void add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instances( + std::vector>>& instances); #endif #ifdef CK_ENABLE_BF16 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt index ca4ea515bb..e445851ae2 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt @@ -1,7 +1,11 @@ -# ONLY XDL_KERNELS +# ONLY XDL_AND_WMMA_KERNELS add_instance_library(device_grouped_conv1d_fwd_instance xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp + + wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instance.cpp + wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instance.cpp + wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instance.cpp ) diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instance.cpp new file mode 100644 index 0000000000..49373e8864 --- /dev/null +++ b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instance.cpp @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. + +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" +#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" + +namespace ck { +namespace tensor_operation { +namespace device { +namespace instance { + +void add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_bf16_instances( + std::vector>>& instances) +{ + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<1, + GNWC, + GKXC, + Empty_Tuple, + GNWK, + ConvFwdDefault>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<1, + GNWC, + GKXC, + Empty_Tuple, + GNWK, + ConvFwd1x1P0>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<1, + GNWC, + GKXC, + Empty_Tuple, + GNWK, + ConvFwd1x1S1P0>{}); +} + +} // namespace instance +} // namespace device +} // namespace tensor_operation +} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instance.cpp new file mode 100644 index 0000000000..300d6e91b2 --- /dev/null +++ b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instance.cpp @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. + +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" +#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" + +namespace ck { +namespace tensor_operation { +namespace device { +namespace instance { + +void add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_f16_instances( + std::vector>>& instances) +{ + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_f16_instances<1, + GNWC, + GKXC, + Empty_Tuple, + GNWK, + ConvFwdDefault>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_f16_instances<1, + GNWC, + GKXC, + Empty_Tuple, + GNWK, + ConvFwd1x1P0>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_f16_instances<1, + GNWC, + GKXC, + Empty_Tuple, + GNWK, + ConvFwd1x1S1P0>{}); +} + +} // namespace instance +} // namespace device +} // namespace tensor_operation +} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instance.cpp new file mode 100644 index 0000000000..100ae12ff4 --- /dev/null +++ b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/wmma/device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instance.cpp @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. + +#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" +#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" + +namespace ck { +namespace tensor_operation { +namespace device { +namespace instance { + +void add_device_grouped_conv1d_fwd_wmma_cshufflev3_gnwc_gkxc_gnwk_int8_instances( + std::vector>>& instances) +{ + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_int8_instances<1, + GNWC, + GKXC, + Empty_Tuple, + GNWK, + ConvFwdDefault>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_int8_instances<1, + GNWC, + GKXC, + Empty_Tuple, + GNWK, + ConvFwd1x1P0>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_int8_instances<1, + GNWC, + GKXC, + Empty_Tuple, + GNWK, + ConvFwd1x1S1P0>{}); +} + +} // namespace instance +} // namespace device +} // namespace tensor_operation +} // namespace ck diff --git a/test/grouped_convnd_fwd/CMakeLists.txt b/test/grouped_convnd_fwd/CMakeLists.txt index 6ff19ef558..2a3e7f6def 100644 --- a/test/grouped_convnd_fwd/CMakeLists.txt +++ b/test/grouped_convnd_fwd/CMakeLists.txt @@ -1,11 +1,6 @@ -# TODO: Put the 3d instances back if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11" OR GPU_TARGETS MATCHES "gfx12") add_gtest_executable(test_grouped_convnd_fwd test_grouped_convnd_fwd.cpp) - if((GPU_TARGETS MATCHES "gfx11" OR GPU_TARGETS MATCHES "gfx12") AND (NOT GPU_TARGETS MATCHES "gfx9")) - target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance) - else() - target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance) - endif() + target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance) endif() if(GPU_TARGETS MATCHES "gfx9")