Add dynamic elementwise op (#1426)

* Add dynamic elementwise op Co-authored-by: ThruptiRajLakshmanaGowda <thruptiraj.lakshmanagowda@amd.com> * CI issues fix * Custom parameter value for dynamic functions - Comments addressed --------- Co-authored-by: ThruptiRajLakshmanaGowda <thruptiraj.lakshmanagowda@amd.com> Co-authored-by: ThruptiRajLakshmanaGowda <tlakshma@amd.com>
2026-05-19 20:40:07 +00:00 · 2024-10-26 15:22:37 +02:00
parent 54f0e6f4bb
commit 31bf253aeb
32 changed files with 2194 additions and 170 deletions
--- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/CMakeLists.txt
@@ -0,0 +1,8 @@
+# ONLY XDL_KERNELS
+set(GROUPED_CONV2D_FWD_DYNAMIC_OP
+   xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
+   xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp
+   xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp
+   xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp)
+
+add_instance_library(device_grouped_conv2d_fwd_dynamic_op_instance ${GROUPED_CONV2D_FWD_DYNAMIC_OP})
--- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_dynamic_op_instance.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
+                                                                NHWGC,
+                                                                GKYXC,
+                                                                ck::Tuple<>,
+                                                                NHWGK,
+                                                                BF16,
+                                                                BF16,
+                                                                ck::Tuple<>,
+                                                                BF16,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                DynamicUnaryOp>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<2,
+                                                              NHWGC,
+                                                              GKYXC,
+                                                              Tuple<>,
+                                                              NHWGK,
+                                                              ConvFwdDefault>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<2,
+                                                              NHWGC,
+                                                              GKYXC,
+                                                              Tuple<>,
+                                                              NHWGK,
+                                                              ConvFwd1x1P0>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<2,
+                                                              NHWGC,
+                                                              GKYXC,
+                                                              Tuple<>,
+                                                              NHWGK,
+                                                              ConvFwd1x1S1P0>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_dynamic_op_instance.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
+                                                                NHWGC,
+                                                                GKYXC,
+                                                                ck::Tuple<>,
+                                                                NHWGK,
+                                                                F16,
+                                                                F16,
+                                                                ck::Tuple<>,
+                                                                F16,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                DynamicUnaryOp>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<2,
+                                                             NHWGC,
+                                                             GKYXC,
+                                                             Tuple<>,
+                                                             NHWGK,
+                                                             ConvFwdDefault>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<2,
+                                                             NHWGC,
+                                                             GKYXC,
+                                                             Tuple<>,
+                                                             NHWGK,
+                                                             ConvFwd1x1P0>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<2,
+                                                             NHWGC,
+                                                             GKYXC,
+                                                             Tuple<>,
+                                                             NHWGK,
+                                                             ConvFwd1x1S1P0>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_dynamic_op_instance.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
+                                                                NHWGC,
+                                                                GKYXC,
+                                                                ck::Tuple<>,
+                                                                NHWGK,
+                                                                F32,
+                                                                F32,
+                                                                ck::Tuple<>,
+                                                                F32,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                DynamicUnaryOp>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<2,
+                                                             NHWGC,
+                                                             GKYXC,
+                                                             Tuple<>,
+                                                             NHWGK,
+                                                             ConvFwdDefault>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<2,
+                                                             NHWGC,
+                                                             GKYXC,
+                                                             Tuple<>,
+                                                             NHWGK,
+                                                             ConvFwd1x1P0>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<2,
+                                                             NHWGC,
+                                                             GKYXC,
+                                                             Tuple<>,
+                                                             NHWGK,
+                                                             ConvFwd1x1S1P0>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_dynamic_op_instance.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
+                                                                NHWGC,
+                                                                GKYXC,
+                                                                ck::Tuple<>,
+                                                                NHWGK,
+                                                                int8_t,
+                                                                int8_t,
+                                                                ck::Tuple<>,
+                                                                int8_t,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                DynamicUnaryOp>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<2,
+                                                              NHWGC,
+                                                              GKYXC,
+                                                              Tuple<>,
+                                                              NHWGK,
+                                                              ConvFwdDefault>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<2,
+                                                              NHWGC,
+                                                              GKYXC,
+                                                              Tuple<>,
+                                                              NHWGK,
+                                                              ConvFwd1x1P0>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<2,
+                                                              NHWGC,
+                                                              GKYXC,
+                                                              Tuple<>,
+                                                              NHWGK,
+                                                              ConvFwd1x1S1P0>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/CMakeLists.txt
@@ -0,0 +1,8 @@
+# ONLY XDL_KERNELS
+set(GROUPED_CONV3D_FWD_DYNAMIC_OP
+   xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
+   xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
+   xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
+   xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp)
+
+add_instance_library(device_grouped_conv3d_fwd_dynamic_op_instance ${GROUPED_CONV3D_FWD_DYNAMIC_OP})
--- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_dynamic_op_instance.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
+                                                                NDHWGC,
+                                                                GKZYXC,
+                                                                ck::Tuple<>,
+                                                                NDHWGK,
+                                                                BF16,
+                                                                BF16,
+                                                                ck::Tuple<>,
+                                                                BF16,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                DynamicUnaryOp>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<3,
+                                                              NDHWGC,
+                                                              GKZYXC,
+                                                              Tuple<>,
+                                                              NDHWGK,
+                                                              ConvFwdDefault>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<3,
+                                                              NDHWGC,
+                                                              GKZYXC,
+                                                              Tuple<>,
+                                                              NDHWGK,
+                                                              ConvFwd1x1P0>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<3,
+                                                              NDHWGC,
+                                                              GKZYXC,
+                                                              Tuple<>,
+                                                              NDHWGK,
+                                                              ConvFwd1x1S1P0>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_dynamic_op_instance.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
+                                                                NDHWGC,
+                                                                GKZYXC,
+                                                                ck::Tuple<>,
+                                                                NDHWGK,
+                                                                F16,
+                                                                F16,
+                                                                ck::Tuple<>,
+                                                                F16,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                DynamicUnaryOp>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<3,
+                                                             NDHWGC,
+                                                             GKZYXC,
+                                                             Tuple<>,
+                                                             NDHWGK,
+                                                             ConvFwdDefault>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<3,
+                                                             NDHWGC,
+                                                             GKZYXC,
+                                                             Tuple<>,
+                                                             NDHWGK,
+                                                             ConvFwd1x1P0>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<3,
+                                                             NDHWGC,
+                                                             GKZYXC,
+                                                             Tuple<>,
+                                                             NDHWGK,
+                                                             ConvFwd1x1S1P0>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_dynamic_op_instance.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
+                                                                NDHWGC,
+                                                                GKZYXC,
+                                                                ck::Tuple<>,
+                                                                NDHWGK,
+                                                                F32,
+                                                                F32,
+                                                                ck::Tuple<>,
+                                                                F32,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                DynamicUnaryOp>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<3,
+                                                             NDHWGC,
+                                                             GKZYXC,
+                                                             Tuple<>,
+                                                             NDHWGK,
+                                                             ConvFwdDefault>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<3,
+                                                             NDHWGC,
+                                                             GKZYXC,
+                                                             Tuple<>,
+                                                             NDHWGK,
+                                                             ConvFwd1x1P0>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<3,
+                                                             NDHWGC,
+                                                             GKZYXC,
+                                                             Tuple<>,
+                                                             NDHWGK,
+                                                             ConvFwd1x1S1P0>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_dynamic_op_instance.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
+                                                                NDHWGC,
+                                                                GKZYXC,
+                                                                ck::Tuple<>,
+                                                                NDHWGK,
+                                                                int8_t,
+                                                                int8_t,
+                                                                ck::Tuple<>,
+                                                                int8_t,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                DynamicUnaryOp>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<3,
+                                                              NDHWGC,
+                                                              GKZYXC,
+                                                              Tuple<>,
+                                                              NDHWGK,
+                                                              ConvFwdDefault>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<3,
+                                                              NDHWGC,
+                                                              GKZYXC,
+                                                              Tuple<>,
+                                                              NDHWGK,
+                                                              ConvFwd1x1P0>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<3,
+                                                              NDHWGC,
+                                                              GKZYXC,
+                                                              Tuple<>,
+                                                              NDHWGK,
+                                                              ConvFwd1x1S1P0>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck