From fda0e3e6eeedf767929845b3ac89ba6de1b6e4b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Kocot?= Date: Tue, 2 Jan 2024 11:36:45 +0100 Subject: [PATCH] Revert "[SWDEV-435347] disable instances failed with mainlien compiler (#1077)" (#1101) This reverts commit df67a32c08db7a0eddc3b3d197d9f2d7c7e590d8. [ROCm/composable_kernel commit: a35e466c86cbb513d7900825d7dca4698541808b] --- ...rouped_convolution_forward_scaleadd_ab.hpp | 43 +++++------ ..._ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp | 73 +++++++++---------- 2 files changed, 56 insertions(+), 60 deletions(-) diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp index 348bcaef8a..1bea403afa 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp @@ -23,20 +23,19 @@ using ScaleAdd = ck::tensor_operation::element_wise::ScaleAdd; #ifdef CK_ENABLE_BF16 // grouped conv3d forward multi AB scaleadd, NDHWGC/GKZYXC/NDHWGK -// TODO: Workaround for https://ontrack-internal.amd.com/browse/SWDEV-435347 -// void add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( -// std::vector, -// NDHWGK, -// ck::Tuple, -// ck::Tuple, -// ck::Tuple<>, -// BF16, -// ScaleAdd, -// ScaleAdd, -// PassThrough>>>& instances); +void add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( + std::vector, + NDHWGK, + ck::Tuple, + ck::Tuple, + ck::Tuple<>, + BF16, + ScaleAdd, + ScaleAdd, + PassThrough>>>& instances); #endif #ifdef CK_ENABLE_FP16 @@ -152,15 +151,13 @@ struct DeviceOperationInstanceFactory> && - // is_same_v> && - // is_same_v && is_same_v) - // { - // add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( - // op_ptrs); - // } + if constexpr(is_same_v> && + is_same_v> && + is_same_v && is_same_v) + { + add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( + op_ptrs); + } #endif #ifdef CK_ENABLE_INT8 if constexpr(is_same_v> && diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp index d5b9da86c1..c7801f02ce 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp @@ -9,43 +9,42 @@ namespace tensor_operation { namespace device { namespace instance { -// TODO: Workaround for https://ontrack-internal.amd.com/browse/SWDEV-435347 -// void add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( -// std::vector, -// NDHWGK, -// ck::Tuple, -// ck::Tuple, -// ck::Tuple<>, -// BF16, -// ScaleAdd, -// ScaleAdd, -// PassThrough>>>& instances) -// { -// add_device_operation_instances( -// instances, -// device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, -// NDHWGC, -// GKZYXC, -// NDHWGK, -// ConvFwdDefault>{}); -// add_device_operation_instances( -// instances, -// device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, -// NDHWGC, -// GKZYXC, -// NDHWGK, -// ConvFwd1x1P0>{}); -// add_device_operation_instances( -// instances, -// device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, -// NDHWGC, -// GKZYXC, -// NDHWGK, -// ConvFwd1x1S1P0>{}); -// } +void add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( + std::vector, + NDHWGK, + ck::Tuple, + ck::Tuple, + ck::Tuple<>, + BF16, + ScaleAdd, + ScaleAdd, + PassThrough>>>& instances) +{ + add_device_operation_instances( + instances, + device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, + NDHWGC, + GKZYXC, + NDHWGK, + ConvFwdDefault>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, + NDHWGC, + GKZYXC, + NDHWGK, + ConvFwd1x1P0>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, + NDHWGC, + GKZYXC, + NDHWGK, + ConvFwd1x1S1P0>{}); +} } // namespace instance } // namespace device