diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp index 348bcaef8a..1bea403afa 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp @@ -23,20 +23,19 @@ using ScaleAdd = ck::tensor_operation::element_wise::ScaleAdd; #ifdef CK_ENABLE_BF16 // grouped conv3d forward multi AB scaleadd, NDHWGC/GKZYXC/NDHWGK -// TODO: Workaround for https://ontrack-internal.amd.com/browse/SWDEV-435347 -// void add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( -// std::vector, -// NDHWGK, -// ck::Tuple, -// ck::Tuple, -// ck::Tuple<>, -// BF16, -// ScaleAdd, -// ScaleAdd, -// PassThrough>>>& instances); +void add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( + std::vector, + NDHWGK, + ck::Tuple, + ck::Tuple, + ck::Tuple<>, + BF16, + ScaleAdd, + ScaleAdd, + PassThrough>>>& instances); #endif #ifdef CK_ENABLE_FP16 @@ -152,15 +151,13 @@ struct DeviceOperationInstanceFactory> && - // is_same_v> && - // is_same_v && is_same_v) - // { - // add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( - // op_ptrs); - // } + if constexpr(is_same_v> && + is_same_v> && + is_same_v && is_same_v) + { + add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( + op_ptrs); + } #endif #ifdef CK_ENABLE_INT8 if constexpr(is_same_v> && diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp index d5b9da86c1..c7801f02ce 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp @@ -9,43 +9,42 @@ namespace tensor_operation { namespace device { namespace instance { -// TODO: Workaround for https://ontrack-internal.amd.com/browse/SWDEV-435347 -// void add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( -// std::vector, -// NDHWGK, -// ck::Tuple, -// ck::Tuple, -// ck::Tuple<>, -// BF16, -// ScaleAdd, -// ScaleAdd, -// PassThrough>>>& instances) -// { -// add_device_operation_instances( -// instances, -// device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, -// NDHWGC, -// GKZYXC, -// NDHWGK, -// ConvFwdDefault>{}); -// add_device_operation_instances( -// instances, -// device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, -// NDHWGC, -// GKZYXC, -// NDHWGK, -// ConvFwd1x1P0>{}); -// add_device_operation_instances( -// instances, -// device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, -// NDHWGC, -// GKZYXC, -// NDHWGK, -// ConvFwd1x1S1P0>{}); -// } +void add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances( + std::vector, + NDHWGK, + ck::Tuple, + ck::Tuple, + ck::Tuple<>, + BF16, + ScaleAdd, + ScaleAdd, + PassThrough>>>& instances) +{ + add_device_operation_instances( + instances, + device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, + NDHWGC, + GKZYXC, + NDHWGK, + ConvFwdDefault>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, + NDHWGC, + GKZYXC, + NDHWGK, + ConvFwd1x1P0>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3, + NDHWGC, + GKZYXC, + NDHWGK, + ConvFwd1x1S1P0>{}); +} } // namespace instance } // namespace device