From ea7f5faa3ee46bb415bdc731bb162ae8b8be5310 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@streamhpc.com>
Date: Fri, 24 Oct 2025 18:13:20 +0200
Subject: [PATCH] ck-builder: ck factory grouped conv fwd scale

---
 experimental/builder/test/CMakeLists.txt      |   1 +
 ...tory_grouped_convolution_forward_scale.cpp | 115 ++++++++++++++++++
 .../gpu/grouped_convolution_forward_scale.hpp |   2 +-
 3 files changed, 117 insertions(+), 1 deletion(-)
 create mode 100644 experimental/builder/test/test_ck_factory_grouped_convolution_forward_scale.cpp
diff --git a/experimental/builder/test/CMakeLists.txt b/experimental/builder/test/CMakeLists.txt
index 003dfacb35..2dbc1bc0ca 100644
--- a/experimental/builder/test/CMakeLists.txt
+++ b/experimental/builder/test/CMakeLists.txt
@@ -33,4 +33,5 @@ endfunction()
 add_ck_factory_test(test_testing_utils test_testing_utils.cpp)
 add_ck_factory_test(test_ck_factory_grouped_convolution_forward test_ck_factory_grouped_convolution_forward.cpp)
 add_ck_factory_test(test_ck_factory_grouped_convolution_forward_convscale test_ck_factory_grouped_convolution_forward_convscale.cpp)
+add_ck_factory_test(test_ck_factory_grouped_convolution_forward_scale test_ck_factory_grouped_convolution_forward_scale.cpp)
 add_ck_factory_test(test_ck_factory_grouped_convolution_forward_scaleadd_ab test_ck_factory_grouped_convolution_forward_scaleadd_ab.cpp)
diff --git a/experimental/builder/test/test_ck_factory_grouped_convolution_forward_scale.cpp b/experimental/builder/test/test_ck_factory_grouped_convolution_forward_scale.cpp
new file mode 100644
index 0000000000..428d1c81f3
--- /dev/null
+++ b/experimental/builder/test/test_ck_factory_grouped_convolution_forward_scale.cpp
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
+
+#include <ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scale.hpp>
+
+#include "testing_utils.hpp"
+
+using ck_tile::test::InstanceSet;
+using ck_tile::test::InstancesMatch;
+
+namespace {
+
+constexpr static auto NumDimSpatial = 3;
+using InLayout                      = ck::tensor_layout::convolution::NDHWGC;
+using WeiLayout                     = ck::tensor_layout::convolution::GKZYXC;
+using OutLayout                     = ck::tensor_layout::convolution::NDHWGK;
+
+using ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD;
+using ck::tensor_operation::element_wise::PassThrough;
+using ck::tensor_operation::element_wise::Scale;
+
+template <typename T, typename ComputeType = T>
+using DeviceOp = DeviceGroupedConvFwdMultipleABD<NumDimSpatial,
+                                                 InLayout,
+                                                 WeiLayout,
+                                                 ck::Tuple<>, // DsLayout
+                                                 OutLayout,
+                                                 T,           // InDataType
+                                                 T,           // WeiDataType
+                                                 ck::Tuple<>, // DsDataType
+                                                 T,           // OutDataType
+                                                 PassThrough,
+                                                 PassThrough,
+                                                 Scale,
+                                                 ComputeType>;
+
+} // namespace
+
+template <typename Case>
+struct CkFactoryTestConvFwd : public testing::Test
+{
+    static auto get_actual_instances()
+    {
+        return InstanceSet::from_factory<typename Case::DeviceOp>();
+    }
+
+    static auto get_expected_instances() { return InstanceSet(Case::expected); }
+};
+
+struct F32
+{
+    using DeviceOp = ::DeviceOp<float>;
+
+    constexpr static auto expected = {
+        // clang-format off
+        ""
+        // clang-format on
+    };
+};
+
+struct F32_TF32
+{
+    using DeviceOp = ::DeviceOp<float, ck::tf32_t>;
+
+    constexpr static auto expected = {
+        // clang-format off
+        ""
+        // clang-format on
+    };
+};
+
+struct F16
+{
+    using DeviceOp = ::DeviceOp<ck::half_t>;
+
+    constexpr static auto expected = {
+        // clang-format off
+        ""
+        // clang-format on
+    };
+};
+
+struct BF16
+{
+    using DeviceOp = ::DeviceOp<ck::bhalf_t>;
+
+    constexpr static auto expected = {
+        // clang-format off
+        ""
+        // clang-format on
+    };
+};
+
+struct S8
+{
+    using DeviceOp = ::DeviceOp<int8_t>;
+
+    constexpr static auto expected = {
+        // clang-format off
+        ""
+        // clang-format on
+    };
+};
+
+using TestTypes = ::testing::Types<F32, F32_TF32, F16, BF16, S8>;
+
+TYPED_TEST_SUITE(CkFactoryTestConvFwd, TestTypes);
+
+TYPED_TEST(CkFactoryTestConvFwd, TestInstances)
+{
+    auto actual   = TestFixture::get_actual_instances();
+    auto expected = TestFixture::get_expected_instances();
+
+    EXPECT_THAT(actual, InstancesMatch(expected));
+}
diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scale.hpp
index d11c80babf..2d14ee908e 100644
--- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scale.hpp
+++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scale.hpp
@@ -69,7 +69,7 @@ void add_device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instances(
                                                                 PassThrough,
                                                                 Scale>>>& instances);
 
-void add_device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instances(
+void add_device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_tf32_instances(
     std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
                                                                 NDHWGC,
                                                                 GKZYXC,