Fix per-layer conv2d int8 CPU verification reference path (#6656)

case example_conv2d_fwd_xdl_perlayer_quantization_int8.exe 1 0 ## Motivation  ## Technical Details  ## Test Plan  ## Test Result  ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
2026-05-14 02:02:46 +00:00 · 2026-04-23 22:08:50 +08:00
parent 1bcace3e41
commit 2aea22cae1
1 changed files with 11 additions and 4 deletions
--- a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc
+++ b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc
@@ -108,28 +108,35 @@ bool run_grouped_conv_fwd(bool do_verification,

    if(do_verification)
    {
+        Tensor<AccDataType> c_host(out_g_n_k_wos_desc);
+
        auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
                                                                     InDataType,
                                                                     WeiDataType,
-                                                                     OutDataType,
+                                                                     AccDataType,
                                                                     InElementOp,
                                                                     WeiElementOp,
-                                                                     OutElementOp>();
+                                                                     PassThrough>();

        auto ref_invoker  = ref_conv.MakeInvoker();
        auto ref_argument = ref_conv.MakeArgument(in,
                                                  wei,
-                                                  out_host,
+                                                  c_host,
                                                  conv_param.conv_filter_strides_,
                                                  conv_param.conv_filter_dilations_,
                                                  conv_param.input_left_pads_,
                                                  conv_param.input_right_pads_,
                                                  in_element_op,
                                                  wei_element_op,
-                                                  out_element_op);
+                                                  PassThrough{});

        ref_invoker.Run(ref_argument);

+        out_host.ForEach([&](auto&, auto idx)
+        {
+            out_element_op(out_host(idx), c_host(idx));
+        });
+
        out_device_buf.FromDevice(out_device.mData.data());

        pass &=