Fix per-layer conv2d int8 CPU verification reference path (#6656)

case example_conv2d_fwd_xdl_perlayer_quantization_int8.exe 1 0

## Motivation

<!-- Explain the purpose of this PR and the goals it aims to achieve.
-->

## Technical Details

<!-- Explain the changes along with any relevant GitHub links. -->

## Test Plan

<!-- Explain any relevant testing done to verify this PR. -->

## Test Result

<!-- Briefly summarize test outcomes. -->

## Submission Checklist

- [ ] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
KateJu
2026-04-23 22:08:50 +08:00
committed by GitHub
parent 1bcace3e41
commit 2aea22cae1

View File

@@ -108,28 +108,35 @@ bool run_grouped_conv_fwd(bool do_verification,
if(do_verification)
{
Tensor<AccDataType> c_host(out_g_n_k_wos_desc);
auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
AccDataType,
InElementOp,
WeiElementOp,
OutElementOp>();
PassThrough>();
auto ref_invoker = ref_conv.MakeInvoker();
auto ref_argument = ref_conv.MakeArgument(in,
wei,
out_host,
c_host,
conv_param.conv_filter_strides_,
conv_param.conv_filter_dilations_,
conv_param.input_left_pads_,
conv_param.input_right_pads_,
in_element_op,
wei_element_op,
out_element_op);
PassThrough{});
ref_invoker.Run(ref_argument);
out_host.ForEach([&](auto&, auto idx)
{
out_element_op(out_host(idx), c_host(idx));
});
out_device_buf.FromDevice(out_device.mData.data());
pass &=