diff --git a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc index 02228d7654..26c3165446 100644 --- a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc +++ b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc @@ -108,28 +108,35 @@ bool run_grouped_conv_fwd(bool do_verification, if(do_verification) { + Tensor c_host(out_g_n_k_wos_desc); + auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd(); + PassThrough>(); auto ref_invoker = ref_conv.MakeInvoker(); auto ref_argument = ref_conv.MakeArgument(in, wei, - out_host, + c_host, conv_param.conv_filter_strides_, conv_param.conv_filter_dilations_, conv_param.input_left_pads_, conv_param.input_right_pads_, in_element_op, wei_element_op, - out_element_op); + PassThrough{}); ref_invoker.Run(ref_argument); + out_host.ForEach([&](auto&, auto idx) + { + out_element_op(out_host(idx), c_host(idx)); + }); + out_device_buf.FromDevice(out_device.mData.data()); pass &=