diff --git a/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp b/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp index 7ac3c4e239..9431a8cde4 100644 --- a/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp +++ b/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp @@ -68,7 +68,7 @@ using DeviceElementwisePermuteInstance = ck::tensor_operation::device::DeviceEle using DeviceReduceInstance = ck::tensor_operation::device::DeviceReduceMultiBlock& input, host_output_scaled_casted_transposed(m, k) = y1; const OutputDataType y_fabs = ck::type_convert(ck::math::abs(ck::type_convert(y0))); - host_output_amax(0) = ck::math::max(y_fabs, host_output_amax(0)); + host_output_amax(0) = ck::type_convert(ck::math::max( + ck::type_convert(y_fabs), ck::type_convert(host_output_amax(0)))); } } }