mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-21 21:39:15 +00:00
Extend available elementwise operations with conv examples (#995)
* Extend available elementwise operations with conv examples * Fixes * Remove not needed convert * Update CMakeFile and dir name
This commit is contained in:
@@ -128,11 +128,9 @@ struct ReferenceConvFwd : public device::BaseOperator
|
||||
}
|
||||
}
|
||||
|
||||
float v_out;
|
||||
|
||||
arg.out_element_op_(v_out, v_acc);
|
||||
|
||||
arg.output_(g, n, k, wo) = ck::type_convert<OutDataType>(v_out);
|
||||
OutDataType v_out;
|
||||
arg.out_element_op_(v_out, ck::type_convert<OutDataType>(v_acc));
|
||||
arg.output_(g, n, k, wo) = v_out;
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(func,
|
||||
@@ -184,11 +182,9 @@ struct ReferenceConvFwd : public device::BaseOperator
|
||||
}
|
||||
}
|
||||
|
||||
float v_out;
|
||||
|
||||
arg.out_element_op_(v_out, v_acc);
|
||||
|
||||
arg.output_(g, n, k, ho, wo) = ck::type_convert<OutDataType>(v_out);
|
||||
OutDataType v_out;
|
||||
arg.out_element_op_(v_out, ck::type_convert<OutDataType>(v_acc));
|
||||
arg.output_(g, n, k, ho, wo) = v_out;
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(func,
|
||||
@@ -253,11 +249,9 @@ struct ReferenceConvFwd : public device::BaseOperator
|
||||
}
|
||||
}
|
||||
|
||||
float v_out;
|
||||
|
||||
arg.out_element_op_(v_out, v_acc);
|
||||
|
||||
arg.output_(g, n, k, d_o, ho, wo) = ck::type_convert<OutDataType>(v_out);
|
||||
OutDataType v_out;
|
||||
arg.out_element_op_(v_out, ck::type_convert<OutDataType>(v_acc));
|
||||
arg.output_(g, n, k, d_o, ho, wo) = v_out;
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(func,
|
||||
|
||||
Reference in New Issue
Block a user