mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 05:31:24 +00:00
Update to the Reduction API and instances (#476)
* Simplify the macros for declaring and defining the add_device_reduce_instance_xxxx() instances * Change the types of lengths and strides from std::vector to std::array for the reduction device interfaces * Remove DeviceSoftmaxImpl's depending on DeviceReduceMultiblock * Split the cpp and hpp files for reduction instances to enable more parallel compiling * Remove the using of macros for declaring reduction instances and instance references * Update to add_device_reduce_instance_xxxx templated functions * Use ReduceOperation+InElementwiseOp+AccElementwiseOp to repace the ReduceOpId in defining add_reduce_instance_xxxx() templates * Change return format
This commit is contained in:
@@ -138,13 +138,17 @@ bool reduce_multiblock_atomic_add_test(bool do_verification,
|
||||
if(ShapeType::Rank_ != inLengths.size() || ShapeType::NumReduceDim_ != reduceDims.size())
|
||||
return;
|
||||
|
||||
std::array<int, ShapeType::NumReduceDim_> a_reduceDims;
|
||||
|
||||
std::copy(reduceDims.begin(), reduceDims.end(), a_reduceDims.begin());
|
||||
|
||||
result = reduce_multiblock_atomic_add_impl<InOutDataType,
|
||||
AccDataType,
|
||||
ReduceOpId,
|
||||
ShapeType::Rank_,
|
||||
ShapeType::NumReduceDim_,
|
||||
PropagateNan>(
|
||||
do_verification, init_method, time_kernel, inLengths, reduceDims, alpha, beta);
|
||||
do_verification, init_method, time_kernel, inLengths, a_reduceDims, alpha, beta);
|
||||
|
||||
matched = true;
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user