mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 08:50:17 +00:00
Update to the Reduction API and instances (#476)
* Simplify the macros for declaring and defining the add_device_reduce_instance_xxxx() instances * Change the types of lengths and strides from std::vector to std::array for the reduction device interfaces * Remove DeviceSoftmaxImpl's depending on DeviceReduceMultiblock * Split the cpp and hpp files for reduction instances to enable more parallel compiling * Remove the using of macros for declaring reduction instances and instance references * Update to add_device_reduce_instance_xxxx templated functions * Use ReduceOperation+InElementwiseOp+AccElementwiseOp to repace the ReduceOpId in defining add_reduce_instance_xxxx() templates * Change return format
This commit is contained in:
@@ -140,6 +140,10 @@ bool reduce_blockwise_test(bool do_verification,
|
||||
if(ShapeType::Rank_ != inLengths.size() || ShapeType::NumReduceDim_ != reduceDims.size())
|
||||
return;
|
||||
|
||||
std::array<int, ShapeType::NumReduceDim_> arrReduceDims;
|
||||
|
||||
std::copy(reduceDims.begin(), reduceDims.end(), arrReduceDims.begin());
|
||||
|
||||
result = reduce_blockwise_impl<InOutDataType,
|
||||
AccDataType,
|
||||
ReduceOpId,
|
||||
@@ -147,7 +151,7 @@ bool reduce_blockwise_test(bool do_verification,
|
||||
ShapeType::NumReduceDim_,
|
||||
PropagateNan,
|
||||
OutputIndex>(
|
||||
do_verification, init_method, time_kernel, inLengths, reduceDims, alpha, beta);
|
||||
do_verification, init_method, time_kernel, inLengths, arrReduceDims, alpha, beta);
|
||||
|
||||
matched = true;
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user