Update to the Reduction API and instances (#476)

* Simplify the macros for declaring and defining the add_device_reduce_instance_xxxx() instances

* Change the types of lengths and strides from std::vector to std::array for the reduction device interfaces

* Remove DeviceSoftmaxImpl's depending on DeviceReduceMultiblock

* Split the cpp and hpp files for reduction instances to enable more parallel compiling

* Remove the using of macros for declaring reduction instances and instance references

* Update to add_device_reduce_instance_xxxx templated functions

* Use ReduceOperation+InElementwiseOp+AccElementwiseOp to repace the ReduceOpId in defining add_reduce_instance_xxxx() templates

* Change return format
This commit is contained in:
Qianfeng
2022-10-25 23:39:11 +08:00
committed by GitHub
parent 6ea9257e9d
commit dda3a0a10b
209 changed files with 4646 additions and 2279 deletions

View File

@@ -140,6 +140,10 @@ bool reduce_blockwise_test(bool do_verification,
if(ShapeType::Rank_ != inLengths.size() || ShapeType::NumReduceDim_ != reduceDims.size())
return;
std::array<int, ShapeType::NumReduceDim_> arrReduceDims;
std::copy(reduceDims.begin(), reduceDims.end(), arrReduceDims.begin());
result = reduce_blockwise_impl<InOutDataType,
AccDataType,
ReduceOpId,
@@ -147,7 +151,7 @@ bool reduce_blockwise_test(bool do_verification,
ShapeType::NumReduceDim_,
PropagateNan,
OutputIndex>(
do_verification, init_method, time_kernel, inLengths, reduceDims, alpha, beta);
do_verification, init_method, time_kernel, inLengths, arrReduceDims, alpha, beta);
matched = true;
});