mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 13:41:24 +00:00
Update to the Reduction API and instances (#476)
* Simplify the macros for declaring and defining the add_device_reduce_instance_xxxx() instances * Change the types of lengths and strides from std::vector to std::array for the reduction device interfaces * Remove DeviceSoftmaxImpl's depending on DeviceReduceMultiblock * Split the cpp and hpp files for reduction instances to enable more parallel compiling * Remove the using of macros for declaring reduction instances and instance references * Update to add_device_reduce_instance_xxxx templated functions * Use ReduceOperation+InElementwiseOp+AccElementwiseOp to repace the ReduceOpId in defining add_reduce_instance_xxxx() templates * Change return format
This commit is contained in:
@@ -29,7 +29,7 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
|
||||
int init_method,
|
||||
bool time_kernel,
|
||||
const std::vector<size_t>& inLengths,
|
||||
const std::vector<int>& reduceDims,
|
||||
const std::array<int, NumReduceDim>& reduceDims,
|
||||
float alpha,
|
||||
float beta)
|
||||
|
||||
@@ -37,6 +37,8 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
|
||||
using namespace ck;
|
||||
using namespace ck::tensor_operation::device;
|
||||
|
||||
constexpr index_t NumOutDim = (Rank - NumReduceDim == 0) ? 1 : Rank - NumReduceDim;
|
||||
|
||||
constexpr bool op_support_atomic_add =
|
||||
(ReduceOpId == ReduceTensorOp::ADD || ReduceOpId == ReduceTensorOp::AVG);
|
||||
|
||||
@@ -84,7 +86,7 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
|
||||
|
||||
std::vector<size_t> outLengths;
|
||||
|
||||
std::vector<int> invariantDims = get_invariant_dims<Rank, NumReduceDim>(reduceDims);
|
||||
auto invariantDims = get_invariant_dims<Rank, NumReduceDim>(reduceDims);
|
||||
|
||||
if(invariantDims.empty())
|
||||
outLengths.push_back(1);
|
||||
@@ -169,22 +171,22 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
|
||||
acc_elementwise_op);
|
||||
};
|
||||
|
||||
std::vector<ck::index_t> i_inLengths;
|
||||
std::vector<ck::index_t> i_inStrides;
|
||||
std::vector<ck::index_t> i_outLengths;
|
||||
std::vector<ck::index_t> i_outStrides;
|
||||
std::array<index_t, Rank> arrInLengths;
|
||||
std::array<index_t, Rank> arrInStrides;
|
||||
std::array<index_t, NumOutDim> arrOutLengths;
|
||||
std::array<index_t, NumOutDim> arrOutStrides;
|
||||
|
||||
i_inLengths.assign(inLengths.begin(), inLengths.end());
|
||||
i_inStrides.assign(inStrides.begin(), inStrides.end());
|
||||
i_outLengths.assign(outLengths.begin(), outLengths.end());
|
||||
i_outStrides.assign(outStrides.begin(), outStrides.end());
|
||||
std::copy(inLengths.begin(), inLengths.end(), arrInLengths.begin());
|
||||
std::copy(inStrides.begin(), inStrides.end(), arrInStrides.begin());
|
||||
std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin());
|
||||
std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin());
|
||||
|
||||
auto reduce = DeviceReduceInstance{};
|
||||
|
||||
auto argument_ptr = reduce.MakeArgumentPointer(i_inLengths,
|
||||
i_inStrides,
|
||||
i_outLengths,
|
||||
i_outStrides,
|
||||
auto argument_ptr = reduce.MakeArgumentPointer(arrInLengths,
|
||||
arrInStrides,
|
||||
arrOutLengths,
|
||||
arrOutStrides,
|
||||
reduceDims,
|
||||
alpha,
|
||||
beta,
|
||||
|
||||
Reference in New Issue
Block a user