mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 21:21:22 +00:00
Reduction external API and client examples (#493)
* Change to the DeviceReduce base class template to include all problem description information * Add external api for reduction * Add client example to test the reduction external api * Spelling correction * Re-implement the host_reduction to follow the DeviceReduce base API format * Change the reduce profiler to call the external API for collecting device instances * Rename reduce client example directory from 08_reduce to 12_reduce * Remove (void) before the functional call * Tiny update in reduce client example * Tiny update in profile_reduce_impl.hpp * Rename the reduce client example directory Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
This commit is contained in:
@@ -73,8 +73,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,
|
||||
static_for<0, NumReduction, 1>{}([&](auto I) {
|
||||
using OutDataType = remove_cvref_t<decltype(OutDataTypeTuple{}[I])>;
|
||||
flag =
|
||||
flag && ck::reduce::InMemoryDataOperatonSupportedOnDataType<OutMemoryDataOperation,
|
||||
OutDataType>::value;
|
||||
flag && ck::reduce::InMemoryDataOperationSupportedOnDataType<OutMemoryDataOperation,
|
||||
OutDataType>::value;
|
||||
});
|
||||
|
||||
return flag;
|
||||
|
||||
@@ -40,8 +40,16 @@ template <typename InDataType,
|
||||
index_t InSrcVectorDim,
|
||||
index_t InSrcVectorSize,
|
||||
index_t OutDstVectorSize>
|
||||
struct DeviceReduceMultiBlock
|
||||
: public DeviceReduce<Rank, NumReduceDim, InElementwiseOperation, AccElementwiseOperation>
|
||||
struct DeviceReduceMultiBlock : public DeviceReduce<InDataType,
|
||||
AccDataType,
|
||||
OutDataType,
|
||||
Rank,
|
||||
NumReduceDim,
|
||||
ReduceOperation,
|
||||
InElementwiseOperation,
|
||||
AccElementwiseOperation,
|
||||
PropagateNan,
|
||||
OutputIndex>
|
||||
{
|
||||
static_assert(Rank <= 6, "Bigger Rank size is not supported!");
|
||||
static_assert(BlockSize == MThreadClusterSize * KThreadClusterSize,
|
||||
@@ -67,8 +75,8 @@ struct DeviceReduceMultiBlock
|
||||
static constexpr bool use_multiblock =
|
||||
(OutMemoryDataOperation == InMemoryDataOperationEnum::AtomicAdd);
|
||||
|
||||
static_assert(ck::reduce::InMemoryDataOperatonSupportedOnDataType<OutMemoryDataOperation,
|
||||
OutDataType>::value,
|
||||
static_assert(ck::reduce::InMemoryDataOperationSupportedOnDataType<OutMemoryDataOperation,
|
||||
OutDataType>::value,
|
||||
"The OutDataType must support the specified OutMemoryDataOperation!");
|
||||
|
||||
static_assert(!use_multiblock || (use_multiblock && !OutputIndex),
|
||||
|
||||
@@ -35,8 +35,17 @@ template <typename InDataType,
|
||||
index_t InSrcVectorDim,
|
||||
index_t InSrcVectorSize,
|
||||
index_t OutDstVectorSize>
|
||||
struct DeviceReduceThreadWise
|
||||
: public DeviceReduce<Rank, NumReduceDim, InElementwiseOperation, AccElementwiseOperation>
|
||||
struct DeviceReduceThreadWise : public DeviceReduce<InDataType,
|
||||
AccDataType,
|
||||
OutDataType,
|
||||
Rank,
|
||||
NumReduceDim,
|
||||
ReduceOperation,
|
||||
InElementwiseOperation,
|
||||
AccElementwiseOperation,
|
||||
PropagateNan,
|
||||
OutputIndex>
|
||||
|
||||
{
|
||||
static_assert(Rank <= 6, "Bigger Rank size is not supported!");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user