mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 21:21:22 +00:00
Update to gemm_reduce and batched_gemm_reduce (#213)
* [Experimental] Change to gemm+reduce and batched-gemm+reduce * Use threadwise-reduce function to improve the gridwise_gemm_reduce_xdl_cshuffle kernel * Tiny fix in device_batched_gemm_xdl.hpp * clang-format library/src/utility/conv_fwd_util.cpp
This commit is contained in:
@@ -5,20 +5,6 @@ namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace element_wise {
|
||||
|
||||
struct ReduceSum
|
||||
{
|
||||
__host__ __device__ static constexpr float GetReduceZeroValue() { return float(0); }
|
||||
|
||||
__host__ __device__ void Reduce(float& acc, float v) const { acc += v; }
|
||||
};
|
||||
|
||||
struct ReduceSquareSum
|
||||
{
|
||||
__host__ __device__ static constexpr float GetReduceZeroValue() { return float(0); }
|
||||
|
||||
__host__ __device__ void Reduce(float& acc, float v) const { acc += v * v; }
|
||||
};
|
||||
|
||||
} // namespace element_wise
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
|
||||
Reference in New Issue
Block a user