mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
Gemm+Reduce Fusion (#128)
* add gridwise gemm v4r1 * rename * adding gemm+reduce * adding gemm+reduce * adding gemm+reduce * adding gemm+reduce * use sfc in shuffling * remove hardcode * remove hardcode * refactor * fix build * adding gemm+reduce * adding gemm+reduce * adding gemm+reduce * adding gemm+reduce * adding gemm+reduce * format * clean * adding gemm+reduce * adding profiler for gemm+reduce * adding gemm+reduce profiler * fix build * clean up * gemm+reduce * fix build * update DeviceGemm_Xdl_CShuffle; update enum to enum class * clean up * add test for gemm+reduce * clean up * refactor * fix build * fix build
This commit is contained in:
@@ -1,6 +1,4 @@
|
||||
#ifndef CK_ELEMENT_WISE_OPERATION_HPP
|
||||
#define CK_ELEMENT_WISE_OPERATION_HPP
|
||||
|
||||
#pragma once
|
||||
#include "data_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
@@ -365,4 +363,3 @@ struct UnarySqrt<double, double>
|
||||
} // namespace element_wise
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
#include "data_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace element_wise {
|
||||
|
||||
struct ReduceSum
|
||||
{
|
||||
__host__ __device__ static constexpr float GetReduceZeroValue() { return float(0); }
|
||||
|
||||
__host__ __device__ void Reduce(float& acc, float v) const { acc += v; }
|
||||
};
|
||||
|
||||
struct ReduceSquareSum
|
||||
{
|
||||
__host__ __device__ static constexpr float GetReduceZeroValue() { return float(0); }
|
||||
|
||||
__host__ __device__ void Reduce(float& acc, float v) const { acc += v * v; }
|
||||
};
|
||||
|
||||
} // namespace element_wise
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
Reference in New Issue
Block a user