mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-21 21:39:15 +00:00
Gemm+Reduce Fusion (#128)
* add gridwise gemm v4r1
* rename
* adding gemm+reduce
* adding gemm+reduce
* adding gemm+reduce
* adding gemm+reduce
* use sfc in shuffling
* remove hardcode
* remove hardcode
* refactor
* fix build
* adding gemm+reduce
* adding gemm+reduce
* adding gemm+reduce
* adding gemm+reduce
* adding gemm+reduce
* format
* clean
* adding gemm+reduce
* adding profiler for gemm+reduce
* adding gemm+reduce profiler
* fix build
* clean up
* gemm+reduce
* fix build
* update DeviceGemm_Xdl_CShuffle; update enum to enum class
* clean up
* add test for gemm+reduce
* clean up
* refactor
* fix build
* fix build
[ROCm/composable_kernel commit: f95267f166]
This commit is contained in:
@@ -7,6 +7,8 @@ DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
|
||||
|
||||
void* DeviceMem::GetDeviceBuffer() { return mpDeviceBuf; }
|
||||
|
||||
std::size_t DeviceMem::GetBufferSize() { return mMemSize; }
|
||||
|
||||
void DeviceMem::ToDevice(const void* p)
|
||||
{
|
||||
hipGetErrorString(
|
||||
@@ -18,6 +20,8 @@ void DeviceMem::FromDevice(void* p)
|
||||
hipGetErrorString(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
void DeviceMem::SetZero() { hipGetErrorString(hipMemset(mpDeviceBuf, 0, mMemSize)); }
|
||||
|
||||
DeviceMem::~DeviceMem() { hipGetErrorString(hipFree(mpDeviceBuf)); }
|
||||
|
||||
struct KernelTimerImpl
|
||||
|
||||
@@ -64,6 +64,8 @@ void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream
|
||||
os << "}" << std::endl;
|
||||
}
|
||||
|
||||
#if 1
|
||||
// FIXME: remove
|
||||
float bf16_to_f32_(ck::bhalf_t src_val)
|
||||
{
|
||||
union
|
||||
@@ -74,8 +76,10 @@ float bf16_to_f32_(ck::bhalf_t src_val)
|
||||
return u.fp32;
|
||||
}
|
||||
|
||||
// FIXME: remove
|
||||
void bf16_to_f32_(const Tensor<ck::bhalf_t>& src, Tensor<float>& dst)
|
||||
{
|
||||
for(int i = 0; i < src.mData.size(); ++i)
|
||||
dst.mData[i] = bf16_to_f32_(src.mData[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user