mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 09:45:56 +00:00
initial stream-k implementation with example (#699)
* initial stream-k implementation with example * fix unexpected change in err * improve a little bit performance by reorganize pipeline. * improve perf a little bit by swizzle block idx * add profiler * update example * fix spelling * shrink karg for streamk * support dynamic buffer using memory coherence glc_slc bit from template * control memory coherence while construct dynamic buffer * update reduction for streamk(not ready yet) * Add template parameter to make_dynamic_buffer to support amd_buffer coherence setting * fix build issue * fix several bug * now result is correct, everything works (but has scratch) * remove scratch by manually reset coordinate * update device code * fix a bug in final reduce * fix something in example * update async memset * fix enum as camel case * modify coherence enum name * clean code and use atomic streamk by default * remove unused var * throw exception if have empty pointer * fix format * fix CI warning * fix type in init * modify CI error * filter out on gfx10+ * restore changed example code --------- Co-authored-by: Qianfeng Zhang <Qianfeng.Zhang@amd.com>
This commit is contained in:
@@ -10,20 +10,57 @@ DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
|
||||
hip_check_error(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
|
||||
}
|
||||
|
||||
void DeviceMem::Realloc(std::size_t mem_size)
|
||||
{
|
||||
if(mpDeviceBuf)
|
||||
{
|
||||
hip_check_error(hipFree(mpDeviceBuf));
|
||||
}
|
||||
mMemSize = mem_size;
|
||||
hip_check_error(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
|
||||
}
|
||||
|
||||
void* DeviceMem::GetDeviceBuffer() const { return mpDeviceBuf; }
|
||||
|
||||
std::size_t DeviceMem::GetBufferSize() const { return mMemSize; }
|
||||
|
||||
void DeviceMem::ToDevice(const void* p) const
|
||||
{
|
||||
hip_check_error(hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
|
||||
if(mpDeviceBuf)
|
||||
{
|
||||
hip_check_error(
|
||||
hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("ToDevice with an empty pointer");
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceMem::FromDevice(void* p) const
|
||||
{
|
||||
hip_check_error(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
|
||||
if(mpDeviceBuf)
|
||||
{
|
||||
hip_check_error(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("FromDevice with an empty pointer");
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceMem::SetZero() const { hip_check_error(hipMemset(mpDeviceBuf, 0, mMemSize)); }
|
||||
void DeviceMem::SetZero() const
|
||||
{
|
||||
if(mpDeviceBuf)
|
||||
{
|
||||
hip_check_error(hipMemset(mpDeviceBuf, 0, mMemSize));
|
||||
}
|
||||
}
|
||||
|
||||
DeviceMem::~DeviceMem() { hip_check_error(hipFree(mpDeviceBuf)); }
|
||||
DeviceMem::~DeviceMem()
|
||||
{
|
||||
if(mpDeviceBuf)
|
||||
{
|
||||
hip_check_error(hipFree(mpDeviceBuf));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user