mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 09:45:56 +00:00
* Squashed 'src/composable_kernel/' content from commitf6edda611git-subtree-dir: src/composable_kernel git-subtree-split:f6edda6119* add solver ConvIgemmFwdV6r1DlopsNchwKcyxNkhw; rename static ck source files * Squashed 'src/composable_kernel/' changes from f6edda611..5781adf5c5781adf5cUpdate develop (#5) (#6)97e6d514fMerge pull request #4 from ROCmSoftwarePlatform/separate_online_compile7b1ec41e5refactor49c33aaearefactor54b3e73d1rename git-subtree-dir: src/composable_kernel git-subtree-split:5781adf5cf* fix * refactor * remove online compilation from CK * refactor * fix * add ctest * add c-style pointer cast * vector/scalar pointer cast use c-style pointer cast instead of reinterpret_cast * fix clang warning suppression * tidy * suppress cppcheck * fix enum issue * revert chagnes to hip build * fix kernel filename * update CK build script * rename * rename * make innner product compatiable on gfx900 * Update src/include/miopen/solver/ck_utility_common.hpp Co-authored-by: JD <Jehandad.Khan@amd.com> * compiler parameter use stream * use int instead of index_t in kernel wrapper * DynamicBuffer, StaticBuffer, amd_buffer_load support customized value for invalid element * refactor * refactor * change cmakelist * change ck common utility * fix Co-authored-by: JD <Jehandad.Khan@amd.com>
81 lines
1.7 KiB
C++
81 lines
1.7 KiB
C++
#ifndef DEVICE_HPP
|
|
#define DEVICE_HPP
|
|
|
|
#include <memory>
|
|
#include "hip/hip_runtime.h"
|
|
#include "hip/hip_fp16.h"
|
|
|
|
struct DeviceMem
|
|
{
|
|
DeviceMem() = delete;
|
|
DeviceMem(std::size_t mem_size);
|
|
void* GetDeviceBuffer();
|
|
void ToDevice(const void* p);
|
|
void FromDevice(void* p);
|
|
~DeviceMem();
|
|
|
|
void* mpDeviceBuf;
|
|
std::size_t mMemSize;
|
|
};
|
|
|
|
struct KernelTimerImpl;
|
|
|
|
struct KernelTimer
|
|
{
|
|
KernelTimer();
|
|
~KernelTimer();
|
|
void Start();
|
|
void End();
|
|
float GetElapsedTime() const;
|
|
|
|
std::unique_ptr<KernelTimerImpl> impl;
|
|
};
|
|
|
|
using device_stream_t = hipStream_t;
|
|
|
|
template <typename... Args, typename F>
|
|
void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
|
|
{
|
|
hipStream_t stream_id = nullptr;
|
|
|
|
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
|
|
}
|
|
|
|
template <typename... Args, typename F>
|
|
float launch_and_time_kernel(
|
|
F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
|
|
{
|
|
KernelTimer timer;
|
|
|
|
printf("%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d} \n",
|
|
__func__,
|
|
grid_dim.x,
|
|
grid_dim.y,
|
|
grid_dim.z,
|
|
block_dim.x,
|
|
block_dim.y,
|
|
block_dim.z);
|
|
|
|
printf("Warm up\n");
|
|
|
|
hipStream_t stream_id = nullptr;
|
|
|
|
// warm up
|
|
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
|
|
|
|
printf("Start running %d times...\n", nrepeat);
|
|
|
|
timer.Start();
|
|
|
|
for(int i = 0; i < nrepeat; ++i)
|
|
{
|
|
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
|
|
}
|
|
|
|
timer.End();
|
|
|
|
return timer.GetElapsedTime() / nrepeat;
|
|
}
|
|
|
|
#endif
|