Files
composable_kernel/host/host_tensor/include/device.hpp
Chao Liu 6fe3627a9e Composable kernel init integration v3 (#1097)
* Squashed 'src/composable_kernel/' content from commit f6edda611

git-subtree-dir: src/composable_kernel
git-subtree-split: f6edda6119

* add solver ConvIgemmFwdV6r1DlopsNchwKcyxNkhw; rename static ck source files

* Squashed 'src/composable_kernel/' changes from f6edda611..5781adf5c

5781adf5c Update develop (#5) (#6)
97e6d514f Merge pull request #4 from ROCmSoftwarePlatform/separate_online_compile
7b1ec41e5 refactor
49c33aaea refactor
54b3e73d1 rename

git-subtree-dir: src/composable_kernel
git-subtree-split: 5781adf5cf

* fix

* refactor

* remove online compilation from CK

* refactor

* fix

* add ctest

* add c-style pointer cast

* vector/scalar pointer cast use c-style pointer cast instead of reinterpret_cast

* fix clang warning suppression

* tidy

* suppress cppcheck

* fix enum issue

* revert chagnes to hip build

* fix kernel filename

* update CK build script

* rename

* rename

* make innner product compatiable on gfx900

* Update src/include/miopen/solver/ck_utility_common.hpp

Co-authored-by: JD <Jehandad.Khan@amd.com>

* compiler parameter use stream

* use int instead of index_t in kernel wrapper

* DynamicBuffer, StaticBuffer, amd_buffer_load support customized value for invalid element

* refactor

* refactor

* change cmakelist

* change ck common utility

* fix

Co-authored-by: JD <Jehandad.Khan@amd.com>
2021-08-19 10:55:03 -05:00

81 lines
1.7 KiB
C++

#ifndef DEVICE_HPP
#define DEVICE_HPP
#include <memory>
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"
struct DeviceMem
{
DeviceMem() = delete;
DeviceMem(std::size_t mem_size);
void* GetDeviceBuffer();
void ToDevice(const void* p);
void FromDevice(void* p);
~DeviceMem();
void* mpDeviceBuf;
std::size_t mMemSize;
};
struct KernelTimerImpl;
struct KernelTimer
{
KernelTimer();
~KernelTimer();
void Start();
void End();
float GetElapsedTime() const;
std::unique_ptr<KernelTimerImpl> impl;
};
using device_stream_t = hipStream_t;
template <typename... Args, typename F>
void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
{
hipStream_t stream_id = nullptr;
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
}
template <typename... Args, typename F>
float launch_and_time_kernel(
F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
{
KernelTimer timer;
printf("%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d} \n",
__func__,
grid_dim.x,
grid_dim.y,
grid_dim.z,
block_dim.x,
block_dim.y,
block_dim.z);
printf("Warm up\n");
hipStream_t stream_id = nullptr;
// warm up
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
printf("Start running %d times...\n", nrepeat);
timer.Start();
for(int i = 0; i < nrepeat; ++i)
{
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
}
timer.End();
return timer.GetElapsedTime() / nrepeat;
}
#endif