mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 09:45:56 +00:00
* tuning para, * testing on v100 * add fp16 * remove deprecated tensor descriptor * sync with miopen * update build script Co-authored-by: Jing Zhang <jizhan@amd.com>
26 lines
479 B
C++
26 lines
479 B
C++
#ifndef CK_SYNCHRONIZATION_AMD_HPP
|
|
#define CK_SYNCHRONIZATION_AMD_HPP
|
|
|
|
#include "config.hpp"
|
|
|
|
namespace ck {
|
|
|
|
__device__ void __llvm_amdgcn_s_barrier() __asm("llvm.amdgcn.s.barrier");
|
|
|
|
__device__ void block_sync_lds()
|
|
{
|
|
#if CK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
|
|
asm volatile("\
|
|
s_waitcnt lgkmcnt(0) \n \
|
|
s_barrier \
|
|
" ::);
|
|
#else
|
|
__llvm_amdgcn_s_barrier();
|
|
#endif
|
|
}
|
|
|
|
__device__ void block_sync_lds_vmem() { __llvm_amdgcn_s_barrier(); }
|
|
|
|
} // namespace ck
|
|
#endif
|