mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 05:31:24 +00:00
* start * read for gridwise gemm * add MakeBGridDescriptor_K0_N0_N1_N2_N3_K1 * add thread copy desc and register buffer * add K0PerBlock dim * add read global data * finish gridwise gemm * finish blockwise gemm * add print data * add smallest config * add compare code for gridwis gemm * fix NXdlPerWave * fix k0perthread and gridewis gemm main loop * remove b matrix lds alloc * fix name * add test code * create b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3 from parameter * add double register * modify b_thread_desc_ * add float * fp16 tag * add tail for pipeline * finish main loop * optimize main loop * start clear gridwise gemm * clear code * clear redundant code * change file name * change file name * fix bug after merge develop * fix input parameters * using MultiK0 control b load data loop * fix some config * 4 buffer * fix bug * one can use * change read order * change buffer array to tuple * change to 8 buffer * interleave buffer load * change to 16 * read 8 buffer * add data buffer to template * fix after merge develop(head file) * format * change to 4 buffer * remove unnecessary lambda fun
33 lines
517 B
C++
33 lines
517 B
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "ck/ck.hpp"
|
|
|
|
namespace ck {
|
|
|
|
__device__ void block_sync_lds()
|
|
{
|
|
#if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
|
|
asm volatile("\
|
|
s_waitcnt lgkmcnt(0) \n \
|
|
s_barrier \
|
|
" ::);
|
|
#else
|
|
__syncthreads();
|
|
#endif
|
|
}
|
|
__device__ void s_nop()
|
|
{
|
|
#if 1
|
|
asm volatile("\
|
|
s_nop 0 \n \
|
|
" ::);
|
|
#else
|
|
__builtin_amdgcn_sched_barrier(0);
|
|
#endif
|
|
}
|
|
|
|
} // namespace ck
|