mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 01:10:17 +00:00
tempsave, trload+asyncload done
This commit is contained in:
@@ -12,12 +12,14 @@
|
||||
|
||||
#define CK_TILE_S_CNT_MAX 0b1100'1111'0111'1111
|
||||
#define CK_TILE_VMCNT(cnt) \
|
||||
([]() { static_assert((cnt) < 0b111111, "VMCNT only has 6 bits"); }(), \
|
||||
((cnt)&0b1111) | (((cnt)&0b110000) << 10))
|
||||
#define CK_TILE_EXPCNT(cnt) \
|
||||
([]() { static_assert((cnt) < 0b111, "EXP only has 3 bits"); }(), ((cnt) << 4))
|
||||
#define CK_TILE_LGKMCNT(cnt) \
|
||||
([]() { static_assert((cnt) < 0b1111, "LGKM only has 4 bits"); }(), ((cnt) << 8))
|
||||
([]() { static_assert((cnt) < (1 << 6), "VMCNT only has 6 bits"); }(), \
|
||||
((cnt)&0b1111) | (((cnt)&0b110000) << 14) | 0b0000'1111'0111'0000)
|
||||
#define CK_TILE_EXPCNT(cnt) \
|
||||
([]() { static_assert((cnt) < (1 << 3), "EXP only has 3 bits"); }(), \
|
||||
((cnt) << 4) | 0b1100'1111'0000'1111)
|
||||
#define CK_TILE_LGKMCNT(cnt) \
|
||||
([]() { static_assert((cnt) < (1 << 4), "LGKM only has 4 bits"); }(), \
|
||||
((cnt) << 8) | 0b1100'0000'0111'1111)
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
@@ -127,7 +129,7 @@ template <index_t vmcnt>
|
||||
CK_TILE_DEVICE void block_sync_lds_direct_load()
|
||||
{
|
||||
// We don't sync the lds insts here.
|
||||
__builtin_amdgcn_s_waitcnt(CK_TILE_VMCNT(vmcnt));
|
||||
__builtin_amdgcn_s_waitcnt(CK_TILE_S_CNT_MAX & CK_TILE_VMCNT(vmcnt));
|
||||
__builtin_amdgcn_s_barrier();
|
||||
}
|
||||
|
||||
|
||||
@@ -433,6 +433,8 @@ struct tile_window_with_static_distribution
|
||||
// data index [y0, y1, ...]
|
||||
constexpr auto idx_ys_start = SFC_Ys::get_index(iAccess);
|
||||
|
||||
// printf("Tid: %02d, tr_load_idx: %d\n",
|
||||
// get_thread_local_1d_id(),bottom_tensor_thread_coord.get_offset());
|
||||
// read from bottom tensor
|
||||
const vector_t vec_value =
|
||||
this->get_bottom_tensor_view()
|
||||
|
||||
Reference in New Issue
Block a user