[CK-Tile] Add the API to load SGPR (#2878)

* Have a workable version for SGPR

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.

* substitute with the new sgpr read api

* update the CHANGELOG

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.

* change to static for logic

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.
This commit is contained in:
Thomas Ning
2025-09-23 01:23:56 -07:00
committed by GitHub
parent b6e8994386
commit 2cbbf5dcb3
40 changed files with 273 additions and 167 deletions

View File

@@ -487,7 +487,7 @@ struct GemmPipelineAgBgCrCompV4 : public BaseGemmPipelineAgBgCrCompV4<Problem>
if(HasHotLoop)
{
// minus 2 because we have ping-pong double buffer.
index_t iCounter = __builtin_amdgcn_readfirstlane(num_loop - 2);
index_t iCounter = amd_wave_read_first_lane(num_loop - 2);
do
{
// ping

View File

@@ -178,7 +178,7 @@ struct GemmPipelineAgBgCrCompV5 : public BaseGemmPipelineAgBgCrCompV5<Problem>
index_t warp_id = get_warp_id();
index_t operation_id =
__builtin_amdgcn_readfirstlane(get_warp_id()); // 0 - Memory read, 1 - block-gemm
amd_wave_read_first_lane(get_warp_id()); // 0 - Memory read, 1 - block-gemm
auto a_offset = (warp_id == 0) ? make_array(0, 0) : make_array(0, KPerBlock);
auto b_offset = (warp_id == 0) ? make_array(0, 0) : make_array(0, KPerBlock);
@@ -336,7 +336,7 @@ struct GemmPipelineAgBgCrCompV5 : public BaseGemmPipelineAgBgCrCompV5<Problem>
MemoryOpsStep(warp_id);
}
index_t num_compute_steps = __builtin_amdgcn_readfirstlane(num_loop);
index_t num_compute_steps = amd_wave_read_first_lane(num_loop);
while(num_compute_steps > 1)
{
block_sync_lds();