[CK-Tile] Add the API to load SGPR (#2878)

* Have a workable version for SGPR

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.

* substitute with the new sgpr read api

* update the CHANGELOG

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.

* change to static for logic

* have a workable version for atomic add

* Revert "have a workable version for atomic add"

This reverts commit 792377a590c26cfff9c8f545d9a9e8484a7422eb.
This commit is contained in:
Thomas Ning
2025-09-23 01:23:56 -07:00
committed by GitHub
parent b6e8994386
commit 2cbbf5dcb3
40 changed files with 273 additions and 167 deletions

View File

@@ -96,9 +96,9 @@ struct TopkSoftmaxKernel
if(block_row_id > kargs.num_rows)
return;
index_t block_os_inp = __builtin_amdgcn_readfirstlane(block_row_id * kargs.stride_input);
index_t block_os_out = __builtin_amdgcn_readfirstlane(block_row_id * kargs.stride_output);
index_t num_rows_rem = __builtin_amdgcn_readfirstlane(kargs.num_rows - block_row_id);
index_t block_os_inp = amd_wave_read_first_lane(block_row_id * kargs.stride_input);
index_t block_os_out = amd_wave_read_first_lane(block_row_id * kargs.stride_output);
index_t num_rows_rem = amd_wave_read_first_lane(kargs.num_rows - block_row_id);
const auto input_window = [&]() {
const InputType* p_input =