Fix for Add the API to load SGPR (#2913)

* Revert "Revert "[CK-Tile] Add the API to load SGPR  (#2878)" (#2904)"

This reverts commit f161b5b738.

* Fix: sgpr minor issue

* cyclic dependency resolved

* clang formatted

* removing unused variable

* clang formatted

---------

Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
This commit is contained in:
Khushbu Agarwal
2025-09-25 10:32:42 -07:00
committed by GitHub
parent 64e61b8647
commit b56e5d1d79
41 changed files with 224 additions and 173 deletions

View File

@@ -184,17 +184,17 @@ struct FusedMoeGemmPipeline_FlatmmUk
index_t nr_1 = kargs.hidden_size / BlockShape::Warp_N1;
index_t kr_1 = shared_intermediate_size_1 / BlockShape::Warp_K1;
const IndexDataType expert_id = __builtin_amdgcn_readfirstlane(
const IndexDataType expert_id = amd_wave_read_first_lane(
reinterpret_cast<const IndexDataType*>(kargs.sorted_expert_ids_ptr)[sorted_tile_id]);
index_t expert_stride_0 = shared_intermediate_size_0 * kargs.hidden_size;
index_t expert_stride_1 = shared_intermediate_size_1 * kargs.hidden_size;
// nr*kr*w
index_t interm_idx_nr0 = __builtin_amdgcn_readfirstlane(
index_t interm_idx_nr0 = amd_wave_read_first_lane(
intermediate_tile_id *
BlockShape::Block_Nr0); // intermediate_tile_id * Block_N / (N in W)
index_t interm_idx_kr1 = __builtin_amdgcn_readfirstlane(
index_t interm_idx_kr1 = amd_wave_read_first_lane(
intermediate_tile_id *
BlockShape::Block_Kr1); // intermediate_tile_id * Block_N / (N in W)