mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 20:51:23 +00:00
Fix for Add the API to load SGPR (#2913)
* Revert "Revert "[CK-Tile] Add the API to load SGPR (#2878)" (#2904)"
This reverts commit f161b5b738.
* Fix: sgpr minor issue
* cyclic dependency resolved
* clang formatted
* removing unused variable
* clang formatted
---------
Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
This commit is contained in:
@@ -240,7 +240,7 @@ struct FusedMoeGemmKernel
|
||||
if constexpr(UseUK)
|
||||
{
|
||||
__shared__ CK_TILE_LDS_ADDR char smem[GetSmemSize()];
|
||||
IndexDataType num_sorted_tiles = __builtin_amdgcn_readfirstlane(
|
||||
IndexDataType num_sorted_tiles = amd_wave_read_first_lane(
|
||||
*reinterpret_cast<const IndexDataType*>(kargs.num_sorted_tiles_ptr));
|
||||
|
||||
num_sorted_tiles = num_sorted_tiles / BlockShape::Block_M0;
|
||||
@@ -261,7 +261,7 @@ struct FusedMoeGemmKernel
|
||||
{
|
||||
// allocate LDS
|
||||
// __shared__ char smem_ptr[GetSmemSize()];
|
||||
IndexDataType num_sorted_tiles = __builtin_amdgcn_readfirstlane(
|
||||
IndexDataType num_sorted_tiles = amd_wave_read_first_lane(
|
||||
*reinterpret_cast<const IndexDataType*>(kargs.num_sorted_tiles_ptr));
|
||||
constexpr index_t hidden_radio_0 = IsGateOnly ? 1 : 2;
|
||||
|
||||
@@ -283,14 +283,14 @@ struct FusedMoeGemmKernel
|
||||
return;
|
||||
|
||||
const IndexDataType expert_id =
|
||||
__builtin_amdgcn_readfirstlane(reinterpret_cast<const IndexDataType*>(
|
||||
amd_wave_read_first_lane(reinterpret_cast<const IndexDataType*>(
|
||||
kargs.sorted_expert_ids_ptr)[sorted_tile_id]);
|
||||
|
||||
// index along intermediate_size
|
||||
// index_t hidden_idx = __builtin_amdgcn_readfirstlane(intermediate_tile_id *
|
||||
// BlockShape::Block_N0);
|
||||
index_t interm_idx_nr =
|
||||
__builtin_amdgcn_readfirstlane(intermediate_tile_id * BlockShape::Block_Nr0);
|
||||
amd_wave_read_first_lane(intermediate_tile_id * BlockShape::Block_Nr0);
|
||||
|
||||
const auto a_coord = Pipeline::GetACoord(); // 2d thread offset, [i_row, i_col]
|
||||
const auto sorted_token_id =
|
||||
|
||||
@@ -756,7 +756,7 @@ struct MoeSortingKernel
|
||||
void* smem) const
|
||||
{
|
||||
const index_t tid = static_cast<index_t>(threadIdx.x);
|
||||
const index_t wid = __builtin_amdgcn_readfirstlane(tid / get_warp_size());
|
||||
const index_t wid = amd_wave_read_first_lane(tid / get_warp_size());
|
||||
const index_t lid = __lane_id();
|
||||
constexpr index_t block_size = 256; // blockDim.x;
|
||||
const index_t sub_tokens = smem_rows - 2; // sub_tokens_mdiv.divisor;
|
||||
|
||||
Reference in New Issue
Block a user