[CK_TILE] Allow switching between SGPR/VGPR get_warp_id() return values (#2669)

* Allow return VGPR get_warp_id() value

* Avoid using SALU in async_load_raw()
This commit is contained in:
Po Yen Chen
2025-08-22 10:17:05 +08:00
committed by GitHub
parent 4a7ecce096
commit 0db21053e6
2 changed files with 16 additions and 4 deletions

View File

@@ -98,9 +98,18 @@ CK_TILE_DEVICE index_t get_block_1d_id() { return blockIdx.x; }
// Use these instead
CK_TILE_DEVICE index_t get_lane_id() { return __lane_id(); }
CK_TILE_DEVICE index_t get_warp_id()
template <bool ReturnSgpr = true>
CK_TILE_DEVICE index_t get_warp_id(bool_constant<ReturnSgpr> = {})
{
return __builtin_amdgcn_readfirstlane(threadIdx.x / get_warp_size());
const index_t warp_id = threadIdx.x / get_warp_size();
if constexpr(ReturnSgpr)
{
return __builtin_amdgcn_readfirstlane(warp_id);
}
else
{
return warp_id;
}
}
CK_TILE_DEVICE index_t get_thread_id() { return threadIdx.x; }

View File

@@ -288,8 +288,11 @@ struct tile_window_with_static_distribution
sizeof(LdsDataType) -
size_per_buf;
const index_t m0_init_value = size_per_buf + size_per_wave * get_warp_id();
m0_set_with_memory(m0_init_value); // This should be wave independent
// Use VALU so the compiler can optimize redundant/repeated computations
const index_t m0_init_value =
size_per_buf + size_per_wave * get_warp_id(/*ReturnSgpr=*/bool_constant<false>{});
m0_set_with_memory(
__builtin_amdgcn_readfirstlane(m0_init_value)); // This should be wave independent
using Traits = typename Base::Traits;