mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-03-25 09:37:42 +00:00
[CK_TILE] Allow switching between SGPR/VGPR get_warp_id() return values (#2669)
* Allow return VGPR get_warp_id() value * Avoid using SALU in async_load_raw()
This commit is contained in:
@@ -98,9 +98,18 @@ CK_TILE_DEVICE index_t get_block_1d_id() { return blockIdx.x; }
|
||||
// Use these instead
|
||||
CK_TILE_DEVICE index_t get_lane_id() { return __lane_id(); }
|
||||
|
||||
CK_TILE_DEVICE index_t get_warp_id()
|
||||
template <bool ReturnSgpr = true>
|
||||
CK_TILE_DEVICE index_t get_warp_id(bool_constant<ReturnSgpr> = {})
|
||||
{
|
||||
return __builtin_amdgcn_readfirstlane(threadIdx.x / get_warp_size());
|
||||
const index_t warp_id = threadIdx.x / get_warp_size();
|
||||
if constexpr(ReturnSgpr)
|
||||
{
|
||||
return __builtin_amdgcn_readfirstlane(warp_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
return warp_id;
|
||||
}
|
||||
}
|
||||
|
||||
CK_TILE_DEVICE index_t get_thread_id() { return threadIdx.x; }
|
||||
|
||||
@@ -288,8 +288,11 @@ struct tile_window_with_static_distribution
|
||||
sizeof(LdsDataType) -
|
||||
size_per_buf;
|
||||
|
||||
const index_t m0_init_value = size_per_buf + size_per_wave * get_warp_id();
|
||||
m0_set_with_memory(m0_init_value); // This should be wave independent
|
||||
// Use VALU so the compiler can optimize redundant/repeated computations
|
||||
const index_t m0_init_value =
|
||||
size_per_buf + size_per_wave * get_warp_id(/*ReturnSgpr=*/bool_constant<false>{});
|
||||
m0_set_with_memory(
|
||||
__builtin_amdgcn_readfirstlane(m0_init_value)); // This should be wave independent
|
||||
|
||||
using Traits = typename Base::Traits;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user