Merge commit 'f161b5b738781c71bd5f2c191561b81f679ba9ed' into develop

This commit is contained in:
assistant-librarian[bot]
2025-09-23 23:11:18 +00:00
parent c39d5ca2c5
commit a55a7e37ec
40 changed files with 167 additions and 273 deletions

View File

@@ -156,7 +156,7 @@ struct Reduce
const auto merged_reduce_len =
transformed_x_tensor.get_tensor_descriptor().get_lengths().at(number<1>{});
index_t num_n_tile_iteration =
amd_wave_read_first_lane(integer_divide_ceil(merged_reduce_len, S::Block_N));
__builtin_amdgcn_readfirstlane(integer_divide_ceil(merged_reduce_len, S::Block_N));
auto block_reduce2d = Policy::template GetBlockReduce2d<Problem>();
auto block_reduce2d_sync = Policy::template GetBlockReduce2dSync<Problem>();
@@ -167,7 +167,7 @@ struct Reduce
auto y_compute = block_reduce2d.template MakeYBlockTile<XTensorType>();
set_tile(y_compute, reduce_func.template GetIdentityValue<ComputeDataType>());
for(int iN = amd_wave_read_first_lane(0); iN < num_n_tile_iteration; ++iN)
for(int iN = __builtin_amdgcn_readfirstlane(0); iN < num_n_tile_iteration; ++iN)
{
const auto x = load_tile(x_window);
block_reduce2d(x, y_compute, reduce_func);