Merge commit '79aae7c7f71404bdb80d6db52bc6401e0e221d42' into develop

This commit is contained in:
assistant-librarian[bot]
2025-11-27 00:36:02 +00:00
parent 2044d0dd35
commit e7c7922385
41 changed files with 360 additions and 174 deletions

View File

@@ -436,12 +436,14 @@ struct PoolKernel
// Main reduction loop - with index tracking
for(int k_tile = amd_wave_read_first_lane(0); k_tile < num_k_tiles; ++k_tile)
{
const auto x_tile = load_tile(x_window);
const auto x_tile = load_tile(x_window);
const auto& in_tensor_padded_ref =
in_tensor_padded; // structured bindings cannot be captured prior to cpp20
auto index_calculator = [&](const auto& x_indices) {
// Get global coordinates in the 2D matrix space (M, N)
const auto global_M = x_indices.at(number<0>{}) + iM;
const auto global_N = (k_tile * S::Block_N) + x_indices.at(number<1>{});
return in_tensor_padded.get_tensor_descriptor().calculate_offset(
return in_tensor_padded_ref.get_tensor_descriptor().calculate_offset(
make_tuple(global_M, global_N));
};