mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
[CK Tile] enable building examples by default (#3259)
* remove EXCLUDE_FROM_ALL from ck-tile examples -> +15 min build time w/ 64 threads for a single arch * fix cpp17 compile error in the ck-tile examples --------- Co-authored-by: khuagarw <khuagarw@amd.com> Co-authored-by: Ding, Yi <yi.ding@amd.com>
This commit is contained in:
@@ -436,12 +436,14 @@ struct PoolKernel
|
||||
// Main reduction loop - with index tracking
|
||||
for(int k_tile = amd_wave_read_first_lane(0); k_tile < num_k_tiles; ++k_tile)
|
||||
{
|
||||
const auto x_tile = load_tile(x_window);
|
||||
const auto x_tile = load_tile(x_window);
|
||||
const auto& in_tensor_padded_ref =
|
||||
in_tensor_padded; // structured bindings cannot be captured prior to cpp20
|
||||
auto index_calculator = [&](const auto& x_indices) {
|
||||
// Get global coordinates in the 2D matrix space (M, N)
|
||||
const auto global_M = x_indices.at(number<0>{}) + iM;
|
||||
const auto global_N = (k_tile * S::Block_N) + x_indices.at(number<1>{});
|
||||
return in_tensor_padded.get_tensor_descriptor().calculate_offset(
|
||||
return in_tensor_padded_ref.get_tensor_descriptor().calculate_offset(
|
||||
make_tuple(global_M, global_N));
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user