[CK Tile] enable building examples by default (#3259)

* remove EXCLUDE_FROM_ALL from ck-tile examples
-> +15 min build time w/ 64 threads for a single arch

* fix cpp17 compile error in the ck-tile examples

---------

Co-authored-by: khuagarw <khuagarw@amd.com>
Co-authored-by: Ding, Yi <yi.ding@amd.com>
This commit is contained in:
Max Podkorytov
2025-11-26 16:24:44 -08:00
committed by GitHub
parent 40d7217ac7
commit 79aae7c7f7
39 changed files with 175 additions and 174 deletions

View File

@@ -436,12 +436,14 @@ struct PoolKernel
// Main reduction loop - with index tracking
for(int k_tile = amd_wave_read_first_lane(0); k_tile < num_k_tiles; ++k_tile)
{
const auto x_tile = load_tile(x_window);
const auto x_tile = load_tile(x_window);
const auto& in_tensor_padded_ref =
in_tensor_padded; // structured bindings cannot be captured prior to cpp20
auto index_calculator = [&](const auto& x_indices) {
// Get global coordinates in the 2D matrix space (M, N)
const auto global_M = x_indices.at(number<0>{}) + iM;
const auto global_N = (k_tile * S::Block_N) + x_indices.at(number<1>{});
return in_tensor_padded.get_tensor_descriptor().calculate_offset(
return in_tensor_padded_ref.get_tensor_descriptor().calculate_offset(
make_tuple(global_M, global_N));
};