[CK Tile] enable building examples by default (#3259)

* remove EXCLUDE_FROM_ALL from ck-tile examples
-> +15 min build time w/ 64 threads for a single arch

* fix cpp17 compile error in the ck-tile examples

---------

Co-authored-by: khuagarw <khuagarw@amd.com>
Co-authored-by: Ding, Yi <yi.ding@amd.com>
This commit is contained in:
Max Podkorytov
2025-11-26 16:24:44 -08:00
committed by GitHub
parent 40d7217ac7
commit 79aae7c7f7
39 changed files with 175 additions and 174 deletions

View File

@@ -1,4 +1,4 @@
add_executable(tile_tutorial_copy_kernel EXCLUDE_FROM_ALL copy_basic.cpp)
add_executable(tile_tutorial_copy_kernel copy_basic.cpp)
# Impact: This flag ensures that the compiler doesn't make
# assumptions about memory aliasing that could interfere with Composable Kernel's explicit memory access patterns.

View File

@@ -1,4 +1,4 @@
add_executable(tile_tutorial_naive_gemm EXCLUDE_FROM_ALL practice_gemm.cpp)
add_executable(tile_tutorial_naive_gemm practice_gemm.cpp)
target_compile_options(tile_tutorial_naive_gemm PRIVATE
-mllvm -enable-noalias-to-md-conversion=0

View File

@@ -28,9 +28,9 @@ struct PracticeGemmHostPipeline
{
// Size of the entire problem
const auto M = a_dram.get_tensor_descriptor().get_length(number<0>{}); // M x K
const auto N = c_dram.get_tensor_descriptor().get_length(number<1>{}); // M x N
const auto K = a_dram.get_tensor_descriptor().get_length(number<1>{}); // M x K
const auto M = a_dram.get_tensor_descriptor().get_length(number<0>{}); // M x K
const auto N = c_dram_ref.get_tensor_descriptor().get_length(number<1>{}); // M x N
const auto K = a_dram.get_tensor_descriptor().get_length(number<1>{}); // M x K
// Size of the block tile
const auto MPerBlock = BlockTile::at(number<0>{});
@@ -83,7 +83,7 @@ struct PracticeGemmHostPipeline
__shared__ char p_smem_char[block_gemm_pipeline.GetStaticLDSSize()];
const auto c_block_tile =
block_gemm_pipeline(a_block_window, b_block_window, num_loops_k, p_smem_char);
auto c_window = make_tile_window(c_dram,
auto c_window = make_tile_window(c_dram_ref,
make_tuple(number<MPerBlock>{}, number<NPerBlock>{}),
{tile_origin_m, tile_origin_n});
store_tile(c_window, c_block_tile);