mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
[CK Tile] enable building examples by default (#3259)
* remove EXCLUDE_FROM_ALL from ck-tile examples -> +15 min build time w/ 64 threads for a single arch * fix cpp17 compile error in the ck-tile examples --------- Co-authored-by: khuagarw <khuagarw@amd.com> Co-authored-by: Ding, Yi <yi.ding@amd.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
add_executable(tile_tutorial_copy_kernel EXCLUDE_FROM_ALL copy_basic.cpp)
|
||||
add_executable(tile_tutorial_copy_kernel copy_basic.cpp)
|
||||
|
||||
# Impact: This flag ensures that the compiler doesn't make
|
||||
# assumptions about memory aliasing that could interfere with Composable Kernel's explicit memory access patterns.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
add_executable(tile_tutorial_naive_gemm EXCLUDE_FROM_ALL practice_gemm.cpp)
|
||||
add_executable(tile_tutorial_naive_gemm practice_gemm.cpp)
|
||||
|
||||
target_compile_options(tile_tutorial_naive_gemm PRIVATE
|
||||
-mllvm -enable-noalias-to-md-conversion=0
|
||||
|
||||
@@ -28,9 +28,9 @@ struct PracticeGemmHostPipeline
|
||||
{
|
||||
|
||||
// Size of the entire problem
|
||||
const auto M = a_dram.get_tensor_descriptor().get_length(number<0>{}); // M x K
|
||||
const auto N = c_dram.get_tensor_descriptor().get_length(number<1>{}); // M x N
|
||||
const auto K = a_dram.get_tensor_descriptor().get_length(number<1>{}); // M x K
|
||||
const auto M = a_dram.get_tensor_descriptor().get_length(number<0>{}); // M x K
|
||||
const auto N = c_dram_ref.get_tensor_descriptor().get_length(number<1>{}); // M x N
|
||||
const auto K = a_dram.get_tensor_descriptor().get_length(number<1>{}); // M x K
|
||||
|
||||
// Size of the block tile
|
||||
const auto MPerBlock = BlockTile::at(number<0>{});
|
||||
@@ -83,7 +83,7 @@ struct PracticeGemmHostPipeline
|
||||
__shared__ char p_smem_char[block_gemm_pipeline.GetStaticLDSSize()];
|
||||
const auto c_block_tile =
|
||||
block_gemm_pipeline(a_block_window, b_block_window, num_loops_k, p_smem_char);
|
||||
auto c_window = make_tile_window(c_dram,
|
||||
auto c_window = make_tile_window(c_dram_ref,
|
||||
make_tuple(number<MPerBlock>{}, number<NPerBlock>{}),
|
||||
{tile_origin_m, tile_origin_n});
|
||||
store_tile(c_window, c_block_tile);
|
||||
|
||||
Reference in New Issue
Block a user