Merge commit '79aae7c7f71404bdb80d6db52bc6401e0e221d42' into develop

This commit is contained in:
assistant-librarian[bot]
2025-11-27 00:36:02 +00:00
parent 2044d0dd35
commit e7c7922385
41 changed files with 360 additions and 174 deletions

View File

@@ -1,4 +1,4 @@
add_executable(tile_tutorial_copy_kernel EXCLUDE_FROM_ALL copy_basic.cpp)
add_executable(tile_tutorial_copy_kernel copy_basic.cpp)
# Impact: This flag ensures that the compiler doesn't make
# assumptions about memory aliasing that could interfere with Composable Kernel's explicit memory access patterns.

View File

@@ -1,4 +1,4 @@
add_executable(tile_tutorial_naive_gemm EXCLUDE_FROM_ALL practice_gemm.cpp)
add_executable(tile_tutorial_naive_gemm practice_gemm.cpp)
target_compile_options(tile_tutorial_naive_gemm PRIVATE
-mllvm -enable-noalias-to-md-conversion=0

View File

@@ -28,9 +28,9 @@ struct PracticeGemmHostPipeline
{
// Size of the entire problem
const auto M = a_dram.get_tensor_descriptor().get_length(number<0>{}); // M x K
const auto N = c_dram.get_tensor_descriptor().get_length(number<1>{}); // M x N
const auto K = a_dram.get_tensor_descriptor().get_length(number<1>{}); // M x K
const auto M = a_dram.get_tensor_descriptor().get_length(number<0>{}); // M x K
const auto N = c_dram_ref.get_tensor_descriptor().get_length(number<1>{}); // M x N
const auto K = a_dram.get_tensor_descriptor().get_length(number<1>{}); // M x K
// Size of the block tile
const auto MPerBlock = BlockTile::at(number<0>{});
@@ -83,7 +83,7 @@ struct PracticeGemmHostPipeline
__shared__ char p_smem_char[block_gemm_pipeline.GetStaticLDSSize()];
const auto c_block_tile =
block_gemm_pipeline(a_block_window, b_block_window, num_loops_k, p_smem_char);
auto c_window = make_tile_window(c_dram,
auto c_window = make_tile_window(c_dram_ref,
make_tuple(number<MPerBlock>{}, number<NPerBlock>{}),
{tile_origin_m, tile_origin_n});
store_tile(c_window, c_block_tile);