fix the flatmm (#2349)

This commit is contained in:
Thomas Ning
2025-06-16 02:17:53 -07:00
committed by GitHub
parent b34c234f51
commit d996bc78be
4 changed files with 8 additions and 4 deletions

View File

@@ -447,6 +447,7 @@ struct FlatmmKernel
// Run GEMM cooperatively by whole workgroup.
const auto& a_block_window = gemm_tile_windows.at(I0);
const auto& b_flat_block_window = gemm_tile_windows.at(I1);
const auto& d_block_window = gemm_tile_windows.at(I2);
const auto& c_block_tile = FlatmmPipeline{}.template operator()(
a_block_window, b_flat_block_window, num_loop, smem_ptr);
@@ -454,7 +455,7 @@ struct FlatmmKernel
auto& c_block_window = gemm_tile_windows.at(I2);
EpiloguePipeline{}.template operator()<decltype(c_block_window), decltype(c_block_tile)>(
c_block_window, c_block_tile, smem_ptr);
c_block_window, c_block_tile, d_block_window, smem_ptr);
}
CK_TILE_DEVICE void operator()(FlatmmKernelArgs kargs) const