From 81a43e6d92cdd8c20d22392f9579604ed5f710a1 Mon Sep 17 00:00:00 2001 From: Tri Dao Date: Mon, 21 Apr 2025 00:02:51 -0400 Subject: [PATCH] Set EpiTile correctly when TileN is not divisible by 32 (#2220) If TileN is not divisible by 32 (e.g, 208), by default EpiTile would be set to 128 x 32, which does not compile as TileN is required to divide EpiTileN --- include/cutlass/epilogue/collective/builders/sm90_builder.inl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/cutlass/epilogue/collective/builders/sm90_builder.inl b/include/cutlass/epilogue/collective/builders/sm90_builder.inl index 50a5420b6..9cb03fdc2 100644 --- a/include/cutlass/epilogue/collective/builders/sm90_builder.inl +++ b/include/cutlass/epilogue/collective/builders/sm90_builder.inl @@ -116,13 +116,13 @@ sm90_compute_tile_shape_or_override() { auto epi_tile = [&] () { if constexpr (detail::sm90_is_cooperative_v) { auto tile_m = cute::min(_128{}, size<0>(TileShape_MNK{})); - auto tile_n = cute::min(_32{}, size<1>(TileShape_MNK{})); + auto tile_n = cute::gcd(cute::min(_32{}, size<1>(TileShape_MNK{})), size<1>(TileShape_MNK{})); return make_shape(tile_m, tile_n); } else if constexpr (detail::sm90_is_warp_specialized_v) { constexpr int N_perf = sizeof_bits_v == 8 ? 64 : 32; auto tile_m = cute::min(_64{}, size<0>(TileShape_MNK{})); - auto tile_n = cute::min(Int{}, size<1>(TileShape_MNK{})); + auto tile_n = cute::gcd(cute::min(Int{}, size<1>(TileShape_MNK{})), size<1>(TileShape_MNK{})); return make_shape(tile_m, tile_n); } else {