From 697126019ed5dd59b58bb2f081dd49595192caa5 Mon Sep 17 00:00:00 2001 From: Haicheng Wu <57973641+hwu36@users.noreply.github.com> Date: Tue, 29 Apr 2025 11:54:20 -0400 Subject: [PATCH] fix blackwell grouped groupwise hang (#2267) --- include/cutlass/gemm/kernel/sm90_tile_scheduler_group.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/cutlass/gemm/kernel/sm90_tile_scheduler_group.hpp b/include/cutlass/gemm/kernel/sm90_tile_scheduler_group.hpp index d896c0698..bf02e5faf 100644 --- a/include/cutlass/gemm/kernel/sm90_tile_scheduler_group.hpp +++ b/include/cutlass/gemm/kernel/sm90_tile_scheduler_group.hpp @@ -66,18 +66,18 @@ public: int32_t M_idx = 0; int32_t N_idx = 0; int32_t L_idx = 0; - bool is_valid_tile = false; + int32_t is_valid_tile = 0; CUTLASS_HOST_DEVICE bool is_valid() const { - return is_valid_tile; + return is_valid_tile != 0; } CUTLASS_HOST_DEVICE static WorkTileInfo invalid_work_tile() { - return {-1, -1, -1, false}; + return {-1, -1, -1, 0}; } CUTLASS_HOST_DEVICE @@ -304,7 +304,7 @@ public: int32_t log_swizzle_size, RasterOrder raster_order) { - bool valid_tile = true; + int32_t valid_tile = 1; // Use a warp to "speculatively" check if the work tile maps to the next 32 groups int lane_idx = canonical_lane_idx();