From 9135a9bb6dbbe55e6d251eaed76ca3e7a95aa32a Mon Sep 17 00:00:00 2001 From: Vrushtee Date: Fri, 24 Apr 2026 20:43:38 +0530 Subject: [PATCH] Replace std::min with cute::min in sm120 blockwise scaling device functions (#3055) --- .../gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp | 4 ++-- .../gemm/collective/sm120_mma_tma_blockwise_scaling.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/cutlass/gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp b/include/cutlass/gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp index 29547938c..b10d803c1 100644 --- a/include/cutlass/gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp +++ b/include/cutlass/gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp @@ -540,8 +540,8 @@ struct CollectiveMma< Tensor tApA_SFA = make_tensor(shape(tAsA_SFA(_,_,0))); Tensor tBpB_SFB = make_tensor(shape(tBsB_SFB(_,_,0))); - auto scale_m_lim = std::min(scales_m, (m_coord + 1) * ScaleMsPerTile); - auto scale_n_lim = std::min(scales_n, (n_coord + 1) * ScaleNsPerTile); + auto scale_m_lim = cute::min(scales_m, (m_coord + 1) * ScaleMsPerTile); + auto scale_n_lim = cute::min(scales_n, (n_coord + 1) * ScaleNsPerTile); CUTLASS_PRAGMA_UNROLL for (int i = 0; i < size(tApA_SFA); ++i) diff --git a/include/cutlass/gemm/collective/sm120_mma_tma_blockwise_scaling.hpp b/include/cutlass/gemm/collective/sm120_mma_tma_blockwise_scaling.hpp index b140998b9..8150d17e1 100644 --- a/include/cutlass/gemm/collective/sm120_mma_tma_blockwise_scaling.hpp +++ b/include/cutlass/gemm/collective/sm120_mma_tma_blockwise_scaling.hpp @@ -472,8 +472,8 @@ struct CollectiveMma< Tensor tApA_SFA = make_tensor(shape(tAsA_SFA(_,_,0))); Tensor tBpB_SFB = make_tensor(shape(tBsB_SFB(_,_,0))); - auto scale_m_lim = std::min(scales_m, (m_coord + 1) * ScaleMsPerTile); - auto scale_n_lim = std::min(scales_n, (n_coord + 1) * ScaleNsPerTile); + auto scale_m_lim = cute::min(scales_m, (m_coord + 1) * ScaleMsPerTile); + auto scale_n_lim = cute::min(scales_n, (n_coord + 1) * ScaleNsPerTile); CUTLASS_PRAGMA_UNROLL for (int i = 0; i < size(tApA_SFA); ++i)