From 9135a9bb6dbbe55e6d251eaed76ca3e7a95aa32a Mon Sep 17 00:00:00 2001
From: Vrushtee <vrushteegaikwad@gmail.com>
Date: Fri, 24 Apr 2026 20:43:38 +0530
Subject: [PATCH] Replace std::min with cute::min in sm120 blockwise scaling
 device functions (#3055)

---
 .../gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp | 4 ++--
 .../gemm/collective/sm120_mma_tma_blockwise_scaling.hpp       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/include/cutlass/gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp b/include/cutlass/gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp
index 29547938c..b10d803c1 100644
--- a/include/cutlass/gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp
+++ b/include/cutlass/gemm/collective/sm120_mma_array_tma_blockwise_scaling.hpp
@@ -540,8 +540,8 @@ struct CollectiveMma<
       Tensor tApA_SFA = make_tensor<bool>(shape(tAsA_SFA(_,_,0)));
       Tensor tBpB_SFB = make_tensor<bool>(shape(tBsB_SFB(_,_,0)));
 
-      auto scale_m_lim = std::min(scales_m, (m_coord + 1) * ScaleMsPerTile);
-      auto scale_n_lim = std::min(scales_n, (n_coord + 1) * ScaleNsPerTile);
+      auto scale_m_lim = cute::min(scales_m, (m_coord + 1) * ScaleMsPerTile);
+      auto scale_n_lim = cute::min(scales_n, (n_coord + 1) * ScaleNsPerTile);
 
       CUTLASS_PRAGMA_UNROLL
       for (int i = 0; i < size(tApA_SFA); ++i)
diff --git a/include/cutlass/gemm/collective/sm120_mma_tma_blockwise_scaling.hpp b/include/cutlass/gemm/collective/sm120_mma_tma_blockwise_scaling.hpp
index b140998b9..8150d17e1 100644
--- a/include/cutlass/gemm/collective/sm120_mma_tma_blockwise_scaling.hpp
+++ b/include/cutlass/gemm/collective/sm120_mma_tma_blockwise_scaling.hpp
@@ -472,8 +472,8 @@ struct CollectiveMma<
     Tensor tApA_SFA = make_tensor<bool>(shape(tAsA_SFA(_,_,0)));
     Tensor tBpB_SFB = make_tensor<bool>(shape(tBsB_SFB(_,_,0)));
 
-    auto scale_m_lim = std::min(scales_m, (m_coord + 1) * ScaleMsPerTile);
-    auto scale_n_lim = std::min(scales_n, (n_coord + 1) * ScaleNsPerTile);
+    auto scale_m_lim = cute::min(scales_m, (m_coord + 1) * ScaleMsPerTile);
+    auto scale_n_lim = cute::min(scales_n, (n_coord + 1) * ScaleNsPerTile);
 
     CUTLASS_PRAGMA_UNROLL
     for (int i = 0; i < size(tApA_SFA); ++i)