Update nvvm API call from nvvm enum to str (#2985)

2026-04-20 06:48:59 +00:00 · 2026-01-27 17:28:29 +08:00
parent 7a14467776
commit acb45938e9
29 changed files with 132 additions and 327 deletions
--- a/python/CuTeDSL/cutlass/utils/dynamic_persistent_tile_scheduler.py
+++ b/python/CuTeDSL/cutlass/utils/dynamic_persistent_tile_scheduler.py
@@ -243,10 +243,7 @@ class ClcDynamicPersistentTileScheduler:
        result_addr: 16-byte response data (simulating shared memory access)
        """
        m_idx, n_idx, l_idx, vld = cute.arch.clc_response(result_addr, loc=loc, ip=ip)
-        cute.arch.fence_proxy(
-            cute.arch.ProxyKind.async_shared,
-            space=cute.arch.SharedSpace.shared_cta,
-        )
+        cute.arch.fence_proxy("async.shared", space="cta")
        cta_idx_in_cluster, cta_idy_in_cluster, _ = self.cta_id_in_cluster
        cur_tile_coord = (m_idx + cta_idx_in_cluster, n_idx + cta_idy_in_cluster, l_idx)
        return WorkTileInfo(cur_tile_coord, vld)
--- a/python/CuTeDSL/cutlass/utils/gemm/sm100.py
+++ b/python/CuTeDSL/cutlass/utils/gemm/sm100.py
@@ -280,10 +280,7 @@ def epilogue_tma_store(
            c_buffer = (num_prev_subtiles + subtile_idx) % gemm_kernel.num_c_stage
            cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
            # Fence and barrier to make sure shared memory store is visible to TMA store
-            cute.arch.fence_proxy(
-                cute.arch.ProxyKind.async_shared,
-                space=cute.arch.SharedSpace.shared_cta,
-            )
+            cute.arch.fence_proxy("async.shared", space="cta")
            epilog_sync_barrier.arrive_and_wait()

            #
@@ -675,10 +672,7 @@ def epilogue_tma_store_release_flag(
            c_buffer = (num_prev_subtiles + subtile_idx) % gemm_kernel.num_c_stage
            cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
            # Fence and barrier to make sure shared memory store is visible to TMA store
-            cute.arch.fence_proxy(
-                cute.arch.ProxyKind.async_shared,
-                space=cute.arch.SharedSpace.shared_cta,
-            )
+            cute.arch.fence_proxy("async.shared", space="cta")
            epilog_sync_barrier.arrive_and_wait()

            #