Update nvvm API call from nvvm enum to str (#2985)

This commit is contained in:
Xiao Song
2026-01-27 17:28:29 +08:00
committed by GitHub
parent 7a14467776
commit acb45938e9
29 changed files with 132 additions and 327 deletions

View File

@@ -1298,10 +1298,7 @@ class PersistentDenseGemmKernel:
c_buffer = (num_prev_subtiles + subtile_idx) % self.num_c_stage
cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
# Fence and barrier to make sure shared memory store is visible to TMA store
cute.arch.fence_proxy(
cute.arch.ProxyKind.async_shared,
space=cute.arch.SharedSpace.shared_cta,
)
cute.arch.fence_proxy("async.shared", space="cta")
epilog_sync_barrier.arrive_and_wait()
#

View File

@@ -1377,10 +1377,7 @@ class PersistentDenseGemmKernel:
c_buffer = (num_prev_subtiles + subtile_idx) % self.num_c_stage
cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
# Fence and barrier to make sure shared memory store is visible to TMA store
cute.arch.fence_proxy(
cute.arch.ProxyKind.async_shared,
space=cute.arch.SharedSpace.shared_cta,
)
cute.arch.fence_proxy("async.shared", space="cta")
epilogue_sync_barrier.arrive_and_wait()
#

View File

@@ -1224,10 +1224,7 @@ class PersistentDenseGemmKernel:
tRS_sC[(None, None, None, c_buffer)],
)
# Fence and barrier to make sure shared memory store is visible to TMA store
cute.arch.fence_proxy(
cute.arch.ProxyKind.async_shared,
space=cute.arch.SharedSpace.shared_cta,
)
cute.arch.fence_proxy("async.shared", space="cta")
epilog_threads = 32 * len(self.epilog_warp_id)
cute.arch.barrier(
barrier_id=self.epilog_sync_bar_id,