mirror of
https://github.com/NVIDIA/cutlass.git
synced 2026-05-11 17:00:05 +00:00
Update nvvm API call from nvvm enum to str (#2985)
This commit is contained in:
@@ -1298,10 +1298,7 @@ class PersistentDenseGemmKernel:
|
||||
c_buffer = (num_prev_subtiles + subtile_idx) % self.num_c_stage
|
||||
cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
|
||||
# Fence and barrier to make sure shared memory store is visible to TMA store
|
||||
cute.arch.fence_proxy(
|
||||
cute.arch.ProxyKind.async_shared,
|
||||
space=cute.arch.SharedSpace.shared_cta,
|
||||
)
|
||||
cute.arch.fence_proxy("async.shared", space="cta")
|
||||
epilog_sync_barrier.arrive_and_wait()
|
||||
|
||||
#
|
||||
|
||||
@@ -1377,10 +1377,7 @@ class PersistentDenseGemmKernel:
|
||||
c_buffer = (num_prev_subtiles + subtile_idx) % self.num_c_stage
|
||||
cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
|
||||
# Fence and barrier to make sure shared memory store is visible to TMA store
|
||||
cute.arch.fence_proxy(
|
||||
cute.arch.ProxyKind.async_shared,
|
||||
space=cute.arch.SharedSpace.shared_cta,
|
||||
)
|
||||
cute.arch.fence_proxy("async.shared", space="cta")
|
||||
epilogue_sync_barrier.arrive_and_wait()
|
||||
|
||||
#
|
||||
|
||||
@@ -1224,10 +1224,7 @@ class PersistentDenseGemmKernel:
|
||||
tRS_sC[(None, None, None, c_buffer)],
|
||||
)
|
||||
# Fence and barrier to make sure shared memory store is visible to TMA store
|
||||
cute.arch.fence_proxy(
|
||||
cute.arch.ProxyKind.async_shared,
|
||||
space=cute.arch.SharedSpace.shared_cta,
|
||||
)
|
||||
cute.arch.fence_proxy("async.shared", space="cta")
|
||||
epilog_threads = 32 * len(self.epilog_warp_id)
|
||||
cute.arch.barrier(
|
||||
barrier_id=self.epilog_sync_bar_id,
|
||||
|
||||
Reference in New Issue
Block a user