mirror of
https://github.com/NVIDIA/cutlass.git
synced 2026-04-20 06:48:59 +00:00
Replace fence proxy to the latest routine code in examples/distributed/all_reduce_tma.py (#3027)
This commit is contained in:
@@ -415,10 +415,7 @@ class AllReduceTmaKernel:
|
||||
# ======================================================================
|
||||
if warp_idx == 0:
|
||||
# Fence to ensure SMEM writes are visible
|
||||
cute.arch.fence_proxy(
|
||||
cute.arch.ProxyKind.async_shared,
|
||||
space=cute.arch.SharedSpace.shared_cta,
|
||||
)
|
||||
cute.arch.fence_proxy("async.shared", space="cta")
|
||||
|
||||
smem_tile_out = cute.slice_(staged_smem_tensor, (None, 0))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user