Replace fence proxy to the latest routine code in examples/distributed/all_reduce_tma.py (#3027)

This commit is contained in:
aragorn-guan
2026-02-14 17:51:20 +08:00
committed by GitHub
parent ec7e6cb17b
commit f9a5f76b7a

View File

@@ -415,10 +415,7 @@ class AllReduceTmaKernel:
# ======================================================================
if warp_idx == 0:
# Fence to ensure SMEM writes are visible
cute.arch.fence_proxy(
cute.arch.ProxyKind.async_shared,
space=cute.arch.SharedSpace.shared_cta,
)
cute.arch.fence_proxy("async.shared", space="cta")
smem_tile_out = cute.slice_(staged_smem_tensor, (None, 0))