Update nvvm API call from nvvm enum to str (#2985)

This commit is contained in:
Xiao Song
2026-01-27 17:28:29 +08:00
committed by GitHub
parent 7a14467776
commit acb45938e9
29 changed files with 132 additions and 327 deletions

View File

@@ -243,10 +243,7 @@ class ClcDynamicPersistentTileScheduler:
result_addr: 16-byte response data (simulating shared memory access)
"""
m_idx, n_idx, l_idx, vld = cute.arch.clc_response(result_addr, loc=loc, ip=ip)
cute.arch.fence_proxy(
cute.arch.ProxyKind.async_shared,
space=cute.arch.SharedSpace.shared_cta,
)
cute.arch.fence_proxy("async.shared", space="cta")
cta_idx_in_cluster, cta_idy_in_cluster, _ = self.cta_id_in_cluster
cur_tile_coord = (m_idx + cta_idx_in_cluster, n_idx + cta_idy_in_cluster, l_idx)
return WorkTileInfo(cur_tile_coord, vld)

View File

@@ -280,10 +280,7 @@ def epilogue_tma_store(
c_buffer = (num_prev_subtiles + subtile_idx) % gemm_kernel.num_c_stage
cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
# Fence and barrier to make sure shared memory store is visible to TMA store
cute.arch.fence_proxy(
cute.arch.ProxyKind.async_shared,
space=cute.arch.SharedSpace.shared_cta,
)
cute.arch.fence_proxy("async.shared", space="cta")
epilog_sync_barrier.arrive_and_wait()
#
@@ -675,10 +672,7 @@ def epilogue_tma_store_release_flag(
c_buffer = (num_prev_subtiles + subtile_idx) % gemm_kernel.num_c_stage
cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
# Fence and barrier to make sure shared memory store is visible to TMA store
cute.arch.fence_proxy(
cute.arch.ProxyKind.async_shared,
space=cute.arch.SharedSpace.shared_cta,
)
cute.arch.fence_proxy("async.shared", space="cta")
epilog_sync_barrier.arrive_and_wait()
#