mirror of
https://github.com/NVIDIA/cutlass.git
synced 2026-04-20 06:48:59 +00:00
Update nvvm API call from nvvm enum to str (#2985)
This commit is contained in:
@@ -243,10 +243,7 @@ class ClcDynamicPersistentTileScheduler:
|
||||
result_addr: 16-byte response data (simulating shared memory access)
|
||||
"""
|
||||
m_idx, n_idx, l_idx, vld = cute.arch.clc_response(result_addr, loc=loc, ip=ip)
|
||||
cute.arch.fence_proxy(
|
||||
cute.arch.ProxyKind.async_shared,
|
||||
space=cute.arch.SharedSpace.shared_cta,
|
||||
)
|
||||
cute.arch.fence_proxy("async.shared", space="cta")
|
||||
cta_idx_in_cluster, cta_idy_in_cluster, _ = self.cta_id_in_cluster
|
||||
cur_tile_coord = (m_idx + cta_idx_in_cluster, n_idx + cta_idy_in_cluster, l_idx)
|
||||
return WorkTileInfo(cur_tile_coord, vld)
|
||||
|
||||
@@ -280,10 +280,7 @@ def epilogue_tma_store(
|
||||
c_buffer = (num_prev_subtiles + subtile_idx) % gemm_kernel.num_c_stage
|
||||
cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
|
||||
# Fence and barrier to make sure shared memory store is visible to TMA store
|
||||
cute.arch.fence_proxy(
|
||||
cute.arch.ProxyKind.async_shared,
|
||||
space=cute.arch.SharedSpace.shared_cta,
|
||||
)
|
||||
cute.arch.fence_proxy("async.shared", space="cta")
|
||||
epilog_sync_barrier.arrive_and_wait()
|
||||
|
||||
#
|
||||
@@ -675,10 +672,7 @@ def epilogue_tma_store_release_flag(
|
||||
c_buffer = (num_prev_subtiles + subtile_idx) % gemm_kernel.num_c_stage
|
||||
cute.copy(tiled_copy_r2s, tRS_rC, tRS_sC[(None, None, None, c_buffer)])
|
||||
# Fence and barrier to make sure shared memory store is visible to TMA store
|
||||
cute.arch.fence_proxy(
|
||||
cute.arch.ProxyKind.async_shared,
|
||||
space=cute.arch.SharedSpace.shared_cta,
|
||||
)
|
||||
cute.arch.fence_proxy("async.shared", space="cta")
|
||||
epilog_sync_barrier.arrive_and_wait()
|
||||
|
||||
#
|
||||
|
||||
Reference in New Issue
Block a user