Fix multiple test failures with staging compiler. (#3103)

* fix sync issues with staging compiler

* fix codegen

* use separate sync for gfx11

[ROCm/composable_kernel commit: 331273b474]
This commit is contained in:
Illia Silin
2025-10-28 08:07:19 -07:00
committed by GitHub
parent 8eb813de42
commit 97c2fb582a
2 changed files with 9 additions and 2 deletions

View File

@@ -52,7 +52,7 @@ struct kernel
template <class... Ts>
auto launch(hipStream_t stream, std::size_t global, std::size_t local, Ts... zs) const
{
return [=](auto&&... xs) {
return [=, this](auto&&... xs) {
launch(stream, global, local, std::vector<kernel_argument>{xs...}, zs...);
};
}

View File

@@ -16,10 +16,17 @@ __device__ void llvm_amdgcn_s_wait_dscnt(short cnt) __asm("llvm.amdgcn.s.wait.ds
__device__ void block_sync_lds()
{
#if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
#ifdef __gfx12__
#if defined(__gfx12__)
llvm_amdgcn_s_wait_dscnt(0);
asm volatile("s_barrier_signal -1\n\t"
"s_barrier_wait -1");
#elif defined(__gfx11__)
// asm volatile("\
// s_waitcnt lgkmcnt(0) \n \
// s_barrier \
// " ::);
__builtin_amdgcn_s_waitcnt(0xfc07);
__builtin_amdgcn_s_barrier();
#else
// asm volatile("\
// s_waitcnt lgkmcnt(0) \n \