diff --git a/codegen/test/rtc/include/rtc/kernel.hpp b/codegen/test/rtc/include/rtc/kernel.hpp index b1ee729f77..96337fe2c1 100644 --- a/codegen/test/rtc/include/rtc/kernel.hpp +++ b/codegen/test/rtc/include/rtc/kernel.hpp @@ -52,7 +52,7 @@ struct kernel template auto launch(hipStream_t stream, std::size_t global, std::size_t local, Ts... zs) const { - return [=](auto&&... xs) { + return [=, this](auto&&... xs) { launch(stream, global, local, std::vector{xs...}, zs...); }; } diff --git a/include/ck/utility/synchronization.hpp b/include/ck/utility/synchronization.hpp index 672fc8c31b..54391e7e86 100644 --- a/include/ck/utility/synchronization.hpp +++ b/include/ck/utility/synchronization.hpp @@ -16,10 +16,17 @@ __device__ void llvm_amdgcn_s_wait_dscnt(short cnt) __asm("llvm.amdgcn.s.wait.ds __device__ void block_sync_lds() { #if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM -#ifdef __gfx12__ +#if defined(__gfx12__) llvm_amdgcn_s_wait_dscnt(0); asm volatile("s_barrier_signal -1\n\t" "s_barrier_wait -1"); +#elif defined(__gfx11__) + // asm volatile("\ + // s_waitcnt lgkmcnt(0) \n \ + // s_barrier \ + // " ::); + __builtin_amdgcn_s_waitcnt(0xfc07); + __builtin_amdgcn_s_barrier(); #else // asm volatile("\ // s_waitcnt lgkmcnt(0) \n \