WA for rocm-6.2+ s constrait for buffer resource (#1346)

* WA for rocm-6.2+ s constrait for buffer resource

* add missing memory clobber
This commit is contained in:
carlushuang
2024-06-22 00:00:13 +08:00
committed by GitHub
parent 510325a468
commit fa129c1a5d
2 changed files with 10 additions and 3 deletions

View File

@@ -991,7 +991,8 @@ __device__ void amd_direct_load_global_to_lds(const T* global_base_ptr,
asm volatile("s_mov_b32 m0, %0; \n\t"
"buffer_load_dword %1, %2, 0 offen lds;\n\t" ::"s"(lds_ptr_sgpr),
"v"(global_offset_bytes),
"s"(src_resource));
"s"(src_resource)
: "memory");
#else
// LDS pointer must be attributed with the LDS address space.
__attribute__((address_space(3))) uint32_t* lds_ptr =