diff --git a/include/ck_tile/core/arch/amd_buffer_addressing.hpp b/include/ck_tile/core/arch/amd_buffer_addressing.hpp index a524d04c57..a7940837b6 100644 --- a/include/ck_tile/core/arch/amd_buffer_addressing.hpp +++ b/include/ck_tile/core/arch/amd_buffer_addressing.hpp @@ -2831,7 +2831,7 @@ CK_TILE_DEVICE void amd_direct_load_global_to_lds(const T* global_base_ptr, #if defined(__gfx950__) template -__device__ auto amd_transpose_load_to_vgpr(const T* in_ptr) +__device__ auto amd_transpose_load_to_vgpr(const T* __restrict__ in_ptr) { static_assert(__has_builtin(__builtin_amdgcn_raw_buffer_load_b32), diff --git a/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp b/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp index d929837891..55a9a18c8e 100644 --- a/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp +++ b/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp @@ -2601,7 +2601,7 @@ CK_TILE_DEVICE void amd_direct_load_global_to_lds(const T* global_base_ptr, #if defined(__gfx950__) template -__device__ auto amd_transpose_load_to_vgpr(const T* in_ptr) +__device__ auto amd_transpose_load_to_vgpr(const T* __restrict__ in_ptr) { static_assert(__has_builtin(__builtin_amdgcn_raw_buffer_load_b32),