From fe63a646a459498e5677efd213fa3f8b714387c8 Mon Sep 17 00:00:00 2001 From: aska-0096 Date: Wed, 6 Aug 2025 05:58:43 +0000 Subject: [PATCH] add __restrict__ to tr load --- include/ck_tile/core/arch/amd_buffer_addressing.hpp | 2 +- include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ck_tile/core/arch/amd_buffer_addressing.hpp b/include/ck_tile/core/arch/amd_buffer_addressing.hpp index a524d04c57..a7940837b6 100644 --- a/include/ck_tile/core/arch/amd_buffer_addressing.hpp +++ b/include/ck_tile/core/arch/amd_buffer_addressing.hpp @@ -2831,7 +2831,7 @@ CK_TILE_DEVICE void amd_direct_load_global_to_lds(const T* global_base_ptr, #if defined(__gfx950__) template -__device__ auto amd_transpose_load_to_vgpr(const T* in_ptr) +__device__ auto amd_transpose_load_to_vgpr(const T* __restrict__ in_ptr) { static_assert(__has_builtin(__builtin_amdgcn_raw_buffer_load_b32), diff --git a/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp b/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp index d929837891..55a9a18c8e 100644 --- a/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp +++ b/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp @@ -2601,7 +2601,7 @@ CK_TILE_DEVICE void amd_direct_load_global_to_lds(const T* global_base_ptr, #if defined(__gfx950__) template -__device__ auto amd_transpose_load_to_vgpr(const T* in_ptr) +__device__ auto amd_transpose_load_to_vgpr(const T* __restrict__ in_ptr) { static_assert(__has_builtin(__builtin_amdgcn_raw_buffer_load_b32),