diff --git a/include/ck/utility/amd_buffer_addressing.hpp b/include/ck/utility/amd_buffer_addressing.hpp index 783fc661ce..d3e4a42946 100644 --- a/include/ck/utility/amd_buffer_addressing.hpp +++ b/include/ck/utility/amd_buffer_addressing.hpp @@ -20,7 +20,7 @@ union BufferResource }; template -__device__ int32x4_t make_wave_buffer_resource(T* p_wave, index_t element_space_size) +__device__ int32x4_t make_wave_buffer_resource(T* __restrict__ p_wave, index_t element_space_size) { BufferResource wave_buffer_resource; @@ -35,7 +35,7 @@ __device__ int32x4_t make_wave_buffer_resource(T* p_wave, index_t element_space_ } template -__device__ int32x4_t make_wave_buffer_resource_with_default_range(T* p_wave) +__device__ int32x4_t make_wave_buffer_resource_with_default_range(T* __restrict__ p_wave) { BufferResource wave_buffer_resource; @@ -892,7 +892,7 @@ template __device__ void amd_buffer_store(const typename vector_type_maker::type::type src_thread_data, - T* p_dst_wave, + T* __restrict__ p_dst_wave, const index_t dst_thread_element_offset, const bool dst_thread_element_valid, const index_t dst_element_space_size) diff --git a/include/ck/utility/c_style_pointer_cast.hpp b/include/ck/utility/c_style_pointer_cast.hpp index 610e393a77..00ba550314 100644 --- a/include/ck/utility/c_style_pointer_cast.hpp +++ b/include/ck/utility/c_style_pointer_cast.hpp @@ -12,7 +12,7 @@ namespace ck { template && is_pointer_v, bool>::type = false> -__host__ __device__ PY c_style_pointer_cast(PX p_x) +__host__ __device__ PY c_style_pointer_cast(PX __restrict__ p_x) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wold-style-cast" diff --git a/include/ck_tile/core/tensor/buffer_view.hpp b/include/ck_tile/core/tensor/buffer_view.hpp index ca314a6abe..d1e770ef42 100644 --- a/include/ck_tile/core/tensor/buffer_view.hpp +++ b/include/ck_tile/core/tensor/buffer_view.hpp @@ -62,12 +62,12 @@ struct buffer_view -CK_TILE_HOST_DEVICE constexpr auto make_buffer_view(T* p, BufferSizeType buffer_size) +CK_TILE_HOST_DEVICE constexpr auto make_buffer_view(T* __restrict__ p, BufferSizeType buffer_size) { return buffer_view{p, buffer_size}; } @@ -1266,7 +1266,7 @@ template , remove_cvref_t>::value, bool>::type = false> CK_TILE_HOST_DEVICE constexpr auto -make_buffer_view(T* p, BufferSizeType buffer_size, X invalid_element_value) +make_buffer_view(T* __restrict__ p, BufferSizeType buffer_size, X invalid_element_value) { return buffer_view{ p, buffer_size, invalid_element_value}; diff --git a/include/ck_tile/core/tensor/tensor_view.hpp b/include/ck_tile/core/tensor/tensor_view.hpp index 269465fae6..33355930df 100644 --- a/include/ck_tile/core/tensor/tensor_view.hpp +++ b/include/ck_tile/core/tensor/tensor_view.hpp @@ -448,7 +448,7 @@ template -CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType* p, +CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType* __restrict__ p, const tensor_descriptor& desc) { auto buffer_view = @@ -467,7 +467,7 @@ template ::type = false> CK_TILE_HOST_DEVICE constexpr auto -make_naive_tensor_view(DataType* p, +make_naive_tensor_view(DataType* __restrict__ p, const tuple& lengths, const tuple& strides, number = number<-1>{}, @@ -490,7 +490,7 @@ template CK_TILE_HOST_DEVICE constexpr auto -make_naive_tensor_view_packed(DataType* p, +make_naive_tensor_view_packed(DataType* __restrict__ p, const tuple& lengths, number = number<-1>{}) {