add restrict to applicable functions

This commit is contained in:
Kevin Choi
2025-08-18 19:36:38 +00:00
parent b85daba2a3
commit 380aa8f311
4 changed files with 17 additions and 17 deletions

View File

@@ -20,7 +20,7 @@ union BufferResource
};
template <typename T>
__device__ int32x4_t make_wave_buffer_resource(T* p_wave, index_t element_space_size)
__device__ int32x4_t make_wave_buffer_resource(T* __restrict__ p_wave, index_t element_space_size)
{
BufferResource<T> wave_buffer_resource;
@@ -35,7 +35,7 @@ __device__ int32x4_t make_wave_buffer_resource(T* p_wave, index_t element_space_
}
template <typename T>
__device__ int32x4_t make_wave_buffer_resource_with_default_range(T* p_wave)
__device__ int32x4_t make_wave_buffer_resource_with_default_range(T* __restrict__ p_wave)
{
BufferResource<T> wave_buffer_resource;
@@ -892,7 +892,7 @@ template <typename T,
index_t N,
AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence>
__device__ void amd_buffer_store(const typename vector_type_maker<T, N>::type::type src_thread_data,
T* p_dst_wave,
T* __restrict__ p_dst_wave,
const index_t dst_thread_element_offset,
const bool dst_thread_element_valid,
const index_t dst_element_space_size)

View File

@@ -12,7 +12,7 @@ namespace ck {
template <typename PY,
typename PX,
typename enable_if<is_pointer_v<PY> && is_pointer_v<PX>, bool>::type = false>
__host__ __device__ PY c_style_pointer_cast(PX p_x)
__host__ __device__ PY c_style_pointer_cast(PX __restrict__ p_x)
{
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"

View File

@@ -62,12 +62,12 @@ struct buffer_view<address_space_enum::generic,
{
}
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data, BufferSizeType buffer_size)
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data, BufferSizeType buffer_size)
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{0}
{
}
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data,
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data,
BufferSizeType buffer_size,
T invalid_element_value)
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{invalid_element_value}
@@ -243,7 +243,7 @@ struct buffer_view<address_space_enum::global,
{
}
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data, BufferSizeType buffer_size)
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data, BufferSizeType buffer_size)
: p_data_{p_data},
buffer_size_{buffer_size / PackedSize},
cached_buf_res_{0},
@@ -251,7 +251,7 @@ struct buffer_view<address_space_enum::global,
{
}
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data,
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data,
BufferSizeType buffer_size,
T invalid_element_value)
: p_data_{p_data},
@@ -762,12 +762,12 @@ struct buffer_view<address_space_enum::lds,
{
}
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data, BufferSizeType buffer_size)
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data, BufferSizeType buffer_size)
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{0}
{
}
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data,
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data,
BufferSizeType buffer_size,
T invalid_element_value)
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{invalid_element_value}
@@ -1121,12 +1121,12 @@ struct buffer_view<address_space_enum::vgpr,
{
}
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data, BufferSizeType buffer_size)
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data, BufferSizeType buffer_size)
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{0}
{
}
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data,
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data,
BufferSizeType buffer_size,
T invalid_element_value)
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{invalid_element_value}
@@ -1253,7 +1253,7 @@ template <address_space_enum BufferAddressSpace,
amd_buffer_coherence_enum Coherence = amd_buffer_coherence_enum::coherence_default,
typename T,
typename BufferSizeType>
CK_TILE_HOST_DEVICE constexpr auto make_buffer_view(T* p, BufferSizeType buffer_size)
CK_TILE_HOST_DEVICE constexpr auto make_buffer_view(T* __restrict__ p, BufferSizeType buffer_size)
{
return buffer_view<BufferAddressSpace, T, BufferSizeType, true, Coherence>{p, buffer_size};
}
@@ -1266,7 +1266,7 @@ template <address_space_enum BufferAddressSpace,
typename std::enable_if<std::is_same<remove_cvref_t<T>, remove_cvref_t<X>>::value,
bool>::type = false>
CK_TILE_HOST_DEVICE constexpr auto
make_buffer_view(T* p, BufferSizeType buffer_size, X invalid_element_value)
make_buffer_view(T* __restrict__ p, BufferSizeType buffer_size, X invalid_element_value)
{
return buffer_view<BufferAddressSpace, T, BufferSizeType, false, Coherence>{
p, buffer_size, invalid_element_value};

View File

@@ -448,7 +448,7 @@ template <address_space_enum BufferAddressSpace = address_space_enum::generic,
amd_buffer_coherence_enum Coherence = amd_buffer_coherence_enum::coherence_default,
typename DataType,
typename... Ts>
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType* p,
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType* __restrict__ p,
const tensor_descriptor<Ts...>& desc)
{
auto buffer_view =
@@ -467,7 +467,7 @@ template <address_space_enum BufferAddressSpace = address_space_enum::generic,
index_t GuaranteedLastDimensionVectorStride = -1,
typename std::enable_if<sizeof...(Lengths) == sizeof...(Strides), bool>::type = false>
CK_TILE_HOST_DEVICE constexpr auto
make_naive_tensor_view(DataType* p,
make_naive_tensor_view(DataType* __restrict__ p,
const tuple<Lengths...>& lengths,
const tuple<Strides...>& strides,
number<GuaranteedLastDimensionVectorLength> = number<-1>{},
@@ -490,7 +490,7 @@ template <address_space_enum BufferAddressSpace = address_space_enum::generic,
typename... Lengths,
index_t GuaranteedLastDimensionVectorLength = -1>
CK_TILE_HOST_DEVICE constexpr auto
make_naive_tensor_view_packed(DataType* p,
make_naive_tensor_view_packed(DataType* __restrict__ p,
const tuple<Lengths...>& lengths,
number<GuaranteedLastDimensionVectorLength> = number<-1>{})
{