mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-08 15:30:23 +00:00
add restrict to applicable functions
This commit is contained in:
@@ -20,7 +20,7 @@ union BufferResource
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ int32x4_t make_wave_buffer_resource(T* p_wave, index_t element_space_size)
|
||||
__device__ int32x4_t make_wave_buffer_resource(T* __restrict__ p_wave, index_t element_space_size)
|
||||
{
|
||||
BufferResource<T> wave_buffer_resource;
|
||||
|
||||
@@ -35,7 +35,7 @@ __device__ int32x4_t make_wave_buffer_resource(T* p_wave, index_t element_space_
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ int32x4_t make_wave_buffer_resource_with_default_range(T* p_wave)
|
||||
__device__ int32x4_t make_wave_buffer_resource_with_default_range(T* __restrict__ p_wave)
|
||||
{
|
||||
BufferResource<T> wave_buffer_resource;
|
||||
|
||||
@@ -892,7 +892,7 @@ template <typename T,
|
||||
index_t N,
|
||||
AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence>
|
||||
__device__ void amd_buffer_store(const typename vector_type_maker<T, N>::type::type src_thread_data,
|
||||
T* p_dst_wave,
|
||||
T* __restrict__ p_dst_wave,
|
||||
const index_t dst_thread_element_offset,
|
||||
const bool dst_thread_element_valid,
|
||||
const index_t dst_element_space_size)
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace ck {
|
||||
template <typename PY,
|
||||
typename PX,
|
||||
typename enable_if<is_pointer_v<PY> && is_pointer_v<PX>, bool>::type = false>
|
||||
__host__ __device__ PY c_style_pointer_cast(PX p_x)
|
||||
__host__ __device__ PY c_style_pointer_cast(PX __restrict__ p_x)
|
||||
{
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wold-style-cast"
|
||||
|
||||
@@ -62,12 +62,12 @@ struct buffer_view<address_space_enum::generic,
|
||||
{
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data, BufferSizeType buffer_size)
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data, BufferSizeType buffer_size)
|
||||
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{0}
|
||||
{
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data,
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data,
|
||||
BufferSizeType buffer_size,
|
||||
T invalid_element_value)
|
||||
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{invalid_element_value}
|
||||
@@ -243,7 +243,7 @@ struct buffer_view<address_space_enum::global,
|
||||
{
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data, BufferSizeType buffer_size)
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data, BufferSizeType buffer_size)
|
||||
: p_data_{p_data},
|
||||
buffer_size_{buffer_size / PackedSize},
|
||||
cached_buf_res_{0},
|
||||
@@ -251,7 +251,7 @@ struct buffer_view<address_space_enum::global,
|
||||
{
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data,
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data,
|
||||
BufferSizeType buffer_size,
|
||||
T invalid_element_value)
|
||||
: p_data_{p_data},
|
||||
@@ -762,12 +762,12 @@ struct buffer_view<address_space_enum::lds,
|
||||
{
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data, BufferSizeType buffer_size)
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data, BufferSizeType buffer_size)
|
||||
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{0}
|
||||
{
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data,
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data,
|
||||
BufferSizeType buffer_size,
|
||||
T invalid_element_value)
|
||||
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{invalid_element_value}
|
||||
@@ -1121,12 +1121,12 @@ struct buffer_view<address_space_enum::vgpr,
|
||||
{
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data, BufferSizeType buffer_size)
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data, BufferSizeType buffer_size)
|
||||
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{0}
|
||||
{
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* p_data,
|
||||
CK_TILE_HOST_DEVICE constexpr buffer_view(T* __restrict__ p_data,
|
||||
BufferSizeType buffer_size,
|
||||
T invalid_element_value)
|
||||
: p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{invalid_element_value}
|
||||
@@ -1253,7 +1253,7 @@ template <address_space_enum BufferAddressSpace,
|
||||
amd_buffer_coherence_enum Coherence = amd_buffer_coherence_enum::coherence_default,
|
||||
typename T,
|
||||
typename BufferSizeType>
|
||||
CK_TILE_HOST_DEVICE constexpr auto make_buffer_view(T* p, BufferSizeType buffer_size)
|
||||
CK_TILE_HOST_DEVICE constexpr auto make_buffer_view(T* __restrict__ p, BufferSizeType buffer_size)
|
||||
{
|
||||
return buffer_view<BufferAddressSpace, T, BufferSizeType, true, Coherence>{p, buffer_size};
|
||||
}
|
||||
@@ -1266,7 +1266,7 @@ template <address_space_enum BufferAddressSpace,
|
||||
typename std::enable_if<std::is_same<remove_cvref_t<T>, remove_cvref_t<X>>::value,
|
||||
bool>::type = false>
|
||||
CK_TILE_HOST_DEVICE constexpr auto
|
||||
make_buffer_view(T* p, BufferSizeType buffer_size, X invalid_element_value)
|
||||
make_buffer_view(T* __restrict__ p, BufferSizeType buffer_size, X invalid_element_value)
|
||||
{
|
||||
return buffer_view<BufferAddressSpace, T, BufferSizeType, false, Coherence>{
|
||||
p, buffer_size, invalid_element_value};
|
||||
|
||||
@@ -448,7 +448,7 @@ template <address_space_enum BufferAddressSpace = address_space_enum::generic,
|
||||
amd_buffer_coherence_enum Coherence = amd_buffer_coherence_enum::coherence_default,
|
||||
typename DataType,
|
||||
typename... Ts>
|
||||
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType* p,
|
||||
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType* __restrict__ p,
|
||||
const tensor_descriptor<Ts...>& desc)
|
||||
{
|
||||
auto buffer_view =
|
||||
@@ -467,7 +467,7 @@ template <address_space_enum BufferAddressSpace = address_space_enum::generic,
|
||||
index_t GuaranteedLastDimensionVectorStride = -1,
|
||||
typename std::enable_if<sizeof...(Lengths) == sizeof...(Strides), bool>::type = false>
|
||||
CK_TILE_HOST_DEVICE constexpr auto
|
||||
make_naive_tensor_view(DataType* p,
|
||||
make_naive_tensor_view(DataType* __restrict__ p,
|
||||
const tuple<Lengths...>& lengths,
|
||||
const tuple<Strides...>& strides,
|
||||
number<GuaranteedLastDimensionVectorLength> = number<-1>{},
|
||||
@@ -490,7 +490,7 @@ template <address_space_enum BufferAddressSpace = address_space_enum::generic,
|
||||
typename... Lengths,
|
||||
index_t GuaranteedLastDimensionVectorLength = -1>
|
||||
CK_TILE_HOST_DEVICE constexpr auto
|
||||
make_naive_tensor_view_packed(DataType* p,
|
||||
make_naive_tensor_view_packed(DataType* __restrict__ p,
|
||||
const tuple<Lengths...>& lengths,
|
||||
number<GuaranteedLastDimensionVectorLength> = number<-1>{})
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user