From 4393383736dbcfe4539fb17a196d30c8d67172c5 Mon Sep 17 00:00:00 2001 From: carlushuang Date: Thu, 9 Mar 2023 22:07:24 +0800 Subject: [PATCH] fix a bug with non-dword-aligned offset when OOB, in case crash (#616) Co-authored-by: zjing14 [ROCm/composable_kernel commit: 76fcdc60e9ef848d23527079ad59ca6f4040ca59] --- include/ck/utility/amd_buffer_addressing.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/ck/utility/amd_buffer_addressing.hpp b/include/ck/utility/amd_buffer_addressing.hpp index 79295356df..bdfb4f2758 100644 --- a/include/ck/utility/amd_buffer_addressing.hpp +++ b/include/ck/utility/amd_buffer_addressing.hpp @@ -1030,7 +1030,7 @@ amd_buffer_load_invalid_element_return_zero(const T* p_src_wave, constexpr index_t vector_size = scalar_type::vector_size; #if CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK - uint32_t src_addr_shift = src_thread_element_valid ? 0 : 0x7fffffff; + uint32_t src_addr_shift = src_thread_element_valid ? 0 : 0x80000000; return amd_buffer_load_impl( src_wave_buffer_resource, src_addr_shift + src_thread_addr_offset, 0); @@ -1091,7 +1091,7 @@ __device__ void amd_buffer_store(const typename vector_type_maker::type::t constexpr index_t vector_size = scalar_type::vector_size; #if CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK - uint32_t dst_addr_shift = dst_thread_element_valid ? 0 : 0x7fffffff; + uint32_t dst_addr_shift = dst_thread_element_valid ? 0 : 0x80000000; amd_buffer_store_impl( src_thread_data, dst_wave_buffer_resource, dst_addr_shift + dst_thread_addr_offset, 0); @@ -1126,7 +1126,7 @@ amd_buffer_atomic_add(const typename vector_type_maker::type::type src_thr constexpr index_t vector_size = scalar_type::vector_size; #if CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK - uint32_t dst_addr_shift = dst_thread_element_valid ? 0 : 0x7fffffff; + uint32_t dst_addr_shift = dst_thread_element_valid ? 0 : 0x80000000; amd_buffer_atomic_add_impl( src_thread_data, dst_wave_buffer_resource, dst_addr_shift + dst_thread_addr_offset, 0); @@ -1161,7 +1161,7 @@ amd_buffer_atomic_max(const typename vector_type_maker::type::type src_thr constexpr index_t vector_size = scalar_type::vector_size; #if CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK - uint32_t dst_addr_shift = dst_thread_element_valid ? 0 : 0x7fffffff; + uint32_t dst_addr_shift = dst_thread_element_valid ? 0 : 0x80000000; amd_buffer_atomic_max_impl( src_thread_data, dst_wave_buffer_resource, dst_addr_shift + dst_thread_addr_offset, 0);