From 24eed120ab718104faf6e0794cf9b739949a4566 Mon Sep 17 00:00:00 2001 From: ThomasNing Date: Thu, 12 Jun 2025 02:44:19 -0500 Subject: [PATCH] intermediate develop need to switch out --- .../core/arch/amd_buffer_addressing.hpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/include/ck_tile/core/arch/amd_buffer_addressing.hpp b/include/ck_tile/core/arch/amd_buffer_addressing.hpp index 7111eed596..92d50e7c90 100644 --- a/include/ck_tile/core/arch/amd_buffer_addressing.hpp +++ b/include/ck_tile/core/arch/amd_buffer_addressing.hpp @@ -13,6 +13,7 @@ #include "ck_tile/core/utility/type_traits.hpp" #include "ck_tile/core/utility/bit_cast.hpp" #include "ck_tile/core/utility/functional.hpp" +#include "ck_tile/core/utility/ignore.hpp" // This attribute gives a hint to the compiler that a branch is likely to be taken. // Then, the compiler should remove if possible the associated s_cbranch_execz branch that would @@ -1749,7 +1750,7 @@ template -CK_TILE_DEVICE void amd_async_buffer_load_impl(T* smem, +CK_TILE_DEVICE void amd_async_buffer_load_impl(CK_TILE_LDS_ADDR T* smem, int32x4_t src_wave_buffer_resource, index_t src_thread_addr_offset, index_t src_wave_addr_offset, @@ -1779,17 +1780,25 @@ CK_TILE_DEVICE void amd_async_buffer_load(CK_TILE_LDS_ADDR T* smem, index_t flag = 0, bool_constant = {}) { + constexpr index_t bytes = sizeof(T) * N; +#if defined(__gfx950__) + static_assert(bytes == 4 || bytes == 12 || bytes == 16, + "wrong! only support in dword, dwordx3, dwordx4"); +#else static_assert(sizeof(T) * N == 4, "wrong! not implemented vector size"); - +#endif + ignore = src_wave_addr_offset; + ignore = src_immediate_addr_offset; if constexpr(oob_conditional_check) { index_t v_offset = flag ? src_thread_addr_offset : src_wave_buffer_resource[2]; llvm_amdgcn_raw_buffer_load_lds(src_wave_buffer_resource, - smem, + reinterpret_cast<__attribute__((address_space(3))) uint32_t*>( + reinterpret_cast(smem)), sizeof(uint32_t), v_offset, - src_wave_addr_offset, - src_immediate_addr_offset, + 0, + 0, static_cast(coherence)); } else