From ee397d0ab2b02166b95c1c9dce00d4c1f2117045 Mon Sep 17 00:00:00 2001 From: carlushuang Date: Fri, 15 Mar 2024 22:56:41 +0000 Subject: [PATCH] temp fix buffer_store spill --- include/ck_tile/core/arch/amd_buffer_addressing.hpp | 3 ++- include/ck_tile/core/numeric/half.hpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/ck_tile/core/arch/amd_buffer_addressing.hpp b/include/ck_tile/core/arch/amd_buffer_addressing.hpp index 5c54f6cda2..c37af77ad4 100644 --- a/include/ck_tile/core/arch/amd_buffer_addressing.hpp +++ b/include/ck_tile/core/arch/amd_buffer_addressing.hpp @@ -413,7 +413,8 @@ struct buffer_store_if<8> { static_assert(sizeof(T) == 8); auto save_exec = __builtin_amdgcn_read_exec(); - using mbuf_t = fp32x2_t; + // TODO: ugly. rocm-6.0/6.1 seems neet bit_cast to same base type to avoid scratch + using mbuf_t = ext_vector_t; asm volatile("v_cmpx_le_u32 exec, 1, %5\n" "buffer_store_dwordx2 %0, %1, %2, %3 offen offset:%4\n" "s_mov_b64 exec %6" diff --git a/include/ck_tile/core/numeric/half.hpp b/include/ck_tile/core/numeric/half.hpp index 4a01a5a985..60ef6c978e 100644 --- a/include/ck_tile/core/numeric/half.hpp +++ b/include/ck_tile/core/numeric/half.hpp @@ -108,8 +108,8 @@ double fp16_to_double_hip(const fp16_hip_t& x) { return static_cast(fp16 CK_TILE_HOST_DEVICE fp16_hip_t float_to_fp16_hip(const float& x) { - // return __float2half(x); - return static_cast(x); + return __float2half(x); + // return static_cast(x); } CK_TILE_HOST_DEVICE