From 572df7d4d1ce95ee18dc257038f05bfb4d395cea Mon Sep 17 00:00:00 2001 From: "assistant-librarian[bot]" Date: Mon, 1 Dec 2025 07:15:08 +0000 Subject: [PATCH] Merge commit '9ed9539ddfcdd8de4180fb992b718b57e1cadfae' into develop --- include/ck_tile/core/tensor/tile_elementwise.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/ck_tile/core/tensor/tile_elementwise.hpp b/include/ck_tile/core/tensor/tile_elementwise.hpp index 076e13d358..bc6d7d2f5a 100644 --- a/include/ck_tile/core/tensor/tile_elementwise.hpp +++ b/include/ck_tile/core/tensor/tile_elementwise.hpp @@ -360,10 +360,12 @@ CK_TILE_DEVICE auto cast_tile(const SrcTensor& src_tensor) (SrcTensor::get_thread_buffer_size() % 2 == 0)) return impl::cast_tile_pkrtz_fp16_fp32(src_tensor); #endif +#if 0 // currently it causes extra spills in qr_async_vr pipeline of fmha_fwd else if constexpr((std::is_same_v || std::is_same_v) && std::is_same_v && (SrcTensor::get_thread_buffer_size() % 2 == 0)) return impl::cast_tile_pk_fp16bf16_fp32(src_tensor); +#endif #if CK_TILE_USE_SUBDWORD_TILE_CAST else if constexpr(sizeof(DstType) < 4 || sizeof(typename SrcTensor::DataType) < 4) return impl::cast_tile_opt_subdword(src_tensor);