From 2688602697122ecc0fcb274efd48a85c992e4ceb Mon Sep 17 00:00:00 2001 From: Yi DING Date: Mon, 1 Dec 2025 14:48:22 +0800 Subject: [PATCH] [CK_TILE] Disable cast_tile_pk_fp16bf16_fp32 as It Causes Extra spills on Recent Compilers (#3327) [ROCm/composable_kernel commit: 9ed9539ddfcdd8de4180fb992b718b57e1cadfae] --- include/ck_tile/core/tensor/tile_elementwise.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/ck_tile/core/tensor/tile_elementwise.hpp b/include/ck_tile/core/tensor/tile_elementwise.hpp index 076e13d358..bc6d7d2f5a 100644 --- a/include/ck_tile/core/tensor/tile_elementwise.hpp +++ b/include/ck_tile/core/tensor/tile_elementwise.hpp @@ -360,10 +360,12 @@ CK_TILE_DEVICE auto cast_tile(const SrcTensor& src_tensor) (SrcTensor::get_thread_buffer_size() % 2 == 0)) return impl::cast_tile_pkrtz_fp16_fp32(src_tensor); #endif +#if 0 // currently it causes extra spills in qr_async_vr pipeline of fmha_fwd else if constexpr((std::is_same_v || std::is_same_v) && std::is_same_v && (SrcTensor::get_thread_buffer_size() % 2 == 0)) return impl::cast_tile_pk_fp16bf16_fp32(src_tensor); +#endif #if CK_TILE_USE_SUBDWORD_TILE_CAST else if constexpr(sizeof(DstType) < 4 || sizeof(typename SrcTensor::DataType) < 4) return impl::cast_tile_opt_subdword(src_tensor);