diff --git a/include/ck_tile/core/tensor/tile_elementwise.hpp b/include/ck_tile/core/tensor/tile_elementwise.hpp index 076e13d358..bc6d7d2f5a 100644 --- a/include/ck_tile/core/tensor/tile_elementwise.hpp +++ b/include/ck_tile/core/tensor/tile_elementwise.hpp @@ -360,10 +360,12 @@ CK_TILE_DEVICE auto cast_tile(const SrcTensor& src_tensor) (SrcTensor::get_thread_buffer_size() % 2 == 0)) return impl::cast_tile_pkrtz_fp16_fp32(src_tensor); #endif +#if 0 // currently it causes extra spills in qr_async_vr pipeline of fmha_fwd else if constexpr((std::is_same_v || std::is_same_v) && std::is_same_v && (SrcTensor::get_thread_buffer_size() % 2 == 0)) return impl::cast_tile_pk_fp16bf16_fp32(src_tensor); +#endif #if CK_TILE_USE_SUBDWORD_TILE_CAST else if constexpr(sizeof(DstType) < 4 || sizeof(typename SrcTensor::DataType) < 4) return impl::cast_tile_opt_subdword(src_tensor);