From 50f95d7bf32e6c0c67dc36716ff6a52653814929 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Wed, 5 Nov 2025 10:58:12 +0200 Subject: [PATCH] Disable CUDA fusion by default for now (#903) Co-authored-by: Iwan Kawrakow --- ggml/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 6ba18d92..b5e2447e 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -133,7 +133,7 @@ option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copie option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ON) -set (GGML_CUDA_FUSION "1" CACHE STRING "ggml: enable/disable fusion") +set (GGML_CUDA_FUSION "0" CACHE STRING "ggml: enable/disable fusion") option(GGML_IQK_FLASH_ATTENTION "ggml: enable the IQK FlashAttention CPU kernels" ON) option(GGML_IQK_FA_ALL_QUANTS "ggml: compile all quants for IQK FlashAttention" OFF)