From 50f95d7bf32e6c0c67dc36716ff6a52653814929 Mon Sep 17 00:00:00 2001
From: Kawrakow <iwankawrakow@gmail.com>
Date: Wed, 5 Nov 2025 10:58:12 +0200
Subject: [PATCH] Disable CUDA fusion by default for now (#903)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
---
 ggml/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
index 6ba18d92..b5e2447e 100644
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@@ -133,7 +133,7 @@ option(GGML_CUDA_NO_PEER_COPY               "ggml: do not use peer to peer copie
 option(GGML_CUDA_NO_VMM                     "ggml: do not try to use CUDA VMM"                OFF)
 option(GGML_CUDA_FA_ALL_QUANTS              "ggml: compile all quants for FlashAttention"     OFF)
 option(GGML_CUDA_USE_GRAPHS                 "ggml: use CUDA graphs (llama.cpp only)"          ON)
-set   (GGML_CUDA_FUSION  "1" CACHE STRING   "ggml: enable/disable fusion")
+set   (GGML_CUDA_FUSION  "0" CACHE STRING   "ggml: enable/disable fusion")
 
 option(GGML_IQK_FLASH_ATTENTION             "ggml: enable the IQK FlashAttention CPU kernels" ON)
 option(GGML_IQK_FA_ALL_QUANTS               "ggml: compile all quants for IQK FlashAttention" OFF)