From 0d97b9c0bf879fb037ddd14ded60078e8d519455 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Tue, 11 Nov 2025 10:35:48 +0200 Subject: [PATCH] Enable fusion by default (#939) Co-authored-by: Iwan Kawrakow --- ggml/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index b5e2447e..6ba18d92 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -133,7 +133,7 @@ option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copie option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ON) -set (GGML_CUDA_FUSION "0" CACHE STRING "ggml: enable/disable fusion") +set (GGML_CUDA_FUSION "1" CACHE STRING "ggml: enable/disable fusion") option(GGML_IQK_FLASH_ATTENTION "ggml: enable the IQK FlashAttention CPU kernels" ON) option(GGML_IQK_FA_ALL_QUANTS "ggml: compile all quants for IQK FlashAttention" OFF)