diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index b5e2447e..6ba18d92 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -133,7 +133,7 @@ option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copie option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ON) -set (GGML_CUDA_FUSION "0" CACHE STRING "ggml: enable/disable fusion") +set (GGML_CUDA_FUSION "1" CACHE STRING "ggml: enable/disable fusion") option(GGML_IQK_FLASH_ATTENTION "ggml: enable the IQK FlashAttention CPU kernels" ON) option(GGML_IQK_FA_ALL_QUANTS "ggml: compile all quants for IQK FlashAttention" OFF)