diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 69b081d6..ff44bb99 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -115,6 +115,7 @@ option(GGML_LLAMAFILE "ggml: use LLAMAFILE" option(GGML_IQK_MUL_MAT "ggml: use optimized iqk matrix multiplications" ON) option(GGML_CUDA "ggml: use CUDA" OFF) +option(GGML_CUDA_LTO "ggml: use CUDA LTO" OFF) option(GGML_MUSA "ggml: use MUSA" OFF) option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF) option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index aab44d7c..78da50f4 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -315,6 +315,10 @@ if (GGML_CUDA) find_package(MUSAToolkit) set(CUDAToolkit_FOUND ${MUSAToolkit_FOUND}) else() + if (GGML_CUDA_LTO) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + endif() find_package(CUDAToolkit) set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0) set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES 0) @@ -344,6 +348,9 @@ if (GGML_CUDA) set(CMAKE_CUDA_COMPILER ${MUSAToolkit_MCC_EXECUTABLE}) else() enable_language(CUDA) + if (GGML_CUDA_LTO) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -dlto") + endif() endif() file(GLOB GGML_HEADERS_CUDA "ggml-cuda/*.cuh")