nccl: initial setep (cmake find if installed, initialize)

This commit is contained in:
Kawrakow
2025-12-19 07:10:02 +00:00
parent ecabd6acf7
commit ea798acd10
4 changed files with 126 additions and 0 deletions

View File

@@ -462,6 +462,19 @@ if (GGML_CUDA)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
endif()
endif()
find_package(NCCL)
if (NCCL_FOUND)
message("==================== NCCL found!")
message("NCCL_LIBRARIES = ${NCCL_LIBRARIES}")
message("NCCL_INCLUDE_DIRS = ${NCCL_INCLUDE_DIRS}")
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${NCCL_LIBRARIES})
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${NCCL_INCLUDE_DIRS})
add_compile_definitions(GGML_USE_NCCL)
else()
message("==================== NCCL NOT found")
endif()
if (NOT GGML_MUSA)
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0)
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES 0)

View File

@@ -246,6 +246,20 @@ static ggml_cuda_device_info ggml_cuda_init() {
// configure logging to stdout
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
#ifdef GGML_USE_NCCL
info.have_nccl = false;
if (info.device_count > 1) {
int gpu_list[GGML_CUDA_MAX_DEVICES];
for(int i = 0; i < info.device_count; ++i) gpu_list[i] = i;
auto status = ncclCommInitAll(info.nccl_coms, info.device_count, gpu_list);
if (status == ncclSuccess) {
printf("=============================== NCCL initialized\n");
info.have_nccl = true;
} else {
printf("=============================== NCCL initialization failed with status %d\n", int(status));
}
}
#endif
return info;
}

View File

@@ -34,6 +34,10 @@
#include "vendors/cuda.h"
#endif // defined(GGML_USE_HIPBLAS)
#ifdef GGML_USE_NCCL
#include <nccl.h>
#endif
#define STRINGIZE_IMPL(...) #__VA_ARGS__
#define STRINGIZE(...) STRINGIZE_IMPL(__VA_ARGS__)
@@ -754,6 +758,11 @@ struct ggml_cuda_device_info {
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};
std::array<float, GGML_CUDA_MAX_DEVICES> default_tensor_split = {};
#ifdef GGML_USE_NCCL
ncclComm_t nccl_coms[GGML_CUDA_MAX_DEVICES];
bool have_nccl;
#endif
};
const ggml_cuda_device_info & ggml_cuda_info();