From 9ddb510787d33c987a6a8a1d2de14bbc0be65975 Mon Sep 17 00:00:00 2001 From: Zhekun Hu Date: Thu, 7 May 2026 02:58:58 -0700 Subject: [PATCH] Add Turing and Ampere (A100) GGML to docker build file (#1691) * Add Turing and Ampere (A100) GGML to docker build file At the moment, the docker file for image builds do not build for CUDA architectures below 8.6, and ik_llama.cpp specifies support for architectures Turing and above, this PR sets the CUDA architecture list to include the architecture for Turing (7.5) and A100 (8.0) * Remove 80 because few ppl have A100s and it does seem like many cuda arches cause issues for build * switch to 86-real and 89-real with 75, 80, 90 using virtual ptx jit * nvm, even adding 90-virtual causes linker error --------- Co-authored-by: Codex --- docker/ik_llama-cuda.Containerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/ik_llama-cuda.Containerfile b/docker/ik_llama-cuda.Containerfile index 7a382a05..8b2d8d73 100644 --- a/docker/ik_llama-cuda.Containerfile +++ b/docker/ik_llama-cuda.Containerfile @@ -7,7 +7,7 @@ ARG BASE_CUDA_RUN_CONTAINER=docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu FROM ${BASE_CUDA_DEV_CONTAINER} AS build # Build arguments -ARG CUDA_DOCKER_ARCH="86;90" +ARG CUDA_DOCKER_ARCH="75-virtual;80-virtual;86-real;89-real" ARG GGML_NATIVE=ON ARG USE_CCACHE=true @@ -93,4 +93,4 @@ RUN curl -sSL "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama- COPY --from=build /app/docker/ik_llama-cuda-swap.config.yaml /app/config.yaml HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD [ "curl", "-f", "http://localhost:8080"] -ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ] \ No newline at end of file +ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ]