From 9ddb510787d33c987a6a8a1d2de14bbc0be65975 Mon Sep 17 00:00:00 2001
From: Zhekun Hu <zhekunhu@gmail.com>
Date: Thu, 7 May 2026 02:58:58 -0700
Subject: [PATCH] Add Turing and Ampere (A100) GGML to docker build file
 (#1691)

* Add Turing and Ampere (A100) GGML to docker build file

At the moment, the docker file for image builds do not build for CUDA architectures below 8.6, and ik_llama.cpp specifies support for architectures Turing and above, this PR sets the CUDA architecture list to include the architecture for Turing (7.5) and A100 (8.0)

* Remove 80 because few ppl have A100s and it does seem like many cuda arches cause issues for build

* switch to 86-real and 89-real with 75, 80, 90 using virtual ptx jit

* nvm, even adding 90-virtual causes linker error

---------

Co-authored-by: Codex <codex@local>
---
 docker/ik_llama-cuda.Containerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/ik_llama-cuda.Containerfile b/docker/ik_llama-cuda.Containerfile
index 7a382a05..8b2d8d73 100644
--- a/docker/ik_llama-cuda.Containerfile
+++ b/docker/ik_llama-cuda.Containerfile
@@ -7,7 +7,7 @@ ARG BASE_CUDA_RUN_CONTAINER=docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu
 FROM ${BASE_CUDA_DEV_CONTAINER} AS build
 
 # Build arguments
-ARG CUDA_DOCKER_ARCH="86;90"
+ARG CUDA_DOCKER_ARCH="75-virtual;80-virtual;86-real;89-real"
 ARG GGML_NATIVE=ON
 ARG USE_CCACHE=true
 
@@ -93,4 +93,4 @@ RUN curl -sSL "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-
 COPY --from=build /app/docker/ik_llama-cuda-swap.config.yaml /app/config.yaml
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
     CMD [ "curl", "-f", "http://localhost:8080"]
-ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ]
\ No newline at end of file
+ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ]