diff --git a/docker/ik_llama-cuda.Containerfile b/docker/ik_llama-cuda.Containerfile index 7a382a05..8b2d8d73 100644 --- a/docker/ik_llama-cuda.Containerfile +++ b/docker/ik_llama-cuda.Containerfile @@ -7,7 +7,7 @@ ARG BASE_CUDA_RUN_CONTAINER=docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu FROM ${BASE_CUDA_DEV_CONTAINER} AS build # Build arguments -ARG CUDA_DOCKER_ARCH="86;90" +ARG CUDA_DOCKER_ARCH="75-virtual;80-virtual;86-real;89-real" ARG GGML_NATIVE=ON ARG USE_CCACHE=true @@ -93,4 +93,4 @@ RUN curl -sSL "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama- COPY --from=build /app/docker/ik_llama-cuda-swap.config.yaml /app/config.yaml HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD [ "curl", "-f", "http://localhost:8080"] -ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ] \ No newline at end of file +ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ]