update docker build (#1872)

This commit is contained in:
Jianwei Dong
2026-02-28 10:34:35 +08:00
committed by GitHub
parent 20262b2743
commit 19887e4363
4 changed files with 76 additions and 44 deletions

View File

@@ -201,31 +201,38 @@ ARG GITHUB_ARTIFACTORY
ARG KTRANSFORMERS_VERSION
ARG KTRANSFORMERS_WHEEL
ARG FLASH_ATTN_WHEEL
ARG FUNCTIONALITY=sft
WORKDIR /workspace
# Create two conda environments with Python 3.12
# Create conda environments (fine-tune only needed for sft mode)
RUN conda create -n serve python=3.12 -y \
&& conda create -n fine-tune python=3.12 -y
&& if [ "$FUNCTIONALITY" = "sft" ]; then conda create -n fine-tune python=3.12 -y; fi
# Set pip mirror for both conda envs
# Set pip mirror for conda envs
RUN /opt/miniconda3/envs/serve/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
&& /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
&& if [ "$FUNCTIONALITY" = "sft" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple; \
fi
# Clone repositories
# Use kvcache-ai/sglang fork with kimi_k2 branch
RUN git clone https://${GITHUB_ARTIFACTORY}/kvcache-ai/sglang.git /workspace/sglang \
&& cd /workspace/sglang && git checkout kimi_k2
RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory \
&& git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \
&& cd /workspace/ktransformers && git submodule update --init --recursive
RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \
&& cd /workspace/ktransformers && git submodule update --init --recursive \
&& if [ "$FUNCTIONALITY" = "sft" ]; then \
git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory; \
fi
# Download ktransformers wheel and flash_attn wheel for fine-tune env
RUN curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \
https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \
&& curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \
https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}
# Download ktransformers wheel and flash_attn wheel for fine-tune env (sft mode only)
RUN if [ "$FUNCTIONALITY" = "sft" ]; then \
curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \
https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \
&& curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \
https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}; \
fi
########################################################
# Environment 1: serve (sglang + kt-kernel)
@@ -318,47 +325,59 @@ RUN . /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve \
&& CPUINFER_BUILD_ALL_VARIANTS=1 ./install.sh build
########################################################
# Environment 2: fine-tune (LLaMA-Factory + ktransformers)
# Environment 2: fine-tune (LLaMA-Factory + ktransformers) - sft mode only
########################################################
# Install dependency libraries for ktransformers (CUDA 11.8 runtime required)
RUN conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \
&& conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime
RUN if [ "$FUNCTIONALITY" = "sft" ]; then \
conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \
&& conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime; \
fi
# Install PyTorch 2.8 in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \
13.0.1) CUINDEX=130 ;; \
esac \
&& /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel \
&& /opt/miniconda3/envs/fine-tune/bin/pip install \
torch==2.8.0 \
torchvision \
torchaudio \
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}
if [ "$FUNCTIONALITY" = "sft" ]; then \
case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \
13.0.1) CUINDEX=130 ;; \
esac \
&& /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel hatchling \
&& /opt/miniconda3/envs/fine-tune/bin/pip install \
torch==2.8.0 \
torchvision \
torchaudio \
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}; \
fi
# Install LLaMA-Factory in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
cd /workspace/LLaMA-Factory \
&& /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation
if [ "$FUNCTIONALITY" = "sft" ]; then \
cd /workspace/LLaMA-Factory \
&& /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation; \
fi
# Install ktransformers wheel in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}
if [ "$FUNCTIONALITY" = "sft" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}; \
fi
# Install flash_attn wheel in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL}
if [ "$FUNCTIONALITY" = "sft" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL}; \
fi
# Install NCCL for fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
if [ "$FUNCTIONALITY" = "sft" ]; then \
if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
fi; \
fi
########################################################
@@ -366,13 +385,18 @@ RUN --mount=type=cache,target=/root/.cache/pip \
########################################################
# Clean up downloaded wheels
RUN rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}
RUN if [ "$FUNCTIONALITY" = "sft" ]; then \
rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}; \
fi
# Initialize conda for bash
RUN /opt/miniconda3/bin/conda init bash
# Create shell aliases for convenience
RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"\nalias finetune="conda activate fine-tune"' >> /root/.bashrc
RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"' >> /root/.bashrc \
&& if [ "$FUNCTIONALITY" = "sft" ]; then \
echo 'alias finetune="conda activate fine-tune"' >> /root/.bashrc; \
fi
########################################################
# Extract version information for image naming
@@ -392,12 +416,17 @@ RUN set -x && \
echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \
echo "Extracted KTransformers version: $KTRANSFORMERS_VERSION" && \
\
# LLaMA-Factory version (from fine-tune environment)
. /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \
cd /workspace/LLaMA-Factory && \
LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown") && \
echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \
echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION" && \
# LLaMA-Factory version (from fine-tune environment, sft mode only)
if [ "$FUNCTIONALITY" = "sft" ]; then \
. /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \
cd /workspace/LLaMA-Factory && \
LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown") && \
echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \
echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION"; \
else \
echo "LLAMAFACTORY_VERSION=none" >> /workspace/versions.env && \
echo "LLaMA-Factory not installed (infer mode)"; \
fi && \
\
# Display all versions
echo "=== Version Summary ===" && \

View File

@@ -261,6 +261,7 @@ build_image() {
build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR")
build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT")
build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1")
build_args+=("--build-arg" "FUNCTIONALITY=$FUNCTIONALITY")
# Add proxy settings if provided
if [ -n "$HTTP_PROXY" ]; then

View File

@@ -183,7 +183,7 @@ generate_image_name() {
llama_ver=$(echo "$versions" | grep "^LLAMAFACTORY_VERSION=" | cut -d= -f2)
# Validate versions were extracted
if [ -z "$sglang_ver" ] || [ -z "$ktrans_ver" ] || [ -z "$llama_ver" ]; then
if [ -z "$sglang_ver" ] || [ -z "$ktrans_ver" ]; then
log_error "Failed to parse versions from input"
return 1
fi

View File

@@ -313,6 +313,7 @@ build_image() {
build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR")
build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT")
build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1")
build_args+=("--build-arg" "FUNCTIONALITY=$FUNCTIONALITY")
# Add proxy settings if provided
if [ -n "$HTTP_PROXY" ]; then
@@ -884,6 +885,7 @@ build_image() {
build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR")
build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT")
build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1")
build_args+=("--build-arg" "FUNCTIONALITY=$FUNCTIONALITY")
# Add proxy settings if provided
if [ -n "$HTTP_PROXY" ]; then