mirror of
https://github.com/kvcache-ai/sglang.git
synced 2026-06-30 19:57:52 +00:00
300 lines
10 KiB
YAML
300 lines
10 KiB
YAML
name: Release Docker Runtime Images
|
|
#
|
|
# This workflow builds and publishes runtime Docker images (production-optimized, ~50% smaller):
|
|
# - lmsysorg/sglang:v{version}-runtime, lmsysorg/sglang:latest-runtime
|
|
# - lmsysorg/sglang:v{version}-cu130-runtime, lmsysorg/sglang:latest-cu130-runtime
|
|
#
|
|
on:
|
|
push:
|
|
tags:
|
|
- "v[0-9]+.*"
|
|
workflow_dispatch:
|
|
inputs:
|
|
version:
|
|
description: "Version to build (without v prefix, e.g., 0.5.7)"
|
|
required: true
|
|
|
|
jobs:
|
|
publish-x86:
|
|
if: github.repository == 'sgl-project/sglang'
|
|
environment: "prod"
|
|
outputs:
|
|
digest-cu129: ${{ steps.build-cu129.outputs.digest }}
|
|
digest-cu130: ${{ steps.build-cu130.outputs.digest }}
|
|
strategy:
|
|
matrix:
|
|
variant:
|
|
- cuda_version: "12.9.1"
|
|
build_type: "all"
|
|
grace_blackwell: 0
|
|
runs-on: x64-docker-build-node
|
|
steps:
|
|
- name: Delete huge unnecessary tools folder
|
|
run: rm -rf /opt/hostedtoolcache
|
|
|
|
- name: Cleanup workspace (remove root-owned files from prior runs)
|
|
run: sudo rm -rf "$GITHUB_WORKSPACE"/* || true
|
|
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Free disk space
|
|
uses: jlumbroso/free-disk-space@main
|
|
with:
|
|
tool-cache: false
|
|
docker-images: false
|
|
android: true
|
|
dotnet: true
|
|
haskell: true
|
|
large-packages: true
|
|
swap-storage: false
|
|
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
|
|
- name: Login to Docker Hub
|
|
uses: docker/login-action@v2
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
|
|
- name: Get version from tag
|
|
id: version
|
|
run: |
|
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
|
VERSION="${{ github.event.inputs.version }}"
|
|
else
|
|
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
|
|
VERSION="${GITHUB_REF_NAME#v}"
|
|
fi
|
|
|
|
# Validate version format
|
|
if [ -z "$VERSION" ]; then
|
|
echo "::error::Version is empty"
|
|
exit 1
|
|
fi
|
|
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
|
|
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
|
|
exit 1
|
|
fi
|
|
|
|
echo "version=${VERSION}" >> $GITHUB_OUTPUT
|
|
|
|
- name: Build and Push AMD64 Runtime
|
|
id: build-cu129
|
|
run: |
|
|
version=${{ steps.version.outputs.version }}
|
|
|
|
docker buildx build \
|
|
--target runtime \
|
|
--platform linux/amd64 \
|
|
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
|
|
-f docker/Dockerfile \
|
|
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
|
|
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
|
|
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
|
|
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
|
|
--build-arg SGL_VERSION=${version} \
|
|
--metadata-file /tmp/metadata-cu129-runtime.json \
|
|
--no-cache \
|
|
.
|
|
|
|
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])")
|
|
echo "Pushed digest: ${DIGEST}"
|
|
echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
|
|
|
|
- name: Build and Push AMD64 Runtime (CUDA 13)
|
|
id: build-cu130
|
|
run: |
|
|
version=${{ steps.version.outputs.version }}
|
|
|
|
docker buildx build \
|
|
--target runtime \
|
|
--platform linux/amd64 \
|
|
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
|
|
-f docker/Dockerfile \
|
|
--build-arg CUDA_VERSION=13.0.1 \
|
|
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
|
|
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
|
|
--build-arg GRACE_BLACKWELL=0 \
|
|
--build-arg SGL_VERSION=${version} \
|
|
--metadata-file /tmp/metadata-cu130-runtime.json \
|
|
--no-cache \
|
|
.
|
|
|
|
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])")
|
|
echo "Pushed digest: ${DIGEST}"
|
|
echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
|
|
|
|
publish-arm64:
|
|
if: github.repository == 'sgl-project/sglang'
|
|
environment: "prod"
|
|
outputs:
|
|
digest-cu129: ${{ steps.build-cu129.outputs.digest }}
|
|
digest-cu130: ${{ steps.build-cu130.outputs.digest }}
|
|
strategy:
|
|
matrix:
|
|
variant:
|
|
- cuda_version: "12.9.1"
|
|
build_type: "all"
|
|
grace_blackwell: 1
|
|
runs-on: arm-docker-build-node
|
|
steps:
|
|
- name: Delete huge unnecessary tools folder
|
|
run: rm -rf /opt/hostedtoolcache
|
|
|
|
- name: Cleanup workspace (remove root-owned files from prior runs)
|
|
run: sudo rm -rf "$GITHUB_WORKSPACE"/* || true
|
|
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
|
|
- name: Login to Docker Hub
|
|
uses: docker/login-action@v2
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
|
|
- name: Get version from tag
|
|
id: version
|
|
run: |
|
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
|
VERSION="${{ github.event.inputs.version }}"
|
|
else
|
|
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
|
|
VERSION="${GITHUB_REF_NAME#v}"
|
|
fi
|
|
|
|
# Validate version format
|
|
if [ -z "$VERSION" ]; then
|
|
echo "::error::Version is empty"
|
|
exit 1
|
|
fi
|
|
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
|
|
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
|
|
exit 1
|
|
fi
|
|
|
|
echo "version=${VERSION}" >> $GITHUB_OUTPUT
|
|
|
|
- name: Build and Push ARM64 Runtime
|
|
id: build-cu129
|
|
run: |
|
|
version=${{ steps.version.outputs.version }}
|
|
|
|
docker buildx build \
|
|
--target runtime \
|
|
--platform linux/arm64 \
|
|
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
|
|
-f docker/Dockerfile \
|
|
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
|
|
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
|
|
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
|
|
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
|
|
--build-arg SGL_VERSION=${version} \
|
|
--metadata-file /tmp/metadata-cu129-runtime.json \
|
|
--no-cache \
|
|
.
|
|
|
|
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])")
|
|
echo "Pushed digest: ${DIGEST}"
|
|
echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
|
|
|
|
- name: Build and Push ARM64 Runtime (CUDA 13)
|
|
id: build-cu130
|
|
run: |
|
|
version=${{ steps.version.outputs.version }}
|
|
|
|
docker buildx build \
|
|
--target runtime \
|
|
--platform linux/arm64 \
|
|
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
|
|
-f docker/Dockerfile \
|
|
--build-arg CUDA_VERSION=13.0.1 \
|
|
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
|
|
--build-arg GRACE_BLACKWELL=1 \
|
|
--build-arg SGL_VERSION=${version} \
|
|
--metadata-file /tmp/metadata-cu130-runtime.json \
|
|
--no-cache \
|
|
.
|
|
|
|
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])")
|
|
echo "Pushed digest: ${DIGEST}"
|
|
echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
|
|
|
|
create-manifests:
|
|
runs-on: ubuntu-22.04
|
|
needs: [publish-x86, publish-arm64]
|
|
if: github.repository == 'sgl-project/sglang'
|
|
environment: "prod"
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
|
|
- name: Login to Docker Hub
|
|
uses: docker/login-action@v2
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
|
|
- name: Get version from tag
|
|
id: version
|
|
run: |
|
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
|
VERSION="${{ github.event.inputs.version }}"
|
|
else
|
|
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
|
|
VERSION="${GITHUB_REF_NAME#v}"
|
|
fi
|
|
|
|
# Validate version format
|
|
if [ -z "$VERSION" ]; then
|
|
echo "::error::Version is empty"
|
|
exit 1
|
|
fi
|
|
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
|
|
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
|
|
exit 1
|
|
fi
|
|
|
|
echo "version=${VERSION}" >> $GITHUB_OUTPUT
|
|
|
|
- name: Create multi-arch manifests
|
|
run: |
|
|
version=${{ steps.version.outputs.version }}
|
|
|
|
CU129_AMD64_RT=${{ needs.publish-x86.outputs.digest-cu129 }}
|
|
CU130_AMD64_RT=${{ needs.publish-x86.outputs.digest-cu130 }}
|
|
CU129_ARM64_RT=${{ needs.publish-arm64.outputs.digest-cu129 }}
|
|
CU130_ARM64_RT=${{ needs.publish-arm64.outputs.digest-cu130 }}
|
|
|
|
# Create versioned runtime manifest
|
|
docker buildx imagetools create \
|
|
-t lmsysorg/sglang:v${version}-runtime \
|
|
lmsysorg/sglang@${CU129_AMD64_RT} \
|
|
lmsysorg/sglang@${CU129_ARM64_RT}
|
|
|
|
# Create latest runtime manifest
|
|
docker buildx imagetools create \
|
|
-t lmsysorg/sglang:latest-runtime \
|
|
lmsysorg/sglang@${CU129_AMD64_RT} \
|
|
lmsysorg/sglang@${CU129_ARM64_RT}
|
|
|
|
# Create versioned CUDA 13 runtime manifest
|
|
docker buildx imagetools create \
|
|
-t lmsysorg/sglang:v${version}-cu130-runtime \
|
|
lmsysorg/sglang@${CU130_AMD64_RT} \
|
|
lmsysorg/sglang@${CU130_ARM64_RT}
|
|
|
|
# Create latest CUDA 13 runtime manifest
|
|
docker buildx imagetools create \
|
|
-t lmsysorg/sglang:latest-cu130-runtime \
|
|
lmsysorg/sglang@${CU130_AMD64_RT} \
|
|
lmsysorg/sglang@${CU130_ARM64_RT}
|