mirror of
https://github.com/kvcache-ai/sglang.git
synced 2026-06-30 19:57:52 +00:00
Co-authored-by: Alison Shao <alison.shao@mac.lan> Co-authored-by: Alison Shao <alison.shao@MacBook-Pro-D2W773R9CD.local> Co-authored-by: hnyls2002 <lsyincs@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
360 lines
12 KiB
YAML
360 lines
12 KiB
YAML
name: PR Test (SMG)
|
|
|
|
on:
|
|
push:
|
|
branches: [ main ]
|
|
paths:
|
|
- "sgl-model-gateway/**"
|
|
pull_request:
|
|
branches: [ main ]
|
|
types: [opened, synchronize, reopened, labeled]
|
|
paths:
|
|
- "sgl-model-gateway/**"
|
|
workflow_dispatch:
|
|
|
|
concurrency:
|
|
group: gateway-tests-${{ github.ref }}
|
|
cancel-in-progress: true
|
|
|
|
env:
|
|
RUSTC_WRAPPER: sccache
|
|
SCCACHE_GHA_ENABLED: "true"
|
|
SGLANG_IS_IN_CI: true
|
|
|
|
jobs:
|
|
build-wheel:
|
|
if: |
|
|
github.event_name != 'pull_request' ||
|
|
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
|
|
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
|
|
runs-on: 4-gpu-a10
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Install rust dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
|
|
|
|
- name: Configure sccache
|
|
uses: mozilla-actions/sccache-action@v0.0.9
|
|
with:
|
|
version: "v0.12.0"
|
|
disable_annotations: true
|
|
|
|
- name: Rust cache
|
|
uses: Swatinem/rust-cache@v2
|
|
with:
|
|
workspaces: sgl-model-gateway
|
|
shared-key: "rust-cache"
|
|
cache-all-crates: true
|
|
cache-on-failure: true
|
|
save-if: true
|
|
|
|
- name: Build python binding
|
|
run: |
|
|
source "$HOME/.cargo/env"
|
|
export RUSTC_WRAPPER=sccache
|
|
cd sgl-model-gateway/bindings/python
|
|
python3 -m pip install --upgrade pip maturin
|
|
maturin build --profile ci --features vendored-openssl --out dist
|
|
|
|
- name: List built wheel
|
|
run: ls -lh sgl-model-gateway/bindings/python/dist/
|
|
|
|
- name: Upload wheel artifact
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: smg-wheel
|
|
path: sgl-model-gateway/bindings/python/dist/*.whl
|
|
retention-days: 1
|
|
|
|
- name: Test wheel install
|
|
run: |
|
|
pip install sgl-model-gateway/bindings/python/dist/*.whl
|
|
python3 -c "import sglang_router; print('Python package: OK')"
|
|
python3 -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')"
|
|
python3 -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK"
|
|
|
|
python-unit-tests:
|
|
needs: build-wheel
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
path: sglang-repo
|
|
|
|
- name: Move sgl-model-gateway folder to root
|
|
run: |
|
|
mv sglang-repo/sgl-model-gateway/* .
|
|
rm -rf sglang-repo
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: "3.13"
|
|
|
|
- name: Download wheel artifact
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: smg-wheel
|
|
path: dist/
|
|
|
|
- name: Install wheel
|
|
run: pip install dist/*.whl
|
|
|
|
- name: Run Python unit tests
|
|
run: |
|
|
cd bindings/python
|
|
python3 -m pip install pytest pytest-cov pytest-xdist
|
|
pytest -q tests --cov=sglang_router --cov-config=.coveragerc --cov-report=term-missing --cov-fail-under=80
|
|
|
|
unit-tests:
|
|
if: |
|
|
github.event_name != 'pull_request' ||
|
|
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
|
|
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
|
|
|
|
- name: Configure sccache
|
|
uses: mozilla-actions/sccache-action@v0.0.9
|
|
with:
|
|
version: "v0.12.0"
|
|
disable_annotations: true
|
|
|
|
- name: Rust cache
|
|
uses: Swatinem/rust-cache@v2
|
|
with:
|
|
workspaces: sgl-model-gateway
|
|
shared-key: "rust-cache"
|
|
cache-all-crates: true
|
|
cache-on-failure: true
|
|
save-if: true
|
|
|
|
- name: Run lint
|
|
run: |
|
|
source "$HOME/.cargo/env"
|
|
cd sgl-model-gateway/
|
|
rustup component add clippy
|
|
cargo clippy --all-targets --all-features -- -D warnings
|
|
|
|
- name: Run fmt
|
|
run: |
|
|
source "$HOME/.cargo/env"
|
|
cd sgl-model-gateway/
|
|
rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
|
|
rustup toolchain install nightly --profile minimal
|
|
cargo +nightly fmt -- --check
|
|
|
|
- name: Generate vision golden fixtures
|
|
run: |
|
|
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
|
|
|
|
pip install transformers pillow numpy scipy
|
|
pip install transformers pillow numpy
|
|
cd sgl-model-gateway/
|
|
python scripts/generate_vision_golden.py
|
|
|
|
- name: Run Rust tests
|
|
timeout-minutes: 20
|
|
run: |
|
|
source "$HOME/.cargo/env"
|
|
cd sgl-model-gateway/
|
|
cargo test
|
|
|
|
- name: Show sccache stats
|
|
if: always()
|
|
run: sccache --show-stats
|
|
|
|
gateway-e2e:
|
|
name: ${{ matrix.name }}
|
|
needs: build-wheel
|
|
if: |
|
|
github.event_name != 'pull_request' ||
|
|
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
|
|
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
include:
|
|
- name: benchmarks
|
|
timeout: 32
|
|
test_dirs: "e2e_test/benchmarks"
|
|
extra_deps: "genai-bench==0.0.3"
|
|
env_vars: ""
|
|
reruns: ""
|
|
upload_benchmarks: true
|
|
parallel_opts: "" # No parallel for benchmarks (performance measurement)
|
|
- name: responses
|
|
timeout: 45
|
|
test_dirs: "e2e_test/responses"
|
|
extra_deps: ""
|
|
env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
|
|
reruns: "--reruns 2 --reruns-delay 5"
|
|
setup_oracle: true
|
|
setup_brave: true
|
|
parallel_opts: "" # Cloud backend tests not compatible with parallel execution
|
|
- name: e2e
|
|
timeout: 45
|
|
test_dirs: "e2e_test/router e2e_test/embeddings"
|
|
extra_deps: "pytest-parallel py" # py is required for pytest-parallel with newer pytest
|
|
env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
|
|
reruns: "--reruns 2 --reruns-delay 5"
|
|
parallel_opts: "--workers 1 --tests-per-worker 4" # Thread-based parallelism
|
|
- name: chat-completions
|
|
timeout: 45
|
|
test_dirs: "e2e_test/chat_completions"
|
|
extra_deps: ""
|
|
env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
|
|
reruns: "--reruns 2 --reruns-delay 5"
|
|
parallel_opts: ""
|
|
runs-on: 4-gpu-a10
|
|
timeout-minutes: ${{ matrix.timeout }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Install SGLang dependencies
|
|
run: |
|
|
sudo --preserve-env=PATH bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Setup Oracle Instant Client
|
|
if: matrix.setup_oracle
|
|
run: |
|
|
sudo apt-get install -y unzip
|
|
INSTANT_CLIENT_DIR="/home/ubuntu/instant-client"
|
|
INSTANT_CLIENT_ZIP="instantclient-basic-linux.x64-23.9.0.25.07.zip"
|
|
|
|
if [ ! -d "$INSTANT_CLIENT_DIR/instantclient_23_9" ]; then
|
|
echo "Downloading Oracle Instant Client..."
|
|
mkdir -p "$INSTANT_CLIENT_DIR"
|
|
cd "$INSTANT_CLIENT_DIR"
|
|
wget https://download.oracle.com/otn_software/linux/instantclient/2390000/$INSTANT_CLIENT_ZIP
|
|
unzip $INSTANT_CLIENT_ZIP
|
|
rm $INSTANT_CLIENT_ZIP
|
|
else
|
|
echo "Oracle Instant Client already exists, skipping download"
|
|
fi
|
|
|
|
echo "LD_LIBRARY_PATH=/home/ubuntu/instant-client/instantclient_23_9:\$LD_LIBRARY_PATH" >> $GITHUB_ENV
|
|
|
|
- name: Start Oracle Database
|
|
if: matrix.setup_oracle
|
|
run: |
|
|
docker run -d -p 1521:1521 -e ORACLE_PASSWORD=oracle --name oracle-db gvenzl/oracle-xe:21-slim
|
|
echo "Starting Oracle DB..."
|
|
|
|
# Export Oracle connection environment variables
|
|
echo "ATP_USER=system" >> $GITHUB_ENV
|
|
echo "ATP_PASSWORD=oracle" >> $GITHUB_ENV
|
|
echo "ATP_DSN=localhost:1521/XEPDB1" >> $GITHUB_ENV
|
|
|
|
- name: Start Brave MCP Server
|
|
if: matrix.setup_brave
|
|
run: |
|
|
docker run -d --rm \
|
|
-p 8001:8080 \
|
|
-e BRAVE_API_KEY \
|
|
--name brave-search-server \
|
|
shoofio/brave-search-mcp-sse:1.0.10
|
|
echo "Starting Brave MCP Server..."
|
|
sleep 2
|
|
curl -f --max-time 1 http://localhost:8001/sse > /dev/null 2>&1 && echo "Brave MCP Server is healthy!" || echo "Brave MCP Server responded"
|
|
|
|
- name: Download wheel artifact
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: smg-wheel
|
|
path: wheel/
|
|
|
|
- name: Install wheel
|
|
run: |
|
|
pip uninstall -y sglang-router || true
|
|
pip install wheel/*.whl
|
|
|
|
- name: Install e2e test dependencies
|
|
run: |
|
|
python3 -m pip install pytest pytest-rerunfailures httpx openai grpcio grpcio-health-checking numpy
|
|
if [ -n "${{ matrix.extra_deps }}" ]; then
|
|
python3 -m pip --no-cache-dir install --upgrade ${{ matrix.extra_deps }}
|
|
fi
|
|
|
|
- name: Run E2E tests
|
|
run: |
|
|
python3 python/sglang/cli/killall.py
|
|
cd sgl-model-gateway
|
|
${{ matrix.env_vars }} ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest ${{ matrix.reruns }} ${{ matrix.parallel_opts }} ${{ matrix.test_dirs }} -s -vv -o log_cli=true --log-cli-level=INFO
|
|
|
|
- name: Upload benchmark results
|
|
if: matrix.upload_benchmarks && success()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: genai-bench-results-all-policies
|
|
path: sgl-model-gateway/benchmark_**/
|
|
|
|
- name: Cleanup Brave MCP Server
|
|
if: always() && matrix.setup_brave
|
|
run: |
|
|
docker stop brave-search-server || true
|
|
docker rm brave-search-server || true
|
|
|
|
- name: Cleanup Oracle Database
|
|
if: always() && matrix.setup_oracle
|
|
run: |
|
|
docker stop oracle-db || true
|
|
docker rm oracle-db || true
|
|
|
|
docker-build-test:
|
|
if: |
|
|
github.event_name != 'pull_request' ||
|
|
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
|
|
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
|
|
runs-on: ubuntu-24.04
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
|
|
- name: Build Docker image (no push)
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: .
|
|
file: docker/gateway.Dockerfile
|
|
push: false
|
|
tags: sgl-model-gateway:test
|
|
cache-from: type=gha
|
|
cache-to: type=gha,mode=max
|
|
|
|
finish:
|
|
needs: [build-wheel, python-unit-tests, unit-tests, gateway-e2e, docker-build-test]
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Finish
|
|
run: echo "This is an empty step to ensure that all jobs are completed."
|
|
|
|
summarize-benchmarks:
|
|
needs: gateway-e2e
|
|
runs-on: ubuntu-latest
|
|
if: success()
|
|
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Download benchmark results
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: genai-bench-results-all-policies
|
|
|
|
- name: Create benchmark summary
|
|
run: python3 sgl-model-gateway/e2e_test/benchmarks/summarize.py .
|