mirror of
https://github.com/kvcache-ai/sglang.git
synced 2026-06-30 19:57:52 +00:00
1448 lines
57 KiB
YAML
1448 lines
57 KiB
YAML
name: PR Test
|
|
# Dynamic run-name for /rerun-stage commands to enable URL lookup
|
|
# Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs
|
|
run-name: ${{ inputs.target_stage && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage)) || '' }}
|
|
|
|
on:
|
|
schedule:
|
|
- cron: '0 1,9,17 * * *' # Run 3x daily: 2am / 10am / 6pm Pacific (PDT)
|
|
pull_request:
|
|
branches: [main]
|
|
workflow_dispatch:
|
|
inputs:
|
|
target_stage:
|
|
description: "Specific stage to run (optional, for quick testing)"
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
force_continue_on_error:
|
|
description: "Force continue-on-error (test scheduled CI behavior)"
|
|
required: false
|
|
type: boolean
|
|
default: false
|
|
pr_head_sha:
|
|
description: "PR head SHA to checkout (for /rerun-stage on fork PRs)"
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
test_parallel_dispatch:
|
|
description: "Test parallel dispatch behavior (simulates scheduled run)"
|
|
required: false
|
|
type: boolean
|
|
default: false
|
|
workflow_call:
|
|
inputs:
|
|
git_ref:
|
|
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
run_all_tests:
|
|
description: "Run all tests (for releasing or testing purpose)"
|
|
required: false
|
|
type: boolean
|
|
default: false
|
|
skip_stage_health_check:
|
|
description: "Skip stage health check fast-fail (e.g. for release branch cuts)"
|
|
required: false
|
|
type: boolean
|
|
default: false
|
|
|
|
concurrency:
|
|
# Concurrency group structure: pr-test-{event}-{branch}-{pr_sha}-{stage}
|
|
# - event_name prevents scheduled runs from colliding with fork PRs whose branch is named 'main'
|
|
# (without it, both resolve the branch segment to 'main' and block each other)
|
|
# - github.head_ref (pull_request) or github.ref_name (workflow_dispatch) normalizes to branch name
|
|
# - pr_head_sha isolates /rerun-stage from main branch runs
|
|
# - target_stage allows parallel stage dispatches to run independently
|
|
group: pr-test-${{ github.event_name }}-${{ github.head_ref || github.ref_name || 'default' }}-${{ inputs.pr_head_sha || 'current' }}-${{ inputs.target_stage || inputs.git_ref || 'all' }}
|
|
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
|
|
|
|
env:
|
|
SGLANG_IS_IN_CI: true
|
|
SGLANG_CUDA_COREDUMP: "1"
|
|
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
|
|
SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}
|
|
# Schedule / main-branch dispatch / workflow_call from main use refs/heads/main; PR events use refs/pull/*/merge
|
|
SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
|
|
USE_VENV: false
|
|
|
|
permissions:
|
|
actions: write
|
|
contents: read
|
|
issues: read
|
|
pull-requests: read
|
|
|
|
jobs:
|
|
# =============================================== check changes ====================================================
|
|
check-changes:
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
# Use API-based detection for target_stage mode (filter-api), otherwise use dorny/paths-filter (filter)
|
|
main_package: ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
|
|
# sgl_kernel is forced to false when target_stage is set, since sgl-kernel-build-wheels won't run
|
|
# This prevents CUSTOM_BUILD_SGL_KERNEL=true when the wheel artifacts aren't available
|
|
# Note: If PR has kernel changes AND target_stage is set, the validate-target-stage step will fail
|
|
sgl_kernel: ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }}
|
|
# Raw sgl_kernel value before target_stage override (used for validation)
|
|
sgl_kernel_raw: ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }}
|
|
jit_kernel: ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }}
|
|
multimodal_gen: ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }}
|
|
max_parallel: ${{ steps.set-parallel.outputs.max_parallel }}
|
|
max_parallel_small: ${{ steps.set-parallel.outputs.max_parallel_small }}
|
|
max_parallel_2gpu: ${{ steps.set-parallel.outputs.max_parallel_2gpu }}
|
|
b200_runner: ${{ steps.set-runner.outputs.b200_runner }}
|
|
enable_retry: ${{ steps.set-retry.outputs.enable_retry }}
|
|
continue_on_error: ${{ steps.set-continue-on-error.outputs.continue_on_error }}
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Determine run mode
|
|
id: run-mode
|
|
run: |
|
|
# Run all tests for scheduled runs and workflow_call (when ref input is provided)
|
|
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.git_ref
|
|
if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.run_all_tests }}" == "true" ]]; then
|
|
echo "run_all_tests=true" >> $GITHUB_OUTPUT
|
|
echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})"
|
|
else
|
|
echo "run_all_tests=false" >> $GITHUB_OUTPUT
|
|
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
|
|
fi
|
|
|
|
- name: Detect file changes
|
|
id: filter
|
|
uses: dorny/paths-filter@v3
|
|
# Only use paths-filter for pull_request events (where it works correctly)
|
|
# For workflow_dispatch with target_stage, we use GitHub API in the next step
|
|
if: steps.run-mode.outputs.run_all_tests != 'true' && !inputs.target_stage
|
|
with:
|
|
filters: |
|
|
main_package:
|
|
- ".github/workflows/pr-test.yml"
|
|
- ".github/workflows/pr-gate.yml"
|
|
- ".github/actions/**"
|
|
- "python/pyproject.toml"
|
|
- "python/sglang/!(multimodal_gen|jit_kernel/diffusion|jit_kernel/tests/diffusion|jit_kernel/benchmark/diffusion|cli)/**/!(*.md)"
|
|
- "scripts/ci/cuda/*"
|
|
- "scripts/ci/utils/*"
|
|
- "test/**/!(*.md)"
|
|
multimodal_gen:
|
|
- ".github/workflows/pr-test.yml"
|
|
- ".github/workflows/pr-test-multimodal-gen.yml"
|
|
- "python/pyproject.toml"
|
|
- "python/sglang/multimodal_gen/**/!(*.md|*.ipynb)"
|
|
- "python/sglang/jit_kernel/diffusion/**"
|
|
- "python/sglang/jit_kernel/tests/diffusion/**"
|
|
- "python/sglang/jit_kernel/benchmark/diffusion/**"
|
|
- "python/sglang/cli/**"
|
|
jit_kernel:
|
|
- ".github/workflows/pr-test.yml"
|
|
- ".github/workflows/pr-test-jit-kernel.yml"
|
|
- "python/pyproject.toml"
|
|
- "python/sglang/jit_kernel/**"
|
|
sgl_kernel:
|
|
- ".github/workflows/pr-test-sgl-kernel.yml"
|
|
- "sgl-kernel/**/!(*.md|THIRDPARTYNOTICES.txt|LICENSE)"
|
|
|
|
# For /rerun-stage (workflow_dispatch with target_stage), dorny/paths-filter doesn't work
|
|
# correctly because it falls back to "last commit" detection which breaks for merge commits.
|
|
# Instead, we use the GitHub API to compare the PR commit against main.
|
|
- name: Detect file changes via API (for target_stage)
|
|
id: filter-api
|
|
if: inputs.target_stage && inputs.pr_head_sha
|
|
env:
|
|
GH_TOKEN: ${{ github.token }}
|
|
run: |
|
|
echo "Detecting file changes via GitHub API for target_stage mode..."
|
|
echo "PR head SHA: ${{ inputs.pr_head_sha }}"
|
|
|
|
# Get the list of changed files by comparing PR commit against main
|
|
# This correctly handles merge commits by looking at the actual PR diff
|
|
CHANGED_FILES=$(gh api "repos/${{ github.repository }}/compare/main...${{ inputs.pr_head_sha }}" \
|
|
--jq '[.files[].filename] | .[]' 2>/dev/null || echo "")
|
|
|
|
if [ -z "$CHANGED_FILES" ]; then
|
|
echo "Warning: Could not fetch changed files from API, assuming no changes"
|
|
echo "sgl_kernel=false" >> $GITHUB_OUTPUT
|
|
echo "main_package=false" >> $GITHUB_OUTPUT
|
|
echo "jit_kernel=false" >> $GITHUB_OUTPUT
|
|
echo "multimodal_gen=false" >> $GITHUB_OUTPUT
|
|
exit 0
|
|
fi
|
|
|
|
echo "Changed files:"
|
|
echo "$CHANGED_FILES" | head -20
|
|
echo "..."
|
|
|
|
# Check for sgl-kernel changes
|
|
if echo "$CHANGED_FILES" | grep -qE "^(sgl-kernel/|\.github/workflows/pr-test-sgl-kernel\.yml)"; then
|
|
echo "sgl_kernel=true" >> $GITHUB_OUTPUT
|
|
echo "Detected sgl-kernel changes"
|
|
else
|
|
echo "sgl_kernel=false" >> $GITHUB_OUTPUT
|
|
fi
|
|
|
|
# Check for main_package changes (excluding multimodal_gen, jit_kernel/diffusion, jit_kernel/tests/diffusion, jit_kernel/benchmark/diffusion, cli)
|
|
# Note: Need to filter out multimodal_gen and diffusion-related paths before checking, not pipe grep -q output
|
|
MAIN_PKG_FILES=$(echo "$CHANGED_FILES" | grep -E "^(python/sglang/|python/pyproject\.toml|scripts/ci/cuda/|scripts/ci/utils/|test/|\.github/workflows/pr-test\.yml|\.github/workflows/pr-gate\.yml|\.github/actions/)" | grep -v -E "^(python/sglang/multimodal_gen/|python/sglang/jit_kernel/diffusion/|python/sglang/jit_kernel/tests/diffusion/|python/sglang/jit_kernel/benchmark/diffusion/|python/sglang/cli/)" || true)
|
|
if [ -n "$MAIN_PKG_FILES" ]; then
|
|
echo "main_package=true" >> $GITHUB_OUTPUT
|
|
echo "Detected main_package changes"
|
|
else
|
|
echo "main_package=false" >> $GITHUB_OUTPUT
|
|
fi
|
|
|
|
# Check for jit_kernel changes
|
|
if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/jit_kernel/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-jit-kernel\.yml)"; then
|
|
echo "jit_kernel=true" >> $GITHUB_OUTPUT
|
|
echo "Detected jit_kernel changes"
|
|
else
|
|
echo "jit_kernel=false" >> $GITHUB_OUTPUT
|
|
fi
|
|
|
|
# Check for multimodal_gen changes, including diffusion-specific jit_kernel coverage
|
|
if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/multimodal_gen/|python/sglang/cli/|python/sglang/jit_kernel/diffusion/|python/sglang/jit_kernel/tests/diffusion/|python/sglang/jit_kernel/benchmark/diffusion/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-multimodal-gen\.yml)"; then
|
|
echo "multimodal_gen=true" >> $GITHUB_OUTPUT
|
|
echo "Detected multimodal_gen changes"
|
|
else
|
|
echo "multimodal_gen=false" >> $GITHUB_OUTPUT
|
|
fi
|
|
|
|
- name: Set max-parallel based on run type
|
|
id: set-parallel
|
|
env:
|
|
GH_TOKEN: ${{ github.token }}
|
|
run: |
|
|
# Determine if this run gets full parallelism (scheduled / high priority)
|
|
FULL=false
|
|
if [[ "${{ github.event_name }}" == "schedule" ]]; then
|
|
FULL=true
|
|
echo "Scheduled run detected, using full parallelism"
|
|
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then
|
|
FULL=true
|
|
echo "High priority PR detected, using full parallelism"
|
|
elif [[ -n "${{ inputs.target_stage }}" ]]; then
|
|
# /rerun-stage (workflow_dispatch): query PR labels via GitHub API
|
|
# Try SHA lookup first (fork PRs), fallback to branch name (non-fork PRs)
|
|
LABELS=""
|
|
PR_HEAD_SHA="${{ inputs.pr_head_sha }}"
|
|
if [[ -n "$PR_HEAD_SHA" ]]; then
|
|
LABELS=$(gh api "repos/${{ github.repository }}/commits/${PR_HEAD_SHA}/pulls" \
|
|
--jq '.[0].labels[].name' 2>/dev/null || true)
|
|
fi
|
|
if [[ -z "$LABELS" ]]; then
|
|
LABELS=$(gh pr list --head "${{ github.ref_name }}" --repo "${{ github.repository }}" \
|
|
--json labels --jq '.[0].labels[].name' 2>/dev/null || true)
|
|
fi
|
|
echo "PR labels: ${LABELS:-"(none)"}"
|
|
if echo "$LABELS" | grep -Fxq "high priority"; then
|
|
FULL=true
|
|
echo "High priority PR detected via API (/rerun-stage), using full parallelism"
|
|
fi
|
|
fi
|
|
|
|
# Set max-parallel for each runner type
|
|
# 1-gpu-h100: 14 partitions, 1-gpu-5090: 8 partitions, 2-gpu-h100: 4 partitions
|
|
if [[ "$FULL" == "true" ]]; then
|
|
LEVEL=full
|
|
echo "max_parallel=14" >> $GITHUB_OUTPUT
|
|
echo "max_parallel_small=8" >> $GITHUB_OUTPUT
|
|
echo "max_parallel_2gpu=4" >> $GITHUB_OUTPUT
|
|
else
|
|
LEVEL=low
|
|
echo "max_parallel=3" >> $GITHUB_OUTPUT
|
|
echo "max_parallel_small=3" >> $GITHUB_OUTPUT
|
|
echo "max_parallel_2gpu=2" >> $GITHUB_OUTPUT
|
|
fi
|
|
echo "parallel_level=$LEVEL" >> $GITHUB_OUTPUT
|
|
echo "Parallelism level: $LEVEL"
|
|
|
|
- name: Set B200 runner tag
|
|
id: set-runner
|
|
run: |
|
|
# Use kernel-build runner only when sgl_kernel changes are detected AND we're not in target_stage mode
|
|
# (target_stage skips wheel builds, so we can't use custom kernels)
|
|
# Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
|
|
sgl_kernel="${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }}"
|
|
target_stage="${{ inputs.target_stage }}"
|
|
if [[ "$sgl_kernel" == "true" && -z "$target_stage" ]]; then
|
|
echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT
|
|
else
|
|
echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT
|
|
fi
|
|
|
|
- name: Enable retry for CI
|
|
id: set-retry
|
|
run: |
|
|
echo "enable_retry=true" >> $GITHUB_OUTPUT
|
|
echo "Retry logic enabled for CI"
|
|
|
|
- name: Set continue-on-error for full test runs
|
|
id: set-continue-on-error
|
|
run: |
|
|
if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" || "${{ inputs.force_continue_on_error }}" == "true" ]]; then
|
|
echo "continue_on_error=true" >> $GITHUB_OUTPUT
|
|
echo "Full test run or force flag detected, enabling continue-on-error to run all tests"
|
|
else
|
|
echo "continue_on_error=false" >> $GITHUB_OUTPUT
|
|
echo "Filtered run, continue-on-error disabled"
|
|
fi
|
|
|
|
- name: Validate target_stage with kernel changes
|
|
# Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
|
|
if: inputs.target_stage && (steps.filter-api.outputs.sgl_kernel == 'true' || steps.filter.outputs.sgl_kernel == 'true')
|
|
run: |
|
|
echo "::error::Cannot use /rerun-stage when PR has sgl-kernel changes."
|
|
echo "::error::The sgl-kernel-build-wheels job is skipped in target_stage mode, but this PR modifies sgl-kernel/ files."
|
|
echo "::error::Please use /tag-and-rerun-ci to run the full workflow including kernel builds."
|
|
echo ""
|
|
echo "ERROR: Cannot use /rerun-stage when PR has sgl-kernel changes."
|
|
echo ""
|
|
echo "This PR modifies files in sgl-kernel/, which requires building custom kernel wheels."
|
|
echo "The /rerun-stage command skips the wheel build job, so the test would run against"
|
|
echo "the wrong (PyPI) version of sgl-kernel instead of your changes."
|
|
echo ""
|
|
echo "To properly test your kernel changes, use one of these commands instead:"
|
|
echo " /tag-and-rerun-ci - Re-run the full workflow including kernel builds"
|
|
echo " /rerun-ci - Re-run the full workflow"
|
|
echo ""
|
|
exit 1
|
|
|
|
- name: Show filter results in summary (table)
|
|
run: |
|
|
{
|
|
echo "## Change Detection"
|
|
echo ""
|
|
echo "| Component | Changed |"
|
|
echo "|-------------------|---------|"
|
|
echo "| main_package | ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |"
|
|
echo "| sgl_kernel (raw) | ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} |"
|
|
echo "| sgl_kernel (used) | ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} |"
|
|
echo "| jit_kernel | ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |"
|
|
echo "| multimodal_gen | ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |"
|
|
echo "| target_stage | ${{ inputs.target_stage || '(none)' }} |"
|
|
echo "| detection_method | ${{ inputs.target_stage && 'GitHub API' || 'dorny/paths-filter' }} |"
|
|
echo "| max_parallel | ${{ steps.set-parallel.outputs.parallel_level }} (h100=${{ steps.set-parallel.outputs.max_parallel }}, 5090=${{ steps.set-parallel.outputs.max_parallel_small }}, 2gpu=${{ steps.set-parallel.outputs.max_parallel_2gpu }}) |"
|
|
echo "| b200_runner | ${{ steps.set-runner.outputs.b200_runner }} |"
|
|
echo "| enable_retry | ${{ steps.set-retry.outputs.enable_retry }} |"
|
|
echo "| continue_on_error | ${{ steps.set-continue-on-error.outputs.continue_on_error }} |"
|
|
} >> $GITHUB_STEP_SUMMARY
|
|
|
|
# =============================================== Wait Jobs for Sequential PR Execution ====================================================
|
|
# These jobs poll GitHub API to wait for previous stages to complete.
|
|
# For PR runs: wait jobs run and enforce sequential execution via polling.
|
|
# For scheduled runs: wait jobs are skipped, enabling parallel execution for easier retry.
|
|
|
|
wait-for-stage-a:
|
|
needs: [check-changes, call-gate]
|
|
if: |
|
|
always() &&
|
|
!cancelled() &&
|
|
github.event_name == 'pull_request' &&
|
|
!inputs.target_stage &&
|
|
inputs.test_parallel_dispatch != true &&
|
|
(needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') &&
|
|
(needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped')
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
stage_a_result: ${{ steps.wait.outputs.result }}
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- uses: ./.github/actions/wait-for-jobs
|
|
id: wait
|
|
with:
|
|
stage-name: stage-a
|
|
jobs: '["stage-a-test-1-gpu-small", {"prefix": "stage-a-test-cpu", "expected_count": 4}]'
|
|
max-wait-minutes: '240'
|
|
|
|
wait-for-stage-b:
|
|
needs: [check-changes, call-gate, wait-for-stage-a]
|
|
if: |
|
|
always() &&
|
|
!cancelled() &&
|
|
github.event_name == 'pull_request' &&
|
|
!inputs.target_stage &&
|
|
inputs.test_parallel_dispatch != true &&
|
|
(needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') &&
|
|
(needs.wait-for-stage-a.result == 'success' || needs.wait-for-stage-a.result == 'skipped') &&
|
|
(needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped')
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
stage_b_result: ${{ steps.wait.outputs.result }}
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- uses: ./.github/actions/wait-for-jobs
|
|
id: wait
|
|
with:
|
|
stage-name: stage-b
|
|
jobs: |
|
|
[
|
|
{"prefix": "stage-b-test-1-gpu-small", "expected_count": 8},
|
|
{"prefix": "stage-b-test-1-gpu-large", "expected_count": 14},
|
|
{"prefix": "stage-b-test-2-gpu-large", "expected_count": 4},
|
|
{"prefix": "stage-b-test-4-gpu-b200", "expected_count": 1}
|
|
]
|
|
max-wait-minutes: '480'
|
|
|
|
# =============================================== PR Gate ====================================================
|
|
call-gate:
|
|
needs: check-changes
|
|
# Skip for scheduled runs (they run all tests) and when target_stage is specified
|
|
if: |
|
|
github.event_name != 'schedule' &&
|
|
inputs.test_parallel_dispatch != true &&
|
|
!inputs.target_stage &&
|
|
(
|
|
needs.check-changes.outputs.main_package == 'true' ||
|
|
needs.check-changes.outputs.sgl_kernel == 'true' ||
|
|
needs.check-changes.outputs.jit_kernel == 'true' ||
|
|
needs.check-changes.outputs.multimodal_gen == 'true'
|
|
)
|
|
uses: ./.github/workflows/pr-gate.yml
|
|
secrets: inherit
|
|
|
|
# =============================================== sgl-kernel ====================================================
|
|
|
|
sgl-kernel-build-wheels:
|
|
needs: [check-changes, call-gate]
|
|
# Skip for scheduled runs (they run stages independently) and when target_stage is set
|
|
if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
|
|
runs-on: x64-kernel-build-node
|
|
timeout-minutes: 240
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- python-version: "3.10"
|
|
cuda-version: "13.0"
|
|
name: Build Wheel
|
|
steps:
|
|
- name: Cleanup
|
|
run: |
|
|
sudo rm -rf $GITHUB_WORKSPACE/* || true
|
|
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: "recursive"
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Set up Python ${{ matrix.python-version }}
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ matrix.python-version }}
|
|
|
|
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
|
|
run: |
|
|
cd sgl-kernel
|
|
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
|
env:
|
|
USE_CCACHE: 1
|
|
|
|
- name: Verify wheel artifacts
|
|
run: |
|
|
ls -alh sgl-kernel/dist
|
|
ls -alh sgl-kernel/dist/*.whl
|
|
|
|
- name: Upload artifacts
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
|
path: sgl-kernel/dist/*
|
|
if-no-files-found: error
|
|
|
|
sgl-kernel-build-wheels-arm:
|
|
needs: [check-changes, call-gate]
|
|
# Skip for scheduled runs (they run stages independently) and when target_stage is set
|
|
if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
|
|
runs-on: arm-kernel-build-node
|
|
timeout-minutes: 240
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- python-version: "3.10"
|
|
cuda-version: "13.0"
|
|
name: Build Wheel Arm
|
|
steps:
|
|
- name: Cleanup
|
|
run: |
|
|
if [ -d "$GITHUB_WORKSPACE" ]; then
|
|
sudo rm -rf "$GITHUB_WORKSPACE"/* || true
|
|
else
|
|
echo "$GITHUB_WORKSPACE does not exist, nothing to clean"
|
|
fi
|
|
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: "recursive"
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Set up Python ${{ matrix.python-version }}
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ matrix.python-version }}
|
|
|
|
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
|
|
run: |
|
|
cd sgl-kernel
|
|
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
|
env:
|
|
USE_CCACHE: 1
|
|
|
|
- name: Verify wheel artifacts
|
|
run: |
|
|
ls -alh sgl-kernel/dist
|
|
ls -alh sgl-kernel/dist/*.whl
|
|
|
|
- name: Upload artifacts
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64
|
|
path: sgl-kernel/dist/*
|
|
if-no-files-found: error
|
|
|
|
call-sgl-kernel-tests:
|
|
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
|
|
if: |
|
|
github.event_name != 'schedule' &&
|
|
inputs.test_parallel_dispatch != true &&
|
|
!inputs.target_stage &&
|
|
needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: ./.github/workflows/pr-test-sgl-kernel.yml
|
|
with:
|
|
sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }}
|
|
b200_runner: ${{ needs.check-changes.outputs.b200_runner }}
|
|
pr_head_sha: ${{ inputs.pr_head_sha || '' }}
|
|
git_ref: ${{ inputs.git_ref || '' }}
|
|
skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }}
|
|
secrets: inherit
|
|
|
|
# =============================================== jit-kernel ====================================================
|
|
|
|
call-jit-kernel-tests:
|
|
needs: [check-changes, call-gate]
|
|
if: needs.check-changes.outputs.jit_kernel == 'true'
|
|
uses: ./.github/workflows/pr-test-jit-kernel.yml
|
|
with:
|
|
jit_kernel: ${{ needs.check-changes.outputs.jit_kernel }}
|
|
b200_runner: ${{ needs.check-changes.outputs.b200_runner }}
|
|
pr_head_sha: ${{ inputs.pr_head_sha || '' }}
|
|
git_ref: ${{ inputs.git_ref || '' }}
|
|
target_stage: ${{ inputs.target_stage || '' }}
|
|
test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' || 'false' }}
|
|
skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }}
|
|
secrets: inherit
|
|
|
|
# =============================================== primary ====================================================
|
|
|
|
# Runs on 5090 (32GB, SM120)
|
|
stage-a-test-1-gpu-small:
|
|
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-a-test-1-gpu-small') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: 1-gpu-5090
|
|
timeout-minutes: 240
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 10
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test/
|
|
python3 run_suite.py --hw cuda --suite stage-a-test-1-gpu-small $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
stage-a-test-cpu:
|
|
needs: [check-changes, call-gate]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-a-test-cpu') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
(needs.check-changes.outputs.main_package == 'true')
|
|
)
|
|
)
|
|
runs-on: ubuntu-latest
|
|
timeout-minutes: 240
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
partition: [0, 1, 2, 3]
|
|
steps:
|
|
- name: Free disk space
|
|
run: |
|
|
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
|
|
df -h
|
|
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.10'
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v5
|
|
|
|
# Needed by setuptools-rust to build the bundled native gRPC extension
|
|
# (rust/sglang-grpc) when installing the main `sglang` wheel from source.
|
|
- name: Install protoc
|
|
run: sudo bash scripts/ci/utils/install_protoc.sh
|
|
|
|
- name: Install Rust toolchain
|
|
run: bash scripts/ci/utils/install_rustup.sh
|
|
|
|
# uv pip targets a venv by default; setup-python has no venv — install into that interpreter (see UV_SYSTEM_PYTHON in https://docs.astral.sh/uv/guides/integration/github/)
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
env:
|
|
UV_SYSTEM_PYTHON: "1"
|
|
run: |
|
|
uv pip install -e "python[dev]" --index-strategy unsafe-best-match --prerelease allow
|
|
|
|
- name: Run test
|
|
timeout-minutes: 10
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test/
|
|
python3 run_suite.py --hw cpu --suite stage-a-test-cpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
# Runs on 5090 (32GB, SM120)
|
|
stage-b-test-1-gpu-small:
|
|
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-b-test-1-gpu-small') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: 1-gpu-5090
|
|
timeout-minutes: 240
|
|
strategy:
|
|
fail-fast: false
|
|
max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel_small) }}
|
|
matrix:
|
|
partition: [0, 1, 2, 3, 4, 5, 6, 7]
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
source /etc/profile.d/sglang-ci.sh
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
source /etc/profile.d/sglang-ci.sh
|
|
cd test/
|
|
python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-small --auto-partition-id ${{ matrix.partition }} --auto-partition-size 8 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
with:
|
|
artifact-suffix: ${{ matrix.partition }}
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
# Runs on H100 (80GB, SM90) - tests that don't pass on 5090 (FA3, FP8, high VRAM, etc.)
|
|
stage-b-test-1-gpu-large:
|
|
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-b-test-1-gpu-large') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: 1-gpu-h100
|
|
timeout-minutes: 240
|
|
strategy:
|
|
fail-fast: false
|
|
max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }}
|
|
matrix:
|
|
partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test/
|
|
python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
with:
|
|
artifact-suffix: ${{ matrix.partition }}
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
stage-b-test-2-gpu-large:
|
|
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-b-test-2-gpu-large') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: 2-gpu-h100
|
|
timeout-minutes: 240
|
|
strategy:
|
|
fail-fast: false
|
|
max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel_2gpu) }}
|
|
matrix:
|
|
partition: [0, 1, 2, 3]
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test/
|
|
python3 run_suite.py --hw cuda --suite stage-b-test-2-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
with:
|
|
artifact-suffix: ${{ matrix.partition }}
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
stage-b-test-4-gpu-b200:
|
|
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-b-test-4-gpu-b200') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: ${{ needs.check-changes.outputs.b200_runner }}
|
|
timeout-minutes: 240
|
|
strategy:
|
|
fail-fast: false
|
|
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v6
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite stage-b-test-4-gpu-b200 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- name: Run FA4 jit_kernel tests (SM100+)
|
|
timeout-minutes: 10
|
|
run: |
|
|
python3 -m pytest -q python/sglang/jit_kernel/tests/test_flash_attention_4.py
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
call-multimodal-gen-tests:
|
|
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
|
|
if: |
|
|
always() &&
|
|
!cancelled() &&
|
|
(
|
|
inputs.target_stage == 'multimodal-gen-test-1-gpu' ||
|
|
inputs.target_stage == 'multimodal-gen-test-2-gpu' ||
|
|
inputs.target_stage == 'multimodal-gen-component-accuracy' ||
|
|
inputs.target_stage == 'multimodal-gen-component-accuracy-1-gpu' ||
|
|
inputs.target_stage == 'multimodal-gen-component-accuracy-2-gpu' ||
|
|
inputs.target_stage == 'multimodal-gen-test-1-b200' ||
|
|
inputs.target_stage == 'multimodal-gen-unit-test' ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
needs.check-changes.outputs.multimodal_gen == 'true'
|
|
)
|
|
)
|
|
uses: ./.github/workflows/pr-test-multimodal-gen.yml
|
|
with:
|
|
multimodal_gen: ${{ needs.check-changes.outputs.multimodal_gen }}
|
|
sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }}
|
|
b200_runner: ${{ needs.check-changes.outputs.b200_runner }}
|
|
continue_on_error: ${{ needs.check-changes.outputs.continue_on_error }}
|
|
pr_head_sha: ${{ inputs.pr_head_sha || '' }}
|
|
git_ref: ${{ inputs.git_ref || '' }}
|
|
target_stage: ${{ inputs.target_stage || '' }}
|
|
test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' || 'false' }}
|
|
caller_needs_failure: ${{ (needs.call-gate.result == 'failure' || needs.sgl-kernel-build-wheels.result == 'failure' || needs.check-changes.result == 'failure') && 'true' || 'false' }}
|
|
skip_stage_health_check: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}
|
|
secrets: inherit
|
|
|
|
stage-c-test-4-gpu-h100:
|
|
needs: [check-changes, call-gate, wait-for-stage-b]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-c-test-4-gpu-h100') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: 4-gpu-h100
|
|
timeout-minutes: 240
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [0, 1, 2]
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-h100 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
with:
|
|
artifact-suffix: ${{ matrix.part }}
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
stage-c-test-8-gpu-h200:
|
|
needs: [check-changes, call-gate, wait-for-stage-b]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-c-test-8-gpu-h200') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: 8-gpu-h200
|
|
timeout-minutes: 240
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [0, 1, 2, 3]
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Warmup DeepGEMM JIT Compilation
|
|
timeout-minutes: 25
|
|
run: |
|
|
# Activate venv if available (GITHUB_ENV may have failed to propagate)
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
|
|
python3 scripts/ci/cuda/warmup_deep_gemm.py \
|
|
deepseek-ai/DeepSeek-V3-0324:8 \
|
|
deepseek-ai/DeepSeek-V3.2-Exp:8
|
|
|
|
- name: Warmup Server CUDA Graphs
|
|
timeout-minutes: 25
|
|
run: |
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
|
|
python3 scripts/ci/cuda/warmup_server.py \
|
|
deepseek-ai/DeepSeek-V3-0324:8 \
|
|
inclusionAI/Ring-2.5-1T:8
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
with:
|
|
artifact-suffix: ${{ matrix.part }}
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
stage-c-test-8-gpu-h20:
|
|
needs: [check-changes, call-gate, wait-for-stage-b]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-c-test-8-gpu-h20') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: 8-gpu-h20
|
|
timeout-minutes: 240
|
|
env:
|
|
SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
|
|
CU_VERSION: cu129
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [0, 1]
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
with:
|
|
artifact-suffix: ${{ matrix.part }}
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
stage-c-test-deepep-4-gpu-h100:
|
|
needs: [check-changes, call-gate, wait-for-stage-b]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-c-test-deepep-4-gpu-h100') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: 4-gpu-h100
|
|
timeout-minutes: 240
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh
|
|
|
|
- name: Warmup DeepGEMM JIT Compilation
|
|
timeout-minutes: 25
|
|
run: |
|
|
# Activate venv if available (GITHUB_ENV may have failed to propagate)
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
|
|
python3 scripts/ci/cuda/warmup_deep_gemm.py \
|
|
lmsys/sglang-ci-dsv3-test:4
|
|
|
|
- name: Warmup Server CUDA Graphs
|
|
timeout-minutes: 25
|
|
run: |
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
|
|
python3 scripts/ci/cuda/warmup_server.py \
|
|
lmsys/sglang-ci-dsv3-test:4
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite stage-c-test-deepep-4-gpu-h100 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
stage-c-test-deepep-8-gpu-h200:
|
|
needs: [check-changes, call-gate, wait-for-stage-b]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-c-test-deepep-8-gpu-h200') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: 8-gpu-h200-deepep
|
|
timeout-minutes: 240
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh
|
|
|
|
- name: Warmup DeepGEMM JIT Compilation
|
|
timeout-minutes: 25
|
|
run: |
|
|
# Activate venv if available (GITHUB_ENV may have failed to propagate)
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
|
|
python3 scripts/ci/cuda/warmup_deep_gemm.py \
|
|
deepseek-ai/DeepSeek-V3-0324:8 \
|
|
deepseek-ai/DeepSeek-V3.2-Exp:8
|
|
|
|
- name: Warmup Server CUDA Graphs
|
|
timeout-minutes: 25
|
|
run: |
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
|
|
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
|
|
python3 scripts/ci/cuda/warmup_server.py \
|
|
deepseek-ai/DeepSeek-V3-0324:8
|
|
|
|
- name: Run test
|
|
timeout-minutes: 45
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite stage-c-test-deepep-8-gpu-h200 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
stage-c-test-4-gpu-b200:
|
|
needs: [check-changes, call-gate, wait-for-stage-b]
|
|
if: |
|
|
always() &&
|
|
(
|
|
(inputs.target_stage == 'stage-c-test-4-gpu-b200') ||
|
|
(
|
|
!inputs.target_stage &&
|
|
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
)
|
|
)
|
|
runs-on: ${{ needs.check-changes.outputs.b200_runner }}
|
|
timeout-minutes: 240
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [0, 1, 2, 3, 4, 5]
|
|
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
|
|
- uses: ./.github/actions/check-stage-health
|
|
|
|
- uses: ./.github/actions/check-maintenance
|
|
|
|
- name: Download artifacts
|
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
uses: actions/download-artifact@v6
|
|
with:
|
|
path: sgl-kernel/dist/
|
|
merge-multiple: true
|
|
pattern: wheel-python3.10-cuda13.0
|
|
|
|
- name: Install dependencies
|
|
timeout-minutes: 20
|
|
run: |
|
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
|
|
|
|
- name: Run test
|
|
timeout-minutes: 30
|
|
env:
|
|
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
run: |
|
|
cd test
|
|
python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 6 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG
|
|
|
|
- uses: ./.github/actions/upload-cuda-coredumps
|
|
if: failure()
|
|
with:
|
|
artifact-suffix: ${{ matrix.part }}
|
|
|
|
- name: Cleanup venv
|
|
if: always()
|
|
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
|
|
|
|
# NOTE: GB200 stage temporarily disabled — no company-owned GB200 runner available yet.
|
|
# Re-enable when a 4-gpu-gb200 runner is provisioned.
|
|
# stage-c-test-4-gpu-gb200:
|
|
# needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels-arm]
|
|
# if: |
|
|
# always() &&
|
|
# (
|
|
# (inputs.target_stage == 'stage-c-test-4-gpu-gb200') ||
|
|
# (
|
|
# !inputs.target_stage &&
|
|
# ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
|
|
# ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
|
# )
|
|
# )
|
|
# runs-on: 4-gpu-gb200
|
|
# timeout-minutes: 240
|
|
# strategy:
|
|
# fail-fast: false
|
|
# steps:
|
|
# - uses: ./.github/actions/check-maintenance
|
|
# with:
|
|
# github-token: ${{ github.token }}
|
|
#
|
|
# - name: Checkout code
|
|
# uses: actions/checkout@v4
|
|
# with:
|
|
# ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
|
|
#
|
|
# - name: Download artifacts
|
|
# if: needs.check-changes.outputs.sgl_kernel == 'true'
|
|
# uses: actions/download-artifact@v4
|
|
# with:
|
|
# path: sgl-kernel/dist/
|
|
# merge-multiple: true
|
|
# pattern: wheel-python3.10-cuda13.0-aarch64
|
|
#
|
|
# - name: Install dependencies
|
|
# timeout-minutes: 20
|
|
# run: |
|
|
# CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} GRACE_BLACKWELL=1 bash scripts/ci/cuda/ci_install_deepep.sh
|
|
#
|
|
# - name: Run test
|
|
# timeout-minutes: 45
|
|
# env:
|
|
# CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
|
|
# run: |
|
|
# cd test
|
|
# python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-gb200 --timeout-per-file 3600 $CONTINUE_ON_ERROR_FLAG
|
|
#
|
|
# - uses: ./.github/actions/upload-cuda-coredumps
|
|
# if: failure()
|
|
|
|
pr-test-finish:
|
|
needs:
|
|
[
|
|
call-gate,
|
|
check-changes,
|
|
|
|
sgl-kernel-build-wheels,
|
|
sgl-kernel-build-wheels-arm,
|
|
call-sgl-kernel-tests,
|
|
|
|
wait-for-stage-a,
|
|
wait-for-stage-b,
|
|
|
|
call-jit-kernel-tests,
|
|
|
|
call-multimodal-gen-tests,
|
|
|
|
stage-a-test-1-gpu-small,
|
|
stage-a-test-cpu,
|
|
stage-b-test-1-gpu-small,
|
|
stage-b-test-1-gpu-large,
|
|
stage-b-test-2-gpu-large,
|
|
stage-b-test-4-gpu-b200,
|
|
stage-c-test-4-gpu-h100,
|
|
stage-c-test-8-gpu-h20,
|
|
stage-c-test-8-gpu-h200,
|
|
stage-c-test-deepep-4-gpu-h100,
|
|
stage-c-test-deepep-8-gpu-h200,
|
|
stage-c-test-4-gpu-b200,
|
|
# stage-c-test-4-gpu-gb200, # Temporarily disabled — no GB200 runner
|
|
]
|
|
if: always()
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Check all dependent job statuses
|
|
run: |
|
|
# Convert the 'needs' context to a JSON string
|
|
json_needs='${{ toJson(needs) }}'
|
|
|
|
# Get a list of all job names from the JSON keys
|
|
job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
|
|
|
|
for job in $job_names; do
|
|
# For each job, extract its result
|
|
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
|
|
|
|
# Print the job name and its result
|
|
echo "$job: $result"
|
|
|
|
# Check for failure or cancellation and exit if found
|
|
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
|
|
echo "The above jobs failed."
|
|
exit 1
|
|
fi
|
|
done
|
|
# If the loop completes, all jobs were successful
|
|
echo "All jobs completed successfully"
|
|
exit 0
|