try multi-pipeline

This commit is contained in:
empyreus
2026-04-07 17:29:47 +00:00
parent 8fb751470b
commit 0bf599837d
2 changed files with 57 additions and 66 deletions

View File

@@ -25,6 +25,13 @@ pr:
- docs/**
- '**/*.md'
parameters:
- name: hostEntries
type: string
default: |
10.0.0.10 mscclit-000000
10.0.0.11 mscclit-000001
jobs:
- job: SGlangTest
displayName: SGLANG Test
@@ -55,4 +62,11 @@ jobs:
pool:
name: mscclpp-it
container:
image: $(containerImage)
image: $(containerImage)
steps:
- template: templates/sglang-test.yml
parameters:
subscription: msccl-it
vmssName: mscclit-vmss
resourceGroup: msccl-IT

View File

@@ -11,9 +11,19 @@ trigger:
- docs/**
- '**/*.md'
# Do not run multi-nodes-test for PR, we can trigger it manually
pr: none
pr:
branches:
include:
- main
- release/*
drafts: false
paths:
exclude:
- .devcontainer/**
- .github/**
- docker/**
- docs/**
- '**/*.md'
parameters:
- name: hostEntries
@@ -23,73 +33,40 @@ parameters:
10.0.0.11 mscclit-000001
jobs:
- job: MultiNodesTest
displayName: Multi nodes test
- job: SGlangTest
displayName: SGLANG Test
strategy:
matrix:
cuda11:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8
cuda12:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9
sglang:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-sglang-x86_64
pool:
name: msccl-ci-h100
container:
image: $(containerImage)
steps:
- template: templates/sglang-test.yml
parameters:
subscription: mscclpp-ci-h100
vmssName: mscclpp-h100-ci
gpuArch: '90'
- job: SGlangMultiNodeTest
displayName: SGLANG Multi-Node Test
strategy:
matrix:
sglang:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-sglang-x86_64
pool:
name: mscclpp-it
container:
image: $[ variables['containerImage'] ]
image: $(containerImage)
steps:
- task: Bash@3
displayName: Add HostEntry
inputs:
targetType: 'inline'
script: |
ENTRY="${{ parameters.hostEntries }}"
if ! grep -qxF "$ENTRY" /etc/hosts; then
echo "Adding to /etc/hosts"
echo "$ENTRY" | sudo tee -a /etc/hosts
else
echo "Entry already exists, nothing to do."
fi
- template: templates/deploy.yml
parameters:
steps:
- template: templates/sglang-test.yml
parameters:
subscription: msccl-it
vmssName: mscclit-vmss
resourceGroup: msccl-IT
- template: templates/run-remote-task.yml
parameters:
name: RunMscclppTest
displayName: Run multi-nodes mscclpp-test
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
remoteScript: |
bash /root/mscclpp/test/deploy/run_tests.sh mscclpp-test
- template: templates/run-remote-task.yml
parameters:
name: RunMultiNodeUnitTest
displayName: Run multi-nodes unit tests
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
remoteScript: |
bash /root/mscclpp/test/deploy/run_tests.sh mp-ut
- template: templates/run-remote-task.yml
parameters:
name: RunMultiNodePythonTests
displayName: Run multi-nodes python tests
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
remoteScript: |
bash /root/mscclpp/test/deploy/run_tests.sh pytests
- template: templates/run-remote-task.yml
parameters:
name: RunMultiNodePythonBenchmark
displayName: Run multi-nodes python benchmark
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
remoteScript: |
bash /root/mscclpp/test/deploy/run_tests.sh py-benchmark
- template: templates/stop.yml
parameters:
subscription: msccl-it
vmssName: mscclit-vmss
resourceGroup: msccl-IT
resourceGroup: msccl-IT