mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-11 17:00:22 +00:00
try multi-pipeline
This commit is contained in:
@@ -25,6 +25,13 @@ pr:
|
||||
- docs/**
|
||||
- '**/*.md'
|
||||
|
||||
parameters:
|
||||
- name: hostEntries
|
||||
type: string
|
||||
default: |
|
||||
10.0.0.10 mscclit-000000
|
||||
10.0.0.11 mscclit-000001
|
||||
|
||||
jobs:
|
||||
- job: SGlangTest
|
||||
displayName: SGLANG Test
|
||||
@@ -55,4 +62,11 @@ jobs:
|
||||
pool:
|
||||
name: mscclpp-it
|
||||
container:
|
||||
image: $(containerImage)
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/sglang-test.yml
|
||||
parameters:
|
||||
subscription: msccl-it
|
||||
vmssName: mscclit-vmss
|
||||
resourceGroup: msccl-IT
|
||||
@@ -11,9 +11,19 @@ trigger:
|
||||
- docs/**
|
||||
- '**/*.md'
|
||||
|
||||
# Do not run multi-nodes-test for PR, we can trigger it manually
|
||||
pr: none
|
||||
|
||||
pr:
|
||||
branches:
|
||||
include:
|
||||
- main
|
||||
- release/*
|
||||
drafts: false
|
||||
paths:
|
||||
exclude:
|
||||
- .devcontainer/**
|
||||
- .github/**
|
||||
- docker/**
|
||||
- docs/**
|
||||
- '**/*.md'
|
||||
|
||||
parameters:
|
||||
- name: hostEntries
|
||||
@@ -23,73 +33,40 @@ parameters:
|
||||
10.0.0.11 mscclit-000001
|
||||
|
||||
jobs:
|
||||
- job: MultiNodesTest
|
||||
displayName: Multi nodes test
|
||||
- job: SGlangTest
|
||||
displayName: SGLANG Test
|
||||
strategy:
|
||||
matrix:
|
||||
cuda11:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8
|
||||
cuda12:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9
|
||||
sglang:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-sglang-x86_64
|
||||
|
||||
pool:
|
||||
name: msccl-ci-h100
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/sglang-test.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-h100
|
||||
vmssName: mscclpp-h100-ci
|
||||
gpuArch: '90'
|
||||
|
||||
- job: SGlangMultiNodeTest
|
||||
displayName: SGLANG Multi-Node Test
|
||||
strategy:
|
||||
matrix:
|
||||
sglang:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-sglang-x86_64
|
||||
|
||||
pool:
|
||||
name: mscclpp-it
|
||||
container:
|
||||
image: $[ variables['containerImage'] ]
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: Add HostEntry
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
ENTRY="${{ parameters.hostEntries }}"
|
||||
if ! grep -qxF "$ENTRY" /etc/hosts; then
|
||||
echo "Adding to /etc/hosts"
|
||||
echo "$ENTRY" | sudo tee -a /etc/hosts
|
||||
else
|
||||
echo "Entry already exists, nothing to do."
|
||||
fi
|
||||
|
||||
- template: templates/deploy.yml
|
||||
parameters:
|
||||
steps:
|
||||
- template: templates/sglang-test.yml
|
||||
parameters:
|
||||
subscription: msccl-it
|
||||
vmssName: mscclit-vmss
|
||||
resourceGroup: msccl-IT
|
||||
|
||||
- template: templates/run-remote-task.yml
|
||||
parameters:
|
||||
name: RunMscclppTest
|
||||
displayName: Run multi-nodes mscclpp-test
|
||||
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
|
||||
remoteScript: |
|
||||
bash /root/mscclpp/test/deploy/run_tests.sh mscclpp-test
|
||||
|
||||
- template: templates/run-remote-task.yml
|
||||
parameters:
|
||||
name: RunMultiNodeUnitTest
|
||||
displayName: Run multi-nodes unit tests
|
||||
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
|
||||
remoteScript: |
|
||||
bash /root/mscclpp/test/deploy/run_tests.sh mp-ut
|
||||
|
||||
- template: templates/run-remote-task.yml
|
||||
parameters:
|
||||
name: RunMultiNodePythonTests
|
||||
displayName: Run multi-nodes python tests
|
||||
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
|
||||
remoteScript: |
|
||||
bash /root/mscclpp/test/deploy/run_tests.sh pytests
|
||||
|
||||
- template: templates/run-remote-task.yml
|
||||
parameters:
|
||||
name: RunMultiNodePythonBenchmark
|
||||
displayName: Run multi-nodes python benchmark
|
||||
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
|
||||
remoteScript: |
|
||||
bash /root/mscclpp/test/deploy/run_tests.sh py-benchmark
|
||||
|
||||
- template: templates/stop.yml
|
||||
parameters:
|
||||
subscription: msccl-it
|
||||
vmssName: mscclit-vmss
|
||||
resourceGroup: msccl-IT
|
||||
resourceGroup: msccl-IT
|
||||
Reference in New Issue
Block a user