From 0bf599837d2f9702f2feaf363a5fce4dedac0dc8 Mon Sep 17 00:00:00 2001 From: empyreus Date: Tue, 7 Apr 2026 17:29:47 +0000 Subject: [PATCH] try multi-pipeline --- .azure-pipelines/integration-test.yml | 16 +++- .azure-pipelines/multi-nodes-test.yml | 107 ++++++++++---------------- 2 files changed, 57 insertions(+), 66 deletions(-) diff --git a/.azure-pipelines/integration-test.yml b/.azure-pipelines/integration-test.yml index 84d4e6c8..40fa09f2 100644 --- a/.azure-pipelines/integration-test.yml +++ b/.azure-pipelines/integration-test.yml @@ -25,6 +25,13 @@ pr: - docs/** - '**/*.md' +parameters: +- name: hostEntries + type: string + default: | + 10.0.0.10 mscclit-000000 + 10.0.0.11 mscclit-000001 + jobs: - job: SGlangTest displayName: SGLANG Test @@ -55,4 +62,11 @@ jobs: pool: name: mscclpp-it container: - image: $(containerImage) \ No newline at end of file + image: $(containerImage) + +steps: +- template: templates/sglang-test.yml + parameters: + subscription: msccl-it + vmssName: mscclit-vmss + resourceGroup: msccl-IT \ No newline at end of file diff --git a/.azure-pipelines/multi-nodes-test.yml b/.azure-pipelines/multi-nodes-test.yml index d4924879..40fa09f2 100644 --- a/.azure-pipelines/multi-nodes-test.yml +++ b/.azure-pipelines/multi-nodes-test.yml @@ -11,9 +11,19 @@ trigger: - docs/** - '**/*.md' -# Do not run multi-nodes-test for PR, we can trigger it manually -pr: none - +pr: + branches: + include: + - main + - release/* + drafts: false + paths: + exclude: + - .devcontainer/** + - .github/** + - docker/** + - docs/** + - '**/*.md' parameters: - name: hostEntries @@ -23,73 +33,40 @@ parameters: 10.0.0.11 mscclit-000001 jobs: -- job: MultiNodesTest - displayName: Multi nodes test +- job: SGlangTest + displayName: SGLANG Test strategy: matrix: - cuda11: - containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8 - cuda12: - containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 + sglang: + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-sglang-x86_64 + + pool: + name: msccl-ci-h100 + container: + image: $(containerImage) + + steps: + - template: templates/sglang-test.yml + parameters: + subscription: mscclpp-ci-h100 + vmssName: mscclpp-h100-ci + gpuArch: '90' + +- job: SGlangMultiNodeTest + displayName: SGLANG Multi-Node Test + strategy: + matrix: + sglang: + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-sglang-x86_64 + pool: name: mscclpp-it container: - image: $[ variables['containerImage'] ] + image: $(containerImage) - steps: - - task: Bash@3 - displayName: Add HostEntry - inputs: - targetType: 'inline' - script: | - ENTRY="${{ parameters.hostEntries }}" - if ! grep -qxF "$ENTRY" /etc/hosts; then - echo "Adding to /etc/hosts" - echo "$ENTRY" | sudo tee -a /etc/hosts - else - echo "Entry already exists, nothing to do." - fi - - - template: templates/deploy.yml - parameters: +steps: +- template: templates/sglang-test.yml + parameters: subscription: msccl-it vmssName: mscclit-vmss - resourceGroup: msccl-IT - - - template: templates/run-remote-task.yml - parameters: - name: RunMscclppTest - displayName: Run multi-nodes mscclpp-test - runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser' - remoteScript: | - bash /root/mscclpp/test/deploy/run_tests.sh mscclpp-test - - - template: templates/run-remote-task.yml - parameters: - name: RunMultiNodeUnitTest - displayName: Run multi-nodes unit tests - runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser' - remoteScript: | - bash /root/mscclpp/test/deploy/run_tests.sh mp-ut - - - template: templates/run-remote-task.yml - parameters: - name: RunMultiNodePythonTests - displayName: Run multi-nodes python tests - runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser' - remoteScript: | - bash /root/mscclpp/test/deploy/run_tests.sh pytests - - - template: templates/run-remote-task.yml - parameters: - name: RunMultiNodePythonBenchmark - displayName: Run multi-nodes python benchmark - runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser' - remoteScript: | - bash /root/mscclpp/test/deploy/run_tests.sh py-benchmark - - - template: templates/stop.yml - parameters: - subscription: msccl-it - vmssName: mscclit-vmss - resourceGroup: msccl-IT + resourceGroup: msccl-IT \ No newline at end of file