mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-04-19 22:39:11 +00:00
Remove GTest dependency, add code coverage, and refactor unit tests and CI pipelines (#744)
- Removes the GTest dependency, replacing it with a minimal custom framework (`test/framework.*`) that covers only what the tests actually use — a unified `TEST()` macro with SFINAE-based fixture auto-detection, `EXPECT_*`/`ASSERT_*` assertions, environments, and setup/teardown. - `--exclude-perf-tests` flag and substring-based negative filtering - `MSCCLPP_ENABLE_COVERAGE` CMake option with gcov/lcov; CI uploads to Codecov - Merges standalone `test/perf/` into main test targets - Refactors Azure pipelines to reduce redundancies & make more readable --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Changho Hwang <changhohwang@microsoft.com>
This commit is contained in:
93
.azure-pipelines/codecov.yml
Normal file
93
.azure-pipelines/codecov.yml
Normal file
@@ -0,0 +1,93 @@
|
||||
trigger:
|
||||
branches:
|
||||
include:
|
||||
- main
|
||||
- release/*
|
||||
paths:
|
||||
exclude:
|
||||
- .devcontainer/**
|
||||
- .github/**
|
||||
- apps/**
|
||||
- docker/**
|
||||
- docs/**
|
||||
- '**/*.md'
|
||||
|
||||
pr:
|
||||
branches:
|
||||
include:
|
||||
- main
|
||||
- release/*
|
||||
drafts: false
|
||||
paths:
|
||||
exclude:
|
||||
- .devcontainer/**
|
||||
- .github/**
|
||||
- apps/**
|
||||
- docker/**
|
||||
- docs/**
|
||||
- '**/*.md'
|
||||
|
||||
jobs:
|
||||
- job: CodeCoverageA100
|
||||
timeoutInMinutes: 40
|
||||
pool:
|
||||
name: msccl-ci
|
||||
variables:
|
||||
- group: mscclpp
|
||||
strategy:
|
||||
matrix:
|
||||
cuda12:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/codecov.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci
|
||||
vmssName: mscclpp-ci
|
||||
gpuArch: '80'
|
||||
|
||||
- job: CodeCoverageH100
|
||||
timeoutInMinutes: 40
|
||||
pool:
|
||||
name: msccl-ci-h100
|
||||
variables:
|
||||
- group: mscclpp
|
||||
strategy:
|
||||
matrix:
|
||||
cuda12:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/codecov.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-h100
|
||||
vmssName: mscclpp-h100-ci
|
||||
gpuArch: '90'
|
||||
|
||||
- job: CodeCoverageMI300X
|
||||
timeoutInMinutes: 40
|
||||
pool:
|
||||
name: msccl-ci-mi300x
|
||||
variables:
|
||||
- group: mscclpp
|
||||
strategy:
|
||||
matrix:
|
||||
rocm6_2:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-rocm6.2
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/codecov.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-mi300x
|
||||
vmssName: mscclpp-mi300x-ci
|
||||
platform: rocm
|
||||
gpuArch: gfx942
|
||||
@@ -41,11 +41,10 @@ jobs:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/integration-test.yaml
|
||||
- template: templates/integration-test.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci
|
||||
vmssName: mscclpp-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
gpuArch: '80'
|
||||
|
||||
- job: IntegrationTestH100
|
||||
@@ -61,10 +60,9 @@ jobs:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/integration-test.yaml
|
||||
- template: templates/integration-test.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-h100
|
||||
vmssName: mscclpp-h100-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
perfBaselineFile: test/deploy/perf_ndmv5.jsonl
|
||||
gpuArch: '90'
|
||||
|
||||
@@ -37,33 +37,6 @@ jobs:
|
||||
image: $[ variables['containerImage'] ]
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
name: Build
|
||||
displayName: Build
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON ..
|
||||
make -j
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: DownloadSecureFile@1
|
||||
name: SshKeyFile
|
||||
displayName: Download key file
|
||||
inputs:
|
||||
secureFile: mscclpp-ssh.key
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallPackages
|
||||
displayName: Install Packages
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install pssh -y
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
|
||||
- task: Bash@3
|
||||
displayName: Add HostEntry
|
||||
inputs:
|
||||
@@ -77,107 +50,46 @@ jobs:
|
||||
echo "Entry already exists, nothing to do."
|
||||
fi
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StartVMSS
|
||||
displayName: Start VMSS
|
||||
inputs:
|
||||
azureSubscription: msccl-it
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss start --name mscclit-vmss --resource-group msccl-IT
|
||||
- template: templates/deploy.yml
|
||||
parameters:
|
||||
subscription: msccl-it
|
||||
vmssName: mscclit-vmss
|
||||
resourceGroup: msccl-IT
|
||||
|
||||
- task: Bash@3
|
||||
name: DeployTestEnv
|
||||
displayName: Deploy Test Env
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: test/deploy/deploy.sh
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
- template: templates/run-remote-task.yml
|
||||
parameters:
|
||||
name: RunMscclppTest
|
||||
displayName: Run multi-nodes mscclpp-test
|
||||
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
|
||||
remoteScript: |
|
||||
bash /root/mscclpp/test/deploy/run_tests.sh mscclpp-test
|
||||
|
||||
- task: Bash@3
|
||||
name: RunMscclppTest
|
||||
displayName: Run multi-nodes mscclpp-test
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
rm -rf output/*
|
||||
mkdir -p output
|
||||
touch output/mscclit-000000
|
||||
tail -f output/mscclit-000000 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh mscclpp-test'
|
||||
kill $CHILD_PID
|
||||
- template: templates/run-remote-task.yml
|
||||
parameters:
|
||||
name: RunMultiNodeUnitTest
|
||||
displayName: Run multi-nodes unit tests
|
||||
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
|
||||
remoteScript: |
|
||||
bash /root/mscclpp/test/deploy/run_tests.sh mp-ut
|
||||
|
||||
- task: Bash@3
|
||||
name: RunMultiNodeUnitTest
|
||||
displayName: Run multi-nodes unit tests
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
rm -rf output/*
|
||||
mkdir -p output
|
||||
touch output/mscclit-000000
|
||||
tail -f output/mscclit-000000 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh mp-ut'
|
||||
kill $CHILD_PID
|
||||
- template: templates/run-remote-task.yml
|
||||
parameters:
|
||||
name: RunMultiNodePythonTests
|
||||
displayName: Run multi-nodes python tests
|
||||
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
|
||||
remoteScript: |
|
||||
bash /root/mscclpp/test/deploy/run_tests.sh pytests
|
||||
|
||||
- task: Bash@3
|
||||
name: RunMultiNodePythonTests
|
||||
displayName: Run multi-nodes python tests
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
rm -rf output/*
|
||||
mkdir -p output
|
||||
touch output/mscclit-000000
|
||||
tail -f output/mscclit-000000 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh pytests'
|
||||
kill $CHILD_PID
|
||||
- template: templates/run-remote-task.yml
|
||||
parameters:
|
||||
name: RunMultiNodePythonBenchmark
|
||||
displayName: Run multi-nodes python benchmark
|
||||
runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser'
|
||||
remoteScript: |
|
||||
bash /root/mscclpp/test/deploy/run_tests.sh py-benchmark
|
||||
|
||||
- task: Bash@3
|
||||
name: RunMultiNodePythonBenchmark
|
||||
displayName: Run multi-nodes python benchmark
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
rm -rf output/*
|
||||
mkdir -p output
|
||||
touch output/mscclit-000000
|
||||
tail -f output/mscclit-000000 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh py-benchmark'
|
||||
kill $CHILD_PID
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StopVMSS
|
||||
displayName: Deallocate VMSS
|
||||
condition: always()
|
||||
inputs:
|
||||
azureSubscription: msccl-it
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss deallocate --name mscclit-vmss --resource-group msccl-IT
|
||||
- template: templates/stop.yml
|
||||
parameters:
|
||||
subscription: msccl-it
|
||||
vmssName: mscclit-vmss
|
||||
resourceGroup: msccl-IT
|
||||
|
||||
@@ -40,11 +40,10 @@ jobs:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/nccl-test.yaml
|
||||
- template: templates/nccl-test.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci
|
||||
vmssName: mscclpp-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
nvccGencode: "-gencode=arch=compute_80,code=sm_80"
|
||||
|
||||
- job: NcclTestH100
|
||||
@@ -61,9 +60,8 @@ jobs:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/nccl-test.yaml
|
||||
- template: templates/nccl-test.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-h100
|
||||
vmssName: mscclpp-h100-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
nvccGencode: "-gencode=arch=compute_90,code=sm_90"
|
||||
@@ -40,9 +40,8 @@ jobs:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/rccl-test.yaml
|
||||
- template: templates/rccl-test.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-mi300x
|
||||
vmssName: mscclpp-mi300x-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
gpuArch: gfx942
|
||||
|
||||
110
.azure-pipelines/templates/codecov.yml
Normal file
110
.azure-pipelines/templates/codecov.yml
Normal file
@@ -0,0 +1,110 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: platform
|
||||
type: string
|
||||
default: 'cuda'
|
||||
- name: gpuArch
|
||||
type: string
|
||||
|
||||
steps:
|
||||
- template: deploy.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
platform: ${{ parameters.platform }}
|
||||
gpuArch: ${{ parameters.gpuArch }}
|
||||
buildType: Debug
|
||||
cmakeArgs: '-DMSCCLPP_ENABLE_COVERAGE=ON'
|
||||
buildDisplayName: 'Build with coverage'
|
||||
buildName: BuildCoverage
|
||||
deployArgs: 'single-node-test true ${{ parameters.platform }}'
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: TestsCoverageNonPerf
|
||||
displayName: Run unit_tests + mp_unit_tests (non-perf) with coverage
|
||||
remoteScript: |
|
||||
BUILD_PREFIX=$(cat build/BUILD_PREFIX)
|
||||
STRIP_COUNT=$(echo $BUILD_PREFIX | tr -cd / | wc -c)
|
||||
export GCOV_PREFIX=/root/mscclpp
|
||||
export GCOV_PREFIX_STRIP=$STRIP_COUNT
|
||||
|
||||
echo "Running unit_tests..."
|
||||
./build/bin/unit_tests
|
||||
echo "unit_tests: PASSED"
|
||||
|
||||
echo "Running mp_unit_tests -np 2..."
|
||||
mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests --exclude-perf-tests
|
||||
echo "mp_unit_tests -np 2: PASSED"
|
||||
|
||||
echo "Running mp_unit_tests -np 4..."
|
||||
mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests --exclude-perf-tests
|
||||
echo "mp_unit_tests -np 4: PASSED"
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: CaptureCoverage
|
||||
displayName: Capture coverage data with lcov
|
||||
remoteScript: |
|
||||
BUILD_PREFIX=$(cat build/BUILD_PREFIX)
|
||||
|
||||
GCOV_TOOL_ARG=""
|
||||
if [ "${{ parameters.platform }}" = "rocm" ]; then
|
||||
apt-get update -qq && apt-get install -y -qq llvm 2>/dev/null | tail -1
|
||||
GCOV_WRAPPER=$(mktemp)
|
||||
printf '#!/bin/sh\nexec llvm-cov gcov "$@"\n' > "$GCOV_WRAPPER"
|
||||
chmod +x "$GCOV_WRAPPER"
|
||||
GCOV_TOOL_ARG="--gcov-tool ${GCOV_WRAPPER}"
|
||||
fi
|
||||
|
||||
lcov --version
|
||||
LCOV_CAPTURE_ARGS=""
|
||||
if lcov --help 2>&1 | grep -q "inconsistent"; then
|
||||
LCOV_CAPTURE_ARGS="--ignore-errors inconsistent"
|
||||
fi
|
||||
|
||||
lcov ${GCOV_TOOL_ARG} --directory . --capture --output-file coverage.info ${LCOV_CAPTURE_ARGS}
|
||||
if [ ! -s coverage.info ]; then
|
||||
echo "ERROR: coverage.info was not generated."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
lcov ${GCOV_TOOL_ARG} --extract coverage.info "${BUILD_PREFIX}/src/*" "${BUILD_PREFIX}/include/mscclpp/*" --output-file coverage.info
|
||||
lcov --list coverage.info
|
||||
ls -la coverage.info
|
||||
|
||||
- task: Bash@3
|
||||
name: FetchCoverage
|
||||
displayName: Fetch coverage data from remote VM
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
HOST=$(head -1 ${HOSTFILE})
|
||||
ssh -i ${KeyFilePath} -o ${SSH_OPTION} ${HOST} \
|
||||
'sudo docker cp mscclpp-test:/root/mscclpp/coverage.info /tmp/coverage.info'
|
||||
scp -i ${KeyFilePath} -o ${SSH_OPTION} ${HOST}:/tmp/coverage.info $(System.DefaultWorkingDirectory)/coverage.info
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: UploadCodecov
|
||||
displayName: Upload coverage to Codecov
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
curl -Os https://cli.codecov.io/latest/linux/codecov
|
||||
chmod +x codecov
|
||||
./codecov upload-process --disable-search -t $(CODECOV_TOKEN) -f coverage.info --flag ${{ parameters.platform }}-${{ parameters.gpuArch }}
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- template: stop.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
131
.azure-pipelines/templates/deploy.yml
Normal file
131
.azure-pipelines/templates/deploy.yml
Normal file
@@ -0,0 +1,131 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: resourceGroup
|
||||
type: string
|
||||
default: mscclpp
|
||||
# Build parameters
|
||||
- name: platform
|
||||
type: string
|
||||
default: 'cuda'
|
||||
- name: gpuArch
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildType
|
||||
type: string
|
||||
default: 'Release'
|
||||
- name: buildTests
|
||||
type: string
|
||||
default: 'true'
|
||||
- name: cmakeArgs
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildName
|
||||
type: string
|
||||
default: 'Build'
|
||||
- name: buildDisplayName
|
||||
type: string
|
||||
default: 'Build'
|
||||
# Deploy parameters
|
||||
- name: deployArgs
|
||||
type: string
|
||||
default: ''
|
||||
|
||||
steps:
|
||||
# 0. Ensure Azure CLI exists before running AzureCLI@2 tasks.
|
||||
- task: Bash@3
|
||||
name: EnsureAzureCLI
|
||||
displayName: Ensure Azure CLI Installed
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
if command -v az >/dev/null 2>&1; then
|
||||
az version >/dev/null
|
||||
exit 0
|
||||
fi
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
|
||||
# 1. Build
|
||||
- task: Bash@3
|
||||
name: ${{ parameters.buildName }}
|
||||
displayName: ${{ parameters.buildDisplayName }}
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
rm -rf build
|
||||
mkdir -p build && cd build
|
||||
BUILD_TESTS_ARG=""
|
||||
if [ "${{ parameters.buildTests }}" = "true" ]; then
|
||||
BUILD_TESTS_ARG="-DMSCCLPP_BUILD_TESTS=ON"
|
||||
fi
|
||||
|
||||
GPU_ARCH_ARG=""
|
||||
if [ -n "${{ parameters.gpuArch }}" ]; then
|
||||
GPU_ARCH_ARG="-DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }}"
|
||||
fi
|
||||
|
||||
CMAKE_EXTRA_ARGS='${{ parameters.cmakeArgs }}'
|
||||
if [ "${{ parameters.platform }}" = "rocm" ]; then
|
||||
eval CXX=/opt/rocm/bin/hipcc cmake \
|
||||
-DCMAKE_BUILD_TYPE=${{ parameters.buildType }} \
|
||||
-DMSCCLPP_BYPASS_GPU_CHECK=ON \
|
||||
-DMSCCLPP_USE_ROCM=ON \
|
||||
${BUILD_TESTS_ARG} \
|
||||
${GPU_ARCH_ARG} \
|
||||
${CMAKE_EXTRA_ARGS} ..
|
||||
else
|
||||
eval cmake \
|
||||
-DCMAKE_BUILD_TYPE=${{ parameters.buildType }} \
|
||||
-DMSCCLPP_BYPASS_GPU_CHECK=ON \
|
||||
-DMSCCLPP_USE_CUDA=ON \
|
||||
${BUILD_TESTS_ARG} \
|
||||
${GPU_ARCH_ARG} \
|
||||
${CMAKE_EXTRA_ARGS} ..
|
||||
fi
|
||||
make -j
|
||||
cd ..
|
||||
pwd > build/BUILD_PREFIX
|
||||
echo "=== Build artifacts ==="
|
||||
ls -la build/bin/ || echo "ERROR: build/bin/ missing after build"
|
||||
du -sh build/bin/* 2>/dev/null || true
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
# 2. Download SSH key + install packages + start VMSS
|
||||
- task: DownloadSecureFile@1
|
||||
name: SshKeyFile
|
||||
displayName: Download key file
|
||||
inputs:
|
||||
secureFile: mscclpp.pem
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallPackages
|
||||
displayName: Install Packages
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install pssh -y
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StartVMSS
|
||||
displayName: Start VMSS
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss start --name ${{ parameters.vmssName }} --resource-group ${{ parameters.resourceGroup }}
|
||||
|
||||
# 3. Deploy test environment
|
||||
- task: Bash@3
|
||||
name: DeployTestEnv
|
||||
displayName: Deploy Test Env
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: test/deploy/deploy.sh
|
||||
arguments: ${{ parameters.deployArgs }}
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
@@ -1,242 +0,0 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: sshKeySecureFile
|
||||
type: string
|
||||
- name: perfBaselineFile
|
||||
type: string
|
||||
default: 'test/deploy/perf_ndmv4.jsonl'
|
||||
- name: gpuArch
|
||||
type: string
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
name: Build
|
||||
displayName: Build
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} ..
|
||||
make -j
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallPackages
|
||||
displayName: Install Packages
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install pssh -y
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
|
||||
- task: DownloadSecureFile@1
|
||||
name: SshKeyFile
|
||||
displayName: Download key file
|
||||
inputs:
|
||||
secureFile: ${{ parameters.sshKeySecureFile }}
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StartVMSS
|
||||
displayName: Start VMSS
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
|
||||
- task: Bash@3
|
||||
name: DeployTestEnv
|
||||
displayName: Deploy Test Env
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: test/deploy/deploy.sh
|
||||
arguments: "single-node-test"
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: AllGatherTest
|
||||
displayName: Run mscclpp AllGather test
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
set -e; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: SendRecvTest
|
||||
displayName: Run mscclpp SendRecv test
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
set -e; \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/sendrecv_test_perf -b 1K -e 1G -f 2 -o output.jsonl"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: AllReduceTest
|
||||
displayName: Run mscclpp AllReduce test
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
set -e; \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 4 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 12M -e 48M -i 3145728 2 -k 5 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 24K -e 768K -i 24576 -k 6 -w 100 -n 100 -o output.jsonl"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: AllToAll
|
||||
displayName: Run mscclpp AllToAll test
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
set -e; \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -o output.jsonl; \
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: CheckPerfNumber
|
||||
displayName: Check collective primitives performance
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
set -e; \
|
||||
cd /root/mscclpp; \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
python3 test/mscclpp-test/check_perf_result.py --perf-file output.jsonl --baseline-file ${{ parameters.perfBaselineFile }}"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: PythonAllReduceBenchmark
|
||||
displayName: Python Allreduce Benchmark
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
set -e; \
|
||||
cd /root/mscclpp; \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
python3 -m pip install .; \
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 ./python/mscclpp_benchmark/allreduce_bench.py"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: FifoPerfBenchmark
|
||||
displayName: FIFO Performance Benchmark
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
set -e; \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
./build/bin/perf/fifo_test"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StopVMSS
|
||||
displayName: Deallocate VMSS
|
||||
condition: always()
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
76
.azure-pipelines/templates/integration-test.yml
Normal file
76
.azure-pipelines/templates/integration-test.yml
Normal file
@@ -0,0 +1,76 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: perfBaselineFile
|
||||
type: string
|
||||
default: 'test/deploy/perf_ndmv4.jsonl'
|
||||
- name: gpuArch
|
||||
type: string
|
||||
|
||||
steps:
|
||||
- template: deploy.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
gpuArch: ${{ parameters.gpuArch }}
|
||||
deployArgs: 'single-node-test'
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: AllGatherTest
|
||||
displayName: Run mscclpp AllGather test
|
||||
remoteScript: |
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: SendRecvTest
|
||||
displayName: Run mscclpp SendRecv test
|
||||
remoteScript: |
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/sendrecv_test_perf -b 1K -e 1G -f 2 -o output.jsonl
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: AllReduceTest
|
||||
displayName: Run mscclpp AllReduce test
|
||||
remoteScript: |
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 4 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 12M -e 48M -i 3145728 2 -k 5 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 24K -e 768K -i 24576 -k 6 -w 100 -n 100 -o output.jsonl
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: AllToAll
|
||||
displayName: Run mscclpp AllToAll test
|
||||
remoteScript: |
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: CheckPerfNumber
|
||||
displayName: Check collective primitives performance
|
||||
remoteScript: |
|
||||
python3 test/mscclpp-test/check_perf_result.py --perf-file output.jsonl --baseline-file ${{ parameters.perfBaselineFile }}
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: PythonAllReduceBenchmark
|
||||
displayName: Python Allreduce Benchmark
|
||||
remoteScript: |
|
||||
python3 -m pip install .
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 ./python/mscclpp_benchmark/allreduce_bench.py
|
||||
|
||||
- template: stop.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
@@ -1,282 +0,0 @@
|
||||
# .azure-pipelines/templates/nccl-test.yaml
|
||||
# ----------------------------------------
|
||||
# A step‐template that runs the entire MSCCLPP→NCCL test suite on one pool/container.
|
||||
#
|
||||
# Parameters:
|
||||
# subscription – Azure subscription to use for VMSS start/stop
|
||||
# sshKeySecureFile – the secureFile name for your SSH key
|
||||
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: sshKeySecureFile
|
||||
type: string
|
||||
- name: nvccGencode
|
||||
type: string
|
||||
default: "-gencode=arch=compute_80,code=sm_80"
|
||||
|
||||
steps:
|
||||
- checkout: self
|
||||
- checkout: git://One/msccl-users
|
||||
- task: Bash@3
|
||||
name: Build
|
||||
displayName: Build
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON ..
|
||||
make -j
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)/mscclpp'
|
||||
|
||||
- task: DownloadSecureFile@1
|
||||
name: SshKeyFile
|
||||
displayName: Download key file
|
||||
inputs:
|
||||
secureFile: ${{ parameters.sshKeySecureFile }}
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallPackages
|
||||
displayName: Install Packages
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install pssh -y
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
- task: AzureCLI@2
|
||||
name: StartVMSS
|
||||
displayName: Start VMSS
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
- task: Bash@3
|
||||
name: DeployTestEnv
|
||||
displayName: Deploy Test Env
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: mscclpp/test/deploy/deploy.sh
|
||||
arguments: nccltest-single-node
|
||||
workingDirectory: $(System.DefaultWorkingDirectory)/mscclpp
|
||||
|
||||
- task: Bash@3
|
||||
name: CopyMscclUsers
|
||||
displayName: Copy msccl-users
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
ROOT_DIR=$(System.DefaultWorkingDirectory)/msccl-users
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
DST_DIR="/tmp/mscclpp/msccl-users"
|
||||
parallel-scp -t 0 -r -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION ${ROOT_DIR} ${DST_DIR}
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
# - task: Bash@3
|
||||
# name: GenerateExecutionFile
|
||||
# displayName: Generate execution file
|
||||
# inputs:
|
||||
# targetType: 'inline'
|
||||
# script: |
|
||||
# set -e
|
||||
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
# SSH_OPTION="StrictHostKeyChecking=no"
|
||||
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
# cd /root/mscclpp/msccl-users; \
|
||||
# mkdir -p execution-files; \
|
||||
# cd /root/mscclpp/msccl-users; \
|
||||
# bash algos/mscclpp_a100/generate_execution_plan.sh"'
|
||||
# workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallNcclTests
|
||||
displayName: Install NCCL Tests
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
cd; git clone https://github.com/NVIDIA/nccl-tests.git; \
|
||||
cd nccl-tests; \
|
||||
MPI=1 MPI_HOME=/usr/local/mpi make -j"'
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
# - task: Bash@3
|
||||
# name: RunNcclAllReduceTest
|
||||
# displayName: Run NCCL AllReduce Test
|
||||
# inputs:
|
||||
# targetType: inline
|
||||
# script: |
|
||||
# set -e
|
||||
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
# SSH_OPTION="StrictHostKeyChecking=no"
|
||||
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
# cd /root/mscclpp; \
|
||||
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
|
||||
# workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
# - task: Bash@3
|
||||
# name: RunNcclAllGatherTest
|
||||
# displayName: Run NCCL AllGather Test
|
||||
# inputs:
|
||||
# targetType: inline
|
||||
# script: |
|
||||
# set -e
|
||||
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
# SSH_OPTION="StrictHostKeyChecking=no"
|
||||
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
# cd /root/mscclpp; \
|
||||
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
|
||||
# workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
# - task: Bash@3
|
||||
# name: RunNcclReduceScatterTest
|
||||
# displayName: Run NCCL Reduce Scatter Test
|
||||
# inputs:
|
||||
# targetType: inline
|
||||
# script: |
|
||||
# set -e
|
||||
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
# SSH_OPTION="StrictHostKeyChecking=no"
|
||||
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
# cd /root/mscclpp; \
|
||||
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
|
||||
# workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallNccl
|
||||
displayName: Install NCCL
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
LATEST_TAG=\$(curl -fsSL https://api.github.com/repos/NVIDIA/nccl/releases/latest | grep tag_name | cut -d\\\" -f4); \
|
||||
if [ -z \"\$LATEST_TAG\" ]; then echo \"Failed to fetch latest NCCL tag\"; exit 1; fi; \
|
||||
cd; git clone --branch \$LATEST_TAG --depth 1 https://github.com/NVIDIA/nccl.git; \
|
||||
cd nccl; \
|
||||
make -j src.build NVCC_GENCODE=${{ parameters.nvccGencode }}"'
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: RunNcclAllGatherFallbaclkToNcclTest
|
||||
displayName: Run NCCL AllGather Test with or without Fallback to NCCL operation
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allgather\" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allreduce\" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: RunNcclAllReduceFallbaclkToNcclTest
|
||||
displayName: Run NCCL AllReduce Test with or without Fallback to NCCL operation
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allreduce\" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allgather\" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: RunNcclBroadcastFallbaclkToNcclTest
|
||||
displayName: Run NCCL Broadcast Test with or without Fallback to NCCL operation
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allreduce\" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
# - task: Bash@3
|
||||
# name: RunNcclReduceScatterFallbaclkToNcclTest
|
||||
# displayName: Run NCCL ReduceScatter Test with or without Fallback to NCCL operation
|
||||
# inputs:
|
||||
# targetType: 'inline'
|
||||
# script: |
|
||||
# set -e
|
||||
# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
|
||||
# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
|
||||
# SSH_OPTION="StrictHostKeyChecking=no"
|
||||
# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
# cd /root/mscclpp; \
|
||||
# echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"reducescatter\" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
|
||||
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
|
||||
# echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
|
||||
# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
|
||||
# workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StopVMSS
|
||||
displayName: Deallocate VMSS
|
||||
condition: always()
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
76
.azure-pipelines/templates/nccl-test.yml
Normal file
76
.azure-pipelines/templates/nccl-test.yml
Normal file
@@ -0,0 +1,76 @@
|
||||
# .azure-pipelines/templates/nccl-test.yml
|
||||
# ----------------------------------------
|
||||
# A step‐template that runs the entire MSCCLPP→NCCL test suite on one pool/container.
|
||||
#
|
||||
# Parameters:
|
||||
# subscription – Azure subscription to use for VMSS start/stop
|
||||
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: nvccGencode
|
||||
type: string
|
||||
default: "-gencode=arch=compute_80,code=sm_80"
|
||||
|
||||
steps:
|
||||
- template: deploy.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
deployArgs: 'nccltest-single-node'
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: InstallNcclTests
|
||||
displayName: Install NCCL Tests
|
||||
remoteScript: |
|
||||
cd
|
||||
git clone https://github.com/NVIDIA/nccl-tests.git
|
||||
cd nccl-tests
|
||||
MPI=1 MPI_HOME=/usr/local/mpi make -j
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: InstallNccl
|
||||
displayName: Install NCCL
|
||||
remoteScript: |
|
||||
LATEST_TAG=$(curl -fsSL https://api.github.com/repos/NVIDIA/nccl/releases/latest | grep tag_name | cut -d\" -f4)
|
||||
if [ -z "$LATEST_TAG" ]; then
|
||||
echo "Failed to fetch latest NCCL tag"
|
||||
exit 1
|
||||
fi
|
||||
cd
|
||||
git clone --branch $LATEST_TAG --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl
|
||||
make -j src.build NVCC_GENCODE=${{ parameters.nvccGencode }}
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: RunNcclAllGatherFallbaclkToNcclTest
|
||||
displayName: Run NCCL AllGather Test with or without Fallback to NCCL operation
|
||||
remoteScript: |
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: RunNcclAllReduceFallbaclkToNcclTest
|
||||
displayName: Run NCCL AllReduce Test with or without Fallback to NCCL operation
|
||||
remoteScript: |
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: RunNcclBroadcastFallbaclkToNcclTest
|
||||
displayName: Run NCCL Broadcast Test with or without Fallback to NCCL operation
|
||||
remoteScript: |
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
|
||||
- template: stop.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
@@ -1,142 +0,0 @@
|
||||
# .azure-pipelines/templates/rccl-test.yaml
|
||||
# ------------------------------------------------
|
||||
# A step-template that runs the entire MSCCLPP→RCCL test suite on one pool/container.
|
||||
#
|
||||
# Parameters:
|
||||
# subscription – Azure subscription to use for VMSS start/stop
|
||||
# vmssName – VMSS name to start/stop
|
||||
# sshKeySecureFile – the secureFile name for your SSH key
|
||||
# gpuArch – GPU architecture (e.g. gfx942)
|
||||
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: sshKeySecureFile
|
||||
type: string
|
||||
- name: gpuArch
|
||||
type: string
|
||||
default: "gfx942"
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
name: Build
|
||||
displayName: Build
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
mkdir build && cd build
|
||||
CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} ..
|
||||
make -j
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: DownloadSecureFile@1
|
||||
name: SshKeyFile
|
||||
displayName: Download key file
|
||||
inputs:
|
||||
secureFile: ${{ parameters.sshKeySecureFile }}
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallPackages
|
||||
displayName: Install Packages
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install pssh -y
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
- task: AzureCLI@2
|
||||
name: StartVMSS
|
||||
displayName: Start VMSS
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
- task: Bash@3
|
||||
name: DeployTestEnv
|
||||
displayName: Deploy Test Env
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: test/deploy/deploy.sh
|
||||
arguments: "single-node-test true rocm"
|
||||
workingDirectory: $(System.DefaultWorkingDirectory)
|
||||
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallRcclTests
|
||||
displayName: Install RCCL Tests
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
ROOT_DIR=$(System.DefaultWorkingDirectory)
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
cd; \
|
||||
git clone --filter=blob:none --no-checkout https://github.com/ROCm/rocm-systems.git; \
|
||||
cd rocm-systems; \
|
||||
git sparse-checkout init --cone; \
|
||||
git sparse-checkout set projects/rccl-tests; \
|
||||
git checkout; \
|
||||
cd projects/rccl-tests; \
|
||||
MPI=1 MPI_HOME=/usr/local/mpi make -j"'
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: RunRcclAllGatherTest
|
||||
displayName: Run RCCL AllGather Test with or without MSCCLPP Lib
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
ROOT_DIR=$(System.DefaultWorkingDirectory)
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: RunRcclAllReduceTest
|
||||
displayName: Run RCCL AllReduce Test with or without MSCCLPP Lib
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
ROOT_DIR=$(System.DefaultWorkingDirectory)
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
|
||||
echo \"mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StopVMSS
|
||||
displayName: Deallocate VMSS
|
||||
condition: always()
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
63
.azure-pipelines/templates/rccl-test.yml
Normal file
63
.azure-pipelines/templates/rccl-test.yml
Normal file
@@ -0,0 +1,63 @@
|
||||
# .azure-pipelines/templates/rccl-test.yml
|
||||
# ------------------------------------------------
|
||||
# A step-template that runs the entire MSCCLPP→RCCL test suite on one pool/container.
|
||||
#
|
||||
# Parameters:
|
||||
# subscription – Azure subscription to use for VMSS start/stop
|
||||
# vmssName – VMSS name to start/stop
|
||||
# gpuArch – GPU architecture (e.g. gfx942)
|
||||
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: gpuArch
|
||||
type: string
|
||||
default: "gfx942"
|
||||
|
||||
steps:
|
||||
- template: deploy.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
platform: rocm
|
||||
gpuArch: ${{ parameters.gpuArch }}
|
||||
buildTests: false
|
||||
deployArgs: 'single-node-test true rocm'
|
||||
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: InstallRcclTests
|
||||
displayName: Install RCCL Tests
|
||||
remoteScript: |
|
||||
cd
|
||||
git clone --filter=blob:none --no-checkout https://github.com/ROCm/rocm-systems.git
|
||||
cd rocm-systems
|
||||
git sparse-checkout init --cone
|
||||
git sparse-checkout set projects/rccl-tests
|
||||
git checkout
|
||||
cd projects/rccl-tests
|
||||
MPI=1 MPI_HOME=/usr/local/mpi make -j
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: RunRcclAllGatherTest
|
||||
displayName: Run RCCL AllGather Test with or without MSCCLPP Lib
|
||||
remoteScript: |
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: RunRcclAllReduceTest
|
||||
displayName: Run RCCL AllReduce Test with or without MSCCLPP Lib
|
||||
remoteScript: |
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20
|
||||
|
||||
- template: stop.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
27
.azure-pipelines/templates/run-remote-task.yml
Normal file
27
.azure-pipelines/templates/run-remote-task.yml
Normal file
@@ -0,0 +1,27 @@
|
||||
parameters:
|
||||
- name: name
|
||||
type: string
|
||||
default: ''
|
||||
- name: displayName
|
||||
type: string
|
||||
- name: runRemoteArgs
|
||||
type: string
|
||||
default: ''
|
||||
- name: remoteScript
|
||||
type: string
|
||||
- name: workingDirectory
|
||||
type: string
|
||||
default: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
${{ if ne(parameters.name, '') }}:
|
||||
name: ${{ parameters.name }}
|
||||
displayName: ${{ parameters.displayName }}
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
test/deploy/run-remote.sh ${{ parameters.runRemoteArgs }} <<'REMOTE_CMD'
|
||||
${{ parameters.remoteScript }}
|
||||
REMOTE_CMD
|
||||
workingDirectory: ${{ parameters.workingDirectory }}
|
||||
20
.azure-pipelines/templates/stop.yml
Normal file
20
.azure-pipelines/templates/stop.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: resourceGroup
|
||||
type: string
|
||||
default: mscclpp
|
||||
|
||||
steps:
|
||||
- task: AzureCLI@2
|
||||
name: StopVMSS
|
||||
displayName: Deallocate VMSS
|
||||
condition: always()
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss deallocate --name ${{ parameters.vmssName }} --resource-group ${{ parameters.resourceGroup }}
|
||||
@@ -1,191 +0,0 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: sshKeySecureFile
|
||||
type: string
|
||||
- name: gpuArch
|
||||
type: string
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
name: Build
|
||||
displayName: Build
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_USE_IB=OFF -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} ..
|
||||
make -j
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: DownloadSecureFile@1
|
||||
name: SshKeyFile
|
||||
displayName: Download key file
|
||||
inputs:
|
||||
secureFile: ${{ parameters.sshKeySecureFile }}
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallPackages
|
||||
displayName: Install Packages
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install pssh -y
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StartVMSS
|
||||
displayName: Start VMSS
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
|
||||
- task: Bash@3
|
||||
name: DeployTestEnv
|
||||
displayName: Deploy Test Env
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: test/deploy/deploy.sh
|
||||
arguments: single-node-test false
|
||||
workingDirectory: $(System.DefaultWorkingDirectory)
|
||||
|
||||
- task: Bash@3
|
||||
name: UnitTests
|
||||
displayName: Run mscclpp unit tests
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
cd /root/mscclpp; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
./build/bin/unit_tests"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: MpUnitTests
|
||||
displayName: Run mscclpp multi-process unit tests
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
cd /root/mscclpp; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests; \
|
||||
mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests; \
|
||||
mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: PyTests
|
||||
displayName: Run pytests
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x MSCCLPP_DISABLE_IB_TESTS=1 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: StopContainer
|
||||
displayName: Stop existing container
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION \
|
||||
"sudo docker stop mscclpp-test || true; sudo docker rm mscclpp-test || true"
|
||||
rm -f $(System.DefaultWorkingDirectory)/sshkey $(System.DefaultWorkingDirectory)/sshkey.pub
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: BuildWithIb
|
||||
displayName: Rebuild with IB
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
rm -rf build && mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} ..
|
||||
make -j
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: DeployTestEnvWithIb
|
||||
displayName: Deploy Test Env (with IB build)
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: test/deploy/deploy.sh
|
||||
arguments: single-node-test false
|
||||
workingDirectory: $(System.DefaultWorkingDirectory)
|
||||
|
||||
- task: Bash@3
|
||||
name: PyTestsWithIbBuildDisableIb
|
||||
displayName: Run pytests (IB build, IB tests disabled)
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x MSCCLPP_DISABLE_IB_TESTS=1 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StopVMSS
|
||||
displayName: Deallocate VMSS
|
||||
condition: always()
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
95
.azure-pipelines/templates/ut-no-ib-env.yml
Normal file
95
.azure-pipelines/templates/ut-no-ib-env.yml
Normal file
@@ -0,0 +1,95 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: gpuArch
|
||||
type: string
|
||||
|
||||
steps:
|
||||
- template: deploy.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
gpuArch: ${{ parameters.gpuArch }}
|
||||
cmakeArgs: '-DMSCCLPP_USE_IB=OFF'
|
||||
deployArgs: 'single-node-test false'
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: UnitTests
|
||||
displayName: Run mscclpp unit tests
|
||||
remoteScript: |
|
||||
./build/bin/unit_tests
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: MpUnitTests
|
||||
displayName: Run mscclpp multi-process unit tests
|
||||
remoteScript: |
|
||||
mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests
|
||||
mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests
|
||||
mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: PyTests
|
||||
displayName: Run pytests
|
||||
remoteScript: |
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x MSCCLPP_DISABLE_IB_TESTS=1 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: StopContainer
|
||||
displayName: Stop existing container
|
||||
runRemoteArgs: '--no-docker --no-log'
|
||||
remoteScript: |
|
||||
sudo docker stop mscclpp-test || true
|
||||
sudo docker rm mscclpp-test || true
|
||||
|
||||
- task: Bash@3
|
||||
displayName: Remove generated SSH key files
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
rm -f $(System.DefaultWorkingDirectory)/sshkey $(System.DefaultWorkingDirectory)/sshkey.pub
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: BuildWithIb
|
||||
displayName: Rebuild with IB
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
rm -rf build
|
||||
mkdir -p build && cd build
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DMSCCLPP_BYPASS_GPU_CHECK=ON \
|
||||
-DMSCCLPP_USE_CUDA=ON \
|
||||
-DMSCCLPP_BUILD_TESTS=ON \
|
||||
-DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} ..
|
||||
make -j
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: DeployTestEnvWithIb
|
||||
displayName: Deploy Test Env (with IB build)
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: test/deploy/deploy.sh
|
||||
arguments: single-node-test false
|
||||
workingDirectory: $(System.DefaultWorkingDirectory)
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: PyTestsWithIbBuildDisableIb
|
||||
displayName: Run pytests (IB build, IB tests disabled)
|
||||
remoteScript: |
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x MSCCLPP_DISABLE_IB_TESTS=1 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x
|
||||
|
||||
- template: stop.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
@@ -1,145 +0,0 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: sshKeySecureFile
|
||||
type: string
|
||||
- name: gpuArch
|
||||
type: string
|
||||
|
||||
|
||||
steps:
|
||||
- task: DownloadSecureFile@1
|
||||
name: SshKeyFile
|
||||
displayName: Download key file
|
||||
inputs:
|
||||
secureFile: ${{ parameters.sshKeySecureFile }}
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallPackages
|
||||
displayName: Install Packages
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install pssh -y
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StartVMSS
|
||||
displayName: Start VMSS
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
|
||||
- task: Bash@3
|
||||
name: DeployTestEnv
|
||||
displayName: Deploy Test Env
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: test/deploy/deploy.sh
|
||||
arguments: "single-node-test"
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: Build
|
||||
displayName: Build
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
set -e; \
|
||||
cd /root/mscclpp; \
|
||||
mkdir -p build && cd build; \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} -DMSCCLPP_NPKIT_FLAGS=\"-DENABLE_NPKIT -DENABLE_NPKIT_EVENT_TIME_SYNC_CPU -DENABLE_NPKIT_EVENT_TIME_SYNC_GPU -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_EXIT -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_EXIT\" ..; \
|
||||
make -j"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: MpUnitTests
|
||||
displayName: Run mscclpp multi-process unit tests
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
cd /root/mscclpp; \
|
||||
rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output; \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
export MSCCLPP_NPKIT_DUMP_DIR=./npkit_dump; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests --gtest_filter=\"ExecutorTest.TwoNodesAllreduce\"; \
|
||||
python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: PyTests
|
||||
displayName: Run pytests
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
# set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
cd /root/mscclpp; \
|
||||
rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output; \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
export MSCCLPP_NPKIT_DUMP_DIR=./npkit_dump; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x -k 'test_executor[allreduce.json'; \
|
||||
python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output; \
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x -k 'test_executor[allreduce_packet.json'; \
|
||||
python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_COPY_PACKET_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_PUT_PACKET_ENTRY ./npkit_output/npkit_event_trace.json; \
|
||||
grep -q NPKIT_EVENT_EXECUTOR_REDUCE_SEND_PACKET_ENTRY ./npkit_output/npkit_event_trace.json"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StopVMSS
|
||||
displayName: Deallocate VMSS
|
||||
condition: always()
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
57
.azure-pipelines/templates/ut-npkit.yml
Normal file
57
.azure-pipelines/templates/ut-npkit.yml
Normal file
@@ -0,0 +1,57 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: gpuArch
|
||||
type: string
|
||||
|
||||
|
||||
steps:
|
||||
- template: deploy.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
gpuArch: ${{ parameters.gpuArch }}
|
||||
cmakeArgs: '-DMSCCLPP_NPKIT_FLAGS="-DENABLE_NPKIT -DENABLE_NPKIT_EVENT_TIME_SYNC_CPU -DENABLE_NPKIT_EVENT_TIME_SYNC_GPU -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_EXIT -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_EXIT"'
|
||||
deployArgs: 'single-node-test'
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: MpUnitTests
|
||||
displayName: Run mscclpp multi-process unit tests
|
||||
remoteScript: |
|
||||
rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output
|
||||
export MSCCLPP_NPKIT_DUMP_DIR=./npkit_dump
|
||||
mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests --filter="ExecutorTest.TwoNodesAllreduce"
|
||||
python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output
|
||||
grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: PyTests
|
||||
displayName: Run pytests
|
||||
remoteScript: |
|
||||
rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output
|
||||
export MSCCLPP_NPKIT_DUMP_DIR=./npkit_dump
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x -k 'test_executor[allreduce.json'
|
||||
python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output
|
||||
grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x -k 'test_executor[allreduce_packet.json'
|
||||
python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output
|
||||
grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
grep -q NPKIT_EVENT_EXECUTOR_COPY_PACKET_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
grep -q NPKIT_EVENT_EXECUTOR_PUT_PACKET_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
grep -q NPKIT_EVENT_EXECUTOR_REDUCE_SEND_PACKET_ENTRY ./npkit_output/npkit_event_trace.json
|
||||
|
||||
- template: stop.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
@@ -1,142 +0,0 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: sshKeySecureFile
|
||||
type: string
|
||||
- name: platform
|
||||
type: string
|
||||
default: 'cuda'
|
||||
- name: gpuArch
|
||||
type: string
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
name: Build
|
||||
displayName: Build
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
mkdir build && cd build
|
||||
if [ "${{ parameters.platform }}" == "rocm" ]; then
|
||||
CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} ..
|
||||
else
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} ..
|
||||
fi
|
||||
make -j
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: DownloadSecureFile@1
|
||||
name: SshKeyFile
|
||||
displayName: Download key file
|
||||
inputs:
|
||||
secureFile: ${{ parameters.sshKeySecureFile }}
|
||||
|
||||
- task: Bash@3
|
||||
name: InstallPackages
|
||||
displayName: Install Packages
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install pssh -y
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StartVMSS
|
||||
displayName: Start VMSS
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
|
||||
- task: Bash@3
|
||||
name: DeployTestEnv
|
||||
displayName: Deploy Test Env
|
||||
inputs:
|
||||
targetType: filePath
|
||||
filePath: test/deploy/deploy.sh
|
||||
arguments: "single-node-test true ${{ parameters.platform }}"
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
|
||||
- task: Bash@3
|
||||
name: UnitTests
|
||||
displayName: Run mscclpp unit tests
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
cd /root/mscclpp; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
./build/bin/unit_tests"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: MpUnitTests
|
||||
displayName: Run mscclpp multi-process unit tests
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH; \
|
||||
cd /root/mscclpp; \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests; \
|
||||
mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests; \
|
||||
mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: Bash@3
|
||||
name: PyTests
|
||||
displayName: Run pytests
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
set -e
|
||||
HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
|
||||
: > azureuser@10.0.0.4
|
||||
tail -f azureuser@10.0.0.4 &
|
||||
CHILD_PID=$!
|
||||
parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \
|
||||
-O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
|
||||
export PATH=/usr/local/mpi/bin:\$PATH \
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \
|
||||
cd /root/mscclpp; \
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x GPU_MAX_HW_QUEUES=8 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x"'
|
||||
kill $CHILD_PID
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
- task: AzureCLI@2
|
||||
name: StopVMSS
|
||||
displayName: Deallocate VMSS
|
||||
condition: always()
|
||||
inputs:
|
||||
azureSubscription: ${{ parameters.subscription }}
|
||||
scriptType: bash
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp
|
||||
48
.azure-pipelines/templates/ut.yml
Normal file
48
.azure-pipelines/templates/ut.yml
Normal file
@@ -0,0 +1,48 @@
|
||||
parameters:
|
||||
- name: subscription
|
||||
type: string
|
||||
- name: vmssName
|
||||
type: string
|
||||
- name: platform
|
||||
type: string
|
||||
default: 'cuda'
|
||||
- name: gpuArch
|
||||
type: string
|
||||
|
||||
steps:
|
||||
- template: deploy.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
platform: ${{ parameters.platform }}
|
||||
gpuArch: ${{ parameters.gpuArch }}
|
||||
deployArgs: 'single-node-test true ${{ parameters.platform }}'
|
||||
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: UnitTests
|
||||
displayName: Run mscclpp unit tests
|
||||
remoteScript: |
|
||||
./build/bin/unit_tests
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: MpUnitTests
|
||||
displayName: Run mscclpp multi-process unit tests
|
||||
remoteScript: |
|
||||
mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests
|
||||
mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests
|
||||
mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests
|
||||
|
||||
- template: run-remote-task.yml
|
||||
parameters:
|
||||
name: PyTests
|
||||
displayName: Run pytests
|
||||
remoteScript: |
|
||||
mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x GPU_MAX_HW_QUEUES=8 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x
|
||||
|
||||
- template: stop.yml
|
||||
parameters:
|
||||
subscription: ${{ parameters.subscription }}
|
||||
vmssName: ${{ parameters.vmssName }}
|
||||
@@ -1,50 +0,0 @@
|
||||
trigger:
|
||||
branches:
|
||||
include:
|
||||
- main
|
||||
- release/*
|
||||
paths:
|
||||
exclude:
|
||||
- .devcontainer/**
|
||||
- .github/**
|
||||
- apps/**
|
||||
- docker/**
|
||||
- docs/**
|
||||
- '**/*.md'
|
||||
|
||||
pr:
|
||||
branches:
|
||||
include:
|
||||
- main
|
||||
- release/*
|
||||
drafts: false
|
||||
paths:
|
||||
exclude:
|
||||
- .devcontainer/**
|
||||
- .github/**
|
||||
- apps/**
|
||||
- docker/**
|
||||
- docs/**
|
||||
- '**/*.md'
|
||||
|
||||
jobs:
|
||||
- job: UnitTestMI300X
|
||||
timeoutInMinutes: 40
|
||||
pool:
|
||||
name: msccl-ci-mi300x
|
||||
strategy:
|
||||
matrix:
|
||||
rocm6_2:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-rocm6.2
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/ut.yaml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-mi300x
|
||||
vmssName: mscclpp-mi300x-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
platform: rocm
|
||||
gpuArch: gfx942
|
||||
@@ -37,17 +37,16 @@ jobs:
|
||||
cuda11:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8
|
||||
cuda12:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/ut.yaml
|
||||
- template: templates/ut.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci
|
||||
vmssName: mscclpp-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
gpuArch: '80'
|
||||
|
||||
- job: UnitTestWithNpKitA100
|
||||
@@ -59,17 +58,16 @@ jobs:
|
||||
cuda11:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8
|
||||
cuda12:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/ut-npkit.yaml
|
||||
- template: templates/ut-npkit.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci
|
||||
vmssName: mscclpp-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
gpuArch: '80'
|
||||
|
||||
- job: UnitTestH100
|
||||
@@ -79,17 +77,16 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
cuda12:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/ut.yaml
|
||||
- template: templates/ut.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-h100
|
||||
vmssName: mscclpp-h100-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
gpuArch: '90'
|
||||
|
||||
- job: UnitTestWithNpKitH100
|
||||
@@ -99,17 +96,16 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
cuda12:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/ut-npkit.yaml
|
||||
- template: templates/ut-npkit.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-h100
|
||||
vmssName: mscclpp-h100-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
gpuArch: '90'
|
||||
|
||||
- job: UnitTestNoIBEnv
|
||||
@@ -121,15 +117,34 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
cuda12:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/ut-no-ib-env.yaml
|
||||
- template: templates/ut-no-ib-env.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-h100
|
||||
vmssName: mscclpp-h100-ci
|
||||
sshKeySecureFile: mscclpp.pem
|
||||
gpuArch: '90'
|
||||
|
||||
- job: UnitTestMI300X
|
||||
timeoutInMinutes: 40
|
||||
pool:
|
||||
name: msccl-ci-mi300x
|
||||
strategy:
|
||||
matrix:
|
||||
rocm6_2:
|
||||
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-rocm6.2
|
||||
|
||||
container:
|
||||
image: $(containerImage)
|
||||
|
||||
steps:
|
||||
- template: templates/ut.yml
|
||||
parameters:
|
||||
subscription: mscclpp-ci-mi300x
|
||||
vmssName: mscclpp-mi300x-ci
|
||||
platform: rocm
|
||||
gpuArch: gfx942
|
||||
|
||||
24
.codecov.yml
Normal file
24
.codecov.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
codecov:
|
||||
require_ci_to_pass: yes
|
||||
|
||||
coverage:
|
||||
status:
|
||||
project:
|
||||
default:
|
||||
target: 68%
|
||||
threshold: 1%
|
||||
patch:
|
||||
default:
|
||||
target: 80%
|
||||
|
||||
flag_management:
|
||||
default_rules:
|
||||
carryforward: true
|
||||
|
||||
ignore:
|
||||
- "test/"
|
||||
- "examples/"
|
||||
- "python/"
|
||||
- "tools/"
|
||||
- "docs/"
|
||||
- "docker/"
|
||||
6
.github/workflows/codeql-analysis.yml
vendored
6
.github/workflows/codeql-analysis.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'cpp', 'python' ]
|
||||
version: [ 'cuda11.8', 'cuda12.8' ]
|
||||
version: [ 'cuda11.8', 'cuda12.9' ]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
rm -rf build && mkdir build && cd build
|
||||
cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON ..
|
||||
cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=OFF ..
|
||||
make -j4
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
@@ -107,7 +107,7 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
rm -rf build && mkdir build && cd build
|
||||
CXX=/opt/rocm/bin/hipcc cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON ..
|
||||
CXX=/opt/rocm/bin/hipcc cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON -DMSCCLPP_BUILD_TESTS=OFF ..
|
||||
make -j4
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
|
||||
69
.github/workflows/integration-test-backup.yml
vendored
69
.github/workflows/integration-test-backup.yml
vendored
@@ -1,69 +0,0 @@
|
||||
name: IntegrationTest
|
||||
|
||||
on: workflow_dispatch
|
||||
|
||||
jobs:
|
||||
IntegrationTest:
|
||||
runs-on: [ self-hosted, A100 ]
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
cuda: [ cuda11.8, cuda12.2 ]
|
||||
|
||||
container:
|
||||
image: "ghcr.io/microsoft/mscclpp/mscclpp:base-dev-${{ matrix.cuda }}"
|
||||
options: --privileged --ipc=host --gpus=all --ulimit memlock=-1:-1
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release ..
|
||||
make -j
|
||||
|
||||
- name: Lock GPU clock frequency
|
||||
run: |
|
||||
sudo nvidia-smi -pm 1
|
||||
for i in $(seq 0 $(( $(nvidia-smi -L | wc -l) - 1 ))); do
|
||||
sudo nvidia-smi -ac $(nvidia-smi --query-gpu=clocks.max.memory,clocks.max.sm --format=csv,noheader,nounits -i $i | sed 's/\ //') -i $i
|
||||
done
|
||||
|
||||
- name: Run mscclpp AllGather test
|
||||
run: |
|
||||
set -e
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl
|
||||
|
||||
- name: Run mscclpp SendRecv test
|
||||
run: |
|
||||
set -e
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/sendrecv_test_perf -b 1K -e 1G -f 2 -o output.jsonl
|
||||
|
||||
- name: Run mscclpp AllReduce test
|
||||
run: |
|
||||
set -e
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 4 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 12M -e 48M -i 3145728 2 -k 5 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 24K -e 768K -i 24576 -k 6 -w 100 -n 100 -o output.jsonl
|
||||
|
||||
- name: Run mscclpp AllToAll test
|
||||
run: |
|
||||
set -e
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -o output.jsonl
|
||||
mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl
|
||||
|
||||
- name: Check collective primitives performance
|
||||
run: |
|
||||
set -e
|
||||
python3 test/mscclpp-test/check_perf_result.py --perf-file output.jsonl --baseline-file test/deploy/perf_ndmv4.jsonl
|
||||
2
.github/workflows/mscclpp-lang.yml
vendored
2
.github/workflows/mscclpp-lang.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: [ 'cuda11.8', 'cuda12.8' ]
|
||||
version: [ 'cuda11.8', 'cuda12.9' ]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
52
.github/workflows/ut-backup.yml
vendored
52
.github/workflows/ut-backup.yml
vendored
@@ -1,52 +0,0 @@
|
||||
name: UnitTest
|
||||
|
||||
on: workflow_dispatch
|
||||
|
||||
jobs:
|
||||
UnitTest:
|
||||
runs-on: [ self-hosted, A100 ]
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
matrix:
|
||||
cuda: [ cuda11.8, cuda12.2 ]
|
||||
|
||||
container:
|
||||
image: "ghcr.io/microsoft/mscclpp/mscclpp:base-dev-${{ matrix.cuda }}"
|
||||
options: --privileged --ipc=host --gpus=all --ulimit memlock=-1:-1
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release ..
|
||||
make -j
|
||||
working-directory: ${{ github.workspace }}
|
||||
|
||||
- name: LockGPUClock
|
||||
run: |
|
||||
sudo nvidia-smi -pm 1
|
||||
for i in $(seq 0 $(( $(nvidia-smi -L | wc -l) - 1 ))); do
|
||||
sudo nvidia-smi -ac $(nvidia-smi --query-gpu=clocks.max.memory,clocks.max.sm --format=csv,noheader,nounits -i $i | sed 's/\ //') -i $i
|
||||
done
|
||||
|
||||
- name: UnitTests
|
||||
run: |
|
||||
./build/bin/unit_tests
|
||||
|
||||
- name: MpUnitTests
|
||||
run: |
|
||||
set -e
|
||||
mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests
|
||||
mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests
|
||||
mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests
|
||||
|
||||
- name: PyTests
|
||||
run: |
|
||||
set -e
|
||||
mpirun --allow-run-as-root -tag-output -np 8 $(which pytest) ./python/test/test_mscclpp.py -x
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,5 +1,6 @@
|
||||
.vscode/
|
||||
build/
|
||||
build_coverage/
|
||||
__pycache__
|
||||
.*.swp
|
||||
*.so
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
cmake_minimum_required(VERSION 3.25)
|
||||
project(mscclpp LANGUAGES CXX)
|
||||
@@ -56,6 +56,7 @@ option(MSCCLPP_USE_ROCM "Use AMD/ROCm." OFF)
|
||||
option(MSCCLPP_USE_IB "Use InfiniBand." ON)
|
||||
option(MSCCLPP_BYPASS_GPU_CHECK "Bypass GPU check." OFF)
|
||||
option(MSCCLPP_NPKIT_FLAGS "Set NPKIT flags" OFF)
|
||||
option(MSCCLPP_ENABLE_COVERAGE "Enable code coverage" OFF)
|
||||
option(MSCCLPP_DISABLE_NB_LEAK_WARNINGS "Disable Nanobind leak warnings" ON)
|
||||
set(MSCCLPP_GPU_ARCHS "" CACHE STRING "Specify GPU architectures with delimiters (comma, space, or semicolon).")
|
||||
|
||||
@@ -99,6 +100,62 @@ else()
|
||||
message(FATAL_ERROR "No compatible GPU found. Set MSCCLPP_USE_CUDA or MSCCLPP_USE_ROCM to ON.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Code coverage setup
|
||||
if(MSCCLPP_ENABLE_COVERAGE)
|
||||
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
message(WARNING "Code coverage results with an optimized (non-Debug) build may be misleading")
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
||||
message(STATUS "Code coverage enabled")
|
||||
|
||||
# Add coverage flags to C++ targets only (not CUDA)
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:--coverage>)
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-O0>)
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-g>)
|
||||
add_link_options($<$<LINK_LANGUAGE:CXX>:--coverage>)
|
||||
|
||||
# Find lcov
|
||||
find_program(LCOV_PATH lcov)
|
||||
|
||||
if(NOT LCOV_PATH)
|
||||
message(WARNING "lcov not found. Install lcov to generate coverage reports.")
|
||||
endif()
|
||||
|
||||
if(LCOV_PATH)
|
||||
# Add coverage target
|
||||
add_custom_target(coverage
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Removing old coverage data..."
|
||||
COMMAND ${LCOV_PATH} --directory . --zerocounters
|
||||
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Running tests..."
|
||||
COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure
|
||||
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Collecting coverage data..."
|
||||
COMMAND ${LCOV_PATH} --directory . --capture --output-file coverage.info
|
||||
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Filtering coverage data..."
|
||||
COMMAND ${LCOV_PATH} --remove coverage.info '/usr/*' '*/test/*' '*/build/*' --output-file coverage.info
|
||||
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Coverage report generated in coverage.info"
|
||||
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
COMMENT "Generating code coverage report"
|
||||
)
|
||||
|
||||
# Add coverage clean target
|
||||
add_custom_target(coverage-clean
|
||||
COMMAND ${CMAKE_COMMAND} -E remove coverage.info
|
||||
COMMAND ${LCOV_PATH} --directory . --zerocounters
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
COMMENT "Cleaning coverage data"
|
||||
)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "Code coverage is only supported with GCC or Clang compilers")
|
||||
endif()
|
||||
endif()
|
||||
if(MSCCLPP_GPU_ARCHS)
|
||||
string(STRIP "${MSCCLPP_GPU_ARCHS}" MSCCLPP_GPU_ARCHS)
|
||||
string(REPLACE " " ";" MSCCLPP_GPU_ARCHS "${MSCCLPP_GPU_ARCHS}")
|
||||
|
||||
13
README.md
13
README.md
@@ -3,15 +3,16 @@
|
||||
[](https://github.com/microsoft/mscclpp/releases/latest)
|
||||
[](LICENSE)
|
||||
[](https://github.com/microsoft/mscclpp/actions/workflows/codeql-analysis.yml)
|
||||
[](https://microsoft.github.io/mscclpp/)
|
||||
[](https://microsoft.github.io/mscclpp/)
|
||||
[](https://codecov.io/gh/microsoft/mscclpp)
|
||||
|
||||
| Testing Pipelines | Build Status |
|
||||
|--------------------------|-------------------|
|
||||
| Unit Tests (CUDA) | [](https://msazure.visualstudio.com/One/_build/latest?definitionId=398325&branchName=main) |
|
||||
| Integration Tests (CUDA) | [](https://msazure.visualstudio.com/One/_build/latest?definitionId=398479&branchName=main) |
|
||||
| Unit Tests (ROCm) | [](https://msazure.visualstudio.com/One/_build/latest?definitionId=399295&branchName=main) |
|
||||
| NCCL Tests | [](https://dev.azure.com/msazure/One/_build/latest?definitionId=320665&branchName=main) |
|
||||
| RCCL Tests | [](https://dev.azure.com/msazure/One/_build/latest?definitionId=448013&branchName=main) |
|
||||
| Unit Tests (CUDA) | [](https://msazure.visualstudio.com/One/_build/latest?definitionId=398325&branchName=main) |
|
||||
| Unit Tests (ROCm) | [](https://msazure.visualstudio.com/One/_build/latest?definitionId=398325&branchName=main) |
|
||||
| Integration Tests (CUDA) | [](https://msazure.visualstudio.com/One/_build/latest?definitionId=398479&branchName=main) |
|
||||
| NCCL Tests | [)](https://msazure.visualstudio.com/One/_build/latest?definitionId=320665&repoName=microsoft%2Fmscclpp&branchName=main) |
|
||||
| RCCL Tests | [)](https://msazure.visualstudio.com/One/_build/latest?definitionId=448013&branchName=main) |
|
||||
|
||||
A GPU-driven communication stack for scalable AI applications.
|
||||
|
||||
|
||||
@@ -7,13 +7,38 @@ LABEL org.opencontainers.image.source=https://github.com/microsoft/mscclpp
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
htop \
|
||||
lcov \
|
||||
vim \
|
||||
&& \
|
||||
apt-get autoremove -y && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/*
|
||||
|
||||
# Install lcov 2.2
|
||||
RUN LCOV_VERSION="2.2" && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cpanminus \
|
||||
gcc \
|
||||
make \
|
||||
perl \
|
||||
&& \
|
||||
cpanm --notest \
|
||||
Capture::Tiny \
|
||||
DateTime \
|
||||
JSON::XS \
|
||||
Memory::Process \
|
||||
TimeDate \
|
||||
&& \
|
||||
cd /tmp && \
|
||||
curl -L https://github.com/linux-test-project/lcov/releases/download/v${LCOV_VERSION}/lcov-${LCOV_VERSION}.tar.gz -o lcov.tar.gz && \
|
||||
tar xzf lcov.tar.gz && \
|
||||
cd lcov-${LCOV_VERSION} && \
|
||||
make install && \
|
||||
cd / && rm -rf /tmp/lcov* && \
|
||||
apt-get autoremove -y && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/*
|
||||
|
||||
# Install CMake 3.26.4
|
||||
RUN OS_ARCH=$(uname -m) && \
|
||||
CMAKE_VERSION="3.26.4" && \
|
||||
@@ -47,7 +72,8 @@ RUN target_type=$(echo $TARGET | sed 's/\.[0-9]*$//') && \
|
||||
export CUPY_INSTALL_USE_HIP=1 && export ROCM_HOME=/opt/rocm; \
|
||||
fi && \
|
||||
pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r python/requirements_${target_type}.txt
|
||||
pip install --no-cache-dir -r python/requirements_${target_type}.txt && \
|
||||
pip install --no-cache-dir coverage xlsxwriter
|
||||
|
||||
# Cleanup
|
||||
RUN rm -rf /tmp/mscclpp
|
||||
|
||||
@@ -4,27 +4,27 @@ set -e
|
||||
|
||||
declare -A baseImageTable
|
||||
baseImageTable=(
|
||||
["cuda11.8"]="nvidia/cuda:11.8.0-devel-ubuntu20.04"
|
||||
["cuda12.1"]="nvidia/cuda:12.1.1-devel-ubuntu20.04"
|
||||
["cuda12.2"]="nvidia/cuda:12.2.2-devel-ubuntu20.04"
|
||||
["cuda12.3"]="nvidia/cuda:12.3.2-devel-ubuntu20.04"
|
||||
["cuda11.8"]="nvidia/cuda:11.8.0-devel-ubuntu22.04"
|
||||
["cuda12.4"]="nvidia/cuda:12.4.1-devel-ubuntu22.04"
|
||||
["cuda12.8"]="nvidia/cuda:12.8.1-devel-ubuntu22.04"
|
||||
["cuda12.9"]="nvidia/cuda:12.9.1-devel-ubuntu22.04"
|
||||
["cuda12.9"]="nvidia/cuda:12.9.1-devel-ubuntu24.04"
|
||||
["cuda13.0"]="nvidia/cuda:13.0.2-devel-ubuntu24.04"
|
||||
["rocm6.2"]="rocm/dev-ubuntu-22.04:6.2.2"
|
||||
)
|
||||
|
||||
declare -A extraLdPathTable
|
||||
extraLdPathTable=(
|
||||
["cuda12.1"]="/usr/local/cuda-12.1/compat:/usr/local/cuda-12.1/lib64"
|
||||
["cuda12.2"]="/usr/local/cuda-12.2/compat:/usr/local/cuda-12.2/lib64"
|
||||
["cuda12.3"]="/usr/local/cuda-12.3/compat:/usr/local/cuda-12.3/lib64"
|
||||
["cuda11.8"]="/usr/local/cuda-11.8/compat"
|
||||
["cuda12.4"]="/usr/local/cuda-12.4/compat"
|
||||
["cuda12.8"]="/usr/local/cuda-12.8/compat"
|
||||
["cuda12.9"]="/usr/local/cuda-12.9/compat"
|
||||
["cuda13.0"]="/usr/local/cuda-13.0/compat"
|
||||
["rocm6.2"]="/opt/rocm/lib"
|
||||
)
|
||||
|
||||
declare -A ofedVersionTable
|
||||
ofedVersionTable=(
|
||||
["cuda11.8"]="23.07-0.5.1.2"
|
||||
["cuda12.4"]="23.07-0.5.1.2"
|
||||
["cuda12.8"]="24.10-1.1.4.0"
|
||||
["cuda12.9"]="24.10-1.1.4.0"
|
||||
@@ -36,7 +36,7 @@ TARGET=${1}
|
||||
OS_ARCH=$(uname -m)
|
||||
|
||||
print_usage() {
|
||||
echo "Usage: $0 [cuda11.8|cuda12.1|cuda12.2|cuda12.3|cuda12.4|cuda12.8|cuda12.9|cuda13.0|rocm6.2]"
|
||||
echo "Usage: $0 [cuda11.8|cuda12.4|cuda12.8|cuda12.9|cuda13.0|rocm6.2]"
|
||||
}
|
||||
|
||||
if [[ ! -v "baseImageTable[${TARGET}]" ]]; then
|
||||
|
||||
@@ -42,7 +42,7 @@ We provide docker images which package all prerequisites for MSCCL++. You can se
|
||||
|
||||
```bash
|
||||
# For NVIDIA platforms
|
||||
$ docker run -it --privileged --net=host --ipc=host --gpus all --name mscclpp-dev ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.8 bash
|
||||
$ docker run -it --privileged --net=host --ipc=host --gpus all --name mscclpp-dev ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 bash
|
||||
# For AMD platforms
|
||||
$ docker run -it --privileged --net=host --ipc=host --security-opt=seccomp=unconfined --group-add=video --name mscclpp-dev ghcr.io/microsoft/mscclpp/mscclpp:base-dev-rocm6.2 bash
|
||||
```
|
||||
@@ -171,7 +171,6 @@ We implement [NCCL](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/ap
|
||||
For example, you can run [nccl-tests](https://github.com/NVIDIA/nccl-tests) using `libmscclpp_nccl.so` as follows, where `MSCCLPP_BUILD` is your MSCCL++ build directory.
|
||||
|
||||
```bash
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH;
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=$MSCCLPP_BUILD/lib/libmscclpp_nccl.so ./build/all_reduce_perf -b 1K -e 256M -f 2 -d half -G 20 -w 10 -n 50
|
||||
```
|
||||
|
||||
@@ -189,13 +188,11 @@ By default, if the parameter `MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION` is not spec
|
||||
|
||||
Example 1, Allreduce will fallback to NCCL ncclAllReduce since allreduce is in the fallback list.
|
||||
```bash
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH;
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=$MSCCLPP_BUILD/lib/libmscclpp_nccl.so -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=$NCCL_BUILD/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce,allgather" ./build/all_reduce_perf -b 1K -e 256M -f 2 -d half -G 20 -w 10 -n 50
|
||||
```
|
||||
|
||||
Example 2, ReduceScatter will still use msccl++ implementation since reducescatter is not in the fallbacklist.
|
||||
```bash
|
||||
export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH;
|
||||
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=$MSCCLPP_BUILD/lib/libmscclpp_nccl.so -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=$NCCL_BUILD/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" ./build/reduce_scatter_perf -b 1K -e 256M -f 2 -d half -G 20 -w 10 -n 50
|
||||
```
|
||||
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
find_package(MPI)
|
||||
find_package(MPI REQUIRED)
|
||||
|
||||
set(TEST_LIBS_COMMON mscclpp ${GPU_LIBRARIES} ${NUMA_LIBRARIES} Threads::Threads)
|
||||
if(MSCCLPP_USE_IB)
|
||||
list(APPEND TEST_LIBS_COMMON ${IBVERBS_LIBRARIES})
|
||||
endif()
|
||||
set(TEST_LIBS_GTEST GTest::gtest_main GTest::gmock_main)
|
||||
set(TEST_INC_COMMON PRIVATE ${PROJECT_SOURCE_DIR}/include SYSTEM PRIVATE ${GPU_INCLUDE_DIRS})
|
||||
set(TEST_INC_INTERNAL PRIVATE ${PROJECT_SOURCE_DIR}/src/core/include)
|
||||
|
||||
@@ -17,6 +16,7 @@ if(MSCCLPP_USE_ROCM)
|
||||
foreach(arch ${MSCCLPP_GPU_ARCHS})
|
||||
add_compile_options(--offload-arch=${arch})
|
||||
endforeach()
|
||||
add_compile_definitions(__HIP_PLATFORM_AMD__)
|
||||
endif()
|
||||
|
||||
function(add_test_executable name sources)
|
||||
@@ -38,28 +38,25 @@ add_test_executable(executor_test executor_test.cc)
|
||||
configure_file(run_mpi_test.sh.in run_mpi_test.sh)
|
||||
|
||||
include(CTest)
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(googletest URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip)
|
||||
option(INSTALL_GTEST OFF)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
include(GoogleTest)
|
||||
|
||||
# Build test framework library
|
||||
add_library(test_framework STATIC framework.cc)
|
||||
target_include_directories(test_framework PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${TEST_INC_COMMON})
|
||||
target_link_libraries(test_framework PUBLIC MPI::MPI_CXX)
|
||||
|
||||
# Unit tests
|
||||
add_executable(unit_tests)
|
||||
target_link_libraries(unit_tests ${TEST_LIBS_COMMON} ${TEST_LIBS_GTEST})
|
||||
target_link_libraries(unit_tests ${TEST_LIBS_COMMON} test_framework)
|
||||
target_include_directories(unit_tests ${TEST_INC_COMMON} ${TEST_INC_INTERNAL})
|
||||
add_subdirectory(unit)
|
||||
gtest_discover_tests(unit_tests DISCOVERY_MODE PRE_TEST)
|
||||
add_test(NAME unit_tests COMMAND unit_tests)
|
||||
|
||||
# Multi-process unit tests
|
||||
add_executable(mp_unit_tests)
|
||||
target_link_libraries(mp_unit_tests ${TEST_LIBS_COMMON} ${TEST_LIBS_GTEST} MPI::MPI_CXX)
|
||||
target_link_libraries(mp_unit_tests ${TEST_LIBS_COMMON} test_framework MPI::MPI_CXX)
|
||||
target_include_directories(mp_unit_tests ${TEST_INC_COMMON} ${TEST_INC_INTERNAL})
|
||||
add_subdirectory(mp_unit)
|
||||
gtest_discover_tests(mp_unit_tests DISCOVERY_MODE PRE_TEST)
|
||||
add_test(NAME mp_unit_tests COMMAND ${CMAKE_CURRENT_BINARY_DIR}/run_mpi_test.sh mp_unit_tests 2)
|
||||
|
||||
# mscclpp-test
|
||||
add_subdirectory(mscclpp-test)
|
||||
|
||||
# Performance tests
|
||||
add_subdirectory(perf)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
set -e
|
||||
set -ex
|
||||
|
||||
TEST_NAME=$1
|
||||
IB_ENVIRONMENT="${2:-true}"
|
||||
|
||||
107
test/deploy/run-remote.sh
Executable file
107
test/deploy/run-remote.sh
Executable file
@@ -0,0 +1,107 @@
|
||||
#!/bin/bash
|
||||
# Run a command on remote CI VMs via parallel-ssh.
|
||||
# By default, runs inside the mscclpp-test docker container.
|
||||
#
|
||||
# Usage:
|
||||
# run-remote.sh [OPTIONS] < <command_script>
|
||||
#
|
||||
# Options:
|
||||
# --no-docker Run command directly on the host, not inside docker
|
||||
# --no-log Don't tail the log file in the background
|
||||
# --hostfile Override hostfile path (default: test/deploy/hostfile_ci)
|
||||
# --host Run command on a single host (uses parallel-ssh -H)
|
||||
# --user SSH user when using --host or custom hostfile
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
HOSTFILE="${SCRIPT_DIR}/hostfile_ci"
|
||||
SSH_OPTION="StrictHostKeyChecking=no"
|
||||
KeyFilePath="${SSHKEYFILE_SECUREFILEPATH}"
|
||||
|
||||
USE_DOCKER=true
|
||||
USE_LOG=true
|
||||
TARGET_HOST=""
|
||||
REMOTE_USER=""
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [--no-docker] [--no-log] [--hostfile <path>] [--host <name>] [--user <name>] < <command_script>" >&2
|
||||
}
|
||||
|
||||
require_value() {
|
||||
local opt="$1"
|
||||
local val="$2"
|
||||
if [ -z "$val" ]; then
|
||||
echo "Missing value for ${opt}" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
while [[ "$1" == --* ]]; do
|
||||
case "$1" in
|
||||
--no-docker) USE_DOCKER=false; shift ;;
|
||||
--no-log) USE_LOG=false; shift ;;
|
||||
--hostfile)
|
||||
require_value "--hostfile" "${2-}"
|
||||
HOSTFILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--host)
|
||||
require_value "--host" "${2-}"
|
||||
TARGET_HOST="$2"
|
||||
shift 2
|
||||
;;
|
||||
--user)
|
||||
require_value "--user" "${2-}"
|
||||
REMOTE_USER="$2"
|
||||
shift 2
|
||||
;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $# -ne 0 ] || [ -t 0 ]; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CMD=$(cat)
|
||||
if [ -z "$CMD" ]; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
CMD_B64=$(printf '%s' "$CMD" | base64 | tr -d '\n')
|
||||
|
||||
PSSH_TARGET_ARGS=()
|
||||
if [ -n "$TARGET_HOST" ]; then
|
||||
PSSH_TARGET_ARGS=(-H "$TARGET_HOST")
|
||||
else
|
||||
PSSH_TARGET_ARGS=(-h "$HOSTFILE")
|
||||
fi
|
||||
|
||||
PSSH_USER_ARGS=()
|
||||
if [ -n "$REMOTE_USER" ]; then
|
||||
PSSH_USER_ARGS=(-l "$REMOTE_USER")
|
||||
fi
|
||||
|
||||
PSSH_COMMON=(
|
||||
-t 0
|
||||
"${PSSH_TARGET_ARGS[@]}"
|
||||
"${PSSH_USER_ARGS[@]}"
|
||||
-x "-i ${KeyFilePath}"
|
||||
-O "$SSH_OPTION"
|
||||
)
|
||||
|
||||
if $USE_DOCKER; then
|
||||
INNER="set -euxo pipefail;"
|
||||
INNER+=" cd /root/mscclpp;"
|
||||
INNER+=" export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\\\$LD_LIBRARY_PATH;"
|
||||
INNER+=" CMD_B64='${CMD_B64}';"
|
||||
INNER+=" printf '%s' \\\"\\\$CMD_B64\\\" | base64 -d | bash -euxo pipefail"
|
||||
|
||||
parallel-ssh -i "${PSSH_COMMON[@]}" \
|
||||
"sudo docker exec mscclpp-test bash -c \"${INNER}\""
|
||||
else
|
||||
parallel-ssh -i "${PSSH_COMMON[@]}" \
|
||||
"set -euxo pipefail; CMD_B64='${CMD_B64}'; printf '%s' \"\$CMD_B64\" | base64 -d | bash -euxo pipefail"
|
||||
fi
|
||||
@@ -1,6 +1,5 @@
|
||||
set -e
|
||||
HOSTFILE=/root/mscclpp/test/deploy/hostfile_mpi
|
||||
export PATH=/usr/local/mpi/bin:$PATH
|
||||
|
||||
function run_mscclpp_test()
|
||||
{
|
||||
|
||||
@@ -93,11 +93,8 @@ double benchTime(int rank, std::shared_ptr<mscclpp::Bootstrap> bootstrap, std::s
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc != 5 && argc != 6) {
|
||||
std::cerr << "Usage: " << argv[0] << " <buffer size>"
|
||||
<< " <execution plan path>"
|
||||
<< " <number of iterations>"
|
||||
<< " <number of graph iterations>"
|
||||
<< " (optional) <packet type>" << std::endl;
|
||||
std::cerr << "Usage: " << argv[0] << " <buffer size> <execution plan path>"
|
||||
<< " <number of iterations> <number of graph iterations> (optional) <packet type>" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -142,7 +139,8 @@ int main(int argc, char* argv[]) {
|
||||
NpKit::Shutdown();
|
||||
}
|
||||
|
||||
std::cout << "Rank " << rank << ": " << bufferSize << " bytes " << deltaSec * 1.e6 << " us" << std::endl;
|
||||
double latencyUs = deltaSec * 1.e6;
|
||||
std::cout << "Rank " << rank << ": " << bufferSize << " bytes " << latencyUs << " us" << std::endl;
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
323
test/framework.cc
Normal file
323
test/framework.cc
Normal file
@@ -0,0 +1,323 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "framework.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace mscclpp {
|
||||
namespace test {
|
||||
|
||||
// Global state
|
||||
static int gMpiRank = 0;
|
||||
static int gMpiSize = 1;
|
||||
static bool gMpiInitialized = false;
|
||||
static bool gCurrentTestPassed = true;
|
||||
static std::string gCurrentTestFailureMessage;
|
||||
static std::string gCurrentTestName;
|
||||
|
||||
std::string currentTestName() { return gCurrentTestName; }
|
||||
|
||||
namespace utils {
|
||||
|
||||
void initializeMPI(int argc, char* argv[]) {
|
||||
if (gMpiInitialized) return;
|
||||
|
||||
int initialized = 0;
|
||||
MPI_Initialized(&initialized);
|
||||
if (!initialized) {
|
||||
MPI_Init(&argc, &argv);
|
||||
}
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &gMpiRank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &gMpiSize);
|
||||
gMpiInitialized = true;
|
||||
}
|
||||
|
||||
static void finalizeMPI() {
|
||||
if (!gMpiInitialized) return;
|
||||
|
||||
MPI_Finalize();
|
||||
gMpiInitialized = false;
|
||||
}
|
||||
|
||||
bool isMainRank() { return gMpiRank == 0; }
|
||||
|
||||
int getMPIRank() { return gMpiRank; }
|
||||
|
||||
int getMPISize() { return gMpiSize; }
|
||||
|
||||
void cleanupMPI() { finalizeMPI(); }
|
||||
|
||||
void reportFailure(const char* file, int line, const std::string& message) {
|
||||
gCurrentTestPassed = false;
|
||||
std::ostringstream oss;
|
||||
oss << file << ":" << line << ": " << message;
|
||||
if (!gCurrentTestFailureMessage.empty()) {
|
||||
gCurrentTestFailureMessage += "\n";
|
||||
}
|
||||
gCurrentTestFailureMessage += oss.str();
|
||||
std::cerr << oss.str() << std::endl;
|
||||
}
|
||||
|
||||
void reportSuccess() {
|
||||
gCurrentTestPassed = true;
|
||||
gCurrentTestFailureMessage.clear();
|
||||
}
|
||||
|
||||
// Timer implementation
|
||||
Timer::Timer() : isRunning_(false) {}
|
||||
|
||||
void Timer::start() {
|
||||
startTime_ = std::chrono::high_resolution_clock::now();
|
||||
isRunning_ = true;
|
||||
}
|
||||
|
||||
void Timer::stop() {
|
||||
endTime_ = std::chrono::high_resolution_clock::now();
|
||||
isRunning_ = false;
|
||||
}
|
||||
|
||||
double Timer::elapsedMicroseconds() const {
|
||||
if (isRunning_) {
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(now - startTime_).count();
|
||||
}
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(endTime_ - startTime_).count();
|
||||
}
|
||||
|
||||
double Timer::elapsedMilliseconds() const { return elapsedMicroseconds() / 1000.0; }
|
||||
|
||||
double Timer::elapsedSeconds() const { return elapsedMicroseconds() / 1000000.0; }
|
||||
|
||||
void cudaCheck(cudaError_t err, const char* file, int line) {
|
||||
if (err != cudaSuccess) {
|
||||
std::string msg =
|
||||
std::string("CUDA error at ") + file + ":" + std::to_string(line) + " - " + cudaGetErrorString(err);
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
|
||||
// TestRegistry implementation
|
||||
TestRegistry& TestRegistry::instance() {
|
||||
static TestRegistry registry;
|
||||
return registry;
|
||||
}
|
||||
|
||||
void TestRegistry::registerTest(const std::string& suiteName, const std::string& testName, TestFactory factory,
|
||||
bool isPerfTest) {
|
||||
tests_.push_back({suiteName, testName, std::move(factory), isPerfTest});
|
||||
}
|
||||
|
||||
void TestRegistry::addEnvironment(Environment* env) { environments_.push_back(env); }
|
||||
|
||||
// Returns true if the test should run given the filter string.
|
||||
// Filter syntax:
|
||||
// "" -> run all
|
||||
// "Pattern" -> run only tests whose full name contains Pattern
|
||||
// "-Pattern" -> run all tests EXCEPT those whose full name contains Pattern
|
||||
static bool matchesFilter(const std::string& fullName, const std::string& filter) {
|
||||
if (filter.empty()) return true;
|
||||
if (filter[0] == '-') {
|
||||
// Negative filter: exclude tests matching any comma-separated pattern
|
||||
std::string patterns = filter.substr(1);
|
||||
size_t pos = 0;
|
||||
while (pos < patterns.size()) {
|
||||
size_t comma = patterns.find(',', pos);
|
||||
std::string pattern = (comma == std::string::npos) ? patterns.substr(pos) : patterns.substr(pos, comma - pos);
|
||||
if (!pattern.empty() && fullName.find(pattern) != std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
pos = (comma == std::string::npos) ? patterns.size() : comma + 1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Positive filter: include only matching tests
|
||||
return fullName.find(filter) != std::string::npos;
|
||||
}
|
||||
|
||||
int TestRegistry::runAllTests(int argc, char* argv[]) {
|
||||
// Initialize MPI if not already initialized
|
||||
if (!gMpiInitialized) {
|
||||
utils::initializeMPI(argc, argv);
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
std::string filter;
|
||||
bool excludePerfTests = false;
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
std::string arg = argv[i];
|
||||
if (arg.find("--filter=") == 0) {
|
||||
filter = arg.substr(9); // Length of "--filter="
|
||||
} else if (arg == "--filter" && i + 1 < argc) {
|
||||
filter = argv[i + 1];
|
||||
++i;
|
||||
} else if (arg == "--exclude-perf-tests") {
|
||||
excludePerfTests = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Set up global test environments
|
||||
for (auto* env : environments_) {
|
||||
try {
|
||||
env->SetUp();
|
||||
} catch (const std::exception& e) {
|
||||
if (gMpiRank == 0) {
|
||||
std::cerr << "Failed to set up test environment: " << e.what() << std::endl;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int passed = 0;
|
||||
int failed = 0;
|
||||
int skipped = 0;
|
||||
|
||||
// Count tests to run
|
||||
int totalToRun = 0;
|
||||
int skippedByFilter = 0;
|
||||
for (const auto& entry : tests_) {
|
||||
std::string fullName = entry.suiteName + "." + entry.testName;
|
||||
if (excludePerfTests && entry.isPerfTest) {
|
||||
skippedByFilter++;
|
||||
continue;
|
||||
}
|
||||
if (!matchesFilter(fullName, filter)) {
|
||||
skippedByFilter++;
|
||||
continue;
|
||||
}
|
||||
totalToRun++;
|
||||
}
|
||||
|
||||
if (gMpiRank == 0) {
|
||||
std::cout << "[==========] Running " << totalToRun << " tests";
|
||||
if (skippedByFilter > 0) {
|
||||
std::cout << " (" << skippedByFilter << " skipped by filter)";
|
||||
}
|
||||
std::cout << ".\n";
|
||||
}
|
||||
|
||||
for (const auto& entry : tests_) {
|
||||
std::string fullName = entry.suiteName + "." + entry.testName;
|
||||
|
||||
if (excludePerfTests && entry.isPerfTest) continue;
|
||||
if (!matchesFilter(fullName, filter)) continue;
|
||||
|
||||
gCurrentTestPassed = true;
|
||||
gCurrentTestFailureMessage.clear();
|
||||
gCurrentTestName = fullName;
|
||||
|
||||
if (gMpiRank == 0) {
|
||||
std::cout << "[ RUN ] " << fullName << std::endl;
|
||||
}
|
||||
|
||||
TestCase* testCase = nullptr;
|
||||
bool testSkipped = false;
|
||||
bool setUpSucceeded = false;
|
||||
try {
|
||||
testCase = entry.factory();
|
||||
testCase->SetUp();
|
||||
setUpSucceeded = true;
|
||||
testCase->TestBody();
|
||||
} catch (const SkipException& e) {
|
||||
gCurrentTestPassed = true;
|
||||
testSkipped = true;
|
||||
if (gMpiRank == 0) {
|
||||
std::cout << "[ SKIPPED ] " << fullName << ": " << e.what() << std::endl;
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
gCurrentTestPassed = false;
|
||||
if (gCurrentTestFailureMessage.empty()) {
|
||||
gCurrentTestFailureMessage = e.what();
|
||||
}
|
||||
} catch (...) {
|
||||
gCurrentTestPassed = false;
|
||||
if (gCurrentTestFailureMessage.empty()) {
|
||||
gCurrentTestFailureMessage = "Unknown exception";
|
||||
}
|
||||
}
|
||||
|
||||
// Always call TearDown() if SetUp() succeeded, even if TestBody() threw
|
||||
if (setUpSucceeded && testCase != nullptr) {
|
||||
try {
|
||||
testCase->TearDown();
|
||||
} catch (const std::exception& e) {
|
||||
// If test already failed, keep original failure message
|
||||
if (gCurrentTestPassed) {
|
||||
gCurrentTestPassed = false;
|
||||
gCurrentTestFailureMessage = std::string("TearDown() failed: ") + e.what();
|
||||
}
|
||||
} catch (...) {
|
||||
if (gCurrentTestPassed) {
|
||||
gCurrentTestPassed = false;
|
||||
gCurrentTestFailureMessage = "TearDown() failed with unknown exception";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete testCase;
|
||||
gCurrentTestName.clear();
|
||||
|
||||
if (testSkipped) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Synchronize test status across all MPI processes
|
||||
int localPassed = gCurrentTestPassed ? 1 : 0;
|
||||
int globalPassed = 1;
|
||||
if (gMpiInitialized) {
|
||||
MPI_Allreduce(&localPassed, &globalPassed, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
|
||||
} else {
|
||||
globalPassed = localPassed;
|
||||
}
|
||||
|
||||
if (gMpiRank == 0) {
|
||||
if (globalPassed) {
|
||||
std::cout << "[ OK ] " << fullName << std::endl;
|
||||
passed++;
|
||||
} else {
|
||||
std::cout << "[ FAILED ] " << fullName << std::endl;
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (gMpiRank == 0) {
|
||||
std::cout << "[==========] " << totalToRun << " tests ran.\n";
|
||||
if (passed > 0) {
|
||||
std::cout << "[ PASSED ] " << passed << " tests.\n";
|
||||
}
|
||||
if (skipped > 0) {
|
||||
std::cout << "[ SKIPPED ] " << skipped << " tests.\n";
|
||||
}
|
||||
if (failed > 0) {
|
||||
std::cout << "[ FAILED ] " << failed << " tests.\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Tear down global test environments (in reverse order)
|
||||
for (auto it = environments_.rbegin(); it != environments_.rend(); ++it) {
|
||||
try {
|
||||
(*it)->TearDown();
|
||||
delete *it;
|
||||
} catch (const std::exception& e) {
|
||||
if (gMpiRank == 0) {
|
||||
std::cerr << "Failed to tear down test environment: " << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
environments_.clear();
|
||||
|
||||
return failed > 0 ? 1 : 0;
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace mscclpp
|
||||
405
test/framework.hpp
Normal file
405
test/framework.hpp
Normal file
@@ -0,0 +1,405 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#ifndef MSCCLPP_TEST_FRAMEWORK_HPP_
|
||||
#define MSCCLPP_TEST_FRAMEWORK_HPP_
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <mscclpp/gpu.hpp>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace mscclpp {
|
||||
namespace test {
|
||||
|
||||
// Test case base class
|
||||
class TestCase {
|
||||
public:
|
||||
virtual ~TestCase() = default;
|
||||
virtual void SetUp() {}
|
||||
virtual void TearDown() {}
|
||||
virtual void TestBody() = 0;
|
||||
};
|
||||
|
||||
// Environment base class (for global test setup/teardown)
|
||||
class Environment {
|
||||
public:
|
||||
virtual ~Environment() = default;
|
||||
virtual void SetUp() {}
|
||||
virtual void TearDown() {}
|
||||
};
|
||||
|
||||
// Test registry and runner
|
||||
class TestRegistry {
|
||||
public:
|
||||
using TestFactory = std::function<TestCase*()>;
|
||||
|
||||
static TestRegistry& instance();
|
||||
|
||||
void registerTest(const std::string& suiteName, const std::string& testName, TestFactory factory,
|
||||
bool isPerfTest = false);
|
||||
void addEnvironment(Environment* env);
|
||||
int runAllTests(int argc, char* argv[]);
|
||||
|
||||
private:
|
||||
TestRegistry() = default;
|
||||
struct TestEntry {
|
||||
std::string suiteName;
|
||||
std::string testName;
|
||||
TestFactory factory;
|
||||
bool isPerfTest;
|
||||
};
|
||||
std::vector<TestEntry> tests_;
|
||||
std::vector<Environment*> environments_;
|
||||
};
|
||||
|
||||
// Returns "Suite.Name" for the currently running test, or "" if none.
|
||||
std::string currentTestName();
|
||||
|
||||
// Utility functions
|
||||
namespace utils {
|
||||
|
||||
// MPI management
|
||||
void initializeMPI(int argc, char* argv[]);
|
||||
void cleanupMPI();
|
||||
bool isMainRank();
|
||||
int getMPIRank();
|
||||
int getMPISize();
|
||||
|
||||
// Timing utilities
|
||||
class Timer {
|
||||
public:
|
||||
Timer();
|
||||
void start();
|
||||
void stop();
|
||||
double elapsedMicroseconds() const;
|
||||
double elapsedMilliseconds() const;
|
||||
double elapsedSeconds() const;
|
||||
|
||||
private:
|
||||
std::chrono::high_resolution_clock::time_point startTime_;
|
||||
std::chrono::high_resolution_clock::time_point endTime_;
|
||||
bool isRunning_;
|
||||
};
|
||||
|
||||
// CUDA utilities
|
||||
void cudaCheck(cudaError_t err, const char* file, int line);
|
||||
#define CUDA_CHECK(call) mscclpp::test::utils::cudaCheck(call, __FILE__, __LINE__)
|
||||
|
||||
// Test assertion helpers
|
||||
void reportFailure(const char* file, int line, const std::string& message);
|
||||
void reportSuccess();
|
||||
|
||||
} // namespace utils
|
||||
|
||||
// Exception for test skips
|
||||
class SkipException : public std::runtime_error {
|
||||
public:
|
||||
explicit SkipException(const std::string& message) : std::runtime_error(message) {}
|
||||
};
|
||||
|
||||
// Helper class for FAIL() macro — supports message streaming via operator<<
|
||||
class FailHelper {
|
||||
public:
|
||||
explicit FailHelper(const char* file, int line) : file_(file), line_(line) {}
|
||||
template <typename T>
|
||||
FailHelper& operator<<(const T& value) {
|
||||
message_ << value;
|
||||
return *this;
|
||||
}
|
||||
~FailHelper() noexcept(false) {
|
||||
std::string msg = message_.str();
|
||||
if (!msg.empty()) {
|
||||
::mscclpp::test::utils::reportFailure(file_, line_, "Test failed: " + msg);
|
||||
} else {
|
||||
::mscclpp::test::utils::reportFailure(file_, line_, "Test failed");
|
||||
}
|
||||
throw std::runtime_error("Test failed");
|
||||
}
|
||||
|
||||
private:
|
||||
const char* file_;
|
||||
int line_;
|
||||
std::ostringstream message_;
|
||||
};
|
||||
|
||||
// Helper class for SKIP_TEST() macro — supports message streaming via operator<<
|
||||
// Usage: SKIP_TEST() << "Reason for skipping";
|
||||
class SkipHelper {
|
||||
public:
|
||||
explicit SkipHelper(const char* file, int line) : file_(file), line_(line) {}
|
||||
template <typename T>
|
||||
SkipHelper& operator<<(const T& value) {
|
||||
message_ << value;
|
||||
return *this;
|
||||
}
|
||||
~SkipHelper() noexcept(false) {
|
||||
std::string msg = message_.str();
|
||||
if (!msg.empty()) {
|
||||
throw SkipException("Test skipped: " + msg);
|
||||
} else {
|
||||
throw SkipException("Test skipped");
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const char* file_;
|
||||
int line_;
|
||||
std::ostringstream message_;
|
||||
};
|
||||
|
||||
// SFINAE helper: resolves to T if T is a complete type (user-defined fixture),
|
||||
// otherwise falls back to TestCase. This lets TEST() work with or without a fixture class.
|
||||
namespace detail {
|
||||
template <typename...>
|
||||
using void_t = void;
|
||||
|
||||
template <typename T, typename = void_t<>>
|
||||
struct FixtureOf {
|
||||
using type = TestCase;
|
||||
};
|
||||
template <typename T>
|
||||
struct FixtureOf<T, void_t<decltype(sizeof(T))>> {
|
||||
using type = T;
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
} // namespace test
|
||||
} // namespace mscclpp
|
||||
|
||||
// --- Test registration macros ---
|
||||
// TEST(Suite, Name): if Suite is a previously-defined class, the test inherits from it (fixture).
|
||||
// Otherwise, the test inherits from TestCase (no fixture needed).
|
||||
|
||||
#define TEST(test_fixture, test_name) \
|
||||
class test_fixture; \
|
||||
class test_fixture##_##test_name##_Test : public ::mscclpp::test::detail::FixtureOf<test_fixture>::type { \
|
||||
public: \
|
||||
void TestBody() override; \
|
||||
}; \
|
||||
static bool test_fixture##_##test_name##_registered = []() { \
|
||||
::mscclpp::test::TestRegistry::instance().registerTest( \
|
||||
#test_fixture, #test_name, \
|
||||
[]() -> ::mscclpp::test::TestCase* { return new test_fixture##_##test_name##_Test(); }); \
|
||||
return true; \
|
||||
}(); \
|
||||
void test_fixture##_##test_name##_Test::TestBody()
|
||||
|
||||
#define PERF_TEST(test_fixture, test_name) \
|
||||
class test_fixture; \
|
||||
class test_fixture##_##test_name##_Test : public ::mscclpp::test::detail::FixtureOf<test_fixture>::type { \
|
||||
public: \
|
||||
void TestBody() override; \
|
||||
}; \
|
||||
static bool test_fixture##_##test_name##_registered = []() { \
|
||||
::mscclpp::test::TestRegistry::instance().registerTest( \
|
||||
#test_fixture, #test_name, \
|
||||
[]() -> ::mscclpp::test::TestCase* { return new test_fixture##_##test_name##_Test(); }, true); \
|
||||
return true; \
|
||||
}(); \
|
||||
void test_fixture##_##test_name##_Test::TestBody()
|
||||
|
||||
// --- Test runner macro ---
|
||||
#define RUN_ALL_TESTS() ::mscclpp::test::TestRegistry::instance().runAllTests(argc, argv)
|
||||
|
||||
// Assertion macros
|
||||
#define EXPECT_TRUE(condition) \
|
||||
do { \
|
||||
if (!(condition)) { \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, "Expected: " #condition " to be true"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EXPECT_FALSE(condition) \
|
||||
do { \
|
||||
if (condition) { \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, "Expected: " #condition " to be false"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EXPECT_EQ(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 == v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " == " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EXPECT_NE(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 != v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " != " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EXPECT_LT(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 < v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " < " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EXPECT_LE(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 <= v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " <= " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EXPECT_GT(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 > v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " > " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EXPECT_GE(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 >= v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " >= " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_TRUE(condition) \
|
||||
do { \
|
||||
if (!(condition)) { \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, "Expected: " #condition " to be true"); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_FALSE(condition) \
|
||||
do { \
|
||||
if (condition) { \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, "Expected: " #condition " to be false"); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_EQ(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 == v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " == " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_NE(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 != v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " != " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_LT(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 < v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " < " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_LE(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 <= v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " <= " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_GT(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 > v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " > " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_GE(val1, val2) \
|
||||
do { \
|
||||
auto v1 = (val1); \
|
||||
auto v2 = (val2); \
|
||||
if (!(v1 >= v2)) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #val1 " >= " #val2 << "\n Actual: " << v1 << " vs " << v2; \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_NO_THROW(statement) \
|
||||
do { \
|
||||
try { \
|
||||
statement; \
|
||||
} catch (const std::exception& e) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "Expected: " #statement " not to throw\n Actual: threw " << e.what(); \
|
||||
::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} catch (...) { \
|
||||
::mscclpp::test::utils::reportFailure( \
|
||||
__FILE__, __LINE__, "Expected: " #statement " not to throw\n Actual: threw unknown exception"); \
|
||||
throw std::runtime_error("Test assertion failed"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// --- Test control macros ---
|
||||
|
||||
// Fail the current test immediately. Usage: FAIL() << "reason";
|
||||
#define FAIL() ::mscclpp::test::FailHelper(__FILE__, __LINE__)
|
||||
|
||||
// Skip the current test. Usage: SKIP_TEST() << "reason";
|
||||
#define SKIP_TEST() ::mscclpp::test::SkipHelper(__FILE__, __LINE__)
|
||||
|
||||
#endif // MSCCLPP_TEST_FRAMEWORK_HPP_
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
@@ -48,7 +48,7 @@ void BootstrapTest::bootstrapTestAll(std::shared_ptr<mscclpp::Bootstrap> bootstr
|
||||
bootstrapTestSendRecv(bootstrap);
|
||||
}
|
||||
|
||||
TEST_F(BootstrapTest, WithId) {
|
||||
TEST(BootstrapTest, WithId) {
|
||||
auto bootstrap = std::make_shared<mscclpp::TcpBootstrap>(gEnv->rank, gEnv->worldSize);
|
||||
mscclpp::UniqueId id;
|
||||
if (bootstrap->getRank() == 0) id = bootstrap->createUniqueId();
|
||||
@@ -57,13 +57,13 @@ TEST_F(BootstrapTest, WithId) {
|
||||
bootstrapTestAll(bootstrap);
|
||||
}
|
||||
|
||||
TEST_F(BootstrapTest, WithIpPortPair) {
|
||||
TEST(BootstrapTest, WithIpPortPair) {
|
||||
auto bootstrap = std::make_shared<mscclpp::TcpBootstrap>(gEnv->rank, gEnv->worldSize);
|
||||
bootstrap->initialize(gEnv->args["ip_port"]);
|
||||
bootstrapTestAll(bootstrap);
|
||||
}
|
||||
|
||||
TEST_F(BootstrapTest, ResumeWithId) {
|
||||
TEST(BootstrapTest, ResumeWithId) {
|
||||
// This test may take a few minutes.
|
||||
bootstrapTestTimer.set(300);
|
||||
|
||||
@@ -76,19 +76,19 @@ TEST_F(BootstrapTest, ResumeWithId) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BootstrapTest, ResumeWithIpPortPair) {
|
||||
TEST(BootstrapTest, ResumeWithIpPortPair) {
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
auto bootstrap = std::make_shared<mscclpp::TcpBootstrap>(gEnv->rank, gEnv->worldSize);
|
||||
bootstrap->initialize(gEnv->args["ip_port"]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BootstrapTest, ExitBeforeConnect) {
|
||||
TEST(BootstrapTest, ExitBeforeConnect) {
|
||||
auto bootstrap = std::make_shared<mscclpp::TcpBootstrap>(gEnv->rank, gEnv->worldSize);
|
||||
bootstrap->createUniqueId();
|
||||
}
|
||||
|
||||
TEST_F(BootstrapTest, TimeoutWithId) {
|
||||
TEST(BootstrapTest, TimeoutWithId) {
|
||||
mscclpp::Timer timer;
|
||||
|
||||
// All ranks initialize a bootstrap with their own id (will hang)
|
||||
@@ -99,7 +99,7 @@ TEST_F(BootstrapTest, TimeoutWithId) {
|
||||
// Set bootstrap timeout to 1 second
|
||||
bootstrap->initialize(id, 1);
|
||||
} catch (const mscclpp::Error& e) {
|
||||
ASSERT_EQ(e.getErrorCode(), mscclpp::ErrorCode::Timeout);
|
||||
ASSERT_TRUE(e.getErrorCode() == mscclpp::ErrorCode::Timeout);
|
||||
}
|
||||
|
||||
// Timeout should be sligtly greater than 1 second
|
||||
@@ -139,7 +139,7 @@ class MPIBootstrap : public mscclpp::Bootstrap {
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(BootstrapTest, MPIBootstrap) {
|
||||
TEST(BootstrapTest, MPIBootstrap) {
|
||||
auto bootstrap = std::make_shared<MPIBootstrap>();
|
||||
bootstrapTestAll(bootstrap);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
@@ -185,7 +185,7 @@ bool CommunicatorTest::testWriteCorrectness(bool skipLocal) {
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST_F(CommunicatorTest, BasicWrite) {
|
||||
TEST(CommunicatorTest, BasicWrite) {
|
||||
if (gEnv->rank >= numRanksToUse) return;
|
||||
|
||||
deviceBufferInit();
|
||||
@@ -215,7 +215,7 @@ __global__ void kernelWaitSemaphores(mscclpp::Host2DeviceSemaphore::DeviceHandle
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CommunicatorTest, WriteWithDeviceSemaphores) {
|
||||
TEST(CommunicatorTest, WriteWithDeviceSemaphores) {
|
||||
if (gEnv->rank >= numRanksToUse) return;
|
||||
|
||||
std::unordered_map<int, std::shared_ptr<mscclpp::Host2DeviceSemaphore>> semaphores;
|
||||
@@ -254,7 +254,7 @@ TEST_F(CommunicatorTest, WriteWithDeviceSemaphores) {
|
||||
communicator->bootstrap()->barrier();
|
||||
}
|
||||
|
||||
TEST_F(CommunicatorTest, WriteWithHostSemaphores) {
|
||||
TEST(CommunicatorTest, WriteWithHostSemaphores) {
|
||||
if (gEnv->rank >= numRanksToUse) return;
|
||||
|
||||
std::unordered_map<int, std::shared_ptr<mscclpp::Host2HostSemaphore>> semaphores;
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mpi.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <mscclpp/env.hpp>
|
||||
@@ -22,7 +23,7 @@ std::string getExecutablePath() {
|
||||
|
||||
void ExecutorTest::SetUp() {
|
||||
if (gEnv->worldSize != 2 || gEnv->nRanksPerNode != 2) {
|
||||
GTEST_SKIP() << "This test requires world size to be 2 and ranks per node to be 2";
|
||||
SKIP_TEST() << "This test requires world size to be 2 and ranks per node to be 2";
|
||||
}
|
||||
MultiProcessTest::SetUp();
|
||||
|
||||
@@ -49,7 +50,7 @@ void ExecutorTest::TearDown() {
|
||||
MultiProcessTest::TearDown();
|
||||
}
|
||||
|
||||
TEST_F(ExecutorTest, TwoNodesAllreduce) {
|
||||
TEST(ExecutorTest, TwoNodesAllreduce) {
|
||||
std::string executablePath = getExecutablePath();
|
||||
std::filesystem::path path = executablePath;
|
||||
std::filesystem::path executionFilesPath =
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
@@ -18,9 +18,7 @@ void IbTestBase::SetUp() {
|
||||
}
|
||||
|
||||
void IbPeerToPeerTest::SetUp() {
|
||||
#if !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
REQUIRE_IBVERBS;
|
||||
|
||||
IbTestBase::SetUp();
|
||||
|
||||
@@ -80,7 +78,7 @@ void IbPeerToPeerTest::stageSendWriteWithImm(uint32_t size, uint64_t wrId, uint6
|
||||
qp->stageSendWriteWithImm(mr.get(), remoteMrInfo, size, wrId, srcOffset, dstOffset, signaled, immData);
|
||||
}
|
||||
|
||||
TEST_F(IbPeerToPeerTest, SimpleSendRecv) {
|
||||
TEST(IbPeerToPeerTest, SimpleSendRecv) {
|
||||
if (gEnv->rank >= 2) {
|
||||
// This test needs only two ranks
|
||||
return;
|
||||
@@ -195,7 +193,7 @@ __global__ void kernelMemoryConsistency(uint64_t* data, volatile uint64_t* curIt
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(IbPeerToPeerTest, MemoryConsistency) {
|
||||
TEST(IbPeerToPeerTest, MemoryConsistency) {
|
||||
if (gEnv->rank >= 2) {
|
||||
// This test needs only two ranks
|
||||
return;
|
||||
@@ -303,7 +301,7 @@ TEST_F(IbPeerToPeerTest, MemoryConsistency) {
|
||||
EXPECT_EQ(res, 0);
|
||||
}
|
||||
|
||||
TEST_F(IbPeerToPeerTest, SimpleAtomicAdd) {
|
||||
TEST(IbPeerToPeerTest, SimpleAtomicAdd) {
|
||||
if (gEnv->rank >= 2) {
|
||||
// This test needs only two ranks
|
||||
return;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
void MemoryChannelOneToOneTest::SetUp() {
|
||||
// Need at least two ranks within a node
|
||||
if (gEnv->nRanksPerNode < 2) {
|
||||
GTEST_SKIP();
|
||||
SKIP_TEST();
|
||||
}
|
||||
// Use only two ranks
|
||||
setNumRanksToUse(2);
|
||||
@@ -88,27 +88,12 @@ void MemoryChannelOneToOneTest::packetPingPongTest(const std::string testName,
|
||||
std::shared_ptr<int> ret = mscclpp::detail::gpuCallocHostShared<int>();
|
||||
|
||||
// The least nelem is 2 for packet ping pong
|
||||
kernelWrapper(buff.get(), gEnv->rank, 2, ret.get(), defaultNTries);
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
*ret = 0;
|
||||
|
||||
kernelWrapper(buff.get(), gEnv->rank, 1024, ret.get(), defaultNTries);
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelWrapper(buff.get(), gEnv->rank, 1024 * 1024, ret.get(), defaultNTries);
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelWrapper(buff.get(), gEnv->rank, 4 * 1024 * 1024, ret.get(), defaultNTries);
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
for (int nElem : {2, 1024, 1024 * 1024, 4 * 1024 * 1024}) {
|
||||
*ret = 0;
|
||||
kernelWrapper(buff.get(), gEnv->rank, nElem, ret.get(), defaultNTries);
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
EXPECT_EQ(*ret, 0);
|
||||
}
|
||||
|
||||
int nTries = 1000000;
|
||||
communicator->bootstrap()->barrier();
|
||||
@@ -169,7 +154,7 @@ __global__ void kernelMemPutPingPong(int* buff, int rank, int nElem, int* ret) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MemoryChannelOneToOneTest, PutPingPong) {
|
||||
TEST(MemoryChannelOneToOneTest, PutPingPong) {
|
||||
if (gEnv->rank >= numRanksToUse) return;
|
||||
|
||||
const int nElem = 4 * 1024 * 1024;
|
||||
@@ -187,28 +172,12 @@ TEST_F(MemoryChannelOneToOneTest, PutPingPong) {
|
||||
|
||||
std::shared_ptr<int> ret = mscclpp::detail::gpuCallocHostShared<int>();
|
||||
|
||||
kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024 * 1024, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 4 * 1024 * 1024, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
for (int nElem : {1, 1024, 1024 * 1024, 4 * 1024 * 1024}) {
|
||||
*ret = 0;
|
||||
kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, nElem, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
EXPECT_EQ(*ret, 0);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void kernelMemGetPingPong(int* buff, int rank, int nElem, int* ret) {
|
||||
@@ -248,7 +217,7 @@ __global__ void kernelMemGetPingPong(int* buff, int rank, int nElem, int* ret) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MemoryChannelOneToOneTest, GetPingPong) {
|
||||
TEST(MemoryChannelOneToOneTest, GetPingPong) {
|
||||
if (gEnv->rank >= numRanksToUse) return;
|
||||
|
||||
const int nElem = 4 * 1024 * 1024;
|
||||
@@ -266,28 +235,12 @@ TEST_F(MemoryChannelOneToOneTest, GetPingPong) {
|
||||
|
||||
std::shared_ptr<int> ret = mscclpp::detail::gpuCallocHostShared<int>();
|
||||
|
||||
kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024 * 1024, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 4 * 1024 * 1024, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
for (int nElem : {1, 1024, 1024 * 1024, 4 * 1024 * 1024}) {
|
||||
*ret = 0;
|
||||
kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, nElem, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
EXPECT_EQ(*ret, 0);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void kernelMemLL8PacketPingPong(int* buff, int rank, int nElem, int* ret, int nTries) {
|
||||
@@ -371,14 +324,14 @@ __global__ void kernelMemLL16PacketPingPong(int* buff, int rank, int nElem, int*
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MemoryChannelOneToOneTest, LL8PacketPingPong) {
|
||||
TEST(MemoryChannelOneToOneTest, LL8PacketPingPong) {
|
||||
auto kernelMemLL8PacketPingPongWrapper = [](int* buff, int rank, int nElem, int* ret, int nTries) {
|
||||
kernelMemLL8PacketPingPong<<<1, 1024>>>(buff, rank, nElem, ret, nTries);
|
||||
};
|
||||
packetPingPongTest("memoryLL8PacketPingPong", kernelMemLL8PacketPingPongWrapper);
|
||||
}
|
||||
|
||||
TEST_F(MemoryChannelOneToOneTest, LL16PacketPingPong) {
|
||||
TEST(MemoryChannelOneToOneTest, LL16PacketPingPong) {
|
||||
auto kernelMemLL16PacketPingPongWrapper = [](int* buff, int rank, int nElem, int* ret, int nTries) {
|
||||
kernelMemLL16PacketPingPong<<<1, 1024>>>(buff, rank, nElem, ret, nTries);
|
||||
};
|
||||
|
||||
@@ -98,14 +98,18 @@ static std::unordered_map<std::string, std::string> parseArgs(int argc, const ch
|
||||
continue;
|
||||
}
|
||||
|
||||
// Unrecognized positional token: ignore to keep parser permissive for gtest/MPI extras
|
||||
// Unrecognized positional token: ignore
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
void MultiProcessTestEnv::SetUp() {
|
||||
MPI_Init(NULL, NULL);
|
||||
int initialized = 0;
|
||||
MPI_Initialized(&initialized);
|
||||
if (!initialized) {
|
||||
MPI_Init(NULL, NULL);
|
||||
}
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &worldSize);
|
||||
// get the local number of nodes with MPI
|
||||
@@ -128,18 +132,17 @@ void MultiProcessTest::TearDown() {
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
gEnv = new MultiProcessTestEnv(argc, (const char**)argv);
|
||||
::testing::AddGlobalTestEnvironment(gEnv);
|
||||
::mscclpp::test::TestRegistry::instance().addEnvironment(gEnv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
TEST_F(MultiProcessTest, Prelim) {
|
||||
TEST(MultiProcessTest, Prelim) {
|
||||
// Test to make sure the MPI environment is set up correctly
|
||||
ASSERT_GE(gEnv->worldSize, 2);
|
||||
}
|
||||
|
||||
TEST_F(MultiProcessTest, HostName) {
|
||||
TEST(MultiProcessTest, HostName) {
|
||||
const size_t maxNameLen = 1024;
|
||||
std::vector<char> buffer(gEnv->worldSize * maxNameLen, '\0');
|
||||
std::string hostName = mscclpp::getHostName(maxNameLen, '\0');
|
||||
@@ -159,7 +162,7 @@ TEST_F(MultiProcessTest, HostName) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MultiProcessTest, HostHash) {
|
||||
TEST(MultiProcessTest, HostHash) {
|
||||
std::vector<uint64_t> buffer(gEnv->worldSize, 0);
|
||||
uint64_t hostHash = mscclpp::getHostHash();
|
||||
buffer[gEnv->rank] = hostHash;
|
||||
|
||||
@@ -4,8 +4,6 @@
|
||||
#ifndef MSCCLPP_MP_UNIT_TESTS_HPP_
|
||||
#define MSCCLPP_MP_UNIT_TESTS_HPP_
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <mscclpp/core.hpp>
|
||||
#include <mscclpp/executor.hpp>
|
||||
#include <mscclpp/memory_channel.hpp>
|
||||
@@ -13,10 +11,18 @@
|
||||
#include <mscclpp/port_channel.hpp>
|
||||
#include <mscclpp/utils.hpp>
|
||||
|
||||
#include "../framework.hpp"
|
||||
#include "ib.hpp"
|
||||
#include "utils_internal.hpp"
|
||||
|
||||
class MultiProcessTestEnv : public ::testing::Environment {
|
||||
// Skip the current test if IBVerbs is not available in this build
|
||||
#if defined(USE_IBVERBS)
|
||||
#define REQUIRE_IBVERBS
|
||||
#else
|
||||
#define REQUIRE_IBVERBS SKIP_TEST() << "This test requires IBVerbs that the current build does not support."
|
||||
#endif
|
||||
|
||||
class MultiProcessTestEnv : public ::mscclpp::test::Environment {
|
||||
public:
|
||||
MultiProcessTestEnv(int argc, const char** argv);
|
||||
|
||||
@@ -37,7 +43,7 @@ mscclpp::Transport ibIdToTransport(int id);
|
||||
int rankToLocalRank(int rank);
|
||||
int rankToNode(int rank);
|
||||
|
||||
class MultiProcessTest : public ::testing::Test {
|
||||
class MultiProcessTest : public ::mscclpp::test::TestCase {
|
||||
protected:
|
||||
void TearDown() override;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <cstdint>
|
||||
#include <mscclpp/concurrency_device.hpp>
|
||||
@@ -178,26 +178,12 @@ void PortChannelOneToOneTest::testPingPong(PingPongTestParams params) {
|
||||
std::shared_ptr<int> ret = mscclpp::detail::gpuCallocHostShared<int>();
|
||||
|
||||
const int nTries = 1000;
|
||||
|
||||
kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1, params.waitWithPoll, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
|
||||
kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024, params.waitWithPoll, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
|
||||
kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024 * 1024, params.waitWithPoll, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
|
||||
kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 4 * 1024 * 1024, params.waitWithPoll, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
for (int nElem : {1, 1024, 1024 * 1024, 4 * 1024 * 1024}) {
|
||||
*ret = 0;
|
||||
kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, nElem, params.waitWithPoll, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
EXPECT_EQ(*ret, 0);
|
||||
}
|
||||
|
||||
proxyService->stopProxy();
|
||||
}
|
||||
@@ -223,8 +209,7 @@ void PortChannelOneToOneTest::testPingPongPerf(PingPongTestParams params) {
|
||||
|
||||
std::shared_ptr<int> ret = mscclpp::detail::gpuCallocHostShared<int>();
|
||||
|
||||
auto* testInfo = ::testing::UnitTest::GetInstance()->current_test_info();
|
||||
const std::string testName = std::string(testInfo->test_suite_name()) + "." + std::string(testInfo->name());
|
||||
const std::string testName = ::mscclpp::test::currentTestName();
|
||||
const int nTries = 1000;
|
||||
|
||||
// Warm-up
|
||||
@@ -247,63 +232,51 @@ void PortChannelOneToOneTest::testPingPongPerf(PingPongTestParams params) {
|
||||
proxyService->stopProxy();
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPong) {
|
||||
TEST(PortChannelOneToOneTest, PingPong) {
|
||||
testPingPong(PingPongTestParams{
|
||||
.useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::Default});
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPongIbHostMode) {
|
||||
#if defined(USE_IBVERBS)
|
||||
TEST(PortChannelOneToOneTest, PingPongIbHostMode) {
|
||||
REQUIRE_IBVERBS;
|
||||
testPingPong(PingPongTestParams{
|
||||
.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::Host});
|
||||
#else // !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPongEthernet) {
|
||||
TEST(PortChannelOneToOneTest, PingPongEthernet) {
|
||||
testPingPong(PingPongTestParams{
|
||||
.useIPC = false, .useIB = false, .useEthernet = true, .waitWithPoll = false, .ibMode = IbMode::Default});
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPongWithPoll) {
|
||||
TEST(PortChannelOneToOneTest, PingPongWithPoll) {
|
||||
testPingPong(PingPongTestParams{
|
||||
.useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = true, .ibMode = IbMode::Default});
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPongIbHostModeWithPoll) {
|
||||
#if defined(USE_IBVERBS)
|
||||
TEST(PortChannelOneToOneTest, PingPongIbHostModeWithPoll) {
|
||||
REQUIRE_IBVERBS;
|
||||
testPingPong(PingPongTestParams{
|
||||
.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = true, .ibMode = IbMode::Host});
|
||||
#else // !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPongPerf) {
|
||||
TEST(PortChannelOneToOneTest, PingPongPerf) {
|
||||
testPingPongPerf(PingPongTestParams{
|
||||
.useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::Default});
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPongPerfIbHostMode) {
|
||||
#if defined(USE_IBVERBS)
|
||||
TEST(PortChannelOneToOneTest, PingPongPerfIbHostMode) {
|
||||
REQUIRE_IBVERBS;
|
||||
testPingPongPerf(PingPongTestParams{
|
||||
.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::Host});
|
||||
#else // !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPongPerfIbHostNoAtomicMode) {
|
||||
#if defined(USE_IBVERBS)
|
||||
TEST(PortChannelOneToOneTest, PingPongPerfIbHostNoAtomicMode) {
|
||||
REQUIRE_IBVERBS;
|
||||
testPingPongPerf(PingPongTestParams{
|
||||
.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::HostNoAtomic});
|
||||
#else // !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPongPerfEthernet) {
|
||||
TEST(PortChannelOneToOneTest, PingPongPerfEthernet) {
|
||||
testPingPongPerf(PingPongTestParams{
|
||||
.useIPC = false, .useIB = false, .useEthernet = true, .waitWithPoll = false, .ibMode = IbMode::Default});
|
||||
}
|
||||
@@ -407,34 +380,14 @@ void PortChannelOneToOneTest::testPacketPingPong(bool useIb, IbMode ibMode) {
|
||||
std::shared_ptr<int> ret = mscclpp::detail::gpuCallocHostShared<int>();
|
||||
|
||||
const int nTries = 1000;
|
||||
|
||||
// The least nelem is 2 for packet ping pong
|
||||
kernelProxyLLPingPong<true>
|
||||
<<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank, 2, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelProxyLLPingPong<true>
|
||||
<<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank, 1024, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelProxyLLPingPong<true><<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank,
|
||||
1024 * 1024, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
*ret = 0;
|
||||
|
||||
kernelProxyLLPingPong<true><<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank,
|
||||
4 * 1024 * 1024, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
|
||||
EXPECT_EQ(*ret, 0);
|
||||
for (int nElem : {2, 1024, 1024 * 1024, 4 * 1024 * 1024}) {
|
||||
*ret = 0;
|
||||
kernelProxyLLPingPong<true>
|
||||
<<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank, nElem, nTries, ret.get());
|
||||
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
|
||||
EXPECT_EQ(*ret, 0);
|
||||
}
|
||||
|
||||
communicator->bootstrap()->barrier();
|
||||
|
||||
@@ -471,8 +424,7 @@ void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIb, IbMode ibMode)
|
||||
|
||||
proxyService->startProxy();
|
||||
|
||||
auto* testInfo = ::testing::UnitTest::GetInstance()->current_test_info();
|
||||
const std::string testName = std::string(testInfo->test_suite_name()) + "." + std::string(testInfo->name());
|
||||
const std::string testName = ::mscclpp::test::currentTestName();
|
||||
const int nTries = 1000000;
|
||||
|
||||
// Warm-up
|
||||
@@ -497,47 +449,32 @@ void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIb, IbMode ibMode)
|
||||
proxyService->stopProxy();
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PacketPingPong) { testPacketPingPong(false, IbMode::Default); }
|
||||
TEST(PortChannelOneToOneTest, PacketPingPong) { testPacketPingPong(false, IbMode::Default); }
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PacketPingPongIbHostMode) {
|
||||
#if defined(USE_IBVERBS)
|
||||
TEST(PortChannelOneToOneTest, PacketPingPongIbHostMode) {
|
||||
REQUIRE_IBVERBS;
|
||||
testPacketPingPong(true, IbMode::Host);
|
||||
#else // !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PacketPingPongPerf) { testPacketPingPongPerf(false, IbMode::Default); }
|
||||
TEST(PortChannelOneToOneTest, PacketPingPongPerf) { testPacketPingPongPerf(false, IbMode::Default); }
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PacketPingPongPerfIbHostMode) {
|
||||
#if defined(USE_IBVERBS)
|
||||
TEST(PortChannelOneToOneTest, PacketPingPongPerfIbHostMode) {
|
||||
REQUIRE_IBVERBS;
|
||||
testPacketPingPongPerf(true, IbMode::Host);
|
||||
#else // !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PacketPingPongPerfIbHostNoAtomicMode) {
|
||||
#if defined(USE_IBVERBS)
|
||||
TEST(PortChannelOneToOneTest, PacketPingPongPerfIbHostNoAtomicMode) {
|
||||
REQUIRE_IBVERBS;
|
||||
testPacketPingPongPerf(true, IbMode::HostNoAtomic);
|
||||
#else // !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PingPongIbHostNoAtomicMode) {
|
||||
#if defined(USE_IBVERBS)
|
||||
TEST(PortChannelOneToOneTest, PingPongIbHostNoAtomicMode) {
|
||||
REQUIRE_IBVERBS;
|
||||
testPingPong(PingPongTestParams{
|
||||
.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::HostNoAtomic});
|
||||
#else // !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
}
|
||||
|
||||
TEST_F(PortChannelOneToOneTest, PacketPingPongIbHostNoAtomicMode) {
|
||||
#if defined(USE_IBVERBS)
|
||||
TEST(PortChannelOneToOneTest, PacketPingPongIbHostNoAtomicMode) {
|
||||
REQUIRE_IBVERBS;
|
||||
testPacketPingPong(true, IbMode::HostNoAtomic);
|
||||
#else // !defined(USE_IBVERBS)
|
||||
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
|
||||
#endif // !defined(USE_IBVERBS)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <mscclpp/switch_channel.hpp>
|
||||
@@ -10,10 +10,10 @@
|
||||
void SwitchChannelTest::SetUp() {
|
||||
// Need at least two ranks within a node
|
||||
if (gEnv->nRanksPerNode < 2) {
|
||||
GTEST_SKIP();
|
||||
SKIP_TEST();
|
||||
}
|
||||
if (!mscclpp::isNvlsSupported()) {
|
||||
GTEST_SKIP();
|
||||
SKIP_TEST();
|
||||
}
|
||||
// Use only two ranks
|
||||
setNumRanksToUse(2);
|
||||
@@ -23,6 +23,8 @@ void SwitchChannelTest::SetUp() {
|
||||
void SwitchChannelTest::TearDown() { CommunicatorTestBase::TearDown(); }
|
||||
|
||||
__constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan;
|
||||
__constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan1;
|
||||
__constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan2;
|
||||
|
||||
__global__ void kernelSwitchReduce() {
|
||||
#if (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900)
|
||||
@@ -31,7 +33,16 @@ __global__ void kernelSwitchReduce() {
|
||||
#endif // (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900)
|
||||
}
|
||||
|
||||
TEST_F(SwitchChannelTest, SimpleAllReduce) {
|
||||
__global__ void kernelSwitchReduceTwo() {
|
||||
#if (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900)
|
||||
auto val1 = gConstSwitchChan1.reduce<mscclpp::f32x1>(0);
|
||||
gConstSwitchChan1.broadcast(0, val1);
|
||||
auto val2 = gConstSwitchChan2.reduce<mscclpp::f32x1>(0);
|
||||
gConstSwitchChan2.broadcast(0, val2);
|
||||
#endif // (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900)
|
||||
}
|
||||
|
||||
TEST(SwitchChannelTest, SimpleAllReduce) {
|
||||
if (gEnv->rank >= numRanksToUse) return;
|
||||
|
||||
std::vector<int> ranks;
|
||||
@@ -66,22 +77,13 @@ TEST_F(SwitchChannelTest, SimpleAllReduce) {
|
||||
for (int i = 0; i < numRanksToUse; i++) {
|
||||
expected += i + 1.0f;
|
||||
}
|
||||
ASSERT_EQ(result, expected) << "Expected " << expected << " but got " << result << " for rank " << gEnv->rank;
|
||||
if (result != expected) {
|
||||
std::cerr << "Expected " << expected << " but got " << result << " for rank " << gEnv->rank << std::endl;
|
||||
}
|
||||
ASSERT_EQ(result, expected);
|
||||
}
|
||||
|
||||
__constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan1;
|
||||
__constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan2;
|
||||
|
||||
__global__ void kernelSwitchReduceTwo() {
|
||||
#if (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900)
|
||||
auto val1 = gConstSwitchChan1.reduce<mscclpp::f32x1>(0);
|
||||
gConstSwitchChan1.broadcast(0, val1);
|
||||
auto val2 = gConstSwitchChan2.reduce<mscclpp::f32x1>(0);
|
||||
gConstSwitchChan2.broadcast(0, val2);
|
||||
#endif // (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900)
|
||||
}
|
||||
|
||||
TEST_F(SwitchChannelTest, TwoChannelsSameConnection) {
|
||||
TEST(SwitchChannelTest, TwoChannelsSameConnection) {
|
||||
if (gEnv->rank >= numRanksToUse) return;
|
||||
|
||||
std::vector<int> ranks;
|
||||
@@ -97,12 +99,9 @@ TEST_F(SwitchChannelTest, TwoChannelsSameConnection) {
|
||||
MSCCLPP_CUDATHROW(cudaMemcpy(buffer1.data(), &data1, sizeof(data1), cudaMemcpyHostToDevice));
|
||||
MSCCLPP_CUDATHROW(cudaMemcpy(buffer2.data(), &data2, sizeof(data2), cudaMemcpyHostToDevice));
|
||||
|
||||
// Connection size must be large enough for two granularity-aligned buffers.
|
||||
// The multicast granularity is typically 2MB, so we need at least 2 * 2MB.
|
||||
const size_t connSize = buffer1.bytes() + buffer2.bytes();
|
||||
auto nvlsConnection = mscclpp::connectNvlsCollective(communicator, ranks, connSize);
|
||||
|
||||
// Bind two separate buffers to the same connection
|
||||
auto switchChannel1 = nvlsConnection->bindAllocatedMemory(CUdeviceptr(buffer1.data()), bufSize);
|
||||
auto switchChannel2 = nvlsConnection->bindAllocatedMemory(CUdeviceptr(buffer2.data()), bufSize);
|
||||
|
||||
@@ -132,6 +131,6 @@ TEST_F(SwitchChannelTest, TwoChannelsSameConnection) {
|
||||
expected1 += (i + 1.0f) * 1.0f;
|
||||
expected2 += (i + 1.0f) * 10.0f;
|
||||
}
|
||||
ASSERT_EQ(result1, expected1) << "Channel1: expected " << expected1 << " but got " << result1;
|
||||
ASSERT_EQ(result2, expected2) << "Channel2: expected " << expected2 << " but got " << result2;
|
||||
ASSERT_EQ(result1, expected1);
|
||||
ASSERT_EQ(result2, expected2);
|
||||
}
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
# Find required packages
|
||||
find_package(MPI REQUIRED)
|
||||
|
||||
# Note: nlohmann_json::nlohmann_json target is already available from the main project
|
||||
|
||||
# Set up common libraries and includes for tests
|
||||
set(PERF_TEST_LIBS_COMMON mscclpp ${GPU_LIBRARIES} ${NUMA_LIBRARIES} Threads::Threads MPI::MPI_CXX)
|
||||
if(MSCCLPP_USE_IB)
|
||||
list(APPEND PERF_TEST_LIBS_COMMON ${IBVERBS_LIBRARIES})
|
||||
endif()
|
||||
|
||||
set(PERF_TEST_INC_COMMON
|
||||
PRIVATE ${PROJECT_SOURCE_DIR}/include
|
||||
SYSTEM PRIVATE ${GPU_INCLUDE_DIRS})
|
||||
|
||||
# Function to add a test executable
|
||||
function(add_perf_test_executable name sources)
|
||||
if(MSCCLPP_USE_ROCM)
|
||||
set_source_files_properties(${sources} PROPERTIES LANGUAGE CXX)
|
||||
endif()
|
||||
add_executable(${name} ${sources})
|
||||
target_link_libraries(${name} ${PERF_TEST_LIBS_COMMON})
|
||||
|
||||
# Link nlohmann_json - use the target from main project
|
||||
target_link_libraries(${name} nlohmann_json::nlohmann_json)
|
||||
|
||||
if(MSCCLPP_USE_IB)
|
||||
target_compile_definitions(${name} PRIVATE USE_IBVERBS)
|
||||
endif()
|
||||
|
||||
target_include_directories(${name} ${PERF_TEST_INC_COMMON})
|
||||
target_compile_definitions(${name} PRIVATE MSCCLPP_USE_MPI_FOR_TESTS)
|
||||
|
||||
# Set C++ standard
|
||||
target_compile_features(${name} PRIVATE cxx_std_17)
|
||||
|
||||
set_target_properties(${name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/perf")
|
||||
endfunction()
|
||||
|
||||
# Add FIFO test
|
||||
add_perf_test_executable(fifo_test "framework.cc;fifo_test.cu")
|
||||
@@ -1,298 +0,0 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mscclpp/fifo.hpp>
|
||||
#include <mscclpp/gpu_utils.hpp>
|
||||
#include <mscclpp/numa.hpp>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "framework.hpp"
|
||||
|
||||
using namespace mscclpp::test;
|
||||
|
||||
// Constants for timeout and trigger calculation
|
||||
constexpr uint64_t TIMEOUT_SPINS = 1000000;
|
||||
constexpr int MIN_TRIGGERS = 1000;
|
||||
constexpr int MIN_WARMUP_TRIGGERS = 100;
|
||||
constexpr int TRIGGERS_PER_FIFO_SIZE = 10;
|
||||
constexpr int WARMUP_TRIGGERS_PER_FIFO_SIZE = 2;
|
||||
|
||||
__constant__ mscclpp::FifoDeviceHandle gFifoDeviceHandle;
|
||||
|
||||
__global__ void kernelFifoPush(size_t numTriggers) {
|
||||
mscclpp::FifoDeviceHandle& fifo = gFifoDeviceHandle;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
mscclpp::ProxyTrigger trigger;
|
||||
for (size_t i = 1; i <= numTriggers; ++i) {
|
||||
trigger.fst = i;
|
||||
trigger.snd = tid ^ i;
|
||||
fifo.push(trigger);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void kernelFifoPushSync(size_t numTriggers) {
|
||||
mscclpp::FifoDeviceHandle& fifo = gFifoDeviceHandle;
|
||||
mscclpp::ProxyTrigger trigger;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
for (size_t i = 1; i <= numTriggers; ++i) {
|
||||
trigger.fst = i;
|
||||
trigger.snd = tid ^ i;
|
||||
fifo.sync(fifo.push(trigger));
|
||||
}
|
||||
}
|
||||
|
||||
static void setupCuda(int& cudaDevice, int& numaNode) {
|
||||
utils::CUDA_CHECK(cudaGetDevice(&cudaDevice));
|
||||
numaNode = mscclpp::getDeviceNumaNode(cudaDevice);
|
||||
mscclpp::numaBind(numaNode);
|
||||
}
|
||||
|
||||
// Helper function to consume triggers from FIFO
|
||||
static bool consumeTriggers(std::unique_ptr<mscclpp::Fifo>& hostFifo, int numTriggers, int parallel) {
|
||||
int totalTriggers = numTriggers * parallel;
|
||||
std::unordered_map<int, int> triggerCounts;
|
||||
for (int i = 0; i < totalTriggers; ++i) {
|
||||
mscclpp::ProxyTrigger trigger;
|
||||
uint64_t spin = 0;
|
||||
do {
|
||||
trigger = hostFifo->poll();
|
||||
if (spin++ > TIMEOUT_SPINS) {
|
||||
return false;
|
||||
}
|
||||
} while (trigger.fst == 0 || trigger.snd == 0);
|
||||
|
||||
// Process trigger (see src/proxy.cc)
|
||||
trigger.snd ^= ((uint64_t)1 << (uint64_t)63);
|
||||
trigger.snd = trigger.snd ^ trigger.fst;
|
||||
assert(triggerCounts[trigger.snd] + 1 == trigger.fst);
|
||||
triggerCounts[trigger.snd]++;
|
||||
hostFifo->pop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Helper function to run a single kernel variant and return performance metrics
|
||||
std::tuple<double, double, int, int> runSingleKernelVariant(void (*kernel)(size_t),
|
||||
std::unique_ptr<mscclpp::Fifo>& hostFifo,
|
||||
cudaStream_t stream, int numParallel) {
|
||||
// Calculate triggers based on FIFO size
|
||||
const int numTriggers = std::max(MIN_TRIGGERS, static_cast<int>(hostFifo->size() * TRIGGERS_PER_FIFO_SIZE));
|
||||
const int warmupTriggers =
|
||||
std::max(MIN_WARMUP_TRIGGERS, static_cast<int>(hostFifo->size() * WARMUP_TRIGGERS_PER_FIFO_SIZE));
|
||||
|
||||
// Warmup
|
||||
kernel<<<numParallel, 1, 0, stream>>>(warmupTriggers);
|
||||
utils::CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Process warmup triggers (note: total triggers = warmupTriggers * numParallel)
|
||||
if (!consumeTriggers(hostFifo, warmupTriggers, numParallel)) {
|
||||
return {0.0, 0.0, 0, 0}; // Return error values
|
||||
}
|
||||
utils::CUDA_CHECK(cudaStreamSynchronize(stream));
|
||||
|
||||
// Benchmark
|
||||
utils::Timer timer;
|
||||
timer.start();
|
||||
|
||||
kernel<<<numParallel, 1, 0, stream>>>(numTriggers);
|
||||
utils::CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Process all triggers
|
||||
if (!consumeTriggers(hostFifo, numTriggers, numParallel)) {
|
||||
return {0.0, 0.0, 0, 0};
|
||||
}
|
||||
utils::CUDA_CHECK(cudaStreamSynchronize(stream));
|
||||
|
||||
timer.stop();
|
||||
|
||||
const int totalTriggers = numTriggers * numParallel;
|
||||
double throughput = totalTriggers / timer.elapsedSeconds();
|
||||
double duration_us = timer.elapsedMicroseconds();
|
||||
|
||||
utils::CUDA_CHECK(cudaDeviceSynchronize());
|
||||
|
||||
return {throughput, duration_us, totalTriggers, warmupTriggers * numParallel};
|
||||
}
|
||||
|
||||
void runFifoTestVariant(std::unique_ptr<mscclpp::Fifo>& hostFifo, cudaStream_t stream, int numParallel,
|
||||
nlohmann::ordered_json& combinedMetrics) {
|
||||
auto [pushThroughput, pushDuration, numTriggers, warmupTriggers] =
|
||||
runSingleKernelVariant(kernelFifoPush, hostFifo, stream, numParallel);
|
||||
|
||||
auto [syncThroughput, syncDuration, syncNumTriggers, syncWarmupTriggers] =
|
||||
runSingleKernelVariant(kernelFifoPushSync, hostFifo, stream, numParallel);
|
||||
|
||||
auto formatThroughput = [](double thru) {
|
||||
return double(int(thru * 10)) / 10.0; // Round to 1 decimal place
|
||||
};
|
||||
|
||||
std::string prefix = "p" + std::to_string(numParallel) + "_";
|
||||
combinedMetrics[prefix + "push_throughput"] = formatThroughput(pushThroughput);
|
||||
combinedMetrics[prefix + "push_sync_throughput"] = formatThroughput(syncThroughput);
|
||||
combinedMetrics[prefix + "push_duration_us"] = pushDuration;
|
||||
combinedMetrics[prefix + "push_sync_duration_us"] = syncDuration;
|
||||
combinedMetrics[prefix + "num_triggers"] = numTriggers;
|
||||
combinedMetrics[prefix + "warmup_triggers"] = warmupTriggers;
|
||||
}
|
||||
|
||||
struct FifoTestConfig {
|
||||
int fifoSize;
|
||||
std::vector<int> parallelismLevels;
|
||||
|
||||
// Constructor with default parallelism levels
|
||||
FifoTestConfig(int size, const std::vector<int>& parallel = {1, 2, 4, 8, 16})
|
||||
: fifoSize(size), parallelismLevels(parallel) {}
|
||||
};
|
||||
|
||||
void runFifoTest(const FifoTestConfig& config, [[maybe_unused]] int rank, [[maybe_unused]] int worldSize,
|
||||
[[maybe_unused]] int localRank) {
|
||||
if (config.fifoSize <= 0) {
|
||||
throw std::invalid_argument("FIFO size must be positive");
|
||||
}
|
||||
if (config.parallelismLevels.empty()) {
|
||||
throw std::invalid_argument("At least one parallelism level must be specified");
|
||||
}
|
||||
|
||||
int cudaDevice, numaNode;
|
||||
setupCuda(cudaDevice, numaNode);
|
||||
|
||||
auto hostFifo = std::make_unique<mscclpp::Fifo>(config.fifoSize);
|
||||
|
||||
mscclpp::FifoDeviceHandle hostHandle = hostFifo->deviceHandle();
|
||||
utils::CUDA_CHECK(cudaMemcpyToSymbol(gFifoDeviceHandle, &hostHandle, sizeof(mscclpp::FifoDeviceHandle)));
|
||||
|
||||
cudaStream_t stream;
|
||||
utils::CUDA_CHECK(cudaStreamCreate(&stream));
|
||||
|
||||
// Create test name with parallelism range
|
||||
std::string testName = "FifoTest_Size" + std::to_string(config.fifoSize) + "_Parallel";
|
||||
|
||||
// Add parallelism range to test name (e.g., "P1-16" or "P1-4-16-64")
|
||||
if (!config.parallelismLevels.empty()) {
|
||||
testName += std::to_string(config.parallelismLevels.front());
|
||||
if (config.parallelismLevels.size() > 1) {
|
||||
testName += "-" + std::to_string(config.parallelismLevels.back());
|
||||
|
||||
// If parallelism levels have non-standard steps, include more detail
|
||||
if (config.parallelismLevels.size() > 2 &&
|
||||
(config.parallelismLevels[1] != 2 * config.parallelismLevels[0] || config.parallelismLevels.size() > 3)) {
|
||||
testName = "FifoTest_Size" + std::to_string(config.fifoSize) + "_ParallelCustom";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print test configuration
|
||||
if (utils::isMainRank()) {
|
||||
std::stringstream ss;
|
||||
ss << "Running FIFO test with size=" << config.fifoSize << ", parallelism_levels=[";
|
||||
for (size_t i = 0; i < config.parallelismLevels.size(); ++i) {
|
||||
if (i > 0) ss << ",";
|
||||
ss << config.parallelismLevels[i];
|
||||
}
|
||||
ss << "]";
|
||||
std::cout << ss.str() << std::endl;
|
||||
}
|
||||
|
||||
nlohmann::ordered_json combinedMetrics;
|
||||
|
||||
for (int numParallel : config.parallelismLevels) {
|
||||
runFifoTestVariant(hostFifo, stream, numParallel, combinedMetrics);
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> testParams;
|
||||
testParams["fifo_size"] = std::to_string(static_cast<int>(hostFifo->size()));
|
||||
|
||||
// Add parallelism levels to test parameters
|
||||
std::stringstream parallelismStream;
|
||||
for (size_t i = 0; i < config.parallelismLevels.size(); ++i) {
|
||||
if (i > 0) parallelismStream << ",";
|
||||
parallelismStream << config.parallelismLevels[i];
|
||||
}
|
||||
testParams["parallelism_levels"] = parallelismStream.str();
|
||||
|
||||
utils::recordResult(testName, "fifo", combinedMetrics, testParams);
|
||||
|
||||
utils::CUDA_CHECK(cudaStreamDestroy(stream));
|
||||
}
|
||||
|
||||
void runAllFifoTests([[maybe_unused]] int rank, [[maybe_unused]] int worldSize, [[maybe_unused]] int localRank) {
|
||||
// clang-format off
|
||||
std::vector<FifoTestConfig> configs = {
|
||||
{1, {1}},
|
||||
{128, {1, 8, 64, 128}},
|
||||
{512, {1, 8, 64, 256, 512}},
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
for (const auto& config : configs) {
|
||||
runFifoTest(config, rank, worldSize, localRank);
|
||||
}
|
||||
}
|
||||
|
||||
static void printUsage(char* argv0) {
|
||||
std::stringstream ss;
|
||||
ss << "Usage: " << argv0 << " [OPTIONS]\n"
|
||||
<< "\n"
|
||||
<< "Options:\n"
|
||||
<< " -o, --output-format FORMAT Output format: human or json (default: human)\n"
|
||||
<< " -f, --output-file FILE JSON output file path (default: report.jsonl)\n"
|
||||
<< " -v, --verbose Increase verbosity\n"
|
||||
<< " -h, --help Show this help message\n";
|
||||
std::cout << ss.str();
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::string outputFormat = "human";
|
||||
std::string outputFile = "report.jsonl";
|
||||
bool verbose = false;
|
||||
|
||||
static struct option longOptions[] = {{"output-format", required_argument, 0, 'o'},
|
||||
{"output-file", required_argument, 0, 'f'},
|
||||
{"verbose", no_argument, 0, 'v'},
|
||||
{"help", no_argument, 0, 'h'},
|
||||
{0, 0, 0, 0}};
|
||||
|
||||
int c;
|
||||
while ((c = getopt_long(argc, argv, "o:f:vh", longOptions, nullptr)) != -1) {
|
||||
switch (c) {
|
||||
case 'o':
|
||||
outputFormat = optarg;
|
||||
break;
|
||||
case 'f':
|
||||
outputFile = optarg;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'h':
|
||||
printUsage(argv[0]);
|
||||
return 0;
|
||||
default:
|
||||
printUsage(argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::tuple<std::string, std::string, std::function<void(int, int, int)>>> tests = {
|
||||
{"AllFifoTests", "FIFO performance tests with multiple configurations", runAllFifoTests}};
|
||||
|
||||
int result = utils::runMultipleTests(argc, argv, tests);
|
||||
|
||||
if (utils::isMainRank()) {
|
||||
if (outputFormat == "json") {
|
||||
utils::writeResultsToFile(outputFile);
|
||||
} else {
|
||||
utils::printResults(verbose);
|
||||
}
|
||||
}
|
||||
|
||||
utils::cleanupMPI();
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -1,208 +0,0 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include "framework.hpp"
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace mscclpp {
|
||||
namespace test {
|
||||
|
||||
// Global state for results
|
||||
static std::vector<TestResult> g_results;
|
||||
static int g_mpi_rank = 0;
|
||||
static int g_mpi_size = 1;
|
||||
static bool g_mpi_initialized = false;
|
||||
|
||||
namespace utils {
|
||||
|
||||
// Internal MPI helper functions (not exposed in header)
|
||||
void initializeMPI(int argc, char* argv[]) {
|
||||
if (g_mpi_initialized) return;
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &g_mpi_rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &g_mpi_size);
|
||||
g_mpi_initialized = true;
|
||||
}
|
||||
|
||||
static void finalizeMPI() {
|
||||
if (!g_mpi_initialized) return;
|
||||
|
||||
MPI_Finalize();
|
||||
g_mpi_initialized = false;
|
||||
}
|
||||
|
||||
static int getMPIRank() { return g_mpi_rank; }
|
||||
|
||||
static int getMPISize() { return g_mpi_size; }
|
||||
|
||||
static bool isMainProcess() { return g_mpi_rank == 0; }
|
||||
|
||||
// Public utility functions for test output
|
||||
bool isMainRank() { return g_mpi_rank == 0; }
|
||||
|
||||
void cleanupMPI() { finalizeMPI(); }
|
||||
|
||||
std::string getCurrentTimestamp() {
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto time_t = std::chrono::system_clock::to_time_t(now);
|
||||
std::stringstream ss;
|
||||
ss << std::put_time(std::gmtime(&time_t), "%Y-%m-%dT%H:%M:%S");
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void recordResult(const std::string& test_name, const std::string& test_category, const nlohmann::ordered_json& metrics,
|
||||
const std::map<std::string, std::string>& test_params) {
|
||||
TestResult result;
|
||||
result.test_name = test_name;
|
||||
result.test_category = test_category;
|
||||
result.test_params = test_params;
|
||||
result.metrics = metrics;
|
||||
result.num_processes = g_mpi_size;
|
||||
result.process_rank = g_mpi_rank;
|
||||
result.timestamp = getCurrentTimestamp();
|
||||
|
||||
g_results.push_back(result);
|
||||
}
|
||||
|
||||
void writeResultsToFile(const std::string& filename) {
|
||||
std::ofstream file(filename);
|
||||
if (!file) {
|
||||
throw std::runtime_error("Cannot open output file: " + filename);
|
||||
}
|
||||
|
||||
for (const auto& result : g_results) {
|
||||
nlohmann::ordered_json j;
|
||||
j["test_name"] = result.test_name;
|
||||
j["test_category"] = result.test_category;
|
||||
j["test_config"] = result.test_params;
|
||||
j["metrics"] = result.metrics;
|
||||
j["num_processes"] = result.num_processes;
|
||||
j["process_rank"] = result.process_rank;
|
||||
j["timestamp"] = result.timestamp;
|
||||
|
||||
file << j.dump() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void printResults(bool verbose) {
|
||||
if (!isMainProcess()) return;
|
||||
|
||||
std::cout << "\n=== Test Results ===" << std::endl;
|
||||
|
||||
for (const auto& result : g_results) {
|
||||
std::cout << "\nTest: " << result.test_name << " (" << result.test_category << ")" << std::endl;
|
||||
|
||||
if (verbose && !result.test_params.empty()) {
|
||||
std::cout << " Parameters:" << std::endl;
|
||||
for (const auto& param : result.test_params) {
|
||||
std::cout << " " << param.first << ": " << param.second << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << " Metrics:" << std::endl;
|
||||
for (auto it = result.metrics.begin(); it != result.metrics.end(); ++it) {
|
||||
std::cout << " " << it.key() << ": " << it.value() << std::endl;
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// Timer implementation
|
||||
Timer::Timer() : is_running_(false) {}
|
||||
|
||||
void Timer::start() {
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
is_running_ = true;
|
||||
}
|
||||
|
||||
void Timer::stop() {
|
||||
end_time_ = std::chrono::high_resolution_clock::now();
|
||||
is_running_ = false;
|
||||
}
|
||||
|
||||
double Timer::elapsedMicroseconds() const {
|
||||
if (is_running_) {
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(now - start_time_).count();
|
||||
}
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(end_time_ - start_time_).count();
|
||||
}
|
||||
|
||||
double Timer::elapsedMilliseconds() const { return elapsedMicroseconds() / 1000.0; }
|
||||
|
||||
double Timer::elapsedSeconds() const { return elapsedMicroseconds() / 1000000.0; }
|
||||
|
||||
void cudaCheck(cudaError_t err, const char* file, int line) {
|
||||
if (err != cudaSuccess) {
|
||||
std::string msg =
|
||||
std::string("CUDA error at ") + file + ":" + std::to_string(line) + " - " + cudaGetErrorString(err);
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
}
|
||||
|
||||
int runMultipleTests(
|
||||
int argc, char* argv[],
|
||||
const std::vector<std::tuple<std::string, std::string, std::function<void(int, int, int)>>>& tests) {
|
||||
int totalResult = 0;
|
||||
|
||||
// Initialize MPI once for all tests
|
||||
initializeMPI(argc, argv);
|
||||
|
||||
try {
|
||||
// Get MPI information
|
||||
int rank = getMPIRank();
|
||||
int size = getMPISize();
|
||||
int local_rank = rank; // For simplicity, assume local_rank = rank
|
||||
|
||||
for (const auto& test : tests) {
|
||||
const std::string& testName = std::get<0>(test);
|
||||
const std::string& testDescription = std::get<1>(test);
|
||||
const std::function<void(int, int, int)>& testFunction = std::get<2>(test);
|
||||
|
||||
if (rank == 0) {
|
||||
std::cout << "Running test: " << testName << std::endl;
|
||||
if (!testDescription.empty()) {
|
||||
std::cout << " " << testDescription << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't clear results - accumulate them for all tests in the same file
|
||||
// g_results.clear(); // Commented out to accumulate results
|
||||
|
||||
try {
|
||||
// Run the individual test function with MPI information
|
||||
testFunction(rank, size, local_rank);
|
||||
|
||||
// Synchronize before moving to next test
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
} catch (const std::exception& e) {
|
||||
if (rank == 0) {
|
||||
std::cerr << "Error in test " << testName << ": " << e.what() << std::endl;
|
||||
}
|
||||
totalResult = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't cleanup MPI here - let the caller handle it
|
||||
// finalizeMPI();
|
||||
|
||||
} catch (const std::exception& e) {
|
||||
if (g_mpi_rank == 0) {
|
||||
std::cerr << "Error: " << e.what() << std::endl;
|
||||
}
|
||||
finalizeMPI();
|
||||
return 1;
|
||||
}
|
||||
|
||||
return totalResult;
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
} // namespace test
|
||||
} // namespace mscclpp
|
||||
@@ -1,80 +0,0 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#ifndef MSCCLPP_TEST_PERF_FRAMEWORK_HPP_
|
||||
#define MSCCLPP_TEST_PERF_FRAMEWORK_HPP_
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <mscclpp/gpu.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
namespace mscclpp {
|
||||
namespace test {
|
||||
|
||||
// Test result structure
|
||||
struct TestResult {
|
||||
std::string test_name;
|
||||
std::string test_category;
|
||||
std::map<std::string, std::string> test_params;
|
||||
nlohmann::ordered_json metrics;
|
||||
int num_processes;
|
||||
int process_rank;
|
||||
std::string timestamp;
|
||||
};
|
||||
|
||||
// Simple utility functions for testing
|
||||
namespace utils {
|
||||
|
||||
// Test execution utilities
|
||||
int runMultipleTests(
|
||||
int argc, char* argv[],
|
||||
const std::vector<std::tuple<std::string, std::string, std::function<void(int, int, int)>>>& tests);
|
||||
|
||||
// MPI management
|
||||
void initializeMPI(int argc, char* argv[]);
|
||||
void cleanupMPI();
|
||||
bool isMainRank();
|
||||
|
||||
// Result recording
|
||||
void recordResult(const std::string& test_name, const std::string& test_category, const nlohmann::ordered_json& metrics,
|
||||
const std::map<std::string, std::string>& test_params = {});
|
||||
|
||||
// Output utilities
|
||||
void writeResultsToFile(const std::string& filename);
|
||||
void printResults(bool verbose = false);
|
||||
void cleanupMPI();
|
||||
|
||||
// Timing utilities
|
||||
class Timer {
|
||||
public:
|
||||
Timer();
|
||||
void start();
|
||||
void stop();
|
||||
double elapsedMicroseconds() const;
|
||||
double elapsedMilliseconds() const;
|
||||
double elapsedSeconds() const;
|
||||
|
||||
private:
|
||||
std::chrono::high_resolution_clock::time_point start_time_;
|
||||
std::chrono::high_resolution_clock::time_point end_time_;
|
||||
bool is_running_;
|
||||
};
|
||||
|
||||
// CUDA utilities
|
||||
void cudaCheck(cudaError_t err, const char* file, int line);
|
||||
#define CUDA_CHECK(call) cudaCheck(call, __FILE__, __LINE__)
|
||||
|
||||
} // namespace utils
|
||||
|
||||
} // namespace test
|
||||
} // namespace mscclpp
|
||||
|
||||
#endif // MSCCLPP_TEST_PERF_FRAMEWORK_HPP_
|
||||
@@ -1,11 +1,13 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
target_sources(unit_tests PRIVATE
|
||||
unit_tests_main.cc
|
||||
core_tests.cc
|
||||
gpu_utils_tests.cc
|
||||
errors_tests.cc
|
||||
fifo_tests.cu
|
||||
fifo_perf_tests.cu
|
||||
numa_tests.cc
|
||||
socket_tests.cc
|
||||
utils_tests.cc
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "../framework.hpp"
|
||||
|
||||
#undef NDEBUG
|
||||
#ifndef DEBUG_BUILD
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mscclpp/core.hpp>
|
||||
|
||||
class LocalCommunicatorTest : public ::testing::Test {
|
||||
#include "../framework.hpp"
|
||||
|
||||
// TODO: TransportFlags needs operator<< for EXPECT_EQ to work
|
||||
// Using ASSERT_TRUE with manual comparisons as workaround
|
||||
|
||||
class LocalCommunicatorTest : public ::mscclpp::test::TestCase {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
bootstrap = std::make_shared<mscclpp::TcpBootstrap>(0, 1);
|
||||
@@ -18,15 +20,15 @@ class LocalCommunicatorTest : public ::testing::Test {
|
||||
std::shared_ptr<mscclpp::Communicator> comm;
|
||||
};
|
||||
|
||||
TEST_F(LocalCommunicatorTest, RegisterMemory) {
|
||||
TEST(LocalCommunicatorTest, RegisterMemory) {
|
||||
int dummy[42];
|
||||
auto memory = comm->registerMemory(&dummy, sizeof(dummy), mscclpp::NoTransports);
|
||||
EXPECT_EQ(memory.data(), &dummy);
|
||||
EXPECT_EQ(memory.size(), sizeof(dummy));
|
||||
EXPECT_EQ(memory.transports(), mscclpp::NoTransports);
|
||||
ASSERT_TRUE(memory.transports() == mscclpp::NoTransports);
|
||||
}
|
||||
|
||||
TEST_F(LocalCommunicatorTest, SendMemoryToSelf) {
|
||||
TEST(LocalCommunicatorTest, SendMemoryToSelf) {
|
||||
int dummy[42];
|
||||
auto memory = comm->registerMemory(&dummy, sizeof(dummy), mscclpp::NoTransports);
|
||||
comm->sendMemory(memory, 0);
|
||||
@@ -34,5 +36,5 @@ TEST_F(LocalCommunicatorTest, SendMemoryToSelf) {
|
||||
auto sameMemory = memoryFuture.get();
|
||||
EXPECT_EQ(sameMemory.data(), memory.data());
|
||||
EXPECT_EQ(sameMemory.size(), memory.size());
|
||||
EXPECT_EQ(sameMemory.transports(), memory.transports());
|
||||
ASSERT_TRUE(sameMemory.transports() == memory.transports());
|
||||
}
|
||||
|
||||
@@ -1,30 +1,33 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mscclpp/errors.hpp>
|
||||
|
||||
#include "../framework.hpp"
|
||||
|
||||
// TODO: ErrorCode needs operator<< for EXPECT_EQ to work
|
||||
// Using ASSERT_TRUE with manual comparisons as workaround
|
||||
|
||||
TEST(ErrorsTest, SystemError) {
|
||||
mscclpp::Error error("test", mscclpp::ErrorCode::SystemError);
|
||||
EXPECT_EQ(error.getErrorCode(), mscclpp::ErrorCode::SystemError);
|
||||
ASSERT_TRUE(error.getErrorCode() == mscclpp::ErrorCode::SystemError);
|
||||
EXPECT_EQ(error.what(), std::string("test (mscclpp failure: SystemError)"));
|
||||
}
|
||||
|
||||
TEST(ErrorsTest, InternalError) {
|
||||
mscclpp::Error error("test", mscclpp::ErrorCode::InternalError);
|
||||
EXPECT_EQ(error.getErrorCode(), mscclpp::ErrorCode::InternalError);
|
||||
ASSERT_TRUE(error.getErrorCode() == mscclpp::ErrorCode::InternalError);
|
||||
EXPECT_EQ(error.what(), std::string("test (mscclpp failure: InternalError)"));
|
||||
}
|
||||
|
||||
TEST(ErrorsTest, InvalidUsage) {
|
||||
mscclpp::Error error("test", mscclpp::ErrorCode::InvalidUsage);
|
||||
EXPECT_EQ(error.getErrorCode(), mscclpp::ErrorCode::InvalidUsage);
|
||||
ASSERT_TRUE(error.getErrorCode() == mscclpp::ErrorCode::InvalidUsage);
|
||||
EXPECT_EQ(error.what(), std::string("test (mscclpp failure: InvalidUsage)"));
|
||||
}
|
||||
|
||||
TEST(ErrorsTest, Timeout) {
|
||||
mscclpp::Error error("test", mscclpp::ErrorCode::Timeout);
|
||||
EXPECT_EQ(error.getErrorCode(), mscclpp::ErrorCode::Timeout);
|
||||
ASSERT_TRUE(error.getErrorCode() == mscclpp::ErrorCode::Timeout);
|
||||
EXPECT_EQ(error.what(), std::string("test (mscclpp failure: Timeout)"));
|
||||
}
|
||||
|
||||
85
test/unit/fifo_perf_tests.cu
Normal file
85
test/unit/fifo_perf_tests.cu
Normal file
@@ -0,0 +1,85 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
#include <mscclpp/fifo.hpp>
|
||||
#include <mscclpp/gpu_utils.hpp>
|
||||
#include <mscclpp/numa.hpp>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "../framework.hpp"
|
||||
|
||||
// Simple FIFO performance test to be run as part of unit_tests
|
||||
// This is a performance test that can be excluded from coverage runs
|
||||
// using the --exclude-perf-tests flag.
|
||||
|
||||
constexpr uint64_t TIMEOUT_SPINS = 1000000;
|
||||
constexpr int MIN_TRIGGERS = 100; // Reduced for faster unit test execution
|
||||
|
||||
__constant__ mscclpp::FifoDeviceHandle gFifoPerfDeviceHandle;
|
||||
|
||||
__global__ void kernelFifoPerfPush(size_t numTriggers) {
|
||||
mscclpp::FifoDeviceHandle& fifo = gFifoPerfDeviceHandle;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
mscclpp::ProxyTrigger trigger;
|
||||
for (size_t i = 1; i <= numTriggers; ++i) {
|
||||
trigger.fst = i;
|
||||
trigger.snd = tid ^ i;
|
||||
fifo.push(trigger);
|
||||
}
|
||||
}
|
||||
|
||||
static bool consumePerfTriggers(std::unique_ptr<mscclpp::Fifo>& hostFifo, int numTriggers, int parallel) {
|
||||
int totalTriggers = numTriggers * parallel;
|
||||
std::unordered_map<int, int> triggerCounts;
|
||||
for (int i = 0; i < totalTriggers; ++i) {
|
||||
mscclpp::ProxyTrigger trigger;
|
||||
uint64_t spin = 0;
|
||||
do {
|
||||
trigger = hostFifo->poll();
|
||||
if (spin++ > TIMEOUT_SPINS) {
|
||||
return false;
|
||||
}
|
||||
} while (trigger.fst == 0 || trigger.snd == 0);
|
||||
|
||||
trigger.snd ^= ((uint64_t)1 << (uint64_t)63);
|
||||
trigger.snd = trigger.snd ^ trigger.fst;
|
||||
if (triggerCounts[trigger.snd] + 1 != trigger.fst) {
|
||||
return false; // Validation failed
|
||||
}
|
||||
triggerCounts[trigger.snd]++;
|
||||
hostFifo->pop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
PERF_TEST(FifoPerfTest, BasicPerformance) {
|
||||
int cudaDevice, numaNode;
|
||||
CUDA_CHECK(cudaGetDevice(&cudaDevice));
|
||||
numaNode = mscclpp::getDeviceNumaNode(cudaDevice);
|
||||
mscclpp::numaBind(numaNode);
|
||||
|
||||
const int fifoSize = 128;
|
||||
const int numTriggers = MIN_TRIGGERS;
|
||||
const int numParallel = 1;
|
||||
|
||||
auto hostFifo = std::make_unique<mscclpp::Fifo>(fifoSize);
|
||||
mscclpp::FifoDeviceHandle hostHandle = hostFifo->deviceHandle();
|
||||
CUDA_CHECK(cudaMemcpyToSymbol(gFifoPerfDeviceHandle, &hostHandle, sizeof(mscclpp::FifoDeviceHandle)));
|
||||
|
||||
cudaStream_t stream;
|
||||
CUDA_CHECK(cudaStreamCreate(&stream));
|
||||
|
||||
// Run kernel
|
||||
kernelFifoPerfPush<<<numParallel, 1, 0, stream>>>(numTriggers);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Process triggers
|
||||
bool success = consumePerfTriggers(hostFifo, numTriggers, numParallel);
|
||||
ASSERT_TRUE(success);
|
||||
|
||||
CUDA_CHECK(cudaStreamSynchronize(stream));
|
||||
CUDA_CHECK(cudaStreamDestroy(stream));
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
}
|
||||
@@ -1,13 +1,12 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mscclpp/fifo.hpp>
|
||||
#include <mscclpp/gpu_utils.hpp>
|
||||
#include <mscclpp/numa.hpp>
|
||||
#include <mscclpp/utils.hpp>
|
||||
|
||||
#include "../framework.hpp"
|
||||
#include "utils_internal.hpp"
|
||||
|
||||
#define ITER 10000 // should be larger than the FIFO size for proper testing
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mscclpp/gpu_utils.hpp>
|
||||
|
||||
#include "../framework.hpp"
|
||||
|
||||
TEST(GpuUtilsTest, StreamPool) {
|
||||
auto streamPool = mscclpp::gpuStreamPool();
|
||||
cudaStream_t s;
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mscclpp/core.hpp>
|
||||
#include <mscclpp/gpu_utils.hpp>
|
||||
#include <mscclpp/port_channel.hpp>
|
||||
#include <mscclpp/port_channel_device.hpp>
|
||||
|
||||
#include "../framework.hpp"
|
||||
|
||||
#define MAGIC_CONST 777
|
||||
|
||||
__constant__ mscclpp::PortChannelDeviceHandle gPortChannel;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mscclpp/gpu_utils.hpp>
|
||||
#include <mscclpp/numa.hpp>
|
||||
|
||||
#include "../framework.hpp"
|
||||
|
||||
TEST(NumaTest, Basic) {
|
||||
int num;
|
||||
MSCCLPP_CUDATHROW(cudaGetDeviceCount(&num));
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mscclpp/utils.hpp>
|
||||
#include <thread>
|
||||
|
||||
#include "../framework.hpp"
|
||||
#include "socket.h"
|
||||
#include "utils_internal.hpp"
|
||||
|
||||
|
||||
6
test/unit/unit_tests_main.cc
Normal file
6
test/unit/unit_tests_main.cc
Normal file
@@ -0,0 +1,6 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "../framework.hpp"
|
||||
|
||||
int main(int argc, char** argv) { return RUN_ALL_TESTS(); }
|
||||
@@ -1,10 +1,9 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <thread>
|
||||
|
||||
#include "../framework.hpp"
|
||||
#include "utils_internal.hpp"
|
||||
|
||||
TEST(UtilsInternalTest, getHostHash) {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <mscclpp/errors.hpp>
|
||||
#include <mscclpp/utils.hpp>
|
||||
#include <thread>
|
||||
|
||||
#include "../framework.hpp"
|
||||
|
||||
TEST(UtilsTest, getHostName) {
|
||||
std::string hostname1 = mscclpp::getHostName(1024, '.');
|
||||
EXPECT_FALSE(hostname1.empty());
|
||||
|
||||
Reference in New Issue
Block a user