Files
mscclpp/test/deploy/deploy.sh
Copilot 93f6eeaa6b Remove GTest dependency, add code coverage, and refactor unit tests and CI pipelines (#744)
- Removes the GTest dependency, replacing it with a minimal custom
framework (`test/framework.*`) that covers only what the tests actually
use — a unified `TEST()` macro with SFINAE-based fixture auto-detection,
`EXPECT_*`/`ASSERT_*` assertions, environments, and setup/teardown.
- `--exclude-perf-tests` flag and substring-based negative filtering
- `MSCCLPP_ENABLE_COVERAGE` CMake option with gcov/lcov; CI uploads to
Codecov
- Merges standalone `test/perf/` into main test targets
- Refactors Azure pipelines to reduce redundancies & make more readable

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: Changho Hwang <changhohwang@microsoft.com>
2026-03-24 23:34:38 -04:00

64 lines
2.4 KiB
Bash

set -ex
TEST_NAME=$1
IB_ENVIRONMENT="${2:-true}"
PLATFORM="${3:-cuda}"
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
ROOT_DIR="${SYSTEM_DEFAULTWORKINGDIRECTORY}/"
if [ "${TEST_NAME}" == "nccltest-single-node" ]; then
ROOT_DIR="${ROOT_DIR}/mscclpp"
SYSTEM_DEFAULTWORKINGDIRECTORY="${SYSTEM_DEFAULTWORKINGDIRECTORY}/mscclpp"
fi
DST_DIR="/tmp/mscclpp"
if [ "${TEST_NAME}" == "nccltest-single-node" ] || [ "${TEST_NAME}" == "single-node-test" ]; then
HOSTFILE="${SYSTEM_DEFAULTWORKINGDIRECTORY}/test/deploy/hostfile_ci"
else
HOSTFILE="${SYSTEM_DEFAULTWORKINGDIRECTORY}/test/deploy/hostfile"
fi
SSH_OPTION="StrictHostKeyChecking=no"
chmod 400 ${KeyFilePath}
ssh-keygen -t rsa -f sshkey -P ""
while true; do
set +e
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION "hostname"
if [ $? -eq 0 ]; then
break
fi
echo "Waiting for sshd to start..."
sleep 5
done
set -e
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION "sudo rm -rf ${DST_DIR}"
parallel-scp -t 0 -r -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION ${ROOT_DIR} ${DST_DIR}
if [ "${PLATFORM}" == "rocm" ]; then
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION "sudo modprobe amdgpu"
fi
# force to pull the latest image
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION \
"sudo docker pull ${CONTAINERIMAGE}"
LAUNCH_OPTION="--gpus=all"
if [ "${PLATFORM}" == "rocm" ]; then
LAUNCH_OPTION="--device=/dev/kfd --device=/dev/dri --group-add=video"
fi
if [ "${IB_ENVIRONMENT}" == "true" ]; then
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION \
"sudo docker run --rm -itd --privileged --net=host --ipc=host ${LAUNCH_OPTION} \
-w /root -v ${DST_DIR}:/root/mscclpp -v /opt/microsoft:/opt/microsoft --ulimit memlock=-1:-1 --name=mscclpp-test \
--entrypoint /bin/bash ${CONTAINERIMAGE}"
else
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION \
"sudo docker run --rm -itd --net=host --ipc=host ${LAUNCH_OPTION} --cap-add=SYS_ADMIN --security-opt seccomp=unconfined \
-w /root -v ${DST_DIR}:/root/mscclpp -v /opt/microsoft:/opt/microsoft --ulimit memlock=-1:-1 --name=mscclpp-test \
--entrypoint /bin/bash ${CONTAINERIMAGE}"
fi
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION \
"sudo docker exec -t --user root mscclpp-test bash '/root/mscclpp/test/deploy/setup.sh' ${PLATFORM}"