Files
mscclpp/test/deploy/setup.sh
Binyang Li c12822a7af create CI pipeline for rocm (#718)
Create CI pipeline for AMD GPU.
2026-02-09 16:55:16 -08:00

39 lines
1.2 KiB
Bash

set -e
PLATFORM="${1:-cuda}"
mkdir -p /root/.ssh
mv /root/mscclpp/sshkey.pub /root/.ssh/authorized_keys
chown root:root /root/.ssh/authorized_keys
mv /root/mscclpp/test/deploy/config /root/.ssh/config
chown root:root /root/.ssh/config
chmod 400 /root/mscclpp/sshkey
chown root:root /root/mscclpp/sshkey
if [ "${PLATFORM}" == "cuda" ]; then
nvidia-smi -pm 1
for i in $(seq 0 $(( $(nvidia-smi -L | wc -l) - 1 ))); do
nvidia-smi -ac $(nvidia-smi --query-gpu=clocks.max.memory,clocks.max.sm --format=csv,noheader,nounits -i $i | sed 's/\ //') -i $i
done
fi
make -C /root/mscclpp/tools/peer-access-test
/root/mscclpp/tools/peer-access-test/peer_access_test
make -C /root/mscclpp/tools/peer-access-test clean
if [[ "${CUDA_VERSION}" == *"11."* ]]; then
pip3 install -r /root/mscclpp/python/requirements_cuda11.txt
elif [[ "${CUDA_VERSION}" == *"12."* ]]; then
pip3 install -r /root/mscclpp/python/requirements_cuda12.txt
fi
if [ "${PLATFORM}" == "rocm" ]; then
export CXX=/opt/rocm/bin/hipcc
fi
cd /root/mscclpp && pip3 install .
pip3 install setuptools_scm
python3 -m setuptools_scm --force-write-version-files
mkdir -p /var/run/sshd
/usr/sbin/sshd -p 22345