Files
mscclpp/test/deploy/setup.sh
Changho Hwang a2f1279c60 Test peer accessibility after deployment (#661)
Test GPUs' peer accessibility before integration testing to distinguish
VM issues.
2025-10-24 11:09:36 -07:00

32 lines
997 B
Bash

set -e
mkdir -p /root/.ssh
mv /root/mscclpp/sshkey.pub /root/.ssh/authorized_keys
chown root:root /root/.ssh/authorized_keys
mv /root/mscclpp/test/deploy/config /root/.ssh/config
chown root:root /root/.ssh/config
chmod 400 /root/mscclpp/sshkey
chown root:root /root/mscclpp/sshkey
nvidia-smi -pm 1
for i in $(seq 0 $(( $(nvidia-smi -L | wc -l) - 1 ))); do
nvidia-smi -ac $(nvidia-smi --query-gpu=clocks.max.memory,clocks.max.sm --format=csv,noheader,nounits -i $i | sed 's/\ //') -i $i
done
make -C /root/mscclpp/tools/peer-access-test
/root/mscclpp/tools/peer-access-test/peer_access_test
make -C /root/mscclpp/tools/peer-access-test clean
if [[ "${CUDA_VERSION}" == *"11."* ]]; then
pip3 install -r /root/mscclpp/python/requirements_cuda11.txt
else
pip3 install -r /root/mscclpp/python/requirements_cuda12.txt
fi
cd /root/mscclpp && pip3 install .
pip3 install setuptools_scm
python3 -m setuptools_scm --force-write-version-files
mkdir -p /var/run/sshd
/usr/sbin/sshd -p 22345