mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-04-20 06:49:29 +00:00
debugging
This commit is contained in:
@@ -27,6 +27,11 @@ steps:
|
||||
name: TestsCoverageNonPerf
|
||||
displayName: Run unit_tests + mp_unit_tests (non-perf) with coverage
|
||||
remoteScript: |
|
||||
echo "=== build/bin/ contents ==="
|
||||
ls -la build/bin/ 2>&1 || echo "ERROR: build/bin/ not found"
|
||||
echo "=== build/ top-level ==="
|
||||
ls build/ 2>&1 || echo "ERROR: build/ not found"
|
||||
|
||||
BUILD_PREFIX=$(cat build/BUILD_PREFIX)
|
||||
STRIP_COUNT=$(echo $BUILD_PREFIX | tr -cd / | wc -c)
|
||||
export GCOV_PREFIX=/root/mscclpp
|
||||
|
||||
@@ -89,6 +89,9 @@ steps:
|
||||
make -j
|
||||
cd ..
|
||||
pwd > build/BUILD_PREFIX
|
||||
echo "=== Build artifacts ==="
|
||||
ls -la build/bin/ || echo "ERROR: build/bin/ missing after build"
|
||||
du -sh build/bin/* 2>/dev/null || true
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
|
||||
# 2. Download SSH key + install packages + start VMSS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
set -e
|
||||
set -ex
|
||||
|
||||
TEST_NAME=$1
|
||||
IB_ENVIRONMENT="${2:-true}"
|
||||
@@ -32,8 +32,21 @@ while true; do
|
||||
done
|
||||
|
||||
set -e
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION "sudo rm -rf ${DST_DIR}"
|
||||
parallel-scp -t 0 -r -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION ${ROOT_DIR} ${DST_DIR}
|
||||
# Transfer workspace to remote hosts via tar+ssh (more reliable than parallel-scp for large files)
|
||||
while IFS= read -r HOST; do
|
||||
HOST_ADDR="${HOST##*@}"
|
||||
HOST_USER="${HOST%%@*}"
|
||||
if [ "${HOST_USER}" = "${HOST_ADDR}" ]; then
|
||||
HOST_USER=""
|
||||
fi
|
||||
SSH_DEST="${HOST}"
|
||||
echo "Deploying to ${SSH_DEST}..."
|
||||
ssh -i ${KeyFilePath} -o ${SSH_OPTION} ${SSH_DEST} "sudo rm -rf ${DST_DIR} && mkdir -p ${DST_DIR}"
|
||||
tar cf - -C "$(dirname "${ROOT_DIR}")" "$(basename "${ROOT_DIR}")" | \
|
||||
ssh -i ${KeyFilePath} -o ${SSH_OPTION} ${SSH_DEST} "tar xf - -C ${DST_DIR} --strip-components=1"
|
||||
echo "Verifying transfer to ${SSH_DEST}..."
|
||||
ssh -i ${KeyFilePath} -o ${SSH_OPTION} ${SSH_DEST} "ls ${DST_DIR}/build/bin/ 2>&1 || echo 'ERROR: build/bin/ missing after transfer'"
|
||||
done < ${HOSTFILE}
|
||||
|
||||
if [ "${PLATFORM}" == "rocm" ]; then
|
||||
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION "sudo modprobe amdgpu"
|
||||
|
||||
Reference in New Issue
Block a user