debugging

This commit is contained in:
Changho Hwang
2026-03-10 22:18:41 +00:00
parent 7a87c2c856
commit cf505d777a
3 changed files with 24 additions and 3 deletions

View File

@@ -27,6 +27,11 @@ steps:
name: TestsCoverageNonPerf
displayName: Run unit_tests + mp_unit_tests (non-perf) with coverage
remoteScript: |
echo "=== build/bin/ contents ==="
ls -la build/bin/ 2>&1 || echo "ERROR: build/bin/ not found"
echo "=== build/ top-level ==="
ls build/ 2>&1 || echo "ERROR: build/ not found"
BUILD_PREFIX=$(cat build/BUILD_PREFIX)
STRIP_COUNT=$(echo $BUILD_PREFIX | tr -cd / | wc -c)
export GCOV_PREFIX=/root/mscclpp

View File

@@ -89,6 +89,9 @@ steps:
make -j
cd ..
pwd > build/BUILD_PREFIX
echo "=== Build artifacts ==="
ls -la build/bin/ || echo "ERROR: build/bin/ missing after build"
du -sh build/bin/* 2>/dev/null || true
workingDirectory: '$(System.DefaultWorkingDirectory)'
# 2. Download SSH key + install packages + start VMSS

View File

@@ -1,4 +1,4 @@
set -e
set -ex
TEST_NAME=$1
IB_ENVIRONMENT="${2:-true}"
@@ -32,8 +32,21 @@ while true; do
done
set -e
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION "sudo rm -rf ${DST_DIR}"
parallel-scp -t 0 -r -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION ${ROOT_DIR} ${DST_DIR}
# Transfer workspace to remote hosts via tar+ssh (more reliable than parallel-scp for large files)
while IFS= read -r HOST; do
HOST_ADDR="${HOST##*@}"
HOST_USER="${HOST%%@*}"
if [ "${HOST_USER}" = "${HOST_ADDR}" ]; then
HOST_USER=""
fi
SSH_DEST="${HOST}"
echo "Deploying to ${SSH_DEST}..."
ssh -i ${KeyFilePath} -o ${SSH_OPTION} ${SSH_DEST} "sudo rm -rf ${DST_DIR} && mkdir -p ${DST_DIR}"
tar cf - -C "$(dirname "${ROOT_DIR}")" "$(basename "${ROOT_DIR}")" | \
ssh -i ${KeyFilePath} -o ${SSH_OPTION} ${SSH_DEST} "tar xf - -C ${DST_DIR} --strip-components=1"
echo "Verifying transfer to ${SSH_DEST}..."
ssh -i ${KeyFilePath} -o ${SSH_OPTION} ${SSH_DEST} "ls ${DST_DIR}/build/bin/ 2>&1 || echo 'ERROR: build/bin/ missing after transfer'"
done < ${HOSTFILE}
if [ "${PLATFORM}" == "rocm" ]; then
parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION "sudo modprobe amdgpu"