mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-11 17:00:22 +00:00
## Summary - Fix `run-remote.sh` to correctly execute multi-command scripts (e.g., multiple `mpirun` calls) - The old approach piped decoded script through `base64 -d | bash`, which feeds the script via bash's **stdin**. When `mpirun` (or its child processes) runs, it can consume the remaining stdin, causing bash to never see subsequent commands — only the first command would execute. - The fix decodes the script to a **temp file** and runs `bash -euxo pipefail "$TMP"` instead, so bash reads commands from the file and `mpirun` consuming stdin has no effect. - Applied to both the docker path (pssh + docker exec) and the non-docker path (pssh only). 🤖 Generated with [Claude Code](https://claude.com/claude-code)
111 lines
2.8 KiB
Bash
Executable File
111 lines
2.8 KiB
Bash
Executable File
#!/bin/bash
|
|
# Run a command on remote CI VMs via parallel-ssh.
|
|
# By default, runs inside the mscclpp-test docker container.
|
|
#
|
|
# Usage:
|
|
# run-remote.sh [OPTIONS] < <command_script>
|
|
#
|
|
# Options:
|
|
# --no-docker Run command directly on the host, not inside docker
|
|
# --no-log Don't tail the log file in the background
|
|
# --hostfile Override hostfile path (default: test/deploy/hostfile_ci)
|
|
# --host Run command on a single host (uses parallel-ssh -H)
|
|
# --user SSH user when using --host or custom hostfile
|
|
|
|
set -e
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
HOSTFILE="${SCRIPT_DIR}/hostfile_ci"
|
|
SSH_OPTION="StrictHostKeyChecking=no"
|
|
KeyFilePath="${SSHKEYFILE_SECUREFILEPATH}"
|
|
|
|
USE_DOCKER=true
|
|
USE_LOG=true
|
|
TARGET_HOST=""
|
|
REMOTE_USER=""
|
|
|
|
usage() {
|
|
echo "Usage: $0 [--no-docker] [--no-log] [--hostfile <path>] [--host <name>] [--user <name>] < <command_script>" >&2
|
|
}
|
|
|
|
require_value() {
|
|
local opt="$1"
|
|
local val="$2"
|
|
if [ -z "$val" ]; then
|
|
echo "Missing value for ${opt}" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
while [[ "$1" == --* ]]; do
|
|
case "$1" in
|
|
--no-docker) USE_DOCKER=false; shift ;;
|
|
--no-log) USE_LOG=false; shift ;;
|
|
--hostfile)
|
|
require_value "--hostfile" "${2-}"
|
|
HOSTFILE="$2"
|
|
shift 2
|
|
;;
|
|
--host)
|
|
require_value "--host" "${2-}"
|
|
TARGET_HOST="$2"
|
|
shift 2
|
|
;;
|
|
--user)
|
|
require_value "--user" "${2-}"
|
|
REMOTE_USER="$2"
|
|
shift 2
|
|
;;
|
|
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
if [ $# -ne 0 ] || [ -t 0 ]; then
|
|
usage
|
|
exit 1
|
|
fi
|
|
|
|
CMD=$(cat)
|
|
if [ -z "$CMD" ]; then
|
|
usage
|
|
exit 1
|
|
fi
|
|
CMD_B64=$(printf '%s' "$CMD" | base64 | tr -d '\n')
|
|
|
|
PSSH_TARGET_ARGS=()
|
|
if [ -n "$TARGET_HOST" ]; then
|
|
PSSH_TARGET_ARGS=(-H "$TARGET_HOST")
|
|
else
|
|
PSSH_TARGET_ARGS=(-h "$HOSTFILE")
|
|
fi
|
|
|
|
PSSH_USER_ARGS=()
|
|
if [ -n "$REMOTE_USER" ]; then
|
|
PSSH_USER_ARGS=(-l "$REMOTE_USER")
|
|
fi
|
|
|
|
PSSH_COMMON=(
|
|
-t 0
|
|
"${PSSH_TARGET_ARGS[@]}"
|
|
"${PSSH_USER_ARGS[@]}"
|
|
-x "-i ${KeyFilePath}"
|
|
-O "$SSH_OPTION"
|
|
)
|
|
|
|
if $USE_DOCKER; then
|
|
INNER="set -euxo pipefail;"
|
|
INNER+=" cd /root/mscclpp;"
|
|
INNER+=" export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\\\$LD_LIBRARY_PATH;"
|
|
INNER+=" CMD_B64='${CMD_B64}';"
|
|
INNER+=" TMP=\\\$(mktemp);"
|
|
INNER+=" printf '%s' \\\"\\\$CMD_B64\\\" | base64 -d > \\\"\\\$TMP\\\";"
|
|
INNER+=" bash -euxo pipefail \\\"\\\$TMP\\\";"
|
|
INNER+=" rm -f \\\"\\\$TMP\\\""
|
|
|
|
parallel-ssh -i "${PSSH_COMMON[@]}" \
|
|
"sudo docker exec mscclpp-test bash -c \"${INNER}\""
|
|
else
|
|
parallel-ssh -i "${PSSH_COMMON[@]}" \
|
|
"set -euxo pipefail; CMD_B64='${CMD_B64}'; TMP=\$(mktemp); printf '%s' \"\$CMD_B64\" | base64 -d > \"\$TMP\"; bash -euxo pipefail \"\$TMP\"; rm -f \"\$TMP\""
|
|
fi
|