diff --git a/.azure-pipelines/templates/deploy.yml b/.azure-pipelines/templates/deploy.yml index fc116acf..2f642f1d 100644 --- a/.azure-pipelines/templates/deploy.yml +++ b/.azure-pipelines/templates/deploy.yml @@ -94,7 +94,27 @@ steps: du -sh build/bin/* 2>/dev/null || true workingDirectory: '$(System.DefaultWorkingDirectory)' -# 2. Download SSH key + install packages + start VMSS +# 2. Write CMake args for pip install on remote VMs +- task: Bash@3 + name: WritePipCmakeArgs + displayName: Write pip CMake args + inputs: + targetType: 'inline' + script: | + set -e + PIP_CMAKE_ARGS="" + if [ -n "${{ parameters.gpuArch }}" ]; then + PIP_CMAKE_ARGS="-DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }}" + fi + CMAKE_EXTRA_ARGS='${{ parameters.cmakeArgs }}' + if [ -n "${CMAKE_EXTRA_ARGS}" ]; then + PIP_CMAKE_ARGS="${PIP_CMAKE_ARGS} ${CMAKE_EXTRA_ARGS}" + fi + echo "${PIP_CMAKE_ARGS}" > pip_cmake_args.txt + echo "pip CMake args: $(cat pip_cmake_args.txt)" + workingDirectory: '$(System.DefaultWorkingDirectory)' + +# 3. Download SSH key + install packages + start VMSS - task: DownloadSecureFile@1 name: SshKeyFile displayName: Download key file @@ -120,7 +140,7 @@ steps: inlineScript: | az vmss start --name ${{ parameters.vmssName }} --resource-group ${{ parameters.resourceGroup }} -# 3. Deploy test environment +# 4. Deploy test environment - task: Bash@3 name: DeployTestEnv displayName: Deploy Test Env diff --git a/.azure-pipelines/templates/ut-npkit.yml b/.azure-pipelines/templates/ut-npkit.yml index e53b5cf5..1bd89caf 100644 --- a/.azure-pipelines/templates/ut-npkit.yml +++ b/.azure-pipelines/templates/ut-npkit.yml @@ -28,7 +28,7 @@ steps: grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json - grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_SEND_ENTRY ./npkit_output/npkit_event_trace.json - template: run-remote-task.yml parameters: @@ -42,14 +42,14 @@ steps: grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json - grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_SEND_ENTRY ./npkit_output/npkit_event_trace.json rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x -k 'test_executor[allreduce_packet.json' python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json - grep -q NPKIT_EVENT_EXECUTOR_COPY_PACKET_ENTRY ./npkit_output/npkit_event_trace.json - grep -q NPKIT_EVENT_EXECUTOR_PUT_PACKET_ENTRY ./npkit_output/npkit_event_trace.json - grep -q NPKIT_EVENT_EXECUTOR_REDUCE_SEND_PACKET_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_PUT_PACKETS_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_REDUCE_SEND_PACKETS_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_UNPACK_PACKETS_ENTRY ./npkit_output/npkit_event_trace.json - template: stop.yml parameters: diff --git a/test/deploy/setup.sh b/test/deploy/setup.sh index 80cd10b1..d4996cc2 100644 --- a/test/deploy/setup.sh +++ b/test/deploy/setup.sh @@ -30,6 +30,12 @@ fi if [ "${PLATFORM}" == "rocm" ]; then export CXX=/opt/rocm/bin/hipcc fi + +PIP_CMAKE_ARGS_FILE="/root/mscclpp/pip_cmake_args.txt" +if [ -f "${PIP_CMAKE_ARGS_FILE}" ]; then + export CMAKE_ARGS="$(cat ${PIP_CMAKE_ARGS_FILE})" + echo "Using CMAKE_ARGS: ${CMAKE_ARGS}" +fi cd /root/mscclpp && pip3 install . pip3 install setuptools_scm python3 -m setuptools_scm --force-write-version-files diff --git a/tools/npkit/npkit_trace_generator.py b/tools/npkit/npkit_trace_generator.py index c5ed6191..294516e6 100644 --- a/tools/npkit/npkit_trace_generator.py +++ b/tools/npkit/npkit_trace_generator.py @@ -14,25 +14,25 @@ def parse_npkit_event_header(npkit_event_header_path): "NOP", "BARRIER", "PUT", - "PUT_PACKET", - "READ_PUT_PACKET", + "PUT_PACKETS", + "READ_PUT_PACKETS", "PUT_WITH_SIGNAL", "PUT_WITH_SIGNAL_AND_FLUSH", "GET", "COPY", - "COPY_PACKET", - "TRANSFORM_TO_PACKET", + "COPY_PACKETS", + "UNPACK_PACKETS", "SIGNAL", "WAIT", "FLUSH", "REDUCE", - "REDUCE_PACKET", + "REDUCE_PACKETS", "REDUCE_COPY_PACKETS", "REDUCE_SEND", - "REDUCE_SEND_PACKET", + "REDUCE_SEND_PACKETS", "REDUCE_COPY_SEND_PACKETS", - "READ_REDUCE_COPY", - "READ_REDUCE_COPY_SEND", + "READ_REDUCE", + "READ_REDUCE_SEND", "MULTI_LOAD_REDUCE_STORE", "RELAXED_SIGNAL", "RELAXED_WAIT",