add sglang all_reduce

This commit is contained in:
empyreus
2026-03-31 15:47:36 +00:00
parent f938f60505
commit 48a6a2e441
2 changed files with 44 additions and 29 deletions

View File

@@ -44,37 +44,57 @@ steps:
pip install --upgrade pip
pip install -e "python"
- template: run-remote-task.yml
parameters:
name: RunSGLangTestBatchSize1
displayName: Run SGLang Test Batch Size 1
runRemoteArgs: '--container mscclpp-sglang-test'
remoteScript: |
python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 1 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
# - template: run-remote-task.yml
# parameters:
# name: RunSGLangTestBatchSize1
# displayName: Run SGLang Test Batch Size 1
# runRemoteArgs: '--container mscclpp-sglang-test'
# remoteScript: |
# python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 1 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
# - template: run-remote-task.yml
# parameters:
# name: RunSGLangTestBatchSize2
# displayName: Run SGLang Test Batch Size 2
# runRemoteArgs: '--container mscclpp-sglang-test'
# remoteScript: |
# python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 2 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
# - template: run-remote-task.yml
# parameters:
# name: RunSGLangTestBatchSize32
# displayName: Run SGLang Test Batch Size 32
# runRemoteArgs: '--container mscclpp-sglang-test'
# remoteScript: |
# python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 32 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
# - template: run-remote-task.yml
# parameters:
# name: RunSGLangTestBatchSize64
# displayName: Run SGLang Test Batch Size 64
# runRemoteArgs: '--container mscclpp-sglang-test'
# remoteScript: |
# python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 64 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
- template: run-remote-task.yml
parameters:
name: RunSGLangTestBatchSize2
displayName: Run SGLang Test Batch Size 2
name: RunSGLangTestAllReduce
displayName: Run SGLang Test All Reduce
runRemoteArgs: '--container mscclpp-sglang-test'
remoteScript: |
python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 2 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
export WORLD_SIZE=1
export RANK=0
export MASTER_ADDR=127.0.0.1
export MASTER_PORT=12345
- template: run-remote-task.yml
parameters:
name: RunSGLangTestBatchSize32
displayName: Run SGLang Test Batch Size 32
runRemoteArgs: '--container mscclpp-sglang-test'
remoteScript: |
python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 32 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
cd sglang
- template: run-remote-task.yml
parameters:
name: RunSGLangTestBatchSize64
displayName: Run SGLang Test Batch Size 64
runRemoteArgs: '--container mscclpp-sglang-test'
remoteScript: |
python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 64 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
torchrun --nproc_per_node gpu \
--nnodes $WORLD_SIZE \
--node_rank $RANK \
--master_addr $MASTER_ADDR \
--master_port $MASTER_PORT benchmark/kernels/all_reduce/benchmark_mscclpp.py
- template: stop.yml
parameters:

View File

@@ -30,9 +30,4 @@ RUN python3 -m venv /root/venv && \
echo 'source /root/venv/bin/activate' >> /root/.bashrc
ENV PATH="/root/venv/bin:${PATH}"
# Install SGLang
RUN pip install --upgrade pip && \
pip install uv && \
uv pip install sglang
WORKDIR /