disable flashinfer version

This commit is contained in:
empyreus
2026-05-06 03:12:16 +00:00
parent 1ca7b65db7
commit 3b96b5ab6e

View File

@@ -49,41 +49,41 @@ steps:
pip install --upgrade pip
pip install -e .
# - template: run-remote-task.yml
# parameters:
# name: RunSGLangBenchOneBatch1
# displayName: Run SGLang Bench One Batch - 1
# runRemoteArgs: '--container sglang-mscclpp-test'
# remoteScript: |
# export FLASHINFER_DISABLE_VERSION_CHECK=1
# python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 1 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
- template: run-remote-task.yml
parameters:
name: RunSGLangBenchOneBatch1
displayName: Run SGLang Bench One Batch - 1
runRemoteArgs: '--container sglang-mscclpp-test'
remoteScript: |
export FLASHINFER_DISABLE_VERSION_CHECK=1
python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 1 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
# - template: run-remote-task.yml
# parameters:
# name: RunSGLangBenchOneBatch2
# displayName: Run SGLang Bench One Batch - 2
# runRemoteArgs: '--container sglang-mscclpp-test'
# remoteScript: |
# export FLASHINFER_DISABLE_VERSION_CHECK=1
# python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 2 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
- template: run-remote-task.yml
parameters:
name: RunSGLangBenchOneBatch2
displayName: Run SGLang Bench One Batch - 2
runRemoteArgs: '--container sglang-mscclpp-test'
remoteScript: |
export FLASHINFER_DISABLE_VERSION_CHECK=1
python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 2 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
# - template: run-remote-task.yml
# parameters:
# name: RunSGLangBenchOneBatch32
# displayName: Run SGLang Bench One Batch - 32
# runRemoteArgs: '--container sglang-mscclpp-test'
# remoteScript: |
# export FLASHINFER_DISABLE_VERSION_CHECK=1
# python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 32 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
- template: run-remote-task.yml
parameters:
name: RunSGLangBenchOneBatch32
displayName: Run SGLang Bench One Batch - 32
runRemoteArgs: '--container sglang-mscclpp-test'
remoteScript: |
export FLASHINFER_DISABLE_VERSION_CHECK=1
python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 32 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
# - template: run-remote-task.yml
# parameters:
# name: RunSGLangBenchOneBatch64
# displayName: Run SGLang Bench One Batch - 64
# runRemoteArgs: '--container sglang-mscclpp-test'
# remoteScript: |
# export FLASHINFER_DISABLE_VERSION_CHECK=1
# python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 64 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
- template: run-remote-task.yml
parameters:
name: RunSGLangBenchOneBatch64
displayName: Run SGLang Bench One Batch - 64
runRemoteArgs: '--container sglang-mscclpp-test'
remoteScript: |
export FLASHINFER_DISABLE_VERSION_CHECK=1
python -m sglang.bench_one_batch --model-path Qwen/Qwen3-8B --batch 64 --input-len 256 --output-len 256 --tp-size 8 --disable-custom-all-reduce --enable-mscclpp
- template: run-remote-task.yml
parameters:
@@ -173,6 +173,7 @@ steps:
displayName: Run SGLang Test All Reduce
runRemoteArgs: '--container sglang-mscclpp-test'
remoteScript: |
export FLASHINFER_DISABLE_VERSION_CHECK=1
export WORLD_SIZE=1
export RANK=0
export MASTER_ADDR=127.0.0.1