From de244e528b17478121dd43a3f93bc251223f283f Mon Sep 17 00:00:00 2001 From: empyreus Date: Mon, 4 May 2026 18:04:30 +0000 Subject: [PATCH] update sglang bench --- .../templates/sglang-multi-test.yml | 5 +++-- .azure-pipelines/templates/sglang-test.yml | 22 ++++++++++--------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/.azure-pipelines/templates/sglang-multi-test.yml b/.azure-pipelines/templates/sglang-multi-test.yml index fea20fd9..bc5d7a91 100644 --- a/.azure-pipelines/templates/sglang-multi-test.yml +++ b/.azure-pipelines/templates/sglang-multi-test.yml @@ -70,10 +70,11 @@ steps: runRemoteArgs: '--container sglang-mscclpp-test --hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --user azureuser' remoteScript: | export FLASHINFER_DISABLE_VERSION_CHECK=1 + VMSS="${{ parameters.vmssName }}" HOSTNAME=$(hostname) - if [ "$HOSTNAME" = "mscclit-000000" ]; then + if [ "$HOSTNAME" = "${VMSS}000000" ]; then NODE_RANK=0 - elif [ "$HOSTNAME" = "mscclit-000001" ]; then + elif [ "$HOSTNAME" = "${VMSS}000001" ]; then NODE_RANK=1 else echo "Unknown hostname: $HOSTNAME" diff --git a/.azure-pipelines/templates/sglang-test.yml b/.azure-pipelines/templates/sglang-test.yml index 37a4fe5f..eb7c07ab 100644 --- a/.azure-pipelines/templates/sglang-test.yml +++ b/.azure-pipelines/templates/sglang-test.yml @@ -97,7 +97,7 @@ steps: echo "Directory: $(pwd)" # Launch sglang server in the background - python3 -m sglang.launch_server \ + python3 -m sglang serve \ --model-path Qwen/Qwen3-8B \ --port 30000 \ --host 0.0.0.0 \ @@ -110,7 +110,6 @@ steps: --attention-backend triton \ --grammar-backend outlines \ --schedule-policy fcfs \ - --disable-custom-all-reduce \ --enable-mscclpp \ > /tmp/sglang_server.log 2>&1 & SERVER_PID=$! @@ -140,17 +139,19 @@ steps: mkdir -p "$RESULTS_DIR" # Run the benchmark - python3 ./test/bench_sglang.py \ - --tokenizer Qwen/Qwen3-8B \ + python3 -m sglang.bench_serving \ + --backend sglang \ --host 127.0.0.1 \ --port 30000 \ - --request-rate 20 \ - --num-prompts 1729 \ + --dataset-name random \ + --random-input-len 512 \ + --random-output-len 8129 \ --max-concurrency 512 \ - --dataset ./test/single_turn_completions_with_si_fixed.jsonl \ - --output-file "$RESULTS_DIR/run.json" \ - --flush-cache \ - --num-warmup 50 + --disable-stream \ + --num-prompts 10 \ + --seed 42 \ + --request-rate inf \ + 2>&1 | tee $LOG_DIR/bench_serving.log" echo "Benchmark completed. Results:" cat "$RESULTS_DIR/run.json" @@ -158,6 +159,7 @@ steps: # Shut down the server kill $SERVER_PID 2>/dev/null || true wait $SERVER_PID 2>/dev/null || true + - template: run-remote-task.yml parameters: name: RunSGLangTestAllReduce