[Misc] Normalize --host parameter to use plain hostname without scheme (#19309)

Co-authored-by: 墨楼 <huangzhilin.hzl@antgroup.com>
Co-authored-by: Liangsheng Yin <lsyincs@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
This commit is contained in:
Julian Huang
2026-02-25 16:37:24 +08:00
committed by GitHub
parent f75abb4521
commit a55f658835
8 changed files with 52 additions and 25 deletions

View File

@@ -271,7 +271,7 @@ Then we can benchmark the accuracy and latency by accessing the first node's exp
```bash
# bench accuracy
python3 benchmark/gsm8k/bench_sglang.py --num-questions 1319 --host http://10.0.0.1 --port 30000
python3 benchmark/gsm8k/bench_sglang.py --num-questions 1319 --host 10.0.0.1 --port 30000
# bench latency
python3 -m sglang.bench_one_batch_server --model None --base-url http://10.0.0.1:30000 --batch-size 1 --input-len 128 --output-len 128

View File

@@ -26,7 +26,7 @@ python -m sglang.test.run_eval \
```bash
python -m sglang.test.few_shot_gsm8k \
--host http://127.0.0.1 \
--host 127.0.0.1 \
--port 30000 \
--num-questions 200 \
--num-shots 5
@@ -36,7 +36,7 @@ python -m sglang.test.few_shot_gsm8k \
```bash
python benchmark/hellaswag/bench_sglang.py \
--host http://127.0.0.1 \
--host 127.0.0.1 \
--port 30000 \
--num-questions 200 \
--num-shots 20

View File

@@ -21,6 +21,8 @@ import aiohttp
import requests
from llava.conversation import conv_llava_llama_3
from sglang.utils import normalize_base_url
async def send_request(url, data, delay=0):
await asyncio.sleep(delay)
@@ -31,7 +33,7 @@ async def send_request(url, data, delay=0):
async def test_concurrent(args):
url = f"{args.host}:{args.port}"
url = normalize_base_url(args.host, args.port)
prompt = "<image>\nPlease generate caption towards this image."
conv_template = copy.deepcopy(conv_llava_llama_3)
@@ -64,7 +66,7 @@ async def test_concurrent(args):
def test_streaming(args):
url = f"{args.host}:{args.port}"
url = normalize_base_url(args.host, args.port)
prompt = "<image>\nPlease generate caption towards this image."
conv_template = copy.deepcopy(conv_llava_llama_3)
conv_template.append_message(role=conv_template.roles[0], message=prompt)
@@ -104,7 +106,7 @@ def test_streaming(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="http://127.0.0.1")
parser.add_argument("--host", type=str, default="127.0.0.1")
parser.add_argument("--port", type=int, default=30000)
args = parser.parse_args()
asyncio.run(test_concurrent(args))

View File

@@ -19,6 +19,8 @@ import json
import aiohttp
import requests
from sglang.utils import normalize_base_url
IMAGE_TOKEN_SEP = "\n[IMG]"
ROUTE = "/generate"
@@ -32,7 +34,7 @@ async def send_request(url, data, delay=0):
async def test_concurrent(args):
url = f"{args.host}:{args.port}{ROUTE}"
url = f"{normalize_base_url(args.host, args.port)}{ROUTE}"
# Single image test
if args.single_image:
@@ -69,7 +71,7 @@ async def test_concurrent(args):
def test_streaming(args):
url = f"{args.host}:{args.port}/generate"
url = f"{normalize_base_url(args.host, args.port)}/generate"
# Single image test
if args.single_image:
@@ -112,7 +114,7 @@ def test_streaming(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="http://127.0.0.1")
parser.add_argument("--host", type=str, default="127.0.0.1")
parser.add_argument("--port", type=int, default=30000)
parser.add_argument(
"--single-image",

View File

@@ -21,6 +21,8 @@ import aiohttp
import requests
from llava.conversation import conv_qwen
from sglang.utils import normalize_base_url
async def send_request(url, data, delay=0):
await asyncio.sleep(delay)
@@ -31,7 +33,7 @@ async def send_request(url, data, delay=0):
async def test_concurrent(args):
url = f"{args.host}:{args.port}"
url = normalize_base_url(args.host, args.port)
prompt = "<image>\nPlease generate caption towards this image."
conv_template = copy.deepcopy(conv_qwen)
@@ -64,7 +66,7 @@ async def test_concurrent(args):
def test_streaming(args):
url = f"{args.host}:{args.port}"
url = normalize_base_url(args.host, args.port)
prompt = "<image>\nPlease generate caption towards this image."
conv_template = copy.deepcopy(conv_qwen)
conv_template.append_message(role=conv_template.roles[0], message=prompt)
@@ -104,7 +106,7 @@ def test_streaming(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="http://127.0.0.1")
parser.add_argument("--host", type=str, default="127.0.0.1")
parser.add_argument("--port", type=int, default=30000)
args = parser.parse_args()
asyncio.run(test_concurrent(args))

View File

@@ -14,7 +14,12 @@ import numpy as np
from sglang.lang.api import set_default_backend
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
from sglang.utils import (
download_and_cache_file,
dump_state_text,
normalize_base_url,
read_jsonl,
)
INVALID = -9999999
@@ -46,7 +51,7 @@ def get_answer_value(answer_str):
def run_eval(args):
# Select backend
set_default_backend(RuntimeEndpoint(f"{args.host}:{args.port}"))
set_default_backend(RuntimeEndpoint(normalize_base_url(args.host, args.port)))
if args.data_path is None:
# Read data
@@ -142,7 +147,7 @@ if __name__ == "__main__":
parser.add_argument("--num-questions", type=int, default=200)
parser.add_argument("--max-new-tokens", type=int, default=512)
parser.add_argument("--parallel", type=int, default=128)
parser.add_argument("--host", type=str, default="http://127.0.0.1")
parser.add_argument("--host", type=str, default="127.0.0.1")
parser.add_argument("--port", type=int, default=30000)
parser.add_argument("--temperature", type=float, default=0.0)
args = parser.parse_args()

View File

@@ -44,7 +44,7 @@ from sglang.srt.utils import (
retry,
)
from sglang.test.run_eval import run_eval
from sglang.utils import get_exception_traceback
from sglang.utils import get_exception_traceback, normalize_base_url
# General test models
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.1-8B-Instruct"
@@ -377,7 +377,7 @@ def call_select_guidance(context, choices, model=None):
def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
parser.add_argument("--parallel", type=int, default=64)
parser.add_argument("--host", type=str, default="http://127.0.0.1")
parser.add_argument("--host", type=str, default="127.0.0.1")
parser.add_argument("--port", type=int, default=None)
parser.add_argument(
"--backend",
@@ -426,7 +426,7 @@ def auto_config_device() -> str:
def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
parser.add_argument("--parallel", type=int, default=64)
parser.add_argument("--host", type=str, default="http://127.0.0.1")
parser.add_argument("--host", type=str, default="127.0.0.1")
parser.add_argument("--port", type=int, default=30000)
parser.add_argument("--backend", type=str, default="srt")
parser.add_argument(
@@ -450,7 +450,7 @@ def select_sglang_backend(args: argparse.Namespace):
if args.backend.startswith("srt"):
if args.backend == "srt-no-parallel":
global_config.enable_parallel_encoding = False
backend = RuntimeEndpoint(f"{args.host}:{args.port}")
backend = RuntimeEndpoint(normalize_base_url(args.host, args.port))
elif args.backend.startswith("gpt-"):
backend = OpenAI(args.backend)
else:
@@ -459,14 +459,15 @@ def select_sglang_backend(args: argparse.Namespace):
def _get_call_generate(args: argparse.Namespace):
base_url = normalize_base_url(args.host, args.port)
if args.backend == "lightllm":
return partial(call_generate_lightllm, url=f"{args.host}:{args.port}/generate")
return partial(call_generate_lightllm, url=f"{base_url}/generate")
elif args.backend == "vllm":
return partial(call_generate_vllm, url=f"{args.host}:{args.port}/generate")
return partial(call_generate_vllm, url=f"{base_url}/generate")
elif args.backend == "srt-raw":
return partial(call_generate_srt_raw, url=f"{args.host}:{args.port}/generate")
return partial(call_generate_srt_raw, url=f"{base_url}/generate")
elif args.backend == "outlines":
return partial(call_generate_outlines, url=f"{args.host}:{args.port}/generate")
return partial(call_generate_outlines, url=f"{base_url}/generate")
elif args.backend == "guidance":
from guidance import models
@@ -479,10 +480,11 @@ def _get_call_generate(args: argparse.Namespace):
def _get_call_select(args: argparse.Namespace):
base_url = normalize_base_url(args.host, args.port)
if args.backend == "lightllm":
return partial(call_select_lightllm, url=f"{args.host}:{args.port}/generate")
return partial(call_select_lightllm, url=f"{base_url}/generate")
elif args.backend == "vllm":
return partial(call_select_vllm, url=f"{args.host}:{args.port}/generate")
return partial(call_select_vllm, url=f"{base_url}/generate")
elif args.backend == "guidance":
from guidance import models

View File

@@ -12,6 +12,7 @@ import sys
import time
import traceback
import urllib.request
import warnings
import weakref
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor
@@ -122,6 +123,19 @@ def dump_state_text(filename: str, states: list, mode: str = "w"):
)
def normalize_base_url(host: str, port: int) -> str:
if host.startswith("http://") or host.startswith("https://"):
warnings.warn(
f"Including the scheme in --host ('{host}') is deprecated. "
f"Pass just the hostname (e.g. '127.0.0.1') instead.",
DeprecationWarning,
stacklevel=2,
)
else:
host = f"http://{host}"
return f"{host}:{port}"
class HttpResponse:
def __init__(self, resp):
self.resp = resp