mirror of
https://github.com/kvcache-ai/sglang.git
synced 2026-04-20 14:29:32 +00:00
[Misc] Normalize --host parameter to use plain hostname without scheme (#19309)
Co-authored-by: 墨楼 <huangzhilin.hzl@antgroup.com> Co-authored-by: Liangsheng Yin <lsyincs@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
This commit is contained in:
@@ -271,7 +271,7 @@ Then we can benchmark the accuracy and latency by accessing the first node's exp
|
||||
|
||||
```bash
|
||||
# bench accuracy
|
||||
python3 benchmark/gsm8k/bench_sglang.py --num-questions 1319 --host http://10.0.0.1 --port 30000
|
||||
python3 benchmark/gsm8k/bench_sglang.py --num-questions 1319 --host 10.0.0.1 --port 30000
|
||||
|
||||
# bench latency
|
||||
python3 -m sglang.bench_one_batch_server --model None --base-url http://10.0.0.1:30000 --batch-size 1 --input-len 128 --output-len 128
|
||||
|
||||
@@ -26,7 +26,7 @@ python -m sglang.test.run_eval \
|
||||
|
||||
```bash
|
||||
python -m sglang.test.few_shot_gsm8k \
|
||||
--host http://127.0.0.1 \
|
||||
--host 127.0.0.1 \
|
||||
--port 30000 \
|
||||
--num-questions 200 \
|
||||
--num-shots 5
|
||||
@@ -36,7 +36,7 @@ python -m sglang.test.few_shot_gsm8k \
|
||||
|
||||
```bash
|
||||
python benchmark/hellaswag/bench_sglang.py \
|
||||
--host http://127.0.0.1 \
|
||||
--host 127.0.0.1 \
|
||||
--port 30000 \
|
||||
--num-questions 200 \
|
||||
--num-shots 20
|
||||
|
||||
@@ -21,6 +21,8 @@ import aiohttp
|
||||
import requests
|
||||
from llava.conversation import conv_llava_llama_3
|
||||
|
||||
from sglang.utils import normalize_base_url
|
||||
|
||||
|
||||
async def send_request(url, data, delay=0):
|
||||
await asyncio.sleep(delay)
|
||||
@@ -31,7 +33,7 @@ async def send_request(url, data, delay=0):
|
||||
|
||||
|
||||
async def test_concurrent(args):
|
||||
url = f"{args.host}:{args.port}"
|
||||
url = normalize_base_url(args.host, args.port)
|
||||
|
||||
prompt = "<image>\nPlease generate caption towards this image."
|
||||
conv_template = copy.deepcopy(conv_llava_llama_3)
|
||||
@@ -64,7 +66,7 @@ async def test_concurrent(args):
|
||||
|
||||
|
||||
def test_streaming(args):
|
||||
url = f"{args.host}:{args.port}"
|
||||
url = normalize_base_url(args.host, args.port)
|
||||
prompt = "<image>\nPlease generate caption towards this image."
|
||||
conv_template = copy.deepcopy(conv_llava_llama_3)
|
||||
conv_template.append_message(role=conv_template.roles[0], message=prompt)
|
||||
@@ -104,7 +106,7 @@ def test_streaming(args):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", type=str, default="http://127.0.0.1")
|
||||
parser.add_argument("--host", type=str, default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, default=30000)
|
||||
args = parser.parse_args()
|
||||
asyncio.run(test_concurrent(args))
|
||||
|
||||
@@ -19,6 +19,8 @@ import json
|
||||
import aiohttp
|
||||
import requests
|
||||
|
||||
from sglang.utils import normalize_base_url
|
||||
|
||||
IMAGE_TOKEN_SEP = "\n[IMG]"
|
||||
ROUTE = "/generate"
|
||||
|
||||
@@ -32,7 +34,7 @@ async def send_request(url, data, delay=0):
|
||||
|
||||
|
||||
async def test_concurrent(args):
|
||||
url = f"{args.host}:{args.port}{ROUTE}"
|
||||
url = f"{normalize_base_url(args.host, args.port)}{ROUTE}"
|
||||
|
||||
# Single image test
|
||||
if args.single_image:
|
||||
@@ -69,7 +71,7 @@ async def test_concurrent(args):
|
||||
|
||||
|
||||
def test_streaming(args):
|
||||
url = f"{args.host}:{args.port}/generate"
|
||||
url = f"{normalize_base_url(args.host, args.port)}/generate"
|
||||
|
||||
# Single image test
|
||||
if args.single_image:
|
||||
@@ -112,7 +114,7 @@ def test_streaming(args):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", type=str, default="http://127.0.0.1")
|
||||
parser.add_argument("--host", type=str, default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, default=30000)
|
||||
parser.add_argument(
|
||||
"--single-image",
|
||||
|
||||
@@ -21,6 +21,8 @@ import aiohttp
|
||||
import requests
|
||||
from llava.conversation import conv_qwen
|
||||
|
||||
from sglang.utils import normalize_base_url
|
||||
|
||||
|
||||
async def send_request(url, data, delay=0):
|
||||
await asyncio.sleep(delay)
|
||||
@@ -31,7 +33,7 @@ async def send_request(url, data, delay=0):
|
||||
|
||||
|
||||
async def test_concurrent(args):
|
||||
url = f"{args.host}:{args.port}"
|
||||
url = normalize_base_url(args.host, args.port)
|
||||
|
||||
prompt = "<image>\nPlease generate caption towards this image."
|
||||
conv_template = copy.deepcopy(conv_qwen)
|
||||
@@ -64,7 +66,7 @@ async def test_concurrent(args):
|
||||
|
||||
|
||||
def test_streaming(args):
|
||||
url = f"{args.host}:{args.port}"
|
||||
url = normalize_base_url(args.host, args.port)
|
||||
prompt = "<image>\nPlease generate caption towards this image."
|
||||
conv_template = copy.deepcopy(conv_qwen)
|
||||
conv_template.append_message(role=conv_template.roles[0], message=prompt)
|
||||
@@ -104,7 +106,7 @@ def test_streaming(args):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", type=str, default="http://127.0.0.1")
|
||||
parser.add_argument("--host", type=str, default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, default=30000)
|
||||
args = parser.parse_args()
|
||||
asyncio.run(test_concurrent(args))
|
||||
|
||||
@@ -14,7 +14,12 @@ import numpy as np
|
||||
|
||||
from sglang.lang.api import set_default_backend
|
||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
|
||||
from sglang.utils import (
|
||||
download_and_cache_file,
|
||||
dump_state_text,
|
||||
normalize_base_url,
|
||||
read_jsonl,
|
||||
)
|
||||
|
||||
INVALID = -9999999
|
||||
|
||||
@@ -46,7 +51,7 @@ def get_answer_value(answer_str):
|
||||
|
||||
def run_eval(args):
|
||||
# Select backend
|
||||
set_default_backend(RuntimeEndpoint(f"{args.host}:{args.port}"))
|
||||
set_default_backend(RuntimeEndpoint(normalize_base_url(args.host, args.port)))
|
||||
|
||||
if args.data_path is None:
|
||||
# Read data
|
||||
@@ -142,7 +147,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--num-questions", type=int, default=200)
|
||||
parser.add_argument("--max-new-tokens", type=int, default=512)
|
||||
parser.add_argument("--parallel", type=int, default=128)
|
||||
parser.add_argument("--host", type=str, default="http://127.0.0.1")
|
||||
parser.add_argument("--host", type=str, default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, default=30000)
|
||||
parser.add_argument("--temperature", type=float, default=0.0)
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -44,7 +44,7 @@ from sglang.srt.utils import (
|
||||
retry,
|
||||
)
|
||||
from sglang.test.run_eval import run_eval
|
||||
from sglang.utils import get_exception_traceback
|
||||
from sglang.utils import get_exception_traceback, normalize_base_url
|
||||
|
||||
# General test models
|
||||
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
@@ -377,7 +377,7 @@ def call_select_guidance(context, choices, model=None):
|
||||
|
||||
def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
|
||||
parser.add_argument("--parallel", type=int, default=64)
|
||||
parser.add_argument("--host", type=str, default="http://127.0.0.1")
|
||||
parser.add_argument("--host", type=str, default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, default=None)
|
||||
parser.add_argument(
|
||||
"--backend",
|
||||
@@ -426,7 +426,7 @@ def auto_config_device() -> str:
|
||||
|
||||
def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
|
||||
parser.add_argument("--parallel", type=int, default=64)
|
||||
parser.add_argument("--host", type=str, default="http://127.0.0.1")
|
||||
parser.add_argument("--host", type=str, default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, default=30000)
|
||||
parser.add_argument("--backend", type=str, default="srt")
|
||||
parser.add_argument(
|
||||
@@ -450,7 +450,7 @@ def select_sglang_backend(args: argparse.Namespace):
|
||||
if args.backend.startswith("srt"):
|
||||
if args.backend == "srt-no-parallel":
|
||||
global_config.enable_parallel_encoding = False
|
||||
backend = RuntimeEndpoint(f"{args.host}:{args.port}")
|
||||
backend = RuntimeEndpoint(normalize_base_url(args.host, args.port))
|
||||
elif args.backend.startswith("gpt-"):
|
||||
backend = OpenAI(args.backend)
|
||||
else:
|
||||
@@ -459,14 +459,15 @@ def select_sglang_backend(args: argparse.Namespace):
|
||||
|
||||
|
||||
def _get_call_generate(args: argparse.Namespace):
|
||||
base_url = normalize_base_url(args.host, args.port)
|
||||
if args.backend == "lightllm":
|
||||
return partial(call_generate_lightllm, url=f"{args.host}:{args.port}/generate")
|
||||
return partial(call_generate_lightllm, url=f"{base_url}/generate")
|
||||
elif args.backend == "vllm":
|
||||
return partial(call_generate_vllm, url=f"{args.host}:{args.port}/generate")
|
||||
return partial(call_generate_vllm, url=f"{base_url}/generate")
|
||||
elif args.backend == "srt-raw":
|
||||
return partial(call_generate_srt_raw, url=f"{args.host}:{args.port}/generate")
|
||||
return partial(call_generate_srt_raw, url=f"{base_url}/generate")
|
||||
elif args.backend == "outlines":
|
||||
return partial(call_generate_outlines, url=f"{args.host}:{args.port}/generate")
|
||||
return partial(call_generate_outlines, url=f"{base_url}/generate")
|
||||
elif args.backend == "guidance":
|
||||
from guidance import models
|
||||
|
||||
@@ -479,10 +480,11 @@ def _get_call_generate(args: argparse.Namespace):
|
||||
|
||||
|
||||
def _get_call_select(args: argparse.Namespace):
|
||||
base_url = normalize_base_url(args.host, args.port)
|
||||
if args.backend == "lightllm":
|
||||
return partial(call_select_lightllm, url=f"{args.host}:{args.port}/generate")
|
||||
return partial(call_select_lightllm, url=f"{base_url}/generate")
|
||||
elif args.backend == "vllm":
|
||||
return partial(call_select_vllm, url=f"{args.host}:{args.port}/generate")
|
||||
return partial(call_select_vllm, url=f"{base_url}/generate")
|
||||
elif args.backend == "guidance":
|
||||
from guidance import models
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ import sys
|
||||
import time
|
||||
import traceback
|
||||
import urllib.request
|
||||
import warnings
|
||||
import weakref
|
||||
from collections import OrderedDict
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
@@ -122,6 +123,19 @@ def dump_state_text(filename: str, states: list, mode: str = "w"):
|
||||
)
|
||||
|
||||
|
||||
def normalize_base_url(host: str, port: int) -> str:
|
||||
if host.startswith("http://") or host.startswith("https://"):
|
||||
warnings.warn(
|
||||
f"Including the scheme in --host ('{host}') is deprecated. "
|
||||
f"Pass just the hostname (e.g. '127.0.0.1') instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
host = f"http://{host}"
|
||||
return f"{host}:{port}"
|
||||
|
||||
|
||||
class HttpResponse:
|
||||
def __init__(self, resp):
|
||||
self.resp = resp
|
||||
|
||||
Reference in New Issue
Block a user