mirror of
https://github.com/kvcache-ai/sglang.git
synced 2026-06-30 19:57:52 +00:00
Co-authored-by: AdityaVKochar <adityavardhankochar@gmail.com> Co-authored-by: mintlify[bot] <109931778+mintlify[bot]@users.noreply.github.com> Co-authored-by: adhyan-jain <adhyanjain2006@gmail.com> Co-authored-by: Adhyan Jain <71976554+adhyan-jain@users.noreply.github.com> Co-authored-by: Maitri-shah29 <maitrirajivshah@gmail.com> Co-authored-by: Adarsh Shirawalmath <114558126+adarshxs@users.noreply.github.com> Co-authored-by: Maitri Shah <shah29maitri@gmail.com> Co-authored-by: Aditya Vardhan Kochar <80113212+AdityaVKochar@users.noreply.github.com> Co-authored-by: Rishit Shivam <164783543+pokymono@users.noreply.github.com> Co-authored-by: Rishitshivam <164783543+Rishitshivam@users.noreply.github.com> Co-authored-by: IshhanKheria <ishhankheria06@gmail.com> Co-authored-by: Ishita Joshi <ishitata.joshi@gmail.com> Co-authored-by: Richard Chen <104477092+Richardczl98@users.noreply.github.com> Co-authored-by: longGGGGGG <553746008@qq.com> Co-authored-by: Richard <richardchen@radixark.ai> Co-authored-by: Nakul Sinha <nakul.new4socials@gmail.com> Co-authored-by: Divyam Agrawal <ludicrouslytrue@gmail.com> Co-authored-by: Richardczl98 <Zhenlinc@stanford.edu> Co-authored-by: Krishang Zinzuwadia <krishangzinzuwadia@gmail.com> Co-authored-by: nimeshas <nimesha.s106@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Jignas Paturu <86356085+JignasP@users.noreply.github.com> Co-authored-by: zijiexia <37504505+zijiexia@users.noreply.github.com>
190 lines
4.9 KiB
Plaintext
190 lines
4.9 KiB
Plaintext
---
|
||
title: "OpenAI APIs - Vision"
|
||
metatags:
|
||
description: "This tutorial covers the vision APIs for vision language models."
|
||
---
|
||
SGLang provides OpenAI-compatible APIs to enable a smooth transition from OpenAI services to self-hosted local models.
|
||
A complete reference for the API is available in the [OpenAI API Reference](https://platform.openai.com/docs/guides/vision).
|
||
This tutorial covers the vision APIs for vision language models.
|
||
|
||
SGLang supports various vision language models such as Llama 3.2, LLaVA-OneVision, Qwen2.5-VL, Gemma3 and [more](../supported-models).
|
||
|
||
As an alternative to the OpenAI API, you can also use the [SGLang offline engine](https://github.com/sgl-project/sglang/blob/main/examples/runtime/engine/offline_batch_inference_vlm.py).
|
||
|
||
|
||
## Launch A Server
|
||
|
||
Launch the server in your terminal and wait for it to initialize.
|
||
|
||
|
||
|
||
```python Example
|
||
from sglang.test.doc_patch import launch_server_cmd
|
||
from sglang.utils import wait_for_server, print_highlight, terminate_process
|
||
|
||
vision_process, port = launch_server_cmd(
|
||
"""
|
||
python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-7B-Instruct --log-level warning
|
||
"""
|
||
)
|
||
|
||
wait_for_server(f"http://localhost:{port}")
|
||
```
|
||
|
||
## Using cURL
|
||
|
||
Once the server is up, you can send test requests using curl or requests.
|
||
|
||
|
||
|
||
```python Example
|
||
import subprocess
|
||
|
||
curl_command = f"""
|
||
curl -s http://localhost:{port}/v1/chat/completions \\
|
||
-H "Content-Type: application/json" \\
|
||
-d '{{
|
||
"model": "Qwen/Qwen2.5-VL-7B-Instruct",
|
||
"messages": [
|
||
{{
|
||
"role": "user",
|
||
"content": [
|
||
{{
|
||
"type": "text",
|
||
"text": "What’s in this image?"
|
||
}},
|
||
{{
|
||
"type": "image_url",
|
||
"image_url": {{
|
||
"url": "https://github.com/sgl-project/sglang/blob/main/examples/assets/example_image.png?raw=true"
|
||
}}
|
||
}}
|
||
]
|
||
}}
|
||
],
|
||
"max_tokens": 300
|
||
}}'
|
||
"""
|
||
|
||
response = subprocess.check_output(curl_command, shell=True).decode()
|
||
print_highlight(response)
|
||
|
||
|
||
response = subprocess.check_output(curl_command, shell=True).decode()
|
||
print_highlight(response)
|
||
```
|
||
|
||
## Using Python Requests
|
||
|
||
|
||
|
||
```python Example
|
||
import requests
|
||
|
||
url = f"http://localhost:{port}/v1/chat/completions"
|
||
|
||
data = {
|
||
"model": "Qwen/Qwen2.5-VL-7B-Instruct",
|
||
"messages": [
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": "What’s in this image?"},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": "https://github.com/sgl-project/sglang/blob/main/examples/assets/example_image.png?raw=true"
|
||
},
|
||
},
|
||
],
|
||
}
|
||
],
|
||
"max_tokens": 300,
|
||
}
|
||
|
||
response = requests.post(url, json=data)
|
||
print_highlight(response.text)
|
||
```
|
||
|
||
## Using OpenAI Python Client
|
||
|
||
|
||
|
||
```python Example
|
||
from openai import OpenAI
|
||
|
||
client = OpenAI(base_url=f"http://localhost:{port}/v1", api_key="None")
|
||
|
||
response = client.chat.completions.create(
|
||
model="Qwen/Qwen2.5-VL-7B-Instruct",
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "text",
|
||
"text": "What is in this image?",
|
||
},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": "https://github.com/sgl-project/sglang/blob/main/examples/assets/example_image.png?raw=true"
|
||
},
|
||
},
|
||
],
|
||
}
|
||
],
|
||
max_tokens=300,
|
||
)
|
||
|
||
print_highlight(response.choices[0].message.content)
|
||
```
|
||
|
||
## Multiple-Image Inputs
|
||
|
||
The server also supports multiple images and interleaved text and images if the model supports it.
|
||
|
||
|
||
|
||
```python Example
|
||
from openai import OpenAI
|
||
|
||
client = OpenAI(base_url=f"http://localhost:{port}/v1", api_key="None")
|
||
|
||
response = client.chat.completions.create(
|
||
model="Qwen/Qwen2.5-VL-7B-Instruct",
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": "https://github.com/sgl-project/sglang/blob/main/examples/assets/example_image.png?raw=true",
|
||
},
|
||
},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png",
|
||
},
|
||
},
|
||
{
|
||
"type": "text",
|
||
"text": "I have two very different images. They are not related at all. "
|
||
"Please describe the first image in one sentence, and then describe the second image in another sentence.",
|
||
},
|
||
],
|
||
}
|
||
],
|
||
temperature=0,
|
||
)
|
||
|
||
print_highlight(response.choices[0].message.content)
|
||
```
|
||
|
||
|
||
```python Example
|
||
terminate_process(vision_process)
|
||
```
|