Files
sglang/docs_new/docs.json
2026-04-20 21:26:18 -07:00

1145 lines
38 KiB
JSON

{
"$schema": "https://mintlify.com/docs.json",
"theme": "aspen",
"name": "SGLang Documentation",
"seo": {
"metatags": {
"google-site-verification": "bX3ofyYQhraIpAYf4DpyZQXZO_G4xLR_RqeBAKnJA7g"
}
},
"redirects": [
{
"source": "/docs/references/learn_more",
"destination": "/"
},
{
"source": "/index.html",
"destination": "/"
},
{
"source": "/advanced_features/adaptive_speculative_decoding.html",
"destination": "/docs/advanced_features/speculative_decoding"
},
{
"source": "/advanced_features/attention_backend.html",
"destination": "/docs/advanced_features/attention_backend"
},
{
"source": "/advanced_features/breakable_cuda_graph.html",
"destination": "/docs/advanced_features/breakable_cuda_graph"
},
{
"source": "/advanced_features/checkpoint_engine.html",
"destination": "/docs/advanced_features/checkpoint_engine"
},
{
"source": "/advanced_features/cuda_graph_for_multi_modal_encoder.html",
"destination": "/docs/advanced_features/cuda_graph_for_multi_modal_encoder"
},
{
"source": "/advanced_features/deterministic_inference.html",
"destination": "/docs/advanced_features/deterministic_inference"
},
{
"source": "/advanced_features/dp_dpa_smg_guide.html",
"destination": "/docs/advanced_features/dp_dpa_smg_guide"
},
{
"source": "/advanced_features/dp_for_multi_modal_encoder.html",
"destination": "/docs/advanced_features/dp_for_multi_modal_encoder"
},
{
"source": "/advanced_features/epd_disaggregation.html",
"destination": "/docs/advanced_features/epd_disaggregation"
},
{
"source": "/advanced_features/expert_parallelism.html",
"destination": "/docs/advanced_features/expert_parallelism"
},
{
"source": "/advanced_features/forward_hooks.html",
"destination": "/docs/advanced_features/forward_hooks"
},
{
"source": "/advanced_features/hicache.html",
"destination": "/docs/advanced_features/hicache"
},
{
"source": "/advanced_features/hicache_best_practices.html",
"destination": "/docs/advanced_features/hicache_best_practices"
},
{
"source": "/advanced_features/hicache_design.html",
"destination": "/docs/advanced_features/hicache_design"
},
{
"source": "/advanced_features/hicache_storage_runtime_attach_detach.html",
"destination": "/docs/advanced_features/hicache_storage_runtime_attach_detach"
},
{
"source": "/advanced_features/hisparse_guide.html",
"destination": "/docs/advanced_features/overview"
},
{
"source": "/advanced_features/hyperparameter_tuning.html",
"destination": "/docs/advanced_features/hyperparameter_tuning"
},
{
"source": "/advanced_features/lora.html",
"destination": "/docs/advanced_features/lora"
},
{
"source": "/advanced_features/object_storage.html",
"destination": "/docs/advanced_features/object_storage"
},
{
"source": "/advanced_features/observability.html",
"destination": "/docs/advanced_features/observability"
},
{
"source": "/advanced_features/pd_disaggregation.html",
"destination": "/docs/advanced_features/pd_disaggregation"
},
{
"source": "/advanced_features/piecewise_cuda_graph.html",
"destination": "/docs/advanced_features/piecewise_cuda_graph"
},
{
"source": "/advanced_features/pipeline_parallelism.html",
"destination": "/docs/advanced_features/pipeline_parallelism"
},
{
"source": "/advanced_features/quantization.html",
"destination": "/docs/advanced_features/quantization"
},
{
"source": "/advanced_features/quantized_kv_cache.html",
"destination": "/docs/advanced_features/quantized_kv_cache"
},
{
"source": "/advanced_features/rfork.html",
"destination": "/docs/advanced_features/rfork"
},
{
"source": "/advanced_features/separate_reasoning.html",
"destination": "/docs/advanced_features/separate_reasoning"
},
{
"source": "/advanced_features/server_arguments.html",
"destination": "/docs/advanced_features/server_arguments"
},
{
"source": "/advanced_features/sgl_model_gateway.html",
"destination": "/docs/advanced_features/sgl_model_gateway"
},
{
"source": "/advanced_features/sglang_for_rl.html",
"destination": "/docs/advanced_features/sglang_for_rl"
},
{
"source": "/advanced_features/speculative_decoding.html",
"destination": "/docs/advanced_features/speculative_decoding"
},
{
"source": "/advanced_features/structured_outputs.html",
"destination": "/docs/advanced_features/structured_outputs"
},
{
"source": "/advanced_features/structured_outputs_for_reasoning_models.html",
"destination": "/docs/advanced_features/structured_outputs_for_reasoning_models"
},
{
"source": "/advanced_features/tool_parser.html",
"destination": "/docs/advanced_features/tool_parser"
},
{
"source": "/advanced_features/vlm_query.html",
"destination": "/docs/advanced_features/vlm_query"
},
{
"source": "/basic_usage/deepseek_ocr.html",
"destination": "/docs/basic_usage/overview"
},
{
"source": "/basic_usage/deepseek_v3.html",
"destination": "/docs/basic_usage/deepseek_v3"
},
{
"source": "/basic_usage/deepseek_v32.html",
"destination": "/docs/basic_usage/deepseek_v32"
},
{
"source": "/basic_usage/glm45.html",
"destination": "/docs/basic_usage/glm45"
},
{
"source": "/basic_usage/glmv.html",
"destination": "/docs/basic_usage/glmv"
},
{
"source": "/basic_usage/gpt_oss.html",
"destination": "/docs/basic_usage/gpt_oss"
},
{
"source": "/basic_usage/llama4.html",
"destination": "/docs/basic_usage/llama4"
},
{
"source": "/basic_usage/minimax_m2.html",
"destination": "/docs/basic_usage/minimax_m2"
},
{
"source": "/basic_usage/native_api.html",
"destination": "/docs/basic_usage/native_api"
},
{
"source": "/basic_usage/offline_engine_api.html",
"destination": "/docs/basic_usage/offline_engine_api"
},
{
"source": "/basic_usage/ollama_api.html",
"destination": "/docs/basic_usage/ollama_api"
},
{
"source": "/basic_usage/openai_api.html",
"destination": "/docs/basic_usage/openai_api"
},
{
"source": "/basic_usage/openai_api_completions.html",
"destination": "/docs/basic_usage/openai_api_completions"
},
{
"source": "/basic_usage/openai_api_embeddings.html",
"destination": "/docs/basic_usage/openai_api_embeddings"
},
{
"source": "/basic_usage/openai_api_vision.html",
"destination": "/docs/basic_usage/openai_api_vision"
},
{
"source": "/basic_usage/popular_model_usage.html",
"destination": "/docs/basic_usage/popular_model_usage"
},
{
"source": "/basic_usage/qwen3.html",
"destination": "/docs/basic_usage/qwen3"
},
{
"source": "/basic_usage/qwen3_5.html",
"destination": "/docs/basic_usage/qwen3"
},
{
"source": "/basic_usage/qwen3_vl.html",
"destination": "/docs/basic_usage/qwen3_vl"
},
{
"source": "/basic_usage/sampling_params.html",
"destination": "/docs/basic_usage/sampling_params"
},
{
"source": "/basic_usage/send_request.html",
"destination": "/docs/basic_usage/send_request"
},
{
"source": "/developer_guide/bench_serving.html",
"destination": "/docs/developer_guide/bench_serving"
},
{
"source": "/developer_guide/benchmark_and_profiling.html",
"destination": "/docs/developer_guide/benchmark_and_profiling"
},
{
"source": "/developer_guide/contribution_guide.html",
"destination": "/docs/developer_guide/contribution_guide"
},
{
"source": "/developer_guide/development_guide_using_docker.html",
"destination": "/docs/developer_guide/development_guide_using_docker"
},
{
"source": "/developer_guide/development_jit_kernel_guide.html",
"destination": "/docs/developer_guide/JIT_kernels"
},
{
"source": "/developer_guide/evaluating_new_models.html",
"destination": "/docs/developer_guide/evaluating_new_models"
},
{
"source": "/developer_guide/release_process.html",
"destination": "/docs/developer_guide/release_process"
},
{
"source": "/developer_guide/setup_github_runner.html",
"destination": "/docs/developer_guide/setup_github_runner"
},
{
"source": "/diffusion/api/cli.html",
"destination": "/docs/sglang-diffusion/api/cli"
},
{
"source": "/diffusion/api/openai_api.html",
"destination": "/docs/sglang-diffusion/api/openai-api"
},
{
"source": "/diffusion/api/post_processing.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/ci_perf.html",
"destination": "/docs/sglang-diffusion/ci-performance"
},
{
"source": "/diffusion/compatibility_matrix.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/contributing.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/development.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/disaggregation.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/environment_variables.html",
"destination": "/docs/sglang-diffusion/environment-variables"
},
{
"source": "/diffusion/index.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/installation.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/performance/attention_backends.html",
"destination": "/docs/sglang-diffusion/attention-backends"
},
{
"source": "/diffusion/performance/cache/cache_dit.html",
"destination": "/docs/sglang-diffusion/cache-dit"
},
{
"source": "/diffusion/performance/cache/index.html",
"destination": "/docs/sglang-diffusion/caching-acceleration"
},
{
"source": "/diffusion/performance/cache/teacache.html",
"destination": "/docs/sglang-diffusion/tea-cache"
},
{
"source": "/diffusion/performance/index.html",
"destination": "/docs/sglang-diffusion/performance-optimization"
},
{
"source": "/diffusion/performance/profiling.html",
"destination": "/docs/sglang-diffusion/profiling"
},
{
"source": "/diffusion/performance/ring_sp_performance.html",
"destination": "/docs/sglang-diffusion/performance-optimization"
},
{
"source": "/diffusion/quantization.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/reference.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/support_new_models.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/diffusion/usage.html",
"destination": "/docs/sglang-diffusion/installation"
},
{
"source": "/get_started/install.html",
"destination": "/docs/get-started/installation"
},
{
"source": "/platforms/amd_gpu.html",
"destination": "/docs/hardware-platforms/amd-gpus"
},
{
"source": "/platforms/apple_metal.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/ascend/ascend_contribution_guide.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/ascend/ascend_npu.html",
"destination": "/docs/hardware-platforms/ascend-npus/SGLang-installation-with-NPUs-support"
},
{
"source": "/platforms/ascend/ascend_npu_best_practice.html",
"destination": "/docs/hardware-platforms/ascend-npus/Best-Practice-on-Ascend-NPU"
},
{
"source": "/platforms/ascend/ascend_npu_deepseek_example.html",
"destination": "/docs/hardware-platforms/ascend-npus/DeepSeek-Examples"
},
{
"source": "/platforms/ascend/ascend_npu_environment_variables.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/ascend/ascend_npu_glm5_examples.html",
"destination": "/docs/hardware-platforms/ascend-npus/GLM-5"
},
{
"source": "/platforms/ascend/ascend_npu_quantization.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/ascend/ascend_npu_qwen3_5_examples.html",
"destination": "/docs/hardware-platforms/ascend-npus/Qwen3.5"
},
{
"source": "/platforms/ascend/ascend_npu_qwen3_examples.html",
"destination": "/docs/hardware-platforms/ascend-npus/Qwen3-Examples"
},
{
"source": "/platforms/ascend/ascend_npu_support.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/ascend/ascend_npu_support_features.html",
"destination": "/docs/hardware-platforms/ascend-npus/Support-Features-on-Ascend-NPU"
},
{
"source": "/platforms/ascend/ascend_npu_support_models.html",
"destination": "/docs/hardware-platforms/ascend-npus/Support-Models-on-Ascend-NPU"
},
{
"source": "/platforms/ascend/mindspore_backend.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/ascend_npu_ring_sp_performance.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/cpu_server.html",
"destination": "/docs/hardware-platforms/cpu-server"
},
{
"source": "/platforms/mthreads_gpu.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/nvidia_jetson.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/plugin.html",
"destination": "/docs/hardware-platforms/overview"
},
{
"source": "/platforms/tpu.html",
"destination": "/docs/hardware-platforms/tpu"
},
{
"source": "/platforms/xpu.html",
"destination": "/docs/hardware-platforms/xpu"
},
{
"source": "/references/custom_chat_template.html",
"destination": "/docs/references/custom_chat_template"
},
{
"source": "/references/environment_variables.html",
"destination": "/docs/references/environment_variables"
},
{
"source": "/references/faq.html",
"destination": "/docs/references/faq"
},
{
"source": "/references/frontend/choices_methods.html",
"destination": "/docs/references/frontend/choices_methods"
},
{
"source": "/references/frontend/frontend_index.html",
"destination": "/docs/references/frontend/frontend_index"
},
{
"source": "/references/frontend/frontend_tutorial.html",
"destination": "/docs/references/frontend/frontend_tutorial"
},
{
"source": "/references/learn_more.html",
"destination": "/"
},
{
"source": "/references/multi_node_deployment/deploy_on_k8s.html",
"destination": "/docs/references/multi_node_deployment/deploy_on_k8s"
},
{
"source": "/references/multi_node_deployment/lws_pd/lws_pd_deploy.html",
"destination": "/docs/references/multi_node_deployment/lws_pd/lws_pd_deploy"
},
{
"source": "/references/multi_node_deployment/multi_node.html",
"destination": "/docs/references/multi_node_deployment/multi_node"
},
{
"source": "/references/multi_node_deployment/multi_node_index.html",
"destination": "/docs/references/multi_node_deployment/multi_node_index"
},
{
"source": "/references/multi_node_deployment/rbg_pd/deepseekv32_pd.html",
"destination": "/docs/references/multi_node_deployment/rbg_pd/deepseekv32_pd"
},
{
"source": "/references/post_training_integration.html",
"destination": "/docs/references/post_training_integration"
},
{
"source": "/references/production_metrics.html",
"destination": "/docs/references/production_metrics"
},
{
"source": "/references/production_request_trace.html",
"destination": "/docs/references/production_request_trace"
},
{
"source": "/references/release_lookup.html",
"destination": "/docs/references/overview"
},
{
"source": "/references/torch_compile_cache.html",
"destination": "/docs/references/torch_compile_cache"
},
{
"source": "/supported_models/extending/index.html",
"destination": "/docs/supported-models"
},
{
"source": "/supported_models/extending/mindspore_models.html",
"destination": "/docs/supported-models/mindspore-models"
},
{
"source": "/supported_models/extending/modelscope.html",
"destination": "/docs/supported-models/modelscope"
},
{
"source": "/supported_models/extending/support_new_models.html",
"destination": "/docs/supported-models/new-model-support"
},
{
"source": "/supported_models/extending/transformers_fallback.html",
"destination": "/docs/supported-models/transformers-fallback"
},
{
"source": "/supported_models/index.html",
"destination": "/docs/supported-models"
},
{
"source": "/supported_models/retrieval_ranking/classify_models.html",
"destination": "/docs/supported-models/classification-models"
},
{
"source": "/supported_models/retrieval_ranking/embedding_models.html",
"destination": "/docs/supported-models/embedding-models"
},
{
"source": "/supported_models/retrieval_ranking/index.html",
"destination": "/docs/supported-models"
},
{
"source": "/supported_models/retrieval_ranking/rerank_models.html",
"destination": "/docs/supported-models/rerank-models"
},
{
"source": "/supported_models/specialized/index.html",
"destination": "/docs/supported-models"
},
{
"source": "/supported_models/specialized/reward_models.html",
"destination": "/docs/supported-models/reward-models"
},
{
"source": "/supported_models/text_generation/diffusion_language_models.html",
"destination": "/docs/supported-models/diffusion-language-models"
},
{
"source": "/supported_models/text_generation/generative_models.html",
"destination": "/docs/supported-models/large-language-models"
},
{
"source": "/supported_models/text_generation/index.html",
"destination": "/docs/supported-models"
},
{
"source": "/supported_models/text_generation/multimodal_language_models.html",
"destination": "/docs/supported-models/vision-language-models"
},
{
"source": "/supported_models.html",
"destination": "/docs/supported-models"
},
{
"source": "/diffusion.html",
"destination": "/docs/sglang-diffusion/installation"
}
],
"colors": {
"primary": "#d55816",
"light": "#d55816",
"dark": "#d55816"
},
"background": {
"decoration": "grid",
"color": {
"dark": "#1d1d1d",
"light": "#fffcfb"
}
},
"fonts": {
"heading": {
"family": "Inter",
"weight": 600
},
"body": {
"family": "Inter",
"weight": 400
}
},
"favicon": "/favicon.png",
"navigation": {
"tabs": [
{
"tab": "Get Started",
"groups": [
{
"group": "Get Started",
"icon": "play",
"pages": [
"index",
"docs/get-started/installation",
"docs/get-started/quickstart",
"docs/basic_usage/send_request"
]
}
]
},
{
"tab": "User Guide",
"groups": [
{
"group": "Basic Usage",
"icon": "book-open",
"pages": [
"docs/basic_usage/overview",
{
"group": "OpenAI-Compatible APIs",
"pages": [
"docs/basic_usage/openai_api",
"docs/basic_usage/openai_api_completions",
"docs/basic_usage/openai_api_vision",
"docs/basic_usage/openai_api_embeddings"
]
},
"docs/basic_usage/ollama_api",
"docs/basic_usage/offline_engine_api",
"docs/basic_usage/native_api",
"docs/basic_usage/sampling_params",
{
"group": "Popular Model Usage",
"pages": [
"docs/basic_usage/popular_model_usage",
"docs/basic_usage/deepseek_v3",
"docs/basic_usage/deepseek_v32",
"docs/basic_usage/glm45",
"docs/basic_usage/glmv",
"docs/basic_usage/gpt_oss",
"docs/basic_usage/kimi_k2_5",
"docs/basic_usage/minimax_m2",
"docs/basic_usage/qwen3",
"docs/basic_usage/qwen3_vl",
"docs/basic_usage/llama4"
]
}
]
},
{
"group": "Advanced Features",
"icon": "gears",
"pages": [
"docs/advanced_features/overview",
"docs/advanced_features/server_arguments",
"docs/advanced_features/object_storage",
"docs/advanced_features/hyperparameter_tuning",
"docs/advanced_features/attention_backend",
"docs/advanced_features/speculative_decoding",
"docs/advanced_features/structured_outputs",
"docs/advanced_features/structured_outputs_for_reasoning_models",
"docs/advanced_features/tool_parser",
"docs/advanced_features/separate_reasoning",
"docs/advanced_features/quantization",
"docs/advanced_features/quantized_kv_cache",
"docs/advanced_features/dp_dpa_smg_guide",
"docs/advanced_features/expert_parallelism",
"docs/advanced_features/lora",
"docs/advanced_features/pd_disaggregation",
"docs/advanced_features/epd_disaggregation",
"docs/advanced_features/pipeline_parallelism",
{
"group": "Hierarchical KV Caching (HiCache)",
"pages": [
"docs/advanced_features/hicache",
"docs/advanced_features/hicache_best_practices",
"docs/advanced_features/hicache_design",
"docs/advanced_features/hicache_storage_runtime_attach_detach"
]
},
"docs/advanced_features/vlm_query",
"docs/advanced_features/dp_for_multi_modal_encoder",
"docs/advanced_features/cuda_graph_for_multi_modal_encoder",
"docs/advanced_features/breakable_cuda_graph",
"docs/advanced_features/piecewise_cuda_graph",
"docs/advanced_features/sgl_model_gateway",
"docs/advanced_features/deterministic_inference",
"docs/advanced_features/observability",
"docs/advanced_features/checkpoint_engine",
"docs/advanced_features/sglang_for_rl"
]
},
{
"group": "Supported Models",
"icon": "cubes",
"pages": [
"docs/supported-models",
{
"group": "Text Generation",
"pages": [
"docs/supported-models/large-language-models",
"docs/supported-models/vision-language-models",
"docs/supported-models/diffusion-language-models"
]
},
{
"group": "Retrieval and Ranking",
"pages": [
"docs/supported-models/embedding-models",
"docs/supported-models/rerank-models",
"docs/supported-models/classification-models"
]
},
{
"group": "Specialized Models",
"pages": [
"docs/supported-models/reward-models"
]
},
{
"group": "Extending SGLang",
"pages": [
"docs/supported-models/new-model-support",
"docs/supported-models/transformers-fallback",
"docs/supported-models/modelscope",
"docs/supported-models/mindspore-models"
]
}
]
},
{
"group": "Developer Guide",
"icon": "code",
"pages": [
"docs/developer_guide/overview",
"docs/developer_guide/contribution_guide",
{
"group": "Development",
"pages": [
"docs/developer_guide/development_guide_using_docker",
"docs/developer_guide/JIT_kernels"
]
},
{
"group": "Benchmarking",
"pages": [
"docs/developer_guide/benchmark_and_profiling",
"docs/developer_guide/bench_serving"
]
},
"docs/developer_guide/evaluating_new_models"
]
},
{
"group": "References",
"icon": "bookmark",
"pages": [
"docs/references/overview",
"docs/references/faq",
"docs/references/environment_variables",
"docs/references/production_metrics",
"docs/references/production_request_trace",
{
"group": "Multi-Node Deployment",
"pages": [
"docs/references/multi_node_deployment/multi_node_index",
"docs/references/multi_node_deployment/multi_node",
"docs/references/multi_node_deployment/deploy_on_k8s",
"docs/references/multi_node_deployment/lws_pd/lws_pd_deploy",
"docs/references/multi_node_deployment/rbg_pd/deepseekv32_pd"
]
},
"docs/references/custom_chat_template",
{
"group": "Frontend Language",
"pages": [
"docs/references/frontend/frontend_index",
"docs/references/frontend/frontend_tutorial",
"docs/references/frontend/choices_methods"
]
},
{
"group": "Cookbook",
"pages": [
"cookbook/base/reference/server_arguments"
]
},
"docs/references/post_training_integration"
]
}
]
},
{
"tab": "Hardware",
"groups": [
{
"group": "Hardware Platforms",
"icon": "microchip",
"pages": [
"docs/hardware-platforms/overview",
"docs/hardware-platforms/nvidia-gpus",
"docs/hardware-platforms/amd-gpus",
{
"group": "Ascend NPUs",
"pages": [
"docs/hardware-platforms/ascend-npus/Best-Practice-on-Ascend-NPU",
"docs/hardware-platforms/ascend-npus/DeepSeek-Examples",
"docs/hardware-platforms/ascend-npus/GLM-5",
"docs/hardware-platforms/ascend-npus/MindSpore-Models",
"docs/hardware-platforms/ascend-npus/Qwen3-Examples",
"docs/hardware-platforms/ascend-npus/Qwen3.5",
"docs/hardware-platforms/ascend-npus/SGLang-installation-with-NPUs-support",
"docs/hardware-platforms/ascend-npus/Support-Features-on-Ascend-NPU",
"docs/hardware-platforms/ascend-npus/Support-Models-on-Ascend-NPU"
]
},
"docs/hardware-platforms/cpu-server",
{
"group": "Edge & Embedded",
"pages": [
"docs/hardware-platforms/nvidia"
]
},
"docs/hardware-platforms/tpu",
"docs/hardware-platforms/xpu"
]
}
]
},
{
"tab": "Cookbook",
"groups": [
{
"group": "Cookbook",
"icon": "book",
"pages": [
"cookbook/intro",
{
"group": "Autoregressive Models",
"pages": [
"cookbook/autoregressive/intro",
{
"group": "Qwen",
"pages": [
"cookbook/autoregressive/Qwen/Qwen3.6",
"cookbook/autoregressive/Qwen/Qwen3.5",
"cookbook/autoregressive/Qwen/Qwen3",
"cookbook/autoregressive/Qwen/Qwen3-Next",
"cookbook/autoregressive/Qwen/Qwen3-Coder",
"cookbook/autoregressive/Qwen/Qwen3-Coder-Next",
"cookbook/autoregressive/Qwen/Qwen3-VL",
"cookbook/autoregressive/Qwen/Qwen2.5-VL"
]
},
{
"group": "DeepSeek",
"pages": [
"cookbook/autoregressive/DeepSeek/DeepSeek-V3_2",
"cookbook/autoregressive/DeepSeek/DeepSeek-V3_1",
"cookbook/autoregressive/DeepSeek/DeepSeek-V3",
"cookbook/autoregressive/DeepSeek/DeepSeek-R1",
"cookbook/autoregressive/DeepSeek/DeepSeek-Math-V2",
"cookbook/autoregressive/DeepSeek/DeepSeek-OCR",
"cookbook/autoregressive/DeepSeek/DeepSeek-OCR-2"
]
},
{
"group": "Llama",
"pages": [
"cookbook/autoregressive/Llama/Llama4",
"cookbook/autoregressive/Llama/Llama3.3-70B",
"cookbook/autoregressive/Llama/Llama3.1"
]
},
{
"group": "GLM",
"pages": [
"cookbook/autoregressive/GLM/GLM-4.5",
"cookbook/autoregressive/GLM/GLM-4.6",
"cookbook/autoregressive/GLM/GLM-4.7",
"cookbook/autoregressive/GLM/GLM-4.7-Flash",
"cookbook/autoregressive/GLM/GLM-5",
"cookbook/autoregressive/GLM/GLM-5.1",
"cookbook/autoregressive/GLM/GLM-Glyph",
"cookbook/autoregressive/GLM/GLM-OCR",
"cookbook/autoregressive/GLM/GLM-4.5V",
"cookbook/autoregressive/GLM/GLM-4.6V"
]
},
{
"group": "Google",
"pages": [
"cookbook/autoregressive/Google/Gemma4"
]
},
{
"group": "OpenAI",
"pages": [
"cookbook/autoregressive/OpenAI/GPT-OSS"
]
},
{
"group": "Moonshotai",
"pages": [
"cookbook/autoregressive/Moonshotai/Kimi-K2.6",
"cookbook/autoregressive/Moonshotai/Kimi-K2.5",
"cookbook/autoregressive/Moonshotai/Kimi-K2",
"cookbook/autoregressive/Moonshotai/Kimi-Linear"
]
},
{
"group": "MiniMax",
"pages": [
"cookbook/autoregressive/MiniMax/MiniMax-M2.7",
"cookbook/autoregressive/MiniMax/MiniMax-M2",
"cookbook/autoregressive/MiniMax/MiniMax-M2.5"
]
},
{
"group": "NVIDIA",
"pages": [
"cookbook/autoregressive/NVIDIA/Nemotron3-Nano",
"cookbook/autoregressive/NVIDIA/Nemotron3-Super"
]
},
{
"group": "Ernie",
"pages": [
"cookbook/autoregressive/Ernie/Ernie4.5",
"cookbook/autoregressive/Ernie/Ernie4.5-VL"
]
},
{
"group": "StepFun",
"pages": [
"cookbook/autoregressive/StepFun/Step3.5",
"cookbook/autoregressive/StepFun/Step3-VL-10B"
]
},
{
"group": "InclusionAI",
"pages": [
"cookbook/autoregressive/InclusionAI/Ling-2.5-1T",
"cookbook/autoregressive/InclusionAI/Ring-2.5-1T",
"cookbook/autoregressive/InclusionAI/LLaDA-2.1"
]
},
{
"group": "InternLM",
"pages": [
"cookbook/autoregressive/InternLM/Intern-S1"
]
},
{
"group": "InternVL",
"pages": [
"cookbook/autoregressive/InternVL/InternVL3.5"
]
},
{
"group": "Jina AI",
"pages": [
"cookbook/autoregressive/Jina/Jina-reranker-m0"
]
},
{
"group": "Mistral",
"pages": [
"cookbook/autoregressive/Mistral/Ministral-3",
"cookbook/autoregressive/Mistral/Mistral-Small-4",
"cookbook/autoregressive/Mistral/Devstral-2"
]
},
{
"group": "Xiaomi",
"pages": [
"cookbook/autoregressive/Xiaomi/MiMo-V2-Flash"
]
},
{
"group": "FlashLabs",
"pages": [
"cookbook/autoregressive/FlashLabs/Chroma1.0"
]
}
]
},
{
"group": "Diffusion Models",
"pages": [
"cookbook/diffusion/intro",
{
"group": "FLUX",
"pages": [
"cookbook/diffusion/FLUX/FLUX"
]
},
{
"group": "Wan",
"pages": [
"cookbook/diffusion/Wan/Wan2.1",
"cookbook/diffusion/Wan/Wan2.2"
]
},
{
"group": "Qwen-Image",
"pages": [
"cookbook/diffusion/Qwen-Image/Qwen-Image",
"cookbook/diffusion/Qwen-Image/Qwen-Image-Edit"
]
},
{
"group": "Z-Image",
"pages": [
"cookbook/diffusion/Z-Image/Z-Image-Turbo"
]
},
{
"group": "MOVA",
"pages": [
"cookbook/diffusion/MOVA/MOVA"
]
}
]
},
{
"group": "SpecBundle",
"pages": [
"cookbook/specbundle/supported_models",
"cookbook/specbundle/specbundle_usage"
]
},
{
"group": "Benchmarks",
"pages": [
"cookbook/base/benchmarks/autoregressive_model_benchmark",
"cookbook/base/benchmarks/diffusion_model_benchmark"
]
}
]
}
]
},
{
"tab": "SGLang Diffusion",
"groups": [
{
"group": "SGLang Diffusion",
"icon": "sparkles",
"pages": [
"sglang-diffusion/intro",
"docs/sglang-diffusion/installation",
"docs/sglang-diffusion/supported-models",
{
"group": "Usage",
"pages": [
"docs/sglang-diffusion/api/cli",
"docs/sglang-diffusion/api/openai-api"
]
},
{
"group": "Performance Optimization",
"pages": [
"docs/sglang-diffusion/performance-optimization",
"docs/sglang-diffusion/attention-backends",
"docs/sglang-diffusion/profiling",
"docs/sglang-diffusion/ci-performance"
]
},
{
"group": "Caching Strategies",
"pages": [
"docs/sglang-diffusion/caching-acceleration",
"docs/sglang-diffusion/cache-dit",
"docs/sglang-diffusion/tea-cache"
]
},
{
"group": "References",
"pages": [
"docs/sglang-diffusion/environment-variables",
"docs/sglang-diffusion/supported-models"
]
}
]
}
]
}
],
"global": {
"anchors": []
}
},
"logo": {
"light": "/logo/logo.png",
"dark": "/logo/logo.png"
},
"contextual": {
"options": [
"copy",
"view",
"chatgpt",
"claude",
"perplexity",
"mcp",
"cursor",
"vscode"
]
},
"footer": {
"socials": {
"github": "https://github.com/sgl-project/sglang",
"x": "https://x.com/lmsysorg",
"linkedin": "https://www.linkedin.com/company/sgl-project/posts?feedView=all",
"slack": "https://slack.sglang.io/",
"discord": "https://discord.gg/4ugb2t6YY2"
}
}
}