mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-05-11 08:20:08 +00:00
Dependencies: Include triton and xformers
This commit is contained in:
@@ -88,7 +88,7 @@ TabbyAPI uses Exllama as a powerful and fast backend for model inference, loadin
|
||||
|
||||
- Exl3 (Highly recommended)
|
||||
|
||||
- FP16
|
||||
- FP16/BF16
|
||||
|
||||
In addition, TabbyAPI supports parallel batching using paged attention for Nvidia Ampere GPUs and higher.
|
||||
|
||||
|
||||
@@ -68,6 +68,13 @@ cu12 = [
|
||||
"torch @ https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'",
|
||||
"torch @ https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'",
|
||||
|
||||
# Triton
|
||||
"triton ; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
||||
"triton-windows ; platform_system == 'Windows'",
|
||||
|
||||
# xformers
|
||||
"xformers",
|
||||
|
||||
# Exl2
|
||||
"exllamav2 @ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.9.0-cp313-cp313-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.13'",
|
||||
"exllamav2 @ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.9.0-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",
|
||||
|
||||
Reference in New Issue
Block a user