Dependencies: Include triton and xformers

2026-05-11 08:20:08 +00:00 · 2026-05-09 23:14:30 +02:00
parent fd9591133d
commit 4a8cb08a24
2 changed files with 8 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ TabbyAPI uses Exllama as a powerful and fast backend for model inference, loadin

 - Exl3 (Highly recommended)

- FP16
+- FP16/BF16

 In addition, TabbyAPI supports parallel batching using paged attention for Nvidia Ampere GPUs and higher.

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,6 +68,13 @@ cu12 = [
    "torch @ https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'",
    "torch @ https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'",

+    # Triton
+    "triton ; platform_system == 'Linux' and platform_machine == 'x86_64'",
+    "triton-windows ; platform_system == 'Windows'",
+
+    # xformers
+    "xformers",
+
    # Exl2
    "exllamav2 @ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.9.0-cp313-cp313-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.13'",
    "exllamav2 @ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.9.0-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",