From 4a8cb08a24cd3cb219975753a1a463de3e51cd37 Mon Sep 17 00:00:00 2001
From: turboderp <11859846+turboderp@users.noreply.github.com>
Date: Sat, 9 May 2026 23:14:30 +0200
Subject: [PATCH] Dependencies: Include triton and xformers

---
 README.md      | 2 +-
 pyproject.toml | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c2e89f7..057e71c 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ TabbyAPI uses Exllama as a powerful and fast backend for model inference, loadin
 
 - Exl3 (Highly recommended)
 
-- FP16
+- FP16/BF16
 
 In addition, TabbyAPI supports parallel batching using paged attention for Nvidia Ampere GPUs and higher.
 
diff --git a/pyproject.toml b/pyproject.toml
index 81316de..cb99dab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,6 +68,13 @@ cu12 = [
     "torch @ https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'",
     "torch @ https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'",
 
+    # Triton
+    "triton ; platform_system == 'Linux' and platform_machine == 'x86_64'",
+    "triton-windows ; platform_system == 'Windows'",
+
+    # xformers
+    "xformers",
+
     # Exl2
     "exllamav2 @ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.9.0-cp313-cp313-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.13'",
     "exllamav2 @ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.9.0-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",