From 4a8cb08a24cd3cb219975753a1a463de3e51cd37 Mon Sep 17 00:00:00 2001 From: turboderp <11859846+turboderp@users.noreply.github.com> Date: Sat, 9 May 2026 23:14:30 +0200 Subject: [PATCH] Dependencies: Include triton and xformers --- README.md | 2 +- pyproject.toml | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c2e89f7..057e71c 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ TabbyAPI uses Exllama as a powerful and fast backend for model inference, loadin - Exl3 (Highly recommended) -- FP16 +- FP16/BF16 In addition, TabbyAPI supports parallel batching using paged attention for Nvidia Ampere GPUs and higher. diff --git a/pyproject.toml b/pyproject.toml index 81316de..cb99dab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,13 @@ cu12 = [ "torch @ https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'", "torch @ https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'", + # Triton + "triton ; platform_system == 'Linux' and platform_machine == 'x86_64'", + "triton-windows ; platform_system == 'Windows'", + + # xformers + "xformers", + # Exl2 "exllamav2 @ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.9.0-cp313-cp313-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.13'", "exllamav2 @ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.9.0-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",