Files
tabbyAPI/backends/exllamav2/utils.py
DocShotgun 156b74f3f0 Revision to paged attention checks (#133)
* Model: Clean up paged attention checks

* Model: Move cache_size checks after paged attn checks
Cache size is only relevant in paged mode

* Model: Fix no_flash_attention

* Model: Remove no_flash_attention
Ability to use flash attention is auto-detected, so this flag is unneeded. Uninstall flash attention to disable it on supported hardware.
2024-06-09 17:28:11 +02:00

63 lines
1.9 KiB
Python

from packaging import version
from importlib.metadata import PackageNotFoundError, version as package_version
from loguru import logger
import torch
def check_exllama_version():
"""Verifies the exllama version"""
required_version = version.parse("0.1.5")
current_version = version.parse(package_version("exllamav2").split("+")[0])
if current_version < required_version:
raise SystemExit(
f"ERROR: TabbyAPI requires ExLlamaV2 {required_version} "
f"or greater. Your current version is {current_version}.\n"
"Please upgrade your environment by running a start script "
"(start.bat or start.sh)\n\n"
"Or you can manually run a requirements update "
"using the following command:\n\n"
"For CUDA 12.1:\n"
"pip install --upgrade .[cu121]\n\n"
"For CUDA 11.8:\n"
"pip install --upgrade .[cu118]\n\n"
"For ROCm:\n"
"pip install --upgrade .[amd]\n\n"
)
else:
logger.info(f"ExllamaV2 version: {current_version}")
def hardware_supports_flash_attn(gpu_device_list: list[int]):
"""
Check whether all GPUs in list support FA2
Compute capability < 8 is not supported by FA2
AMD is also unsupported until ROCm updates its FA2 fork
"""
min_compute_capability = min(
torch.cuda.get_device_capability(device=device_idx)[0]
for device_idx in gpu_device_list
)
if torch.version.hip or min_compute_capability < 8:
return False
else:
return True
def supports_paged_attn():
"""Check whether the user's flash-attn version supports paged mode"""
required_version = version.parse("2.5.7")
try:
current_version = version.parse(package_version("flash-attn").split("+")[0])
except PackageNotFoundError:
return False
if current_version < required_version:
return False
else:
return True