mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-23 15:59:14 +00:00
Requirements: Update Exllamav2
v0.0.15 Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -10,6 +10,7 @@ from exllamav2 import (
|
|||||||
ExLlamaV2Config,
|
ExLlamaV2Config,
|
||||||
ExLlamaV2Cache,
|
ExLlamaV2Cache,
|
||||||
ExLlamaV2Cache_8bit,
|
ExLlamaV2Cache_8bit,
|
||||||
|
ExLlamaV2Cache_Q4,
|
||||||
ExLlamaV2Tokenizer,
|
ExLlamaV2Tokenizer,
|
||||||
ExLlamaV2Lora,
|
ExLlamaV2Lora,
|
||||||
)
|
)
|
||||||
@@ -27,14 +28,6 @@ from common.templating import (
|
|||||||
from common.utils import coalesce, unwrap
|
from common.utils import coalesce, unwrap
|
||||||
from common.logger import init_logger
|
from common.logger import init_logger
|
||||||
|
|
||||||
# Optional imports for dependencies
|
|
||||||
try:
|
|
||||||
from exllamav2 import ExLlamaV2Cache_Q4
|
|
||||||
|
|
||||||
_exllamav2_has_int4 = True
|
|
||||||
except ImportError:
|
|
||||||
_exllamav2_has_int4 = False
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -116,16 +109,7 @@ class ExllamaV2Container:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
self.quiet = quiet
|
self.quiet = quiet
|
||||||
|
self.cache_mode = unwrap(kwargs.get("cache_mode"), "FP16")
|
||||||
cache_mode = unwrap(kwargs.get("cache_mode"), "FP16")
|
|
||||||
if cache_mode == "Q4" and not _exllamav2_has_int4:
|
|
||||||
logger.warning(
|
|
||||||
"Q4 cache is not available "
|
|
||||||
"in the currently installed ExllamaV2 version. Using FP16."
|
|
||||||
)
|
|
||||||
cache_mode = "FP16"
|
|
||||||
|
|
||||||
self.cache_mode = cache_mode
|
|
||||||
|
|
||||||
# Turn off GPU split if the user is using 1 GPU
|
# Turn off GPU split if the user is using 1 GPU
|
||||||
gpu_count = torch.cuda.device_count()
|
gpu_count = torch.cuda.device_count()
|
||||||
@@ -415,7 +399,7 @@ class ExllamaV2Container:
|
|||||||
|
|
||||||
batch_size = 2 if self.use_cfg else 1
|
batch_size = 2 if self.use_cfg else 1
|
||||||
|
|
||||||
if self.cache_mode == "Q4" and _exllamav2_has_int4:
|
if self.cache_mode == "Q4":
|
||||||
self.cache = ExLlamaV2Cache_Q4(
|
self.cache = ExLlamaV2Cache_Q4(
|
||||||
self.model, lazy=self.gpu_split_auto, batch_size=batch_size
|
self.model, lazy=self.gpu_split_auto, batch_size=batch_size
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ logger = init_logger(__name__)
|
|||||||
def check_exllama_version():
|
def check_exllama_version():
|
||||||
"""Verifies the exllama version"""
|
"""Verifies the exllama version"""
|
||||||
|
|
||||||
required_version = version.parse("0.0.14")
|
required_version = version.parse("0.0.15")
|
||||||
current_version = version.parse(package_version("exllamav2").split("+")[0])
|
current_version = version.parse(package_version("exllamav2").split("+")[0])
|
||||||
|
|
||||||
if current_version < required_version:
|
if current_version < required_version:
|
||||||
|
|||||||
@@ -3,8 +3,8 @@
|
|||||||
torch ~= 2.2
|
torch ~= 2.2
|
||||||
|
|
||||||
# Exllamav2
|
# Exllamav2
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+rocm5.6-cp311-cp311-linux_x86_64.whl; python_version == "3.11"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+rocm5.6-cp311-cp311-linux_x86_64.whl; python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+rocm5.6-cp310-cp310-linux_x86_64.whl; python_version == "3.10"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+rocm5.6-cp310-cp310-linux_x86_64.whl; python_version == "3.10"
|
||||||
|
|
||||||
# Pip dependencies
|
# Pip dependencies
|
||||||
fastapi
|
fastapi
|
||||||
|
|||||||
@@ -5,12 +5,12 @@ torch ~= 2.2
|
|||||||
# Exllamav2
|
# Exllamav2
|
||||||
|
|
||||||
# Windows
|
# Windows
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu118-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# Linux
|
# Linux
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu118-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+cu118-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# Pip dependencies
|
# Pip dependencies
|
||||||
fastapi
|
fastapi
|
||||||
|
|||||||
@@ -5,12 +5,12 @@ torch ~= 2.2
|
|||||||
# Exllamav2
|
# Exllamav2
|
||||||
|
|
||||||
# Windows
|
# Windows
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# Linux
|
# Linux
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# Pip dependencies
|
# Pip dependencies
|
||||||
fastapi
|
fastapi
|
||||||
|
|||||||
Reference in New Issue
Block a user