mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-14 15:57:27 +00:00
Config: Add experimental torch cuda malloc backend
This option saves some VRAM, but does have the chance to error out. Add this in the experimental config section. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
6
main.py
6
main.py
@@ -1,4 +1,5 @@
|
||||
"""The main tabbyAPI module. Contains the FastAPI server and endpoints."""
|
||||
import os
|
||||
import pathlib
|
||||
import uvicorn
|
||||
from asyncio import CancelledError
|
||||
@@ -600,6 +601,11 @@ def entrypoint(args: Optional[dict] = None):
|
||||
else:
|
||||
check_exllama_version()
|
||||
|
||||
# Enable CUDA malloc backend
|
||||
if unwrap(developer_config.get("cuda_malloc_backend"), False):
|
||||
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
|
||||
logger.warning("Enabled the experimental CUDA malloc backend.")
|
||||
|
||||
network_config = get_network_config()
|
||||
|
||||
# Initialize auth keys
|
||||
|
||||
Reference in New Issue
Block a user