Config: Add experimental torch cuda malloc backend

This option saves some VRAM, but does have the chance to error out.
Add this in the experimental config section.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2024-02-14 21:44:04 -05:00
parent 664e2c417e
commit 949248fb94
3 changed files with 16 additions and 1 deletions

View File

@@ -1,4 +1,5 @@
"""The main tabbyAPI module. Contains the FastAPI server and endpoints."""
import os
import pathlib
import uvicorn
from asyncio import CancelledError
@@ -600,6 +601,11 @@ def entrypoint(args: Optional[dict] = None):
else:
check_exllama_version()
# Enable CUDA malloc backend
if unwrap(developer_config.get("cuda_malloc_backend"), False):
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
logger.warning("Enabled the experimental CUDA malloc backend.")
network_config = get_network_config()
# Initialize auth keys