mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-05-11 16:30:16 +00:00
50 lines
1.4 KiB
Docker
50 lines
1.4 KiB
Docker
# Use an official CUDA runtime with Ubuntu as a parent image
|
|
FROM nvidia/cuda:12.8.1-runtime-ubuntu24.04
|
|
|
|
# Install system dependencies
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
curl \
|
|
ca-certificates \
|
|
python3.12 \
|
|
python3-pip \
|
|
python3.12-venv \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Create a virtual environment
|
|
RUN python3 -m venv /opt/venv
|
|
|
|
# Activate the venv and set the PATH
|
|
ENV PATH="/opt/venv/bin:$PATH"
|
|
|
|
# Upgrade pip
|
|
RUN pip install --no-cache-dir --upgrade pip
|
|
|
|
# Set the working directory in the container
|
|
WORKDIR /app
|
|
|
|
# Get requirements
|
|
COPY pyproject.toml .
|
|
|
|
# Install cu12 group first — pins torch+cu128, exllamav2/v3+cu128, flash_attn+cu128.
|
|
# The 'extras' group (infinity-emb, sentence-transformers) is installed separately
|
|
# with --no-deps so pip cannot resolve xformers transitively and pull a cu130 wheel,
|
|
# which would cause libcudart.so.13 ImportError on driver 590.x (cu128-only hosts).
|
|
# See: https://github.com/theroyallab/tabbyAPI/issues/414
|
|
RUN pip install --no-cache-dir .[cu12]
|
|
RUN pip install --no-cache-dir --no-deps .[extras]
|
|
|
|
RUN rm pyproject.toml
|
|
|
|
# Copy the current directory contents into the container
|
|
COPY . .
|
|
|
|
# Make port 5000 available to the world outside this container
|
|
EXPOSE 5000
|
|
|
|
# Set the entry point
|
|
ENTRYPOINT ["python3"]
|
|
|
|
# Run main.py when the container launches
|
|
CMD ["main.py", "--host", "0.0.0.0"]
|