diff --git a/config_sample.yml b/config_sample.yml index 3b34c9b..eb788e5 100644 --- a/config_sample.yml +++ b/config_sample.yml @@ -166,10 +166,10 @@ model: # Do NOT enable this if the model is not a reasoning model (e.g. deepseek-r1 series) reasoning: false - # The start token for reasoning conetnt (default: "") + # The start token for reasoning content (default: "") reasoning_start_token: "" - # The end token for reasoning conetnt (default: "") + # The end token for reasoning content (default: "") reasoning_end_token: "" # Suppress this text whenever it appears in the beginning of a reasoning block (default: None) diff --git a/docker/Dockerfile b/docker/Dockerfile index 58aa61f..189e26f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -26,8 +26,13 @@ WORKDIR /app # Get requirements COPY pyproject.toml . -# Install packages specified in pyproject.toml cu12, extras -RUN pip install --no-cache-dir .[cu12,extras] +# Install cu12 group first — pins torch+cu128, exllamav2/v3+cu128, flash_attn+cu128. +# The 'extras' group (infinity-emb, sentence-transformers) is installed separately +# with --no-deps so pip cannot resolve xformers transitively and pull a cu130 wheel, +# which would cause libcudart.so.13 ImportError on driver 590.x (cu128-only hosts). +# See: https://github.com/theroyallab/tabbyAPI/issues/414 +RUN pip install --no-cache-dir .[cu12] +RUN pip install --no-cache-dir --no-deps .[extras] RUN rm pyproject.toml