diff --git a/config_sample.yml b/config_sample.yml
index 3b34c9b..eb788e5 100644
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -166,10 +166,10 @@ model:
   # Do NOT enable this if the model is not a reasoning model (e.g. deepseek-r1 series)
   reasoning: false
 
-  # The start token for reasoning conetnt (default: "<think>")
+  # The start token for reasoning content (default: "<think>")
   reasoning_start_token: "<think>"
 
-  # The end token for reasoning conetnt (default: "</think>")
+  # The end token for reasoning content (default: "</think>")
   reasoning_end_token: "</think>"
 
   # Suppress this text whenever it appears in the beginning of a reasoning block (default: None)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 58aa61f..189e26f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -26,8 +26,13 @@ WORKDIR /app
 # Get requirements
 COPY pyproject.toml .
 
-# Install packages specified in pyproject.toml cu12, extras
-RUN pip install --no-cache-dir .[cu12,extras]
+# Install cu12 group first — pins torch+cu128, exllamav2/v3+cu128, flash_attn+cu128.
+# The 'extras' group (infinity-emb, sentence-transformers) is installed separately
+# with --no-deps so pip cannot resolve xformers transitively and pull a cu130 wheel,
+# which would cause libcudart.so.13 ImportError on driver 590.x (cu128-only hosts).
+# See: https://github.com/theroyallab/tabbyAPI/issues/414
+RUN pip install --no-cache-dir .[cu12]
+RUN pip install --no-cache-dir --no-deps .[extras]
 
 RUN rm pyproject.toml