Model: Remove dev wheel setting checks

Removes TP and DRY sampler checks since those are in stable. Signed-off-by: kingbri <bdashore3@proton.me>
2026-05-11 16:30:16 +00:00 · 2024-09-14 22:13:56 -04:00
parent 2d221832fb
commit 2a41910931
1 changed files with 5 additions and 27 deletions
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -17,6 +17,7 @@ from exllamav2 import (
    ExLlamaV2Cache_Q4,
    ExLlamaV2Cache_Q6,
    ExLlamaV2Cache_Q8,
+    ExLlamaV2Cache_TP,
    ExLlamaV2Tokenizer,
    ExLlamaV2Lora,
 )
@@ -55,14 +56,6 @@ from common.templating import (
 from common.transformers_utils import GenerationConfig, HuggingFaceConfig
 from common.utils import coalesce, unwrap

-# Dynamic imports
-try:
-    from exllamav2 import ExLlamaV2Cache_TP
-
-    has_tp = True
-except ImportError:
-    has_tp = False
-

 class ExllamaV2Container:
    """The model container class for ExLlamaV2 models."""
@@ -197,17 +190,10 @@ class ExllamaV2Container:
        else:
            # Set tensor parallel
            if use_tp:
-                if has_tp:
-                    self.use_tp = True
+                self.use_tp = True

-                    # TP has its own autosplit loader
-                    self.gpu_split_auto = False
-                else:
-                    # TODO: Remove conditional with exl2 v0.1.9 release
-                    logger.warning(
-                        "Tensor parallelism is not supported in the "
-                        "current ExllamaV2 version."
-                    )
+                # TP has its own autosplit loader
+                self.gpu_split_auto = False

            # Enable manual GPU split if provided
            if gpu_split:
@@ -703,7 +689,7 @@ class ExllamaV2Container:
    ):
        """Utility function to create a model cache."""

-        if has_tp and use_tp:
+        if use_tp:
            return ExLlamaV2Cache_TP(
                model,
                base=cache_class,
@@ -967,14 +953,6 @@ class ExllamaV2Container:
        Meant for dev wheels!
        """

-        if unwrap(kwargs.get("dry_allowed_length"), 0) > 0 and not hasattr(
-            ExLlamaV2Sampler.Settings, "dry_multiplier"
-        ):
-            logger.warning(
-                "DRY sampling is not supported by the currently "
-                "installed ExLlamaV2 version."
-            )
-
        return kwargs

    async def generate_gen(