From 8cbb59d6e1e6775347ca1a7e18442b4a310b862b Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 18 Mar 2024 22:20:45 -0400
Subject: [PATCH] Model: Cleanup some comments

Signed-off-by: kingbri <bdashore3@proton.me>
---
 backends/exllamav2/model.py | 50 +++----------------------------------
 1 file changed, 3 insertions(+), 47 deletions(-)

diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py
index 4b99202..79bdbe2 100644
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -141,7 +141,7 @@ class ExllamaV2Container:
         self.config.model_dir = str(model_directory.resolve())
 
         # Make the max seq len 4096 before preparing the config
-        # This is a better default than 2038
+        # This is a better default than 2048
         self.config.max_seq_len = 4096
 
         # Hardcode max output length to 16
@@ -185,13 +185,6 @@ class ExllamaV2Container:
             True if self.use_cfg else unwrap(kwargs.get("no_flash_attention"), False)
         )
 
-        # low_mem is currently broken in exllamav2. Don't use it until it's
-        # fixed.
-        """
-        if "low_mem" in kwargs and kwargs["low_mem"]:
-            self.config.set_low_mem()
-        """
-
         # Try to set prompt template
         self.prompt_template = self.find_prompt_template(
             kwargs.get("prompt_template"), model_directory
@@ -639,46 +632,9 @@ class ExllamaV2Container:
 
     def generate_gen_sync(self, prompt: str, **kwargs):
         """
-        Create generator function for prompt completion
+        Create generator function for prompt completion.
 
-        Args:
-            prompt (str): Input prompt
-            **kwargs:
-                'token_healing' (bool): Use token healing (default: False)
-                'temperature' (float): Sampling temperature (default: 1.0)
-                'temperature_last' (bool): Apply temperature after all other
-                    samplers (default: False)
-                'top_k' (int): Sampling top-K (default: 0)
-                'top_p' (float): Sampling top-P (default: 1.0)
-                'min_p' (float): Sampling min-P (default: 0.0)
-                'tfs' (float): Tail-free sampling (default: 0.0)
-                'typical' (float): Sampling typical (default: 0.0)
-                'mirostat' (bool): Use Mirostat (default: False)
-                'mirostat_tau' (float) Mirostat tau parameter (default: 1.5)
-                'mirostat_eta' (float) Mirostat eta parameter (default: 0.1)
-                'frequency_penalty' (float): Token frequency penalty (default: 0.0)
-                'presence_penalty' (float): Token presence penalty (default: 0.0)
-                'repetition_penalty' (float): Token repetition penalty
-                    (default: 1.15)
-                'penalty_range' (int): Penalty range
-                    (default: whole context)
-                'repetition_decay' (int): Repetition penalty range
-                    (default: same as range)
-                'stop' (List[Union[str, int]]): List of stop strings/tokens to
-                    end response (default: [EOS])
-                'max_tokens' (int): Max no. tokens in response (default: 150)
-                'add_bos_token' (bool): Adds the BOS token to the start of the
-                    prompt (default: True)
-                'ban_eos_token' (bool): Bans the EOS token from generation
-                    (default: False)
-                'logit_bias' (Dict[int, float]): Biases specific tokens to
-                    either show up more or less (default: None)
-                'stream_interval' (float): Interval in seconds between each
-                    output chunk (default: immediate)
-                'generate_window' (int): Space to reserve at the end of the
-                    model's context when generating. Rolls context window by
-                    the same amount if context length is exceeded to allow
-                    generating pastthe models max_seq_len.
+        for kwargs, check common/sampling.py
         """
 
         token_healing = unwrap(kwargs.get("token_healing"), False)