diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 1d80062..fdb85a7 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -1081,14 +1081,21 @@ class ExllamaV2Container: ) # DRY options - dry_allowed_length = unwrap(kwargs.get("dry_allowed_length"), 0) + dry_multiplier = unwrap(kwargs.get("dry_multiplier"), 0.0) - # 0 = disabled - if dry_allowed_length: - gen_settings.dry_allowed_length = dry_allowed_length + # < 0 = disabled + if dry_multiplier > 0: + gen_settings.dry_allowed_length = unwrap( + kwargs.get("dry_allowed_length"), 0 + ) gen_settings.dry_base = unwrap(kwargs.get("dry_base"), 2.0) gen_settings.dry_multiplier = unwrap(kwargs.get("dry_multiplier"), 2.0) - gen_settings.dry_max_ngram = unwrap(kwargs.get("dry_max_ngram"), 20) + + # Exl2 has dry_range as 0 for unlimited unlike -1 for penalty_range + # Use max_seq_len as the fallback to stay consistent + gen_settings.dry_range = unwrap( + kwargs.get("dry_range"), self.config.max_seq_len + ) # Tokenize sequence breakers dry_sequence_breakers_json = kwargs.get("dry_sequence_breakers") diff --git a/common/sampling.py b/common/sampling.py index a3bccb3..de5b7dc 100644 --- a/common/sampling.py +++ b/common/sampling.py @@ -153,10 +153,10 @@ class BaseSamplerRequest(BaseModel): default_factory=lambda: get_default_sampler_value("dry_multiplier", 2.0) ) - # TODO: Remove these aliases - dry_max_ngram: Optional[int] = Field( - default_factory=lambda: get_default_sampler_value("dry_max_ngram", 20), - alias=AliasChoices("dry_max_ngram", "dry_penalty_last_n"), + dry_range: Optional[int] = Field( + default_factory=lambda: get_default_sampler_value("dry_range", 0), + alias=AliasChoices("dry_range", "dry_penalty_last_n"), + description=("Aliases: dry_penalty_last_n"), ) dry_sequence_breakers: Optional[str] = Field( @@ -371,7 +371,6 @@ class BaseSamplerRequest(BaseModel): "penalty_range": self.penalty_range, "dry_allowed_length": self.dry_allowed_length, "dry_base": self.dry_base, - "dry_max_ngram": self.dry_max_ngram, "dry_multiplier": self.dry_multiplier, "dry_sequence_breakers": self.dry_sequence_breakers, "repetition_decay": self.repetition_decay,