diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index e9876e4..bd7c7ca 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -863,6 +863,7 @@ class ExllamaV2Container: # Some options are too large, so log the args instead log_generation_params( max_tokens=max_tokens, + stream=kwargs.get("stream"), **gen_settings_log_dict, token_healing=token_healing, auto_scale_penalty_range=auto_scale_penalty_range, diff --git a/endpoints/OAI/types/common.py b/endpoints/OAI/types/common.py index af92df8..2241ad0 100644 --- a/endpoints/OAI/types/common.py +++ b/endpoints/OAI/types/common.py @@ -43,6 +43,9 @@ class CommonCompletionRequest(BaseSamplerRequest): ) def to_gen_params(self): - extra_gen_params = {"logprobs": self.logprobs} + extra_gen_params = { + "stream": self.stream, + "logprobs": self.logprobs, + } return super().to_gen_params(**extra_gen_params)