From 69e41e994cfe7a2030cef615ac23b5585e10ae26 Mon Sep 17 00:00:00 2001 From: kingbri Date: Thu, 21 Mar 2024 00:47:24 -0400 Subject: [PATCH] Model: Fix generation with non-streaming and logprobs Finish_reason was giving an empty offset. Fix this by grabbing the finish reason first and then handling the static generation as normal. Signed-off-by: kingbri --- backends/exllamav2/model.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index c008b8d..e9876e4 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -590,9 +590,18 @@ class ExllamaV2Container: } if generations: + # Get finish_reason first and then shift where -1 points to + if "finish_reason" in generations[-1]: + finish_reason_gen = generations.pop() + joined_generation["finish_reason"] = finish_reason_gen.get( + "finish_reason" + ) + else: + joined_generation["finish_reason"] = "stop" + for generation in generations: joined_generation["text"] += unwrap(generation.get("text"), "") - joined_generation["offset"].append(unwrap(generation.get("offset"), [])) + joined_generation["offset"].append(unwrap(generation.get("offset"), -1)) joined_generation["token_probs"].update( unwrap(generation.get("token_probs"), {}) ) @@ -608,9 +617,6 @@ class ExllamaV2Container: joined_generation["generation_tokens"] = unwrap( generations[-1].get("generated_tokens"), 0 ) - joined_generation["finish_reason"] = unwrap( - generations[-1].get("finish_reason"), "stop" - ) return joined_generation