Model: Fix generation with non-streaming and logprobs

Finish_reason was giving an empty offset. Fix this by grabbing the
finish reason first and then handling the static generation as normal.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2024-03-21 00:47:24 -04:00
parent 345bcc30c7
commit 69e41e994c

View File

@@ -590,9 +590,18 @@ class ExllamaV2Container:
}
if generations:
# Get finish_reason first and then shift where -1 points to
if "finish_reason" in generations[-1]:
finish_reason_gen = generations.pop()
joined_generation["finish_reason"] = finish_reason_gen.get(
"finish_reason"
)
else:
joined_generation["finish_reason"] = "stop"
for generation in generations:
joined_generation["text"] += unwrap(generation.get("text"), "")
joined_generation["offset"].append(unwrap(generation.get("offset"), []))
joined_generation["offset"].append(unwrap(generation.get("offset"), -1))
joined_generation["token_probs"].update(
unwrap(generation.get("token_probs"), {})
)
@@ -608,9 +617,6 @@ class ExllamaV2Container:
joined_generation["generation_tokens"] = unwrap(
generations[-1].get("generated_tokens"), 0
)
joined_generation["finish_reason"] = unwrap(
generations[-1].get("finish_reason"), "stop"
)
return joined_generation