From 191600a150df7eb1c1c9ecea290327f3f102680f Mon Sep 17 00:00:00 2001 From: kingbri Date: Mon, 22 Jul 2024 18:34:00 -0400 Subject: [PATCH] Revert "Model: Skip empty token chunks" This reverts commit 21516bd7b5ca90b190c785c0c767e6045136e4ab. This skips EOS and implementing it the proper way seems more costly than necessary. Signed-off-by: kingbri --- backends/exllamav2/model.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index f42dd00..200be6b 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -1185,15 +1185,13 @@ class ExllamaV2Container: result_id = result.get("identifier") if stage == "streaming" and result_id == job_id: - chunk_tokens = result.get("token_ids") - if chunk_tokens is None: - continue - else: - generated_tokens += chunk_tokens.size(dim=0) - chunk = unwrap(result.get("text"), "") full_response += chunk + chunk_tokens = result.get("token_ids") + if chunk_tokens is not None: + generated_tokens += chunk_tokens.size(dim=0) + generation = { "text": chunk, "prompt_tokens": context_len,