From 21516bd7b5ca90b190c785c0c767e6045136e4ab Mon Sep 17 00:00:00 2001 From: kingbri Date: Mon, 22 Jul 2024 12:23:49 -0400 Subject: [PATCH] Model: Skip empty token chunks This helps make the generation loop more efficient by skipping past chunks that aren't providing any tokens anyways. The offset isn't affected. Signed-off-by: kingbri --- backends/exllamav2/model.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 200be6b..f42dd00 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -1185,13 +1185,15 @@ class ExllamaV2Container: result_id = result.get("identifier") if stage == "streaming" and result_id == job_id: + chunk_tokens = result.get("token_ids") + if chunk_tokens is None: + continue + else: + generated_tokens += chunk_tokens.size(dim=0) + chunk = unwrap(result.get("text"), "") full_response += chunk - chunk_tokens = result.get("token_ids") - if chunk_tokens is not None: - generated_tokens += chunk_tokens.size(dim=0) - generation = { "text": chunk, "prompt_tokens": context_len,