Model: Skip empty token chunks

This helps make the generation loop more efficient by skipping past
chunks that aren't providing any tokens anyways. The offset isn't
affected.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2024-07-22 12:23:49 -04:00
parent 0eedc8ca14
commit 21516bd7b5

View File

@@ -1185,13 +1185,15 @@ class ExllamaV2Container:
result_id = result.get("identifier")
if stage == "streaming" and result_id == job_id:
chunk_tokens = result.get("token_ids")
if chunk_tokens is None:
continue
else:
generated_tokens += chunk_tokens.size(dim=0)
chunk = unwrap(result.get("text"), "")
full_response += chunk
chunk_tokens = result.get("token_ids")
if chunk_tokens is not None:
generated_tokens += chunk_tokens.size(dim=0)
generation = {
"text": chunk,
"prompt_tokens": context_len,