mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-14 15:57:27 +00:00
Revert "Model: Skip empty token chunks"
This reverts commit 21516bd7b5.
This skips EOS and implementing it the proper way seems more
costly than necessary.
Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -1185,15 +1185,13 @@ class ExllamaV2Container:
|
||||
result_id = result.get("identifier")
|
||||
|
||||
if stage == "streaming" and result_id == job_id:
|
||||
chunk_tokens = result.get("token_ids")
|
||||
if chunk_tokens is None:
|
||||
continue
|
||||
else:
|
||||
generated_tokens += chunk_tokens.size(dim=0)
|
||||
|
||||
chunk = unwrap(result.get("text"), "")
|
||||
full_response += chunk
|
||||
|
||||
chunk_tokens = result.get("token_ids")
|
||||
if chunk_tokens is not None:
|
||||
generated_tokens += chunk_tokens.size(dim=0)
|
||||
|
||||
generation = {
|
||||
"text": chunk,
|
||||
"prompt_tokens": context_len,
|
||||
|
||||
Reference in New Issue
Block a user