mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-26 17:28:54 +00:00
Model: Log metrics before yielding a stop
Yielding the finish reason before the logging causes the function to terminate early. Instead, log before yielding and breaking out of the generation loop. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -975,19 +975,19 @@ class ExllamaV2Container:
|
|||||||
last_chunk_time = now
|
last_chunk_time = now
|
||||||
|
|
||||||
if eos or generated_tokens == max_tokens:
|
if eos or generated_tokens == max_tokens:
|
||||||
|
# Print response
|
||||||
|
log_response(full_response)
|
||||||
|
|
||||||
|
# Print metrics
|
||||||
|
elapsed_time = last_chunk_time - start_time
|
||||||
|
context_len = None if ids is None else context_len
|
||||||
|
|
||||||
|
log_metrics(
|
||||||
|
generated_tokens, elapsed_time, context_len, self.config.max_seq_len
|
||||||
|
)
|
||||||
|
|
||||||
finish_reason = "length" if generated_tokens == max_tokens else "stop"
|
finish_reason = "length" if generated_tokens == max_tokens else "stop"
|
||||||
generation = {"finish_reason": finish_reason}
|
generation = {"finish_reason": finish_reason}
|
||||||
yield generation
|
yield generation
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
# Print response
|
|
||||||
log_response(full_response)
|
|
||||||
|
|
||||||
# Print metrics
|
|
||||||
elapsed_time = last_chunk_time - start_time
|
|
||||||
context_len = None if ids is None else context_len
|
|
||||||
|
|
||||||
log_metrics(
|
|
||||||
generated_tokens, elapsed_time, context_len, self.config.max_seq_len
|
|
||||||
)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user