Model: Log metrics before yielding a stop

Yielding the finish reason before the logging causes the function to
terminate early. Instead, log before yielding and breaking out of the
generation loop.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2024-03-20 01:17:04 -04:00
parent 09a4c79847
commit b74603db59

View File

@@ -975,19 +975,19 @@ class ExllamaV2Container:
last_chunk_time = now
if eos or generated_tokens == max_tokens:
# Print response
log_response(full_response)
# Print metrics
elapsed_time = last_chunk_time - start_time
context_len = None if ids is None else context_len
log_metrics(
generated_tokens, elapsed_time, context_len, self.config.max_seq_len
)
finish_reason = "length" if generated_tokens == max_tokens else "stop"
generation = {"finish_reason": finish_reason}
yield generation
break
# Print response
log_response(full_response)
# Print metrics
elapsed_time = last_chunk_time - start_time
context_len = None if ids is None else context_len
log_metrics(
generated_tokens, elapsed_time, context_len, self.config.max_seq_len
)