mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-30 11:11:35 +00:00
API: Persist request IDs and append full_text to finish chunk
Adding these to each generation chunk helps remove redundancy and unecessary request ID operations. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
@@ -730,7 +730,7 @@ class ExllamaV3Container(BaseModelContainer):
|
||||
# Clean up and remove the job from active IDs
|
||||
del self.active_job_ids[request_id]
|
||||
|
||||
def handle_finish_chunk(self, result: dict, generation: dict):
|
||||
def handle_finish_chunk(self, result: dict, request_id: str, full_text: str):
|
||||
eos_reason = result.get("eos_reason")
|
||||
|
||||
stop_str = None
|
||||
@@ -764,6 +764,7 @@ class ExllamaV3Container(BaseModelContainer):
|
||||
total_time = round(queue_time + prompt_time + gen_time, 2)
|
||||
|
||||
finish_chunk = {
|
||||
"request_id": request_id,
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"prompt_time": round(prompt_time, 2),
|
||||
"prompt_tokens_per_sec": prompt_ts,
|
||||
@@ -775,6 +776,7 @@ class ExllamaV3Container(BaseModelContainer):
|
||||
"cached_tokens": cached_tokens,
|
||||
"finish_reason": finish_reason,
|
||||
"stop_str": stop_str,
|
||||
"full_text": full_text,
|
||||
}
|
||||
|
||||
return finish_chunk
|
||||
@@ -940,6 +942,7 @@ class ExllamaV3Container(BaseModelContainer):
|
||||
# gen_settings.token_repetition_range = generated_tokens
|
||||
|
||||
generation = {
|
||||
"request_id": request_id,
|
||||
"text": chunk,
|
||||
"prompt_tokens": context_len,
|
||||
"generated_tokens": generated_tokens,
|
||||
@@ -948,7 +951,9 @@ class ExllamaV3Container(BaseModelContainer):
|
||||
yield generation
|
||||
|
||||
if result.get("eos"):
|
||||
finish_chunk = self.handle_finish_chunk(result, generation)
|
||||
finish_chunk = self.handle_finish_chunk(
|
||||
result, request_id, full_response
|
||||
)
|
||||
|
||||
# Save the final result for metrics logging
|
||||
metrics_result = finish_chunk
|
||||
|
||||
Reference in New Issue
Block a user