mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-25 16:59:09 +00:00
Model: Attach request ID to logs
If multiple logs come in at once, track which log corresponds to which request. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -1210,7 +1210,7 @@ class ExllamaV2Container:
|
|||||||
|
|
||||||
# Second yield if eos is true
|
# Second yield if eos is true
|
||||||
if result.get("eos"):
|
if result.get("eos"):
|
||||||
log_response(full_response)
|
log_response(request_id, full_response)
|
||||||
|
|
||||||
eos_reason = result.get("eos_reason")
|
eos_reason = result.get("eos_reason")
|
||||||
finish_reason = (
|
finish_reason = (
|
||||||
@@ -1271,6 +1271,7 @@ class ExllamaV2Container:
|
|||||||
# Log the metrics if present
|
# Log the metrics if present
|
||||||
if metrics_result:
|
if metrics_result:
|
||||||
log_metrics(
|
log_metrics(
|
||||||
|
request_id,
|
||||||
metrics_result.get("time_enqueued"),
|
metrics_result.get("time_enqueued"),
|
||||||
metrics_result.get("prompt_tokens"),
|
metrics_result.get("prompt_tokens"),
|
||||||
metrics_result.get("cached_tokens"),
|
metrics_result.get("cached_tokens"),
|
||||||
|
|||||||
@@ -64,14 +64,18 @@ def log_prompt(prompt: str, request_id: str, negative_prompt: Optional[str]):
|
|||||||
logger.info(f"Negative Prompt: {formatted_negative_prompt}\n")
|
logger.info(f"Negative Prompt: {formatted_negative_prompt}\n")
|
||||||
|
|
||||||
|
|
||||||
def log_response(response: str):
|
def log_response(request_id: str, response: str):
|
||||||
"""Logs the response to console."""
|
"""Logs the response to console."""
|
||||||
if PREFERENCES.prompt:
|
if PREFERENCES.prompt:
|
||||||
formatted_response = "\n" + response
|
formatted_response = "\n" + response
|
||||||
logger.info(f"Response: {formatted_response if response else 'Empty'}\n")
|
logger.info(
|
||||||
|
f"Response (ID: {request_id}): "
|
||||||
|
f"{formatted_response if response else 'Empty'}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def log_metrics(
|
def log_metrics(
|
||||||
|
request_id: str,
|
||||||
queue_time: float,
|
queue_time: float,
|
||||||
prompt_tokens: int,
|
prompt_tokens: int,
|
||||||
cached_tokens: int,
|
cached_tokens: int,
|
||||||
@@ -82,7 +86,7 @@ def log_metrics(
|
|||||||
max_seq_len: int,
|
max_seq_len: int,
|
||||||
):
|
):
|
||||||
initial_response = (
|
initial_response = (
|
||||||
f"Metrics: {generated_tokens} tokens generated in "
|
f"Metrics (ID: {request_id}): {generated_tokens} tokens generated in "
|
||||||
f"{round(queue_time + prompt_time + generate_time, 2)} seconds"
|
f"{round(queue_time + prompt_time + generate_time, 2)} seconds"
|
||||||
)
|
)
|
||||||
itemization = []
|
itemization = []
|
||||||
|
|||||||
Reference in New Issue
Block a user