mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
Logging: Move metrics to gen logging
This didn't have a place in the generation function. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -20,7 +20,7 @@ from loguru import logger
|
|||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
from backends.exllamav2.grammar import ExLlamaV2Grammar
|
from backends.exllamav2.grammar import ExLlamaV2Grammar
|
||||||
from common.gen_logging import log_generation_params, log_prompt, log_response
|
from common.gen_logging import log_generation_params, log_metrics, log_prompt, log_response
|
||||||
from common.templating import (
|
from common.templating import (
|
||||||
PromptTemplate,
|
PromptTemplate,
|
||||||
find_template_from_model,
|
find_template_from_model,
|
||||||
@@ -969,35 +969,10 @@ class ExllamaV2Container:
|
|||||||
# Print response
|
# Print response
|
||||||
log_response(full_response)
|
log_response(full_response)
|
||||||
|
|
||||||
|
# Print metrics
|
||||||
elapsed_time = last_chunk_time - start_time
|
elapsed_time = last_chunk_time - start_time
|
||||||
|
context_len = None if ids is None else context_len
|
||||||
|
|
||||||
initial_response = (
|
log_metrics(
|
||||||
f"Metrics: {generated_tokens} tokens generated in "
|
generated_tokens, elapsed_time, context_len, self.config.max_seq_len
|
||||||
f"{round(elapsed_time, 2)} seconds"
|
|
||||||
)
|
|
||||||
itemization = []
|
|
||||||
extra_parts = []
|
|
||||||
|
|
||||||
# Add tokens per second
|
|
||||||
tokens_per_second = (
|
|
||||||
"Indeterminate"
|
|
||||||
if elapsed_time == 0
|
|
||||||
else round(generated_tokens / elapsed_time, 2)
|
|
||||||
)
|
|
||||||
itemization.append(f"{tokens_per_second} T/s")
|
|
||||||
|
|
||||||
# Add context (original token count)
|
|
||||||
if ids is not None:
|
|
||||||
itemization.append(f"context {context_len} tokens")
|
|
||||||
|
|
||||||
if context_len > self.config.max_seq_len:
|
|
||||||
extra_parts.append("<-- Not accurate (truncated)")
|
|
||||||
|
|
||||||
# Print output
|
|
||||||
logger.info(
|
|
||||||
initial_response
|
|
||||||
+ " ("
|
|
||||||
+ ", ".join(itemization)
|
|
||||||
+ ") "
|
|
||||||
+ " ".join(extra_parts)
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -67,3 +67,41 @@ def log_response(response: str):
|
|||||||
if PREFERENCES.prompt:
|
if PREFERENCES.prompt:
|
||||||
formatted_response = "\n" + response
|
formatted_response = "\n" + response
|
||||||
logger.info(f"Response: {formatted_response if response else 'Empty'}\n")
|
logger.info(f"Response: {formatted_response if response else 'Empty'}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def log_metrics(
|
||||||
|
generated_tokens: int,
|
||||||
|
elapsed_time: float,
|
||||||
|
context_len: Optional[int],
|
||||||
|
max_seq_len: int,
|
||||||
|
):
|
||||||
|
initial_response = (
|
||||||
|
f"Metrics: {generated_tokens} tokens generated in "
|
||||||
|
f"{round(elapsed_time, 2)} seconds"
|
||||||
|
)
|
||||||
|
itemization = []
|
||||||
|
extra_parts = []
|
||||||
|
|
||||||
|
# Add tokens per second
|
||||||
|
tokens_per_second = (
|
||||||
|
"Indeterminate"
|
||||||
|
if elapsed_time == 0
|
||||||
|
else round(generated_tokens / elapsed_time, 2)
|
||||||
|
)
|
||||||
|
itemization.append(f"{tokens_per_second} T/s")
|
||||||
|
|
||||||
|
# Add context (original token count)
|
||||||
|
if context_len:
|
||||||
|
itemization.append(f"context {context_len} tokens")
|
||||||
|
|
||||||
|
if context_len > max_seq_len:
|
||||||
|
extra_parts.append("<-- Not accurate (truncated)")
|
||||||
|
|
||||||
|
# Print output
|
||||||
|
logger.info(
|
||||||
|
initial_response
|
||||||
|
+ " ("
|
||||||
|
+ ", ".join(itemization)
|
||||||
|
+ ") "
|
||||||
|
+ " ".join(extra_parts)
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user