mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
API: Add timings to usage stats
It's useful for the client to know what the T/s and total time for generation are per-request. Works with both completions and chat completions. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""Common types for OAI."""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
from typing import Optional, Union
|
||||
|
||||
from common.sampling import BaseSamplerRequest, get_default_sampler_value
|
||||
|
||||
@@ -10,8 +10,13 @@ class UsageStats(BaseModel):
|
||||
"""Represents usage stats."""
|
||||
|
||||
prompt_tokens: int
|
||||
prompt_time: Optional[float] = None
|
||||
prompt_tokens_per_sec: Optional[Union[float, str]] = None
|
||||
completion_tokens: int
|
||||
completion_time: Optional[float] = None
|
||||
completion_tokens_per_sec: Optional[Union[float, str]] = None
|
||||
total_tokens: int
|
||||
total_time: Optional[float] = None
|
||||
|
||||
|
||||
class CompletionResponseFormat(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user