mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-20 14:28:54 +00:00
Model: Add logprobs support
Returns token offsets, selected tokens, probabilities of tokens post-sampling, and normalized probability of selecting a token pre-sampling (for efficiency purposes). Only for text completions. Chat completions in a later commit. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -9,22 +9,40 @@ from OAI.types.chat_completion import (
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionStreamChoice,
|
||||
)
|
||||
from OAI.types.completion import CompletionResponse, CompletionRespChoice
|
||||
from OAI.types.completion import (
|
||||
CompletionResponse,
|
||||
CompletionRespChoice,
|
||||
CompletionLogProbs,
|
||||
)
|
||||
from OAI.types.common import UsageStats
|
||||
|
||||
|
||||
def create_completion_response(
|
||||
text: str,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
model_name: Optional[str],
|
||||
):
|
||||
def create_completion_response(**kwargs):
|
||||
"""Create a completion response from the provided text."""
|
||||
choice = CompletionRespChoice(finish_reason="Generated", text=text)
|
||||
|
||||
token_probs = unwrap(kwargs.get("token_probs"), {})
|
||||
logprobs = unwrap(kwargs.get("logprobs"), [])
|
||||
offset = unwrap(kwargs.get("offset"), [])
|
||||
|
||||
logprob_response = CompletionLogProbs(
|
||||
text_offset=offset if isinstance(offset, list) else [offset],
|
||||
token_logprobs=token_probs.values(),
|
||||
tokens=token_probs.keys(),
|
||||
top_logprobs=logprobs if isinstance(logprobs, list) else [logprobs],
|
||||
)
|
||||
|
||||
choice = CompletionRespChoice(
|
||||
finish_reason="Generated",
|
||||
text=unwrap(kwargs.get("text"), ""),
|
||||
logprobs=logprob_response,
|
||||
)
|
||||
|
||||
prompt_tokens = unwrap(kwargs.get("prompt_tokens"), 0)
|
||||
completion_tokens = unwrap(kwargs.get("completion_tokens"), 0)
|
||||
|
||||
response = CompletionResponse(
|
||||
choices=[choice],
|
||||
model=unwrap(model_name, ""),
|
||||
model=unwrap(kwargs.get("model_name"), ""),
|
||||
usage=UsageStats(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
@@ -37,12 +55,12 @@ def create_completion_response(
|
||||
|
||||
def create_chat_completion_response(
|
||||
text: str,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
prompt_tokens: Optional[int],
|
||||
completion_tokens: Optional[int],
|
||||
model_name: Optional[str],
|
||||
):
|
||||
"""Create a chat completion response from the provided text."""
|
||||
message = ChatCompletionMessage(role="assistant", content=text)
|
||||
message = ChatCompletionMessage(role="assistant", content=unwrap(text, ""))
|
||||
|
||||
choice = ChatCompletionRespChoice(finish_reason="Generated", message=message)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user