Model: Add logprobs support

Returns token offsets, selected tokens, probabilities of tokens post-sampling, and normalized probability of selecting a token pre-sampling (for efficiency purposes). Only for text completions. Chat completions in a later commit. Signed-off-by: kingbri <bdashore3@proton.me>
2026-04-20 14:28:54 +00:00 · 2024-02-07 21:41:15 -05:00
parent 2642ef7156
commit 0af6a38af3
6 changed files with 145 additions and 52 deletions
--- a/OAI/utils/completion.py
+++ b/OAI/utils/completion.py
@@ -9,22 +9,40 @@ from OAI.types.chat_completion import (
    ChatCompletionResponse,
    ChatCompletionStreamChoice,
 )
-from OAI.types.completion import CompletionResponse, CompletionRespChoice
+from OAI.types.completion import (
+    CompletionResponse,
+    CompletionRespChoice,
+    CompletionLogProbs,
+)
 from OAI.types.common import UsageStats


-def create_completion_response(
-    text: str,
-    prompt_tokens: int,
-    completion_tokens: int,
-    model_name: Optional[str],
-):
+def create_completion_response(**kwargs):
    """Create a completion response from the provided text."""
-    choice = CompletionRespChoice(finish_reason="Generated", text=text)
+
+    token_probs = unwrap(kwargs.get("token_probs"), {})
+    logprobs = unwrap(kwargs.get("logprobs"), [])
+    offset = unwrap(kwargs.get("offset"), [])
+
+    logprob_response = CompletionLogProbs(
+        text_offset=offset if isinstance(offset, list) else [offset],
+        token_logprobs=token_probs.values(),
+        tokens=token_probs.keys(),
+        top_logprobs=logprobs if isinstance(logprobs, list) else [logprobs],
+    )
+
+    choice = CompletionRespChoice(
+        finish_reason="Generated",
+        text=unwrap(kwargs.get("text"), ""),
+        logprobs=logprob_response,
+    )
+
+    prompt_tokens = unwrap(kwargs.get("prompt_tokens"), 0)
+    completion_tokens = unwrap(kwargs.get("completion_tokens"), 0)

    response = CompletionResponse(
        choices=[choice],
-        model=unwrap(model_name, ""),
+        model=unwrap(kwargs.get("model_name"), ""),
        usage=UsageStats(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
@@ -37,12 +55,12 @@ def create_completion_response(

 def create_chat_completion_response(
    text: str,
-    prompt_tokens: int,
-    completion_tokens: int,
+    prompt_tokens: Optional[int],
+    completion_tokens: Optional[int],
    model_name: Optional[str],
 ):
    """Create a chat completion response from the provided text."""
-    message = ChatCompletionMessage(role="assistant", content=text)
+    message = ChatCompletionMessage(role="assistant", content=unwrap(text, ""))

    choice = ChatCompletionRespChoice(finish_reason="Generated", message=message)