fix v1 completions streaming mode (#768)

This commit is contained in:
firecoperana
2025-09-09 08:38:12 -05:00
committed by GitHub
parent c519d4177b
commit d323871ba9

View File

@@ -275,34 +275,29 @@ struct server_task_result {
json to_json_oaicompat_partial() { json to_json_oaicompat_partial() {
std::time_t t = std::time(0); std::time_t t = std::time(0);
json logprobs = json(nullptr); // OAI default to null json logprobs = json(nullptr); // OAI default to null
if (!stream && probs_output.size() > 0) { if (probs_output.size() > 0) {
logprobs = json{ logprobs = json{
{"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)}, {"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)},
}; };
} }
json finish_reason = "length";
if (stop) {
//if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
finish_reason = "stop";
}
json res = json{ json res = json{
{"choices", json::array({ {"choices", json::array({
json{ json{
{"text", stream ? "" : content}, // in stream mode, content is already in last partial chunk {"text", content},
{"index", index}, {"index", index},
{"logprobs", logprobs}, {"logprobs", logprobs},
{"finish_reason", finish_reason}, {"finish_reason", nullptr},
} }
})}, })},
{"created", t}, {"created", t},
{"model", oaicompat_model}, {"model", oaicompat_model},
{"object", "text_completion"}, {"object", "text_completion"},
{"usage", json { {"usage", json {
{"completion_tokens", n_decoded}, {"completion_tokens", n_decoded},
{"prompt_tokens", n_prompt_tokens}, {"prompt_tokens", n_prompt_tokens},
{"total_tokens", n_decoded + n_prompt_tokens} {"total_tokens", n_decoded + n_prompt_tokens}
}}, }},
{"id", oaicompat_cmpl_id} {"id", oaicompat_cmpl_id}
}; };
// extra fields for debugging purposes // extra fields for debugging purposes