diff --git a/examples/server/server.cpp b/examples/server/server.cpp index a77b3b7a..18d2af8a 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2646,18 +2646,9 @@ struct server_context { // populate res.probs_output if (slot.sparams.n_probs > 0) { - if (!slot.params.stream && slot.stopped_word) { - const std::vector stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false); - - size_t safe_offset = std::min(slot.generated_token_probs.size(), stop_word_toks.size()); - res.probs_output = std::vector( - slot.generated_token_probs.begin(), - slot.generated_token_probs.end() - safe_offset); - } else { - res.probs_output = std::vector( - slot.generated_token_probs.begin(), - slot.generated_token_probs.end()); - } + res.probs_output = std::vector( + slot.generated_token_probs.begin(), + slot.generated_token_probs.end()); res.data["completion_probabilities"] = probs_vector_to_json(ctx, res.probs_output); } diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 24dcb044..3c4b6dd1 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -466,6 +466,12 @@ static json oaicompat_chat_params_parse(const json& body) { throw std::runtime_error("Only no echo is supported"); } + // Handle "logprobs" field + int n_probs = json_value(body, "logprobs", 0); + if (n_probs > 0) { + llama_params["n_probs"] = n_probs; + } + // Params supported by OAI but unsupported by llama.cpp static const std::vector unsupported_params{ "best_of", "suffix" }; for (const auto& param : unsupported_params) {