mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-13 09:26:00 +00:00
server: fix usage stats (#1647)
Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -120,6 +120,16 @@ json server_task_result_cmpl_partial::to_json_oaicompat_partial() {
|
||||
return res;
|
||||
}
|
||||
|
||||
json server_task_result_cmpl_final::usage_json_oaicompat() {
|
||||
return json{
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"prompt_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
json server_task_result_cmpl_final::to_json_oaicompat_final() {
|
||||
std::time_t t = std::time(0);
|
||||
json logprobs = json(nullptr); // OAI default to null
|
||||
@@ -144,11 +154,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_final() {
|
||||
{"created", t},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "text_completion"},
|
||||
{"usage", json {
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens}
|
||||
}},
|
||||
{"usage", usage_json_oaicompat()},
|
||||
{"id", oaicompat_cmpl_id}
|
||||
};
|
||||
|
||||
@@ -379,11 +385,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_final() {
|
||||
{"created", t},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "chat.completion"},
|
||||
{"usage", json {
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens}
|
||||
}},
|
||||
{"usage", usage_json_oaicompat()},
|
||||
{"id", oaicompat_cmpl_id}
|
||||
};
|
||||
|
||||
@@ -445,11 +447,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
|
||||
{"id", oaicompat_cmpl_id},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "chat.completion.chunk"},
|
||||
{"usage", json {
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
}},
|
||||
{"usage", usage_json_oaicompat()},
|
||||
});
|
||||
}
|
||||
if (timings.prompt_n >= 0) {
|
||||
@@ -523,10 +521,11 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_final() {
|
||||
{"object", "response"},
|
||||
{"output", output},
|
||||
{"status", "completed"},
|
||||
{"usage", json{
|
||||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
||||
}},
|
||||
};
|
||||
|
||||
@@ -633,11 +632,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||
{"status", "completed"},
|
||||
{"model", oaicompat_model},
|
||||
{"output", output},
|
||||
{"usage", json{
|
||||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
}},
|
||||
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
||||
}}
|
||||
}},
|
||||
}},
|
||||
});
|
||||
@@ -703,7 +703,8 @@ json server_task_result_cmpl_final::to_json_anthropic_final() {
|
||||
{"stop_reason", stop_reason},
|
||||
{"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)},
|
||||
{"usage", {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"cache_read_input_tokens", n_prompt_tokens_cache},
|
||||
{"input_tokens", n_prompt_tokens - n_prompt_tokens_cache},
|
||||
{"output_tokens", n_decoded}
|
||||
}}
|
||||
};
|
||||
@@ -923,7 +924,8 @@ json server_task_result_cmpl_partial::to_json_anthropic_partial() {
|
||||
{"stop_reason", nullptr},
|
||||
{"stop_sequence", nullptr},
|
||||
{"usage", {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"cache_read_input_tokens", n_prompt_tokens_cache},
|
||||
{"input_tokens", n_prompt_tokens - n_prompt_tokens_cache},
|
||||
{"output_tokens", 0}
|
||||
}}
|
||||
}}
|
||||
|
||||
Reference in New Issue
Block a user