mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-24 07:04:11 +00:00
Send [DONE] for OAI compatibility (#470)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -1266,6 +1266,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||||||
params.port = std::stoi(argv[i]);
|
params.port = std::stoi(argv[i]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (arg == "--send-done") {
|
||||||
|
params.send_done = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (arg == "--path") {
|
if (arg == "--path") {
|
||||||
CHECK_ARG
|
CHECK_ARG
|
||||||
params.public_path = argv[i];
|
params.public_path = argv[i];
|
||||||
|
|||||||
@@ -223,6 +223,7 @@ struct gpt_params {
|
|||||||
int32_t timeout_read = 600; // http read timeout in seconds
|
int32_t timeout_read = 600; // http read timeout in seconds
|
||||||
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
||||||
int32_t n_threads_http = -1; // number of threads to process HTTP requests
|
int32_t n_threads_http = -1; // number of threads to process HTTP requests
|
||||||
|
bool send_done = false; // send done message as required for OAI compatibility
|
||||||
|
|
||||||
std::string hostname = "127.0.0.1";
|
std::string hostname = "127.0.0.1";
|
||||||
std::string public_path = "";
|
std::string public_path = "";
|
||||||
|
|||||||
@@ -3394,7 +3394,8 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
||||||
} else {
|
} else {
|
||||||
const auto chunked_content_provider = [id_task, &ctx_server, completion_id](size_t, httplib::DataSink & sink) {
|
const auto chunked_content_provider = [id_task, &ctx_server, completion_id, send_done = params.send_done](size_t, httplib::DataSink & sink) {
|
||||||
|
bool successful_completion = false;
|
||||||
while (true) {
|
while (true) {
|
||||||
server_task_result result = ctx_server.queue_results.recv(id_task);
|
server_task_result result = ctx_server.queue_results.recv(id_task);
|
||||||
if (!result.error) {
|
if (!result.error) {
|
||||||
@@ -3414,6 +3415,7 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (result.stop) {
|
if (result.stop) {
|
||||||
|
successful_completion = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -3429,9 +3431,18 @@ int main(int argc, char ** argv) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
bool ok = true;
|
||||||
|
if (send_done && successful_completion) {
|
||||||
|
static const std::string done_message = "data: [DONE]\n\n";
|
||||||
|
LOG_VERBOSE("data stream", {{"to_send", done_message}});
|
||||||
|
if (!sink.write(done_message.c_str(), done_message.size())) {
|
||||||
|
// If writing [DONE] fails, the stream is likely already problematic.
|
||||||
|
ok = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
sink.done();
|
sink.done();
|
||||||
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
||||||
return true;
|
return ok;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto on_complete = [id_task, &ctx_server](bool) {
|
auto on_complete = [id_task, &ctx_server](bool) {
|
||||||
|
|||||||
Reference in New Issue
Block a user