Send [DONE] for OAI compatibility (#470)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-02-24 07:04:11 +00:00 · 2025-06-17 10:32:53 +03:00
parent 0f8f8b32e2
commit 8b3002bba2
3 changed files with 18 additions and 2 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1266,6 +1266,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
        params.port = std::stoi(argv[i]);
        return true;
    }
    if (arg == "--send-done") {
        params.send_done = true;
        return true;
    }
    if (arg == "--path") {
        CHECK_ARG
        params.public_path = argv[i];
--- a/common/common.h
+++ b/common/common.h
@@ -223,6 +223,7 @@ struct gpt_params {
    int32_t timeout_read   = 600;          // http read timeout in seconds
    int32_t timeout_write  = timeout_read; // http write timeout in seconds
    int32_t n_threads_http = -1;           // number of threads to process HTTP requests
    bool    send_done      = false;        // send done message as required for OAI compatibility
    std::string hostname      = "127.0.0.1";
    std::string public_path   = "";
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -3394,7 +3394,8 @@ int main(int argc, char ** argv) {
            }
            ctx_server.queue_results.remove_waiting_task_id(id_task);
        } else {
-            const auto chunked_content_provider = [id_task, &ctx_server, completion_id](size_t, httplib::DataSink & sink) {
+            const auto chunked_content_provider = [id_task, &ctx_server, completion_id, send_done = params.send_done](size_t, httplib::DataSink & sink) {
                bool successful_completion = false;
                while (true) {
                    server_task_result result = ctx_server.queue_results.recv(id_task);
                    if (!result.error) {
@@ -3414,6 +3415,7 @@ int main(int argc, char ** argv) {
                            }
                        }
                        if (result.stop) {
                            successful_completion = true;
                            break;
                        }
                    } else {
@@ -3429,9 +3431,18 @@ int main(int argc, char ** argv) {
                        break;
                    }
                }
                bool ok = true;
                if (send_done && successful_completion) {
                    static const std::string done_message = "data: [DONE]\n\n";
                    LOG_VERBOSE("data stream", {{"to_send", done_message}});
                    if (!sink.write(done_message.c_str(), done_message.size())) {
                        // If writing [DONE] fails, the stream is likely already problematic.
                        ok = false;
                    }
                }
                sink.done();
                ctx_server.queue_results.remove_waiting_task_id(id_task);
-                return true;
+                return ok;
            };
            auto on_complete = [id_task, &ctx_server](bool) {