server: stop processing the prompt when client disconnects (#1134)

implement generator-based API for task results Update httplib.h to 0.27.0 Fix embedding error Stop prompt processing when disconnected Co-authored-by: firecoperana <firecoperana>
2026-04-29 19:01:47 +00:00 · 2026-01-12 23:56:59 -06:00
parent d3e3ad40f9
commit 1a461525d5
24 changed files with 7654 additions and 4549 deletions
--- a/examples/server/server-common.h
+++ b/examples/server/server-common.h
@@ -27,15 +27,15 @@
 #include <random>
 #include <set>

-// increase max payload length to allow use of larger context size
-#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
-// increase backlog size to avoid connection resets for >> 1 slots
-#define CPPHTTPLIB_LISTEN_BACKLOG 512
-// increase max URI length to handle longer prompts in query string
-#define CPPHTTPLIB_REQUEST_URI_MAX_LENGTH 32768
-// disable Nagle's algorithm
-#define CPPHTTPLIB_TCP_NODELAY true
-#include "httplib.h"
+//// increase max payload length to allow use of larger context size
+//#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
+//// increase backlog size to avoid connection resets for >> 1 slots
+//#define CPPHTTPLIB_LISTEN_BACKLOG 512
+//// increase max URI length to handle longer prompts in query string
+//#define CPPHTTPLIB_REQUEST_URI_MAX_LENGTH 32768
+//// disable Nagle's algorithm
+//#define CPPHTTPLIB_TCP_NODELAY true
+#include <cpp-httplib/httplib.h>

 #define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"

@@ -459,3 +459,6 @@ void print_files_info(const std::vector<raw_buffer>& files);

 bool prompt_cache_equal(llama_context* ctx, const server_tokens& cache_tokens,
    const server_tokens& prompt_tokens, size_t start, const common_prefix& prefix);
+
+std::string safe_json_to_str(const json& data);
+