mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 19:01:47 +00:00
server: stop processing the prompt when client disconnects (#1134)
implement generator-based API for task results Update httplib.h to 0.27.0 Fix embedding error Stop prompt processing when disconnected Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -27,15 +27,15 @@
|
||||
#include <random>
|
||||
#include <set>
|
||||
|
||||
// increase max payload length to allow use of larger context size
|
||||
#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
|
||||
// increase backlog size to avoid connection resets for >> 1 slots
|
||||
#define CPPHTTPLIB_LISTEN_BACKLOG 512
|
||||
// increase max URI length to handle longer prompts in query string
|
||||
#define CPPHTTPLIB_REQUEST_URI_MAX_LENGTH 32768
|
||||
// disable Nagle's algorithm
|
||||
#define CPPHTTPLIB_TCP_NODELAY true
|
||||
#include "httplib.h"
|
||||
//// increase max payload length to allow use of larger context size
|
||||
//#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
|
||||
//// increase backlog size to avoid connection resets for >> 1 slots
|
||||
//#define CPPHTTPLIB_LISTEN_BACKLOG 512
|
||||
//// increase max URI length to handle longer prompts in query string
|
||||
//#define CPPHTTPLIB_REQUEST_URI_MAX_LENGTH 32768
|
||||
//// disable Nagle's algorithm
|
||||
//#define CPPHTTPLIB_TCP_NODELAY true
|
||||
#include <cpp-httplib/httplib.h>
|
||||
|
||||
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"
|
||||
|
||||
@@ -459,3 +459,6 @@ void print_files_info(const std::vector<raw_buffer>& files);
|
||||
|
||||
bool prompt_cache_equal(llama_context* ctx, const server_tokens& cache_tokens,
|
||||
const server_tokens& prompt_tokens, size_t start, const common_prefix& prefix);
|
||||
|
||||
std::string safe_json_to_str(const json& data);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user