mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
Add vision support in llama-server (#901)
* server: add support for vision model webui: add support for vision model * server : remove hack for extra parallel slot#10187 * llama : fix KV shift for qwen2vl #13870 * add no-context-shift parameter --------- Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -57,8 +57,6 @@ add_library(${TARGET} STATIC
|
||||
chat-parser.cpp
|
||||
chat-parser.h
|
||||
common.cpp
|
||||
chat.h
|
||||
chat.cpp
|
||||
sampling.h
|
||||
sampling.cpp
|
||||
console.h
|
||||
|
||||
@@ -270,6 +270,14 @@ static std::string parse_device_list(const std::string& value) {
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
std::pair<long, std::vector<char>> common_remote_get_content(const std::string& url, const common_remote_params&) {
|
||||
if (!url.empty()) {
|
||||
throw std::runtime_error("error: built without CURL, cannot download file from the internet");
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
//
|
||||
// CLI argument parsing
|
||||
//
|
||||
@@ -1727,6 +1735,11 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||
params.n_junk = std::stoi(argv[i]);
|
||||
return true;
|
||||
}
|
||||
if (arg == "--no-context-shift") {
|
||||
CHECK_ARG
|
||||
params.ctx_shift = false;
|
||||
return true;
|
||||
}
|
||||
if (arg == "--pos") {
|
||||
CHECK_ARG
|
||||
params.i_pos = std::stoi(argv[i]);
|
||||
@@ -2060,7 +2073,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||
options.push_back({ "multi-modality" });
|
||||
options.push_back({ "*", " --mmproj FILE", "path to a multimodal projector file for LLaVA. see examples/llava/README.md" });
|
||||
options.push_back({ "*", " --image FILE", "path to an image file. use with multimodal models. Specify multiple times for batching" });
|
||||
|
||||
options.push_back({ "*", " --no-context-shift", "disable context-shift." });
|
||||
options.push_back({ "backend" });
|
||||
options.push_back({ "*", " --rpc SERVERS", "comma separated list of RPC servers" });
|
||||
|
||||
@@ -3311,6 +3324,29 @@ std::vector<llama_token> llama_tokenize(
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<llama_token> llama_tokenize(
|
||||
const struct llama_vocab* vocab,
|
||||
const std::string& text,
|
||||
bool add_special,
|
||||
bool parse_special) {
|
||||
// upper limit for the number of tokens
|
||||
int n_tokens = text.length() + 2 * add_special;
|
||||
std::vector<llama_token> result(n_tokens);
|
||||
n_tokens = llama_vocab_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
||||
if (n_tokens == std::numeric_limits<int32_t>::min()) {
|
||||
throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
|
||||
}
|
||||
if (n_tokens < 0) {
|
||||
result.resize(-n_tokens);
|
||||
int check = llama_vocab_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
||||
GGML_ASSERT(check == -n_tokens);
|
||||
}
|
||||
else {
|
||||
result.resize(n_tokens);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
|
||||
std::string piece;
|
||||
piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n'
|
||||
@@ -3343,7 +3379,7 @@ std::string llama_token_to_piece(const struct llama_model* model, llama_token to
|
||||
return piece;
|
||||
}
|
||||
|
||||
std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
|
||||
std::string llama_detokenize(const llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
|
||||
std::string text;
|
||||
text.resize(std::max(text.capacity(), tokens.size()));
|
||||
int32_t n_chars = llama_detokenize(llama_get_model(ctx), tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
||||
@@ -3359,6 +3395,7 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
|
||||
return text;
|
||||
}
|
||||
|
||||
|
||||
bool llama_should_add_bos_token(const llama_model * model) {
|
||||
const int add_bos = llama_add_bos_token(model);
|
||||
|
||||
|
||||
@@ -53,6 +53,8 @@ struct llama_lora_adapter_container : llama_lora_adapter_info {
|
||||
struct llama_lora_adapter * adapter;
|
||||
};
|
||||
|
||||
using llama_tokens = std::vector<llama_token>;
|
||||
|
||||
// build info
|
||||
extern int LLAMA_BUILD_NUMBER;
|
||||
extern char const * LLAMA_COMMIT;
|
||||
@@ -237,7 +239,7 @@ struct gpt_params {
|
||||
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
|
||||
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
||||
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
||||
|
||||
bool ctx_shift = true;
|
||||
bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
|
||||
bool multiline_input = false; // reverse the usage of `\`
|
||||
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||
@@ -371,6 +373,9 @@ struct gpt_params {
|
||||
bool sweep_bench_output_jsonl = false;
|
||||
};
|
||||
|
||||
|
||||
|
||||
void gpt_params_handle_hf_token(gpt_params & params);
|
||||
void gpt_params_parse_from_env(gpt_params & params);
|
||||
void gpt_params_handle_model_default(gpt_params & params);
|
||||
|
||||
@@ -381,6 +386,15 @@ void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
|
||||
|
||||
std::string gpt_params_get_system_info(const gpt_params & params);
|
||||
|
||||
|
||||
struct common_remote_params {
|
||||
std::vector<std::string> headers;
|
||||
long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
|
||||
long max_size = 0; // max size of the response ; unlimited if 0 ; max is 2GB
|
||||
};
|
||||
// get remote file content, returns <http_code, raw_response_body>
|
||||
std::pair<long, std::vector<char>> common_remote_get_content(const std::string& url, const common_remote_params& params);
|
||||
|
||||
//
|
||||
// String utils
|
||||
//
|
||||
@@ -497,6 +511,12 @@ std::vector<llama_token> llama_tokenize(
|
||||
bool add_special,
|
||||
bool parse_special = false);
|
||||
|
||||
std::vector<llama_token> llama_tokenize(
|
||||
const struct llama_vocab* vocab,
|
||||
const std::string& text,
|
||||
bool add_special,
|
||||
bool parse_special = false);
|
||||
|
||||
// tokenizes a token into a piece, optionally renders special/control tokens
|
||||
// should work similar to Python's `tokenizer.id_to_piece`
|
||||
std::string llama_token_to_piece(
|
||||
@@ -513,70 +533,16 @@ std::string llama_token_to_piece(
|
||||
// should work similar to Python's `tokenizer.decode`
|
||||
// optionally renders special/control tokens
|
||||
std::string llama_detokenize(
|
||||
llama_context * ctx,
|
||||
const llama_context * ctx,
|
||||
const std::vector<llama_token> & tokens,
|
||||
bool special = true);
|
||||
|
||||
|
||||
// Uses the value from the model metadata if possible, otherwise
|
||||
// defaults to true when model type is SPM, otherwise false.
|
||||
bool llama_should_add_bos_token(const llama_model * model);
|
||||
|
||||
//
|
||||
// Chat template utils
|
||||
//
|
||||
//struct common_tool_call {
|
||||
// std::string name;
|
||||
// std::string arguments;
|
||||
// std::string id;
|
||||
//};
|
||||
//
|
||||
//// same with llama_chat_message, but uses std::string
|
||||
//struct common_chat_msg {
|
||||
// std::string role;
|
||||
// std::string content;
|
||||
// std::vector<common_tool_call> tool_calls;
|
||||
// std::string reasoning_content = "";
|
||||
//};
|
||||
|
||||
//// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
||||
//bool llama_chat_verify_template(const struct llama_model* , const std::string& tmpl, bool use_jinja);
|
||||
//
|
||||
//namespace minja {
|
||||
// class chat_template;
|
||||
//}
|
||||
//
|
||||
//typedef minja::chat_template common_chat_template;
|
||||
//
|
||||
//struct common_chat_templates {
|
||||
// bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
||||
// std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
||||
// std::unique_ptr<common_chat_template> template_tool_use;
|
||||
//};
|
||||
//
|
||||
//
|
||||
//// CPP wrapper for llama_chat_apply_template
|
||||
//// If the built-in template is not supported, we default to chatml
|
||||
//// If the custom "tmpl" is not supported, we throw an error
|
||||
//std::string llama_chat_apply_template(
|
||||
// const struct llama_model* model,
|
||||
// const common_chat_template& tmpl,
|
||||
// const std::vector< common_chat_msg>& chat,
|
||||
// bool add_ass,
|
||||
// bool use_jinja);
|
||||
//
|
||||
//// Format single message, while taking into account the position of that message in chat history
|
||||
//std::string llama_chat_format_single(const struct llama_model* model,
|
||||
// const common_chat_template& tmpl,
|
||||
// const std::vector< common_chat_msg>& past_msg,
|
||||
// const common_chat_msg& new_msg,
|
||||
// bool add_ass,
|
||||
// bool use_jinja);
|
||||
//
|
||||
//// Returns an example of formatted chat
|
||||
//std::string llama_chat_format_example(const struct llama_model* model,
|
||||
// const common_chat_template& tmpl, bool use_jinja);
|
||||
//
|
||||
//common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override);
|
||||
|
||||
|
||||
//
|
||||
|
||||
@@ -3331,7 +3331,7 @@ struct image_manipulation {
|
||||
dst.buf.resize(3 * target_width * target_height);
|
||||
|
||||
float Cc;
|
||||
float C[5];
|
||||
float C[5] = {};
|
||||
float d0, d2, d3, a0, a1, a2, a3;
|
||||
int i, j, k, jj;
|
||||
int x, y;
|
||||
|
||||
@@ -70,6 +70,9 @@ endif()
|
||||
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
||||
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
||||
target_include_directories(${TARGET} PRIVATE ../mtmd)
|
||||
target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
||||
if (LLAMA_SERVER_SSL)
|
||||
find_package(OpenSSL REQUIRED)
|
||||
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -6,6 +6,9 @@
|
||||
// Change JSON_ASSERT from assert() to GGML_ASSERT:
|
||||
#define JSON_ASSERT GGML_ASSERT
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "base64.hpp"
|
||||
#include "mtmd.h"
|
||||
#include "mtmd-helper.h"
|
||||
#include "chat.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@@ -51,6 +54,8 @@ extern bool server_log_json;
|
||||
#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||
|
||||
using raw_buffer = std::vector<uint8_t>;
|
||||
|
||||
static inline void server_log(const char * level, const char * function, int line, const char * message, const json & extra);
|
||||
|
||||
template <typename T>
|
||||
@@ -469,8 +474,9 @@ struct oaicompat_parser_options {
|
||||
// used by /chat/completions endpoint
|
||||
static json oaicompat_chat_params_parse(
|
||||
const struct llama_model* model,
|
||||
const json& body, /* openai api json semantics */
|
||||
const oaicompat_parser_options& opt)
|
||||
json& body, /* openai api json semantics */
|
||||
const oaicompat_parser_options& opt,
|
||||
std::vector<raw_buffer>& out_files)
|
||||
{
|
||||
json llama_params;
|
||||
|
||||
@@ -480,20 +486,6 @@ static json oaicompat_chat_params_parse(
|
||||
auto stream = json_value(body, "stream", false);
|
||||
auto tool_choice = json_value(body, "tool_choice", std::string("auto"));
|
||||
|
||||
/* if (tools.is_array() && !tools.empty()) {
|
||||
if (stream) {
|
||||
throw std::runtime_error("Cannot use tools with stream");
|
||||
}
|
||||
if (!use_jinja) {
|
||||
throw std::runtime_error("tools param requires --jinja flag");
|
||||
}
|
||||
}
|
||||
if (!use_jinja) {
|
||||
if (body.contains("tool_choice") && !body.at("tool_choice").is_null()) {
|
||||
throw std::runtime_error("Unsupported param: tool_choice");
|
||||
}
|
||||
}*/
|
||||
|
||||
if (!opt.use_jinja) {
|
||||
if (has_tools) {
|
||||
throw std::runtime_error("tools param requires --jinja flag");
|
||||
@@ -531,8 +523,120 @@ static json oaicompat_chat_params_parse(
|
||||
json_schema = json_value(json_schema, "schema", json::object());
|
||||
}
|
||||
}
|
||||
|
||||
// get input files
|
||||
if (!body.contains("messages")) {
|
||||
throw std::runtime_error("'messages' is required");
|
||||
}
|
||||
json& messages = body.at("messages");
|
||||
if (!messages.is_array()) {
|
||||
throw std::runtime_error("Expected 'messages' to be an array");
|
||||
}
|
||||
for (auto& msg : messages) {
|
||||
std::string role = json_value(msg, "role", std::string());
|
||||
if (role != "assistant" && !msg.contains("content")) {
|
||||
throw std::runtime_error("All non-assistant messages must contain 'content'");
|
||||
}
|
||||
if (role == "assistant") {
|
||||
if (!msg.contains("content") && !msg.contains("tool_calls")) {
|
||||
throw std::runtime_error("Assistant message must contain either 'content' or 'tool_calls'!");
|
||||
}
|
||||
if (!msg.contains("content")) {
|
||||
continue; // avoid errors with no content
|
||||
}
|
||||
}
|
||||
json& content = msg.at("content");
|
||||
if (content.is_string() || content.is_null()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!content.is_array()) {
|
||||
throw std::runtime_error("Expected 'content' to be a string or an array");
|
||||
}
|
||||
|
||||
for (auto& p : content) {
|
||||
std::string type = json_value(p, "type", std::string());
|
||||
if (type == "image_url") {
|
||||
if (!opt.allow_image) {
|
||||
throw std::runtime_error("image input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
||||
}
|
||||
|
||||
json image_url = json_value(p, "image_url", json::object());
|
||||
std::string url = json_value(image_url, "url", std::string());
|
||||
if (string_starts_with(url, "http")) {
|
||||
// download remote image
|
||||
// TODO @ngxson : maybe make these params configurable
|
||||
common_remote_params params;
|
||||
params.headers.push_back("User-Agent: ik_llama.cpp/");
|
||||
params.max_size = 1024 * 1024 * 10; // 10MB
|
||||
params.timeout = 10; // seconds
|
||||
LOG_INFO("downloading image from '%s'\n", url.c_str());
|
||||
auto res = common_remote_get_content(url, params);
|
||||
if (200 <= res.first && res.first < 300) {
|
||||
LOG_INFO("downloaded %ld bytes\n", res.second.size());
|
||||
raw_buffer data;
|
||||
data.insert(data.end(), res.second.begin(), res.second.end());
|
||||
out_files.push_back(data);
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error("Failed to download image");
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
// try to decode base64 image
|
||||
std::vector<std::string> parts = string_split<std::string>(url, /*separator*/ ',');
|
||||
if (parts.size() != 2) {
|
||||
throw std::runtime_error("Invalid image_url.url value");
|
||||
}
|
||||
else if (!string_starts_with(parts[0], "data:image/")) {
|
||||
throw std::runtime_error("Invalid image_url.url format: " + parts[0]);
|
||||
}
|
||||
else if (!string_ends_with(parts[0], "base64")) {
|
||||
throw std::runtime_error("image_url.url must be base64 encoded");
|
||||
}
|
||||
else {
|
||||
auto base64_data = parts[1];
|
||||
auto decoded_data = base64_decode(base64_data);
|
||||
out_files.push_back(decoded_data);
|
||||
}
|
||||
}
|
||||
|
||||
// replace this chunk with a marker
|
||||
p["type"] = "text";
|
||||
p["text"] = mtmd_default_marker();
|
||||
p.erase("image_url");
|
||||
|
||||
}
|
||||
else if (type == "input_audio") {
|
||||
if (!opt.allow_audio) {
|
||||
throw std::runtime_error("audio input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
||||
}
|
||||
|
||||
json input_audio = json_value(p, "input_audio", json::object());
|
||||
std::string data = json_value(input_audio, "data", std::string());
|
||||
std::string format = json_value(input_audio, "format", std::string());
|
||||
// while we also support flac, we don't allow it here so we matches the OAI spec
|
||||
if (format != "wav" && format != "mp3") {
|
||||
throw std::runtime_error("input_audio.format must be either 'wav' or 'mp3'");
|
||||
}
|
||||
auto decoded_data = base64_decode(data); // expected to be base64 encoded
|
||||
out_files.push_back(decoded_data);
|
||||
|
||||
// replace this chunk with a marker
|
||||
p["type"] = "text";
|
||||
p["text"] = mtmd_default_marker();
|
||||
p.erase("input_audio");
|
||||
|
||||
}
|
||||
else if (type != "text") {
|
||||
throw std::runtime_error("unsupported content[].type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
common_chat_templates_inputs inputs;
|
||||
inputs.messages = common_chat_msgs_parse_oaicompat(body.at("messages"));
|
||||
inputs.messages = common_chat_msgs_parse_oaicompat(messages);
|
||||
inputs.tools = common_chat_tools_parse_oaicompat(tools);
|
||||
inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(tool_choice);
|
||||
inputs.json_schema = json_schema.is_null() ? "" : json_schema.dump();
|
||||
@@ -608,8 +712,9 @@ static json oaicompat_chat_params_parse(
|
||||
llama_params["grammar"] = chat_params.grammar;
|
||||
llama_params["grammar_lazy"] = chat_params.grammar_lazy;
|
||||
auto grammar_triggers = json::array();
|
||||
for (const auto& trigger : chat_params.grammar_triggers) {
|
||||
grammar_triggers.push_back(trigger.to_json<json>());
|
||||
for (const auto & trigger : chat_params.grammar_triggers) {
|
||||
server_grammar_trigger ct(trigger);
|
||||
grammar_triggers.push_back(ct.to_json());
|
||||
}
|
||||
llama_params["grammar_triggers"] = grammar_triggers;
|
||||
llama_params["preserved_tokens"] = chat_params.preserved_tokens;
|
||||
@@ -649,6 +754,52 @@ static json oaicompat_chat_params_parse(
|
||||
return llama_params;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// tokenizer and input processing utils
|
||||
//
|
||||
|
||||
static bool json_is_array_of_numbers(const json& data) {
|
||||
if (data.is_array()) {
|
||||
for (const auto& e : data) {
|
||||
if (!e.is_number_integer()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// is array having BOTH numbers & strings?
|
||||
static bool json_is_array_of_mixed_numbers_strings(const json& data) {
|
||||
bool seen_string = false;
|
||||
bool seen_number = false;
|
||||
if (data.is_array()) {
|
||||
for (const auto& e : data) {
|
||||
seen_string |= e.is_string();
|
||||
seen_number |= e.is_number_integer();
|
||||
if (seen_number && seen_string) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// does array have any individual integers/tokens?
|
||||
static bool json_is_array_and_contains_numbers(const json& data) {
|
||||
if (data.is_array()) {
|
||||
for (const auto& e : data) {
|
||||
if (e.is_number_integer()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// get value by path(key1 / key2)
|
||||
static json json_get_nested_values(const std::vector<std::string>& paths, const json& js) {
|
||||
json result = json::object();
|
||||
@@ -673,6 +824,50 @@ static json json_get_nested_values(const std::vector<std::string>& paths, const
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this handles 2 cases:
|
||||
* - only string, example: "string"
|
||||
* - mixed string and tokens, example: [12, 34, "string", 56, 78]
|
||||
*/
|
||||
static std::vector<llama_token> tokenize_mixed(const llama_vocab* vocab, const json& json_prompt, bool add_special, bool parse_special) {
|
||||
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
|
||||
// or the first element of the json_prompt array is a string.
|
||||
std::vector<llama_token> prompt_tokens;
|
||||
|
||||
if (json_prompt.is_array()) {
|
||||
bool first = true;
|
||||
for (const auto& p : json_prompt) {
|
||||
if (p.is_string()) {
|
||||
auto s = p.template get<std::string>();
|
||||
|
||||
std::vector<llama_token> p;
|
||||
if (first) {
|
||||
p = llama_tokenize(vocab, s, add_special, parse_special);
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
p = llama_tokenize(vocab, s, false, parse_special);
|
||||
}
|
||||
|
||||
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
||||
}
|
||||
else {
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
|
||||
prompt_tokens.push_back(p.template get<llama_token>());
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
auto s = json_prompt.template get<std::string>();
|
||||
prompt_tokens = llama_tokenize(vocab, s, add_special, parse_special);
|
||||
}
|
||||
|
||||
return prompt_tokens;
|
||||
}
|
||||
|
||||
static json format_tokenizer_response(const std::vector<llama_token> & tokens) {
|
||||
return json {
|
||||
{"tokens", tokens}
|
||||
@@ -764,3 +959,480 @@ static token_probabilities get_token_probabilities(llama_context * ctx, int idx,
|
||||
|
||||
return {sampled_token_p, cur};
|
||||
}
|
||||
|
||||
/**
|
||||
* server_tokens is a helper to manage the input tokens and image for the server.
|
||||
* it is made this way to simplify the logic of KV cache management.
|
||||
*/
|
||||
struct server_tokens {
|
||||
bool has_mtmd = false;
|
||||
|
||||
private: // disallow accessing these members directly, risking out-of-sync
|
||||
|
||||
// map a **start** position in tokens to the image chunk
|
||||
std::unordered_map<llama_pos, mtmd::input_chunk_ptr> map_pos_to_media;
|
||||
|
||||
// list of tokens
|
||||
// it can include LLAMA_TOKEN_NULL, which is used to indicate a token that is not a text token
|
||||
// a mtmd_input_chunk can occupy multiple tokens, one llama_token per **position**
|
||||
// important: for models using mrope, an image can contain multiple tokens but will use only one **position**
|
||||
std::vector<llama_token> tokens;
|
||||
|
||||
// for ex. with input of 5 text tokens and 2 images:
|
||||
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
|
||||
// pos 0 1 2 3 4 5 6 7 8 9
|
||||
// map_pos_to_media will contain: {5, img0}, {8, img1}
|
||||
|
||||
public:
|
||||
server_tokens() = default;
|
||||
~server_tokens() = default;
|
||||
|
||||
// Prevent copying
|
||||
server_tokens(const server_tokens&) = delete;
|
||||
server_tokens& operator=(const server_tokens&) = delete;
|
||||
|
||||
// Allow moving (usually implicitly generated if members are movable)
|
||||
server_tokens(server_tokens&&) = default;
|
||||
server_tokens& operator=(server_tokens&&) = default;
|
||||
|
||||
// Allow accessing elements using [] operator
|
||||
llama_token operator[](size_t index) { return tokens[index]; }
|
||||
const llama_token& operator[](size_t index) const { return tokens[index]; }
|
||||
|
||||
server_tokens(mtmd::input_chunks& mtmd_chunks, bool has_mtmd) : has_mtmd(has_mtmd) {
|
||||
for (size_t i = 0; i < mtmd_chunks.size(); ++i) {
|
||||
push_back(mtmd_chunks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
server_tokens(std::vector<llama_token>& tokens, bool has_mtmd) : has_mtmd(has_mtmd), tokens(tokens) {}
|
||||
|
||||
llama_pos pos_next() const {
|
||||
if (!has_mtmd) {
|
||||
return tokens.size();
|
||||
}
|
||||
|
||||
llama_pos res = tokens.size();
|
||||
|
||||
for (auto it = map_pos_to_media.begin(); it != map_pos_to_media.end(); ++it) {
|
||||
const auto& chunk = it->second;
|
||||
res += mtmd_input_chunk_get_n_pos(chunk.get()) - mtmd_input_chunk_get_n_tokens(chunk.get());
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// for debugging
|
||||
std::string str() const {
|
||||
std::ostringstream oss;
|
||||
oss << "tokens: ";
|
||||
for (const auto& t : tokens) {
|
||||
if (t == LLAMA_TOKEN_NULL) {
|
||||
oss << "<embd> ";
|
||||
}
|
||||
else {
|
||||
oss << t << " ";
|
||||
}
|
||||
}
|
||||
oss << "\n";
|
||||
oss << "image pos: ";
|
||||
for (const auto& it : map_pos_to_media) {
|
||||
oss << it.first << ", ";
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
const mtmd::input_chunk_ptr& find_chunk(llama_pos pos) const {
|
||||
auto it = map_pos_to_media.find(pos);
|
||||
if (it != map_pos_to_media.end()) {
|
||||
return it->second;
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error("Chunk not found");
|
||||
}
|
||||
}
|
||||
|
||||
void push_back(llama_token tok) {
|
||||
if (tok == LLAMA_TOKEN_NULL) {
|
||||
throw std::runtime_error("Invalid token");
|
||||
}
|
||||
tokens.emplace_back(tok);
|
||||
}
|
||||
|
||||
// will create a copy of the chunk if it contains non-text data
|
||||
void push_back(const mtmd_input_chunk* chunk) {
|
||||
auto type = mtmd_input_chunk_get_type(chunk);
|
||||
if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE || type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
||||
GGML_ASSERT(has_mtmd);
|
||||
const int n_pos = mtmd_input_chunk_get_n_pos(chunk);
|
||||
fprintf(stdout, "n_pos: %d\n", n_pos);
|
||||
llama_pos start_pos = tokens.size();
|
||||
for (int i = 0; i < n_pos; ++i) {
|
||||
tokens.emplace_back(LLAMA_TOKEN_NULL);
|
||||
}
|
||||
mtmd::input_chunk_ptr new_chunk(mtmd_input_chunk_copy(chunk));
|
||||
map_pos_to_media[start_pos] = std::move(new_chunk);
|
||||
}
|
||||
else if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
||||
size_t n_tokens;
|
||||
auto text_tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
||||
for (size_t i = 0; i < n_tokens; ++i) {
|
||||
push_back(text_tokens[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
GGML_ABORT("Invalid chunk type");
|
||||
}
|
||||
}
|
||||
|
||||
// appends server tokens, updates the media map. copies media chunks.
|
||||
void push_back(server_tokens& tokens) {
|
||||
size_t start_pos = size();
|
||||
for (size_t i = 0; i < tokens.size(); i++) {
|
||||
push_back(tokens[i]);
|
||||
}
|
||||
if (tokens.has_mtmd) {
|
||||
// Assert if we are copying MTMD chunks to a server_tokens that does not have mtmd.
|
||||
// We could also just check, but this will prevent silently dropping MTMD data.
|
||||
GGML_ASSERT(has_mtmd);
|
||||
for (auto it = tokens.map_pos_to_media.begin(); it != tokens.map_pos_to_media.end(); ) {
|
||||
auto chunk = tokens.map_pos_to_media[it->first].get();
|
||||
mtmd::input_chunk_ptr new_chunk(mtmd_input_chunk_copy(chunk));
|
||||
map_pos_to_media[start_pos + it->first] = std::move(new_chunk);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for compatibility with context shift and prompt truncation
|
||||
void insert(const std::vector<llama_token>& inp_tokens) {
|
||||
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||
tokens.insert(tokens.end(), inp_tokens.begin(), inp_tokens.end());
|
||||
}
|
||||
|
||||
// for compatibility with context shift and prompt truncation
|
||||
void resize(size_t size) {
|
||||
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||
tokens.resize(size);
|
||||
}
|
||||
|
||||
llama_token * data() {
|
||||
return tokens.data();
|
||||
}
|
||||
|
||||
llama_tokens::iterator begin() {
|
||||
return tokens.begin();
|
||||
}
|
||||
|
||||
llama_tokens::iterator end() {
|
||||
return tokens.end();
|
||||
}
|
||||
|
||||
llama_tokens::const_iterator cbegin() {
|
||||
return tokens.cbegin();
|
||||
}
|
||||
|
||||
llama_tokens::const_iterator cend() {
|
||||
return tokens.cend();
|
||||
}
|
||||
|
||||
llama_tokens tokens_data() {
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
// for compatibility with speculative decoding, ctx shift, slot save/load
|
||||
const std::vector<llama_token>& get_text_tokens() const {
|
||||
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||
return tokens;
|
||||
}
|
||||
|
||||
// for compatibility with speculative decoding
|
||||
void set_token(llama_pos pos, llama_token id) {
|
||||
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||
tokens[pos] = id;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return tokens.size();
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return tokens.empty();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
tokens.clear();
|
||||
}
|
||||
|
||||
void keep_first(size_t n) {
|
||||
GGML_ASSERT(n <= tokens.size());
|
||||
if (has_mtmd) {
|
||||
if (n == tokens.size()) {
|
||||
return; // nothing to do
|
||||
}
|
||||
// we throw an error if we try to remove a token in the middle of an image
|
||||
// for ex. with input of 5 text tokens and 2 images:
|
||||
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
|
||||
// n 1 2 3 4 5 6 7 8 9 10
|
||||
// allowed to resize ^ ^
|
||||
// disallowed to resize ^ ^ ^
|
||||
if (n > 0) {
|
||||
llama_token last_token = tokens[n - 1];
|
||||
// make sure we never remove tokens in the middle of an image
|
||||
if (last_token == LLAMA_TOKEN_NULL) {
|
||||
find_chunk(n - 1); // will throw an error if the token is not begin-of-chunk
|
||||
}
|
||||
}
|
||||
// remove all image chunks that are not used anymore
|
||||
for (auto it = map_pos_to_media.begin(); it != map_pos_to_media.end(); ) {
|
||||
llama_pos pos = it->first;
|
||||
if (pos >= (llama_pos)n) {
|
||||
it = map_pos_to_media.erase(it);
|
||||
}
|
||||
else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
tokens.resize(n);
|
||||
}
|
||||
|
||||
std::string detokenize(const llama_context* ctx, bool special) const {
|
||||
llama_tokens text_tokens;
|
||||
text_tokens.reserve(tokens.size());
|
||||
for (const auto& t : tokens) {
|
||||
if (t != LLAMA_TOKEN_NULL) {
|
||||
text_tokens.push_back(t);
|
||||
}
|
||||
}
|
||||
return llama_detokenize(ctx, text_tokens, special);
|
||||
}
|
||||
|
||||
size_t get_common_prefix(const server_tokens& b) const {
|
||||
size_t max_idx = std::min(tokens.size(), b.tokens.size());
|
||||
for (size_t i = 0; i < max_idx; ++i) {
|
||||
auto& ai = tokens[i];
|
||||
auto& bi = b.tokens[i];
|
||||
|
||||
if (ai == LLAMA_TOKEN_NULL && bi == LLAMA_TOKEN_NULL) {
|
||||
GGML_ASSERT(has_mtmd);
|
||||
const auto& a_chunk = find_chunk(i);
|
||||
const auto& b_chunk = b.find_chunk(i);
|
||||
GGML_ASSERT(a_chunk && b_chunk);
|
||||
std::string ai_id = mtmd_input_chunk_get_id(a_chunk.get());
|
||||
std::string bi_id = mtmd_input_chunk_get_id(b_chunk.get());
|
||||
size_t a_pos = mtmd_input_chunk_get_n_pos(a_chunk.get());
|
||||
size_t b_pos = mtmd_input_chunk_get_n_pos(b_chunk.get());
|
||||
if (ai_id == bi_id && a_pos == b_pos) {
|
||||
GGML_ASSERT(a_pos > 0 && "Invalid media chunk"); // should never happen
|
||||
i += a_pos - 1; // will be +1 by the for loop
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
else if (ai == bi) {
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return max_idx; // all tokens are equal
|
||||
}
|
||||
|
||||
// make sure all text tokens are within the vocab range
|
||||
bool validate(const struct llama_context* ctx) const {
|
||||
const llama_model* model = llama_get_model(ctx);
|
||||
const llama_vocab* vocab = llama_model_get_vocab(model);
|
||||
const int32_t n_vocab = llama_vocab_n_tokens(vocab);
|
||||
|
||||
for (size_t i = 0; i < tokens.size(); ++i) {
|
||||
auto& t = tokens[i];
|
||||
if (t == LLAMA_TOKEN_NULL) {
|
||||
try {
|
||||
const auto& chunk = find_chunk(i);
|
||||
size_t n_pos = mtmd_input_chunk_get_n_pos(chunk.get());
|
||||
i += n_pos - 1; // will be +1 by the for loop
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (t < 0 || t >= n_vocab) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// encode and decode the image chunk
|
||||
int32_t process_chunk(
|
||||
llama_context* ctx,
|
||||
mtmd_context* mctx,
|
||||
llama_pos n_past,
|
||||
int32_t seq_id,
|
||||
llama_pos& n_pos_out) {
|
||||
auto& chunk = find_chunk(n_past);
|
||||
const char* name = mtmd_input_chunk_get_type(chunk.get()) == MTMD_INPUT_CHUNK_TYPE_IMAGE
|
||||
? "image" : "audio";
|
||||
LOG_INFO("processing %s...\n", name);
|
||||
int32_t n_batch = llama_n_batch(ctx);
|
||||
int64_t t0 = ggml_time_ms();
|
||||
llama_pos new_n_past = n_past;
|
||||
int32_t result = mtmd_helper_eval_chunk_single(mctx, ctx,
|
||||
chunk.get(),
|
||||
n_past,
|
||||
seq_id,
|
||||
n_batch,
|
||||
true, // logits last
|
||||
&new_n_past);
|
||||
LOG_INFO("processed in %" PRId64 " ms\n", ggml_time_ms() - t0);
|
||||
if (result != 0) {
|
||||
LOG_ERROR("mtmd_helper_eval failed with status %d", result);
|
||||
n_pos_out = n_past;
|
||||
return result;
|
||||
}
|
||||
n_pos_out = new_n_past;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Computes FNV-1a hash of the data
|
||||
static std::string fnv_hash(const uint8_t* data, size_t len) {
|
||||
const uint64_t fnv_prime = 0x100000001b3ULL;
|
||||
uint64_t hash = 0xcbf29ce484222325ULL;
|
||||
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
hash ^= data[i];
|
||||
hash *= fnv_prime;
|
||||
}
|
||||
return std::to_string(hash);
|
||||
}
|
||||
|
||||
static server_tokens process_mtmd_prompt(mtmd_context* mctx, std::string prompt, std::vector<raw_buffer> files) {
|
||||
mtmd::bitmaps bitmaps;
|
||||
for (auto& file : files) {
|
||||
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(mctx, file.data(), file.size()));
|
||||
if (!bmp.ptr) {
|
||||
throw std::runtime_error("Failed to load image or audio file");
|
||||
}
|
||||
// calculate bitmap hash (for KV caching)
|
||||
std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
|
||||
bmp.set_id(hash.c_str());
|
||||
bitmaps.entries.push_back(std::move(bmp));
|
||||
}
|
||||
// process prompt
|
||||
std::vector<server_tokens> inputs;
|
||||
// multimodal
|
||||
mtmd_input_text inp_txt = {
|
||||
prompt.c_str(),
|
||||
/* add_special */ true,
|
||||
/* parse_special */ true,
|
||||
};
|
||||
mtmd::input_chunks chunks(mtmd_input_chunks_init());
|
||||
auto bitmaps_c_ptr = bitmaps.c_ptr();
|
||||
int32_t tokenized = mtmd_tokenize(mctx,
|
||||
chunks.ptr.get(),
|
||||
&inp_txt,
|
||||
bitmaps_c_ptr.data(),
|
||||
bitmaps_c_ptr.size());
|
||||
if (tokenized != 0) {
|
||||
throw std::runtime_error("Failed to tokenize prompt");
|
||||
}
|
||||
auto result = server_tokens(chunks, true);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* break the input "prompt" object into multiple prompt if needed, then tokenize them
|
||||
* use tokenize_input_prompts() if the input could be an array.
|
||||
* this supports these cases:
|
||||
* - "prompt": "string"
|
||||
* - "prompt": [12, 34, 56]
|
||||
* - "prompt": [12, 34, "string", 56, 78]
|
||||
* - "prompt": { "prompt_string": "string", "multimodal_data": [ "base64" ] }
|
||||
*/
|
||||
static server_tokens tokenize_input_subprompt(const llama_vocab* vocab, mtmd_context* mctx, const json& json_prompt, bool add_special, bool parse_special) {
|
||||
constexpr char JSON_STRING_PROMPT_KEY[] = "prompt_string";
|
||||
constexpr char JSON_MTMD_DATA_KEY[] = "multimodal_data";
|
||||
const bool has_mtmd = mctx != nullptr;
|
||||
if (json_prompt.is_string() || json_is_array_of_mixed_numbers_strings(json_prompt)) {
|
||||
// string or mixed
|
||||
std::vector<llama_token> tmp = tokenize_mixed(vocab, json_prompt, add_special, parse_special);
|
||||
return server_tokens(tmp, false);
|
||||
}
|
||||
else if (json_is_array_of_numbers(json_prompt)) {
|
||||
// array of tokens
|
||||
std::vector<llama_token> tmp = json_prompt.get<std::vector<llama_token>>();
|
||||
return server_tokens(tmp, false);
|
||||
}
|
||||
else if (json_prompt.contains(JSON_STRING_PROMPT_KEY)) {
|
||||
// JSON object with prompt key.
|
||||
if (json_prompt.contains(JSON_MTMD_DATA_KEY)) {
|
||||
if (!has_mtmd)
|
||||
throw std::runtime_error("Multimodal data provided, but model does not support multimodal requests.");
|
||||
|
||||
// JSON object with prompt and multimodal key.
|
||||
std::vector<raw_buffer> files;
|
||||
for (const auto& entry : json_prompt.at(JSON_MTMD_DATA_KEY)) {
|
||||
files.push_back(base64_decode(entry));
|
||||
}
|
||||
return process_mtmd_prompt(mctx, json_prompt.at(JSON_STRING_PROMPT_KEY), files);
|
||||
}
|
||||
else {
|
||||
// Not multimodal, but contains a subobject.
|
||||
std::vector<llama_token> tmp = tokenize_mixed(vocab, json_prompt.at(JSON_STRING_PROMPT_KEY), add_special, parse_special);
|
||||
return server_tokens(tmp, false);
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error("\"prompt\" elements must be a string, a list of tokens, a JSON object containing a prompt string, or a list of mixed strings & tokens.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* break the input "prompt" object into multiple prompt if needed, then tokenize them
|
||||
* this supports these cases:
|
||||
* - "prompt": "string"
|
||||
* - "prompt": [12, 34, 56]
|
||||
* - "prompt": [12, 34, "string", 56, 78]
|
||||
* - "prompt": { "prompt_string": "string", "multimodal_data": [ "base64" ] }
|
||||
* and multiple prompts (multi-tasks):
|
||||
* - "prompt": ["string1", "string2"]
|
||||
* - "prompt": ["string1", [12, 34, 56]]
|
||||
* - "prompt": [[12, 34, 56], [78, 90, 12]]
|
||||
* - "prompt": [[12, 34, "string", 56, 78], [12, 34, 56], { "prompt_string": "string", "multimodal_data": [ "base64" ]}]
|
||||
*/
|
||||
static std::vector<server_tokens> tokenize_input_prompts(const llama_vocab* vocab, mtmd_context* mctx, const json& json_prompt, bool add_special, bool parse_special) {
|
||||
std::vector<server_tokens> result;
|
||||
if (json_prompt.is_array() && !json_is_array_and_contains_numbers(json_prompt)) {
|
||||
result.reserve(json_prompt.size());
|
||||
for (const auto& p : json_prompt) {
|
||||
result.push_back(tokenize_input_subprompt(vocab, mctx, p, add_special, parse_special));
|
||||
}
|
||||
}
|
||||
else {
|
||||
result.push_back(tokenize_input_subprompt(vocab, mctx, json_prompt, add_special, parse_special));
|
||||
}
|
||||
if (result.empty()) {
|
||||
throw std::runtime_error("\"prompt\" must not be empty");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
// Assuming raw_buffer has .data() and .size() members
|
||||
inline void printFilesInfo(const std::vector<raw_buffer>& files) {
|
||||
for (size_t i = 0; i < files.size(); ++i) {
|
||||
const auto& file = files[i];
|
||||
std::cout << "File " << i << ": Size = " << file.size() << " bytes\n";
|
||||
|
||||
// Print first 16 bytes in hex
|
||||
std::cout << "First 16 bytes: ";
|
||||
for (size_t j = 0; j < std::min<size_t>(file.size(), 16); ++j) {
|
||||
std::cout << std::hex << std::setw(2) << std::setfill('0')
|
||||
<< static_cast<int>(file.data()[j]) << " ";
|
||||
}
|
||||
std::cout << std::dec << "\n\n"; // Reset to decimal
|
||||
}
|
||||
}
|
||||
|
||||
291
examples/server/webui/dist/index.html
vendored
291
examples/server/webui/dist/index.html
vendored
File diff suppressed because one or more lines are too long
264
examples/server/webui/package-lock.json
generated
264
examples/server/webui/package-lock.json
generated
@@ -19,9 +19,11 @@
|
||||
"dexie-export-import": "^4.0.11",
|
||||
"highlight.js": "^11.10.0",
|
||||
"katex": "^0.16.15",
|
||||
"pdfjs-dist": "^5.2.133",
|
||||
"postcss": "^8.4.49",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-dropzone": "^14.3.8",
|
||||
"react-hot-toast": "^2.5.2",
|
||||
"react-markdown": "^9.0.3",
|
||||
"react-router": "^7.1.5",
|
||||
@@ -1036,6 +1038,191 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.14"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.80.tgz",
|
||||
"integrity": "sha512-DxuT1ClnIPts1kQx8FBmkk4BQDTfI5kIzywAaMjQSXfNnra5UFU9PwurXrl+Je3bJ6BGsp/zmshVVFbCmyI+ww==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"workspaces": [
|
||||
"e2e/*"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@napi-rs/canvas-android-arm64": "0.1.80",
|
||||
"@napi-rs/canvas-darwin-arm64": "0.1.80",
|
||||
"@napi-rs/canvas-darwin-x64": "0.1.80",
|
||||
"@napi-rs/canvas-linux-arm-gnueabihf": "0.1.80",
|
||||
"@napi-rs/canvas-linux-arm64-gnu": "0.1.80",
|
||||
"@napi-rs/canvas-linux-arm64-musl": "0.1.80",
|
||||
"@napi-rs/canvas-linux-riscv64-gnu": "0.1.80",
|
||||
"@napi-rs/canvas-linux-x64-gnu": "0.1.80",
|
||||
"@napi-rs/canvas-linux-x64-musl": "0.1.80",
|
||||
"@napi-rs/canvas-win32-x64-msvc": "0.1.80"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-android-arm64": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.80.tgz",
|
||||
"integrity": "sha512-sk7xhN/MoXeuExlggf91pNziBxLPVUqF2CAVnB57KLG/pz7+U5TKG8eXdc3pm0d7Od0WreB6ZKLj37sX9muGOQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-darwin-arm64": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.80.tgz",
|
||||
"integrity": "sha512-O64APRTXRUiAz0P8gErkfEr3lipLJgM6pjATwavZ22ebhjYl/SUbpgM0xcWPQBNMP1n29afAC/Us5PX1vg+JNQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-darwin-x64": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.80.tgz",
|
||||
"integrity": "sha512-FqqSU7qFce0Cp3pwnTjVkKjjOtxMqRe6lmINxpIZYaZNnVI0H5FtsaraZJ36SiTHNjZlUB69/HhxNDT1Aaa9vA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-arm-gnueabihf": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.80.tgz",
|
||||
"integrity": "sha512-eyWz0ddBDQc7/JbAtY4OtZ5SpK8tR4JsCYEZjCE3dI8pqoWUC8oMwYSBGCYfsx2w47cQgQCgMVRVTFiiO38hHQ==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-arm64-gnu": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.80.tgz",
|
||||
"integrity": "sha512-qwA63t8A86bnxhuA/GwOkK3jvb+XTQaTiVML0vAWoHyoZYTjNs7BzoOONDgTnNtr8/yHrq64XXzUoLqDzU+Uuw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-arm64-musl": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.80.tgz",
|
||||
"integrity": "sha512-1XbCOz/ymhj24lFaIXtWnwv/6eFHXDrjP0jYkc6iHQ9q8oXKzUX1Lc6bu+wuGiLhGh2GS/2JlfORC5ZcXimRcg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-riscv64-gnu": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.80.tgz",
|
||||
"integrity": "sha512-XTzR125w5ZMs0lJcxRlS1K3P5RaZ9RmUsPtd1uGt+EfDyYMu4c6SEROYsxyatbbu/2+lPe7MPHOO/0a0x7L/gw==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-x64-gnu": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.80.tgz",
|
||||
"integrity": "sha512-BeXAmhKg1kX3UCrJsYbdQd3hIMDH/K6HnP/pG2LuITaXhXBiNdh//TVVVVCBbJzVQaV5gK/4ZOCMrQW9mvuTqA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-linux-x64-musl": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.80.tgz",
|
||||
"integrity": "sha512-x0XvZWdHbkgdgucJsRxprX/4o4sEed7qo9rCQA9ugiS9qE2QvP0RIiEugtZhfLH3cyI+jIRFJHV4Fuz+1BHHMg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/canvas-win32-x64-msvc": {
|
||||
"version": "0.1.80",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.80.tgz",
|
||||
"integrity": "sha512-Z8jPsM6df5V8B1HrCHB05+bDiCxjE9QA//3YrkKIdVDEwn5RKaqOxCJDRJkl48cJbylcrJbW4HxZbTte8juuPg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@nodelib/fs.scandir": {
|
||||
"version": "2.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
||||
@@ -2060,6 +2247,15 @@
|
||||
"dev": true,
|
||||
"license": "Python-2.0"
|
||||
},
|
||||
"node_modules/attr-accept": {
|
||||
"version": "2.2.5",
|
||||
"resolved": "https://registry.npmjs.org/attr-accept/-/attr-accept-2.2.5.tgz",
|
||||
"integrity": "sha512-0bDNnY/u6pPwHDMoF0FieU354oBi0a8rD9FcsLwzcGWbc8KS8KPIi7y+s13OlVY+gMWc/9xEMUgNE6Qm8ZllYQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/autoprefixer": {
|
||||
"version": "10.4.20",
|
||||
"resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.20.tgz",
|
||||
@@ -2815,6 +3011,18 @@
|
||||
"node": ">=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/file-selector": {
|
||||
"version": "2.1.2",
|
||||
"resolved": "https://registry.npmjs.org/file-selector/-/file-selector-2.1.2.tgz",
|
||||
"integrity": "sha512-QgXo+mXTe8ljeqUFaX3QVHc5osSItJ/Km+xpocx0aSqWGMSCf6qYs/VnzZgS864Pjn5iceMRFigeAV7AfTlaig==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tslib": "^2.7.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 12"
|
||||
}
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
|
||||
@@ -4694,6 +4902,15 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/object-assign": {
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||
"integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/optionator": {
|
||||
"version": "0.9.4",
|
||||
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
|
||||
@@ -4814,6 +5031,18 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/pdfjs-dist": {
|
||||
"version": "5.4.149",
|
||||
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.149.tgz",
|
||||
"integrity": "sha512-Xe8/1FMJEQPUVSti25AlDpwpUm2QAVmNOpFP0SIahaPIOKBKICaefbzogLdwey3XGGoaP4Lb9wqiw2e9Jqp0LA==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=20.16.0 || >=22.3.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@napi-rs/canvas": "^0.1.77"
|
||||
}
|
||||
},
|
||||
"node_modules/picocolors": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
|
||||
@@ -4892,6 +5121,17 @@
|
||||
"url": "https://github.com/prettier/prettier?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/prop-types": {
|
||||
"version": "15.8.1",
|
||||
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
|
||||
"integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"loose-envify": "^1.4.0",
|
||||
"object-assign": "^4.1.1",
|
||||
"react-is": "^16.13.1"
|
||||
}
|
||||
},
|
||||
"node_modules/property-information": {
|
||||
"version": "6.5.0",
|
||||
"resolved": "https://registry.npmjs.org/property-information/-/property-information-6.5.0.tgz",
|
||||
@@ -4958,6 +5198,23 @@
|
||||
"react": "^18.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/react-dropzone": {
|
||||
"version": "14.3.8",
|
||||
"resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.3.8.tgz",
|
||||
"integrity": "sha512-sBgODnq+lcA4P296DY4wacOZz3JFpD99fp+hb//iBO2HHnyeZU3FwWyXJ6salNpqQdsZrgMrotuko/BdJMV8Ug==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"attr-accept": "^2.2.4",
|
||||
"file-selector": "^2.1.0",
|
||||
"prop-types": "^15.8.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 10.13"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": ">= 16.8 || 18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-hot-toast": {
|
||||
"version": "2.5.2",
|
||||
"resolved": "https://registry.npmjs.org/react-hot-toast/-/react-hot-toast-2.5.2.tgz",
|
||||
@@ -4975,6 +5232,12 @@
|
||||
"react-dom": ">=16"
|
||||
}
|
||||
},
|
||||
"node_modules/react-is": {
|
||||
"version": "16.13.1",
|
||||
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
|
||||
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/react-markdown": {
|
||||
"version": "9.0.3",
|
||||
"resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-9.0.3.tgz",
|
||||
@@ -5851,7 +6114,6 @@
|
||||
"version": "2.8.1",
|
||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
|
||||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||
"devOptional": true,
|
||||
"license": "0BSD"
|
||||
},
|
||||
"node_modules/turbo-stream": {
|
||||
|
||||
@@ -22,9 +22,11 @@
|
||||
"dexie-export-import": "^4.0.11",
|
||||
"highlight.js": "^11.10.0",
|
||||
"katex": "^0.16.15",
|
||||
"pdfjs-dist": "^5.2.133",
|
||||
"postcss": "^8.4.49",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-dropzone": "^14.3.8",
|
||||
"react-hot-toast": "^2.5.2",
|
||||
"react-markdown": "^9.0.3",
|
||||
"react-router": "^7.1.5",
|
||||
|
||||
@@ -16,6 +16,8 @@ export const CONFIG_DEFAULT = {
|
||||
showTokensPerSecond: false,
|
||||
showThoughtInProgress: false,
|
||||
excludeThoughtOnReq: true,
|
||||
pasteLongTextToFileLen: 2500,
|
||||
pdfAsImage: false,
|
||||
reasoning_format: 'auto',
|
||||
// make sure these default values are in sync with `common.h`
|
||||
samplers: 'dkypmxnt',
|
||||
@@ -46,6 +48,8 @@ export const CONFIG_INFO: Record<string, string> = {
|
||||
reasoning_format : 'Specify how to parse reasoning content. none: reasoning content in content block. auto: reasoning content in reasoning_content. ',
|
||||
apiKey: 'Set the API Key if you are using --api-key option for the server.',
|
||||
systemMessage: 'The starting message that defines how model should behave.',
|
||||
pasteLongTextToFileLen:
|
||||
'On pasting long text, it will be converted to a file. You can control the file length by setting the value of this parameter. Value 0 means disable.',
|
||||
samplers:
|
||||
'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->top_sigma->temperature',
|
||||
temperature:
|
||||
|
||||
@@ -0,0 +1,135 @@
|
||||
import {
|
||||
DocumentTextIcon,
|
||||
SpeakerWaveIcon,
|
||||
XMarkIcon,
|
||||
} from '@heroicons/react/24/outline';
|
||||
import { MessageExtra } from '../utils/types';
|
||||
import { useState } from 'react';
|
||||
import { classNames } from '../utils/misc';
|
||||
|
||||
export default function ChatInputExtraContextItem({
|
||||
items,
|
||||
removeItem,
|
||||
clickToShow,
|
||||
}: {
|
||||
items?: MessageExtra[];
|
||||
removeItem?: (index: number) => void;
|
||||
clickToShow?: boolean;
|
||||
}) {
|
||||
const [show, setShow] = useState(-1);
|
||||
const showingItem = show >= 0 ? items?.[show] : undefined;
|
||||
|
||||
if (!items) return null;
|
||||
|
||||
return (
|
||||
<div
|
||||
className="flex flex-row gap-4 overflow-x-auto py-2 px-1 mb-1"
|
||||
role="group"
|
||||
aria-description="Selected files"
|
||||
>
|
||||
{items.map((item, i) => (
|
||||
<div
|
||||
className="indicator"
|
||||
key={i}
|
||||
onClick={() => clickToShow && setShow(i)}
|
||||
tabIndex={0}
|
||||
aria-description={
|
||||
clickToShow ? `Click to show: ${item.name}` : undefined
|
||||
}
|
||||
role={clickToShow ? 'button' : 'menuitem'}
|
||||
>
|
||||
{removeItem && (
|
||||
<div className="indicator-item indicator-top">
|
||||
<button
|
||||
aria-label="Remove file"
|
||||
className="btn btn-neutral btn-sm w-4 h-4 p-0 rounded-full"
|
||||
onClick={() => removeItem(i)}
|
||||
>
|
||||
<XMarkIcon className="h-3 w-3" />
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div
|
||||
className={classNames({
|
||||
'flex flex-row rounded-md shadow-sm items-center m-0 p-0': true,
|
||||
'cursor-pointer hover:shadow-md': !!clickToShow,
|
||||
})}
|
||||
>
|
||||
{item.type === 'imageFile' ? (
|
||||
<>
|
||||
<img
|
||||
src={item.base64Url}
|
||||
alt={`Preview image for ${item.name}`}
|
||||
className="w-14 h-14 object-cover rounded-md"
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div
|
||||
className="w-14 h-14 flex items-center justify-center"
|
||||
aria-description="Document icon"
|
||||
>
|
||||
{item.type === 'audioFile' ? (
|
||||
<SpeakerWaveIcon className="h-8 w-8 text-gray-500" />
|
||||
) : (
|
||||
<DocumentTextIcon className="h-8 w-8 text-gray-500" />
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="text-xs pr-4">
|
||||
<b>{item.name ?? 'Extra content'}</b>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
|
||||
{showingItem && (
|
||||
<dialog
|
||||
className="modal modal-open"
|
||||
aria-description={`Preview ${showingItem.name}`}
|
||||
>
|
||||
<div className="modal-box">
|
||||
<div className="flex justify-between items-center mb-4">
|
||||
<b>{showingItem.name ?? 'Extra content'}</b>
|
||||
<button
|
||||
className="btn btn-ghost btn-sm"
|
||||
aria-label="Close preview dialog"
|
||||
>
|
||||
<XMarkIcon className="h-5 w-5" onClick={() => setShow(-1)} />
|
||||
</button>
|
||||
</div>
|
||||
{showingItem.type === 'imageFile' ? (
|
||||
<img
|
||||
src={showingItem.base64Url}
|
||||
alt={`Preview image for ${showingItem.name}`}
|
||||
/>
|
||||
) : showingItem.type === 'audioFile' ? (
|
||||
<audio
|
||||
controls
|
||||
className="w-full"
|
||||
aria-description={`Audio file ${showingItem.name}`}
|
||||
>
|
||||
<source
|
||||
src={`data:${showingItem.mimeType};base64,${showingItem.base64Data}`}
|
||||
type={showingItem.mimeType}
|
||||
aria-description={`Audio file ${showingItem.name}`}
|
||||
/>
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
) : (
|
||||
<div className="overflow-x-auto">
|
||||
<pre className="whitespace-pre-wrap break-words text-sm">
|
||||
{showingItem.content}
|
||||
</pre>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="modal-backdrop" onClick={() => setShow(-1)}></div>
|
||||
</dialog>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -3,7 +3,8 @@ import { useAppContext } from '../utils/app.context';
|
||||
import { Message, PendingMessage } from '../utils/types';
|
||||
import { classNames } from '../utils/misc';
|
||||
import MarkdownDisplay, { CopyButton } from './MarkdownDisplay';
|
||||
import { ChevronLeftIcon, ChevronRightIcon } from '@heroicons/react/24/outline';
|
||||
import { ChevronLeftIcon, ChevronRightIcon, ArrowPathIcon, PencilSquareIcon } from '@heroicons/react/24/outline';
|
||||
import ChatInputExtraContextItem from './ChatInputExtraContextItem';
|
||||
|
||||
interface SplitMessage {
|
||||
content: PendingMessage['content'];
|
||||
@@ -82,7 +83,11 @@ export default function ChatMessage({
|
||||
if (!viewingChat) return null;
|
||||
|
||||
return (
|
||||
<div className="group" id={id}>
|
||||
<div className="group"
|
||||
id={id}
|
||||
role="group"
|
||||
aria-description={`Message from ${msg.role}`}
|
||||
>
|
||||
<div
|
||||
className={classNames({
|
||||
chat: true,
|
||||
@@ -90,9 +95,13 @@ export default function ChatMessage({
|
||||
'chat-end': msg.role === 'user',
|
||||
})}
|
||||
>
|
||||
{msg.extra && msg.extra.length > 0 && (
|
||||
<ChatInputExtraContextItem items={msg.extra} clickToShow />
|
||||
)}
|
||||
|
||||
<div
|
||||
className={classNames({
|
||||
'chat-bubble markdown': true,
|
||||
'chat-bubble chat-bubble-primary': true,
|
||||
'chat-bubble-base-300': msg.role !== 'user',
|
||||
})}
|
||||
>
|
||||
@@ -168,35 +177,6 @@ export default function ChatMessage({
|
||||
</div>
|
||||
</details>
|
||||
)}
|
||||
|
||||
{msg.extra && msg.extra.length > 0 && (
|
||||
<details
|
||||
className={classNames({
|
||||
'collapse collapse-arrow mb-4 bg-base-200': true,
|
||||
'bg-opacity-10': msg.role !== 'assistant',
|
||||
})}
|
||||
>
|
||||
<summary className="collapse-title">
|
||||
Extra content
|
||||
</summary>
|
||||
<div className="collapse-content">
|
||||
{msg.extra.map(
|
||||
(extra, i) =>
|
||||
extra.type === 'textFile' ? (
|
||||
<div key={extra.name}>
|
||||
<b>{extra.name}</b>
|
||||
<pre>{extra.content}</pre>
|
||||
</div>
|
||||
) : extra.type === 'context' ? (
|
||||
<div key={i}>
|
||||
<pre>{extra.content}</pre>
|
||||
</div>
|
||||
) : null // TODO: support other extra types
|
||||
)}
|
||||
</div>
|
||||
</details>
|
||||
)}
|
||||
|
||||
<MarkdownDisplay
|
||||
content={content}
|
||||
isGenerating={isPending}
|
||||
@@ -273,7 +253,7 @@ export default function ChatMessage({
|
||||
onClick={() => setEditingContent(msg.content)}
|
||||
disabled={msg.content === null}
|
||||
>
|
||||
✍️ Edit
|
||||
<PencilSquareIcon className="h-4 w-4" /> Edit
|
||||
</button>
|
||||
)}
|
||||
{/* assistant message */}
|
||||
@@ -289,7 +269,7 @@ export default function ChatMessage({
|
||||
}}
|
||||
disabled={msg.content === null}
|
||||
>
|
||||
🔄 Regenerate
|
||||
<ArrowPathIcon className="h-4 w-4" /> Regenerate
|
||||
</button>
|
||||
)}
|
||||
{!isPending && (
|
||||
@@ -298,7 +278,7 @@ export default function ChatMessage({
|
||||
onClick={() => setEditingContent(msg.content)}
|
||||
disabled={msg.content === null}
|
||||
>
|
||||
✍️ Edit
|
||||
<PencilSquareIcon className="h-4 w-4" /> Edit
|
||||
</button>
|
||||
)}
|
||||
</>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useEffect, useMemo, useState } from 'react';
|
||||
import { ClipboardEvent, useEffect, useMemo, useState } from 'react';
|
||||
import { CallbackGeneratedChunk, useAppContext } from '../utils/app.context';
|
||||
import ChatMessage from './ChatMessage';
|
||||
import { CanvasType, Message, PendingMessage } from '../utils/types';
|
||||
@@ -7,7 +7,17 @@ import CanvasPyInterpreter from './CanvasPyInterpreter';
|
||||
import StorageUtils from '../utils/storage';
|
||||
import { useVSCodeContext } from '../utils/llama-vscode';
|
||||
import { useChatTextarea, ChatTextareaApi } from './useChatTextarea.ts';
|
||||
|
||||
import {
|
||||
ArrowUpIcon,
|
||||
StopIcon,
|
||||
PaperClipIcon,
|
||||
} from '@heroicons/react/24/solid';
|
||||
import {
|
||||
ChatExtraContextApi,
|
||||
useChatExtraContext,
|
||||
} from './useChatExtraContext.tsx';
|
||||
import Dropzone from 'react-dropzone';
|
||||
import ChatInputExtraContextItem from './ChatInputExtraContextItem.tsx';
|
||||
/**
|
||||
* A message display is a message node with additional information for rendering.
|
||||
* For example, siblings of the message node are stored as their last node (aka leaf node).
|
||||
@@ -104,9 +114,10 @@ export default function ChatScreen() {
|
||||
|
||||
const textarea: ChatTextareaApi = useChatTextarea(prefilledMsg.content());
|
||||
|
||||
const { extraContext, clearExtraContext } = useVSCodeContext(textarea);
|
||||
const extraContext = useChatExtraContext();
|
||||
useVSCodeContext(textarea, extraContext);
|
||||
//const { extraContext, clearExtraContext } = useVSCodeContext(textarea);
|
||||
// TODO: improve this when we have "upload file" feature
|
||||
const currExtra: Message['extra'] = extraContext ? [extraContext] : undefined;
|
||||
|
||||
// keep track of leaf node for rendering
|
||||
const [currNodeId, setCurrNodeId] = useState<number>(-1);
|
||||
@@ -147,7 +158,7 @@ export default function ChatScreen() {
|
||||
currConvId,
|
||||
lastMsgNodeId,
|
||||
lastInpMsg,
|
||||
currExtra,
|
||||
extraContext.items,
|
||||
onChunk
|
||||
))
|
||||
) {
|
||||
@@ -155,7 +166,7 @@ export default function ChatScreen() {
|
||||
textarea.setValue(lastInpMsg);
|
||||
}
|
||||
// OK
|
||||
clearExtraContext();
|
||||
extraContext.clearItems();
|
||||
};
|
||||
|
||||
const handleEditMessage = async (msg: Message, content: string) => {
|
||||
@@ -282,42 +293,14 @@ export default function ChatScreen() {
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* chat input */}
|
||||
<div className="flex flex-row items-end pt-8 pb-6 sticky bottom-0 bg-base-100">
|
||||
<textarea
|
||||
// Default (mobile): Enable vertical resize, overflow auto for scrolling if needed
|
||||
// Large screens (lg:): Disable manual resize, apply max-height for autosize limit
|
||||
className="textarea textarea-bordered w-full resize-vertical lg:resize-none lg:max-h-48 lg:overflow-y-auto" // Adjust lg:max-h-48 as needed (e.g., lg:max-h-60)
|
||||
placeholder="Type a message (Shift+Enter to add a new line)"
|
||||
ref={textarea.ref}
|
||||
onInput={textarea.onInput} // Hook's input handler (will only resize height on lg+ screens)
|
||||
onKeyDown={(e) => {
|
||||
if (e.nativeEvent.isComposing || e.keyCode === 229) return;
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
sendNewMessage();
|
||||
}
|
||||
}}
|
||||
id="msg-input"
|
||||
dir="auto"
|
||||
// Set a base height of 2 rows for mobile views
|
||||
// On lg+ screens, the hook will calculate and set the initial height anyway
|
||||
rows={2}
|
||||
></textarea>
|
||||
|
||||
{isGenerating(currConvId ?? '') ? (
|
||||
<button
|
||||
className="btn btn-neutral ml-2"
|
||||
onClick={() => stopGenerating(currConvId ?? '')}
|
||||
>
|
||||
Stop
|
||||
</button>
|
||||
) : (
|
||||
<button className="btn btn-primary ml-2" onClick={sendNewMessage}>
|
||||
Send
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{/* chat input */}
|
||||
<ChatInput
|
||||
textarea={textarea}
|
||||
extraContext={extraContext}
|
||||
onSend={sendNewMessage}
|
||||
onStop={() => stopGenerating(currConvId ?? '')}
|
||||
isGenerating={isGenerating(currConvId ?? '')}
|
||||
/>
|
||||
</div>
|
||||
<div className="w-full sticky top-[7em] h-[calc(100vh-9em)]">
|
||||
{canvasData?.type === CanvasType.PY_INTERPRETER && (
|
||||
@@ -327,3 +310,183 @@ export default function ChatScreen() {
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// function ServerInfo() {
|
||||
// const { serverProps } = useAppContext();
|
||||
// const modalities = [];
|
||||
// if (serverProps?.modalities?.audio) {
|
||||
// modalities.push('audio');
|
||||
// }
|
||||
// if (serverProps?.modalities?.vision) {
|
||||
// modalities.push('vision');
|
||||
// }
|
||||
// return (
|
||||
// <div
|
||||
// className="card card-sm shadow-sm border-1 border-base-content/20 text-base-content/70 mb-6"
|
||||
// tabIndex={0}
|
||||
// aria-description="Server information"
|
||||
// >
|
||||
// <div className="card-body">
|
||||
// <b>Server Info</b>
|
||||
// <p>
|
||||
// <b>Model</b>: {serverProps?.model_path?.split(/(\\|\/)/).pop()}
|
||||
// <br />
|
||||
// {modalities.length > 0 ? (
|
||||
// <>
|
||||
// <b>Supported modalities:</b> {modalities.join(', ')}
|
||||
// </>
|
||||
// ) : (
|
||||
// ''
|
||||
// )}
|
||||
// </p>
|
||||
// </div>
|
||||
// </div>
|
||||
// );
|
||||
// }
|
||||
|
||||
function ChatInput({
|
||||
textarea,
|
||||
extraContext,
|
||||
onSend,
|
||||
onStop,
|
||||
isGenerating,
|
||||
}: {
|
||||
textarea: ChatTextareaApi;
|
||||
extraContext: ChatExtraContextApi;
|
||||
onSend: () => void;
|
||||
onStop: () => void;
|
||||
isGenerating: boolean;
|
||||
}) {
|
||||
const { config } = useAppContext();
|
||||
const [isDrag, setIsDrag] = useState(false);
|
||||
|
||||
return (
|
||||
<div
|
||||
role="group"
|
||||
aria-label="Chat input"
|
||||
className={classNames({
|
||||
'flex items-end pt-8 pb-6 sticky bottom-0 bg-base-100': true,
|
||||
'opacity-50': isDrag, // simply visual feedback to inform user that the file will be accepted
|
||||
})}
|
||||
>
|
||||
<Dropzone
|
||||
noClick
|
||||
onDrop={(files: File[]) => {
|
||||
setIsDrag(false);
|
||||
extraContext.onFileAdded(files);
|
||||
}}
|
||||
onDragEnter={() => setIsDrag(true)}
|
||||
onDragLeave={() => setIsDrag(false)}
|
||||
multiple={true}
|
||||
>
|
||||
{({ getRootProps, getInputProps }) => (
|
||||
<div
|
||||
className="flex flex-col rounded-xl border-1 border-base-content/30 p-3 w-full"
|
||||
// when a file is pasted to the input, we handle it here
|
||||
// if a text is pasted, and if it is long text, we will convert it to a file
|
||||
onPasteCapture={(e: ClipboardEvent<HTMLInputElement>) => {
|
||||
const text = e.clipboardData.getData('text/plain');
|
||||
if (
|
||||
text.length > 0 &&
|
||||
config.pasteLongTextToFileLen > 0 &&
|
||||
text.length > config.pasteLongTextToFileLen
|
||||
) {
|
||||
// if the text is too long, we will convert it to a file
|
||||
extraContext.addItems([
|
||||
{
|
||||
type: 'context',
|
||||
name: 'Pasted Content',
|
||||
content: text,
|
||||
},
|
||||
]);
|
||||
e.preventDefault();
|
||||
return;
|
||||
}
|
||||
|
||||
// if a file is pasted, we will handle it here
|
||||
const files = Array.from(e.clipboardData.items)
|
||||
.filter((item) => item.kind === 'file')
|
||||
.map((item) => item.getAsFile())
|
||||
.filter((file) => file !== null);
|
||||
|
||||
if (files.length > 0) {
|
||||
e.preventDefault();
|
||||
extraContext.onFileAdded(files);
|
||||
}
|
||||
}}
|
||||
{...getRootProps()}
|
||||
>
|
||||
{!isGenerating && (
|
||||
<ChatInputExtraContextItem
|
||||
items={extraContext.items}
|
||||
removeItem={extraContext.removeItem}
|
||||
/>
|
||||
)}
|
||||
|
||||
<div className="flex flex-row w-full">
|
||||
<textarea
|
||||
// Default (mobile): Enable vertical resize, overflow auto for scrolling if needed
|
||||
// Large screens (lg:): Disable manual resize, apply max-height for autosize limit
|
||||
className="text-md outline-none border-none w-full resize-vertical lg:resize-none lg:max-h-48 lg:overflow-y-auto" // Adjust lg:max-h-48 as needed (e.g., lg:max-h-60)
|
||||
placeholder="Type a message..."
|
||||
ref={textarea.ref}
|
||||
onInput={textarea.onInput} // Hook's input handler (will only resize height on lg+ screens)
|
||||
onKeyDown={(e) => {
|
||||
if (e.nativeEvent.isComposing || e.keyCode === 229) return;
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
onSend();
|
||||
}
|
||||
}}
|
||||
id="msg-input"
|
||||
dir="auto"
|
||||
// Set a base height of 2 rows for mobile views
|
||||
// On lg+ screens, the hook will calculate and set the initial height anyway
|
||||
rows={2}
|
||||
></textarea>
|
||||
|
||||
{/* buttons area */}
|
||||
<div className="flex flex-row gap-2 ml-2">
|
||||
<label
|
||||
htmlFor="file-upload"
|
||||
className={classNames({
|
||||
'btn w-8 h-8 p-0 rounded-full': true,
|
||||
'btn-disabled': isGenerating,
|
||||
})}
|
||||
aria-label="Upload file"
|
||||
tabIndex={0}
|
||||
role="button"
|
||||
>
|
||||
<PaperClipIcon className="h-5 w-5" />
|
||||
</label>
|
||||
<input
|
||||
id="file-upload"
|
||||
type="file"
|
||||
disabled={isGenerating}
|
||||
{...getInputProps()}
|
||||
hidden
|
||||
/>
|
||||
{isGenerating ? (
|
||||
<button
|
||||
className="btn btn-neutral w-8 h-8 p-0 rounded-full"
|
||||
onClick={onStop}
|
||||
>
|
||||
<StopIcon className="h-5 w-5" />
|
||||
</button>
|
||||
) : (
|
||||
<button
|
||||
className="btn btn-primary w-8 h-8 p-0 rounded-full"
|
||||
onClick={onSend}
|
||||
aria-label="Send message"
|
||||
>
|
||||
<ArrowUpIcon className="h-5 w-5" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</Dropzone>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
ArrowDownTrayIcon,
|
||||
PencilIcon,
|
||||
TrashIcon,
|
||||
MoonIcon,
|
||||
} from '@heroicons/react/24/outline';
|
||||
|
||||
export default function Header() {
|
||||
@@ -204,16 +205,7 @@ export default function Header() {
|
||||
<div className="tooltip tooltip-bottom" data-tip="Themes">
|
||||
<div className="dropdown dropdown-end dropdown-bottom">
|
||||
<div tabIndex={0} role="button" className="btn m-1">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="16"
|
||||
height="16"
|
||||
fill="currentColor"
|
||||
className="bi bi-palette2"
|
||||
viewBox="0 0 16 16"
|
||||
>
|
||||
<path d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 .5.5v5.277l4.147-4.131a.5.5 0 0 1 .707 0l3.535 3.536a.5.5 0 0 1 0 .708L10.261 10H15.5a.5.5 0 0 1 .5.5v5a.5.5 0 0 1-.5.5H3a3 3 0 0 1-2.121-.879A3 3 0 0 1 0 13.044m6-.21 7.328-7.3-2.829-2.828L6 7.188zM4.5 13a1.5 1.5 0 1 0-3 0 1.5 1.5 0 0 0 3 0M15 15v-4H9.258l-4.015 4zM0 .5v12.495zm0 12.495V13z" />
|
||||
</svg>
|
||||
<MoonIcon className="w-5 h-5" />
|
||||
</div>
|
||||
<ul
|
||||
tabIndex={0}
|
||||
|
||||
@@ -11,6 +11,7 @@ import { ElementContent, Root } from 'hast';
|
||||
import { visit } from 'unist-util-visit';
|
||||
import { useAppContext } from '../utils/app.context';
|
||||
import { CanvasType } from '../utils/types';
|
||||
import { DocumentDuplicateIcon, PlayIcon } from '@heroicons/react/24/outline';
|
||||
|
||||
export default function MarkdownDisplay({
|
||||
content,
|
||||
@@ -109,7 +110,8 @@ export const CopyButton = ({
|
||||
}}
|
||||
onMouseLeave={() => setCopied(false)}
|
||||
>
|
||||
{copied ? 'Copied!' : '📋 Copy'}
|
||||
<DocumentDuplicateIcon className="h-4 w-4" />
|
||||
{copied ? 'Copied!' : 'Copy'}
|
||||
</button>
|
||||
);
|
||||
};
|
||||
@@ -133,7 +135,8 @@ export const RunPyCodeButton = ({
|
||||
})
|
||||
}
|
||||
>
|
||||
▶️ Run
|
||||
<PlayIcon className="h-4 w-4" />
|
||||
{"Run"}
|
||||
</button>
|
||||
</>
|
||||
);
|
||||
|
||||
@@ -275,6 +275,16 @@ const SETTING_SECTIONS = (
|
||||
key,
|
||||
}) as SettingFieldInput
|
||||
),
|
||||
{
|
||||
type: SettingInputType.SHORT_INPUT,
|
||||
label: 'Paste length to file',
|
||||
key: 'pasteLongTextToFileLen',
|
||||
},
|
||||
{
|
||||
type: SettingInputType.CHECKBOX,
|
||||
label: 'Parse PDF as image instead of text',
|
||||
key: 'pdfAsImage',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
371
examples/server/webui/src/components/useChatExtraContext.tsx
Normal file
371
examples/server/webui/src/components/useChatExtraContext.tsx
Normal file
@@ -0,0 +1,371 @@
|
||||
import { useState } from 'react';
|
||||
import { MessageExtra } from '../utils/types';
|
||||
import toast from 'react-hot-toast';
|
||||
import { useAppContext } from '../utils/app.context';
|
||||
import * as pdfjs from 'pdfjs-dist';
|
||||
import pdfjsWorkerSrc from 'pdfjs-dist/build/pdf.worker.min.mjs?url';
|
||||
import { TextContent, TextItem } from 'pdfjs-dist/types/src/display/api';
|
||||
|
||||
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorkerSrc;
|
||||
|
||||
// This file handles uploading extra context items (a.k.a files)
|
||||
// It allows processing these kinds of files:
|
||||
// - image files (converted to base64)
|
||||
// - audio files (converted to base64)
|
||||
// - text files (including code files)
|
||||
// - pdf (converted to text)
|
||||
|
||||
// Interface describing the API returned by the hook
|
||||
export interface ChatExtraContextApi {
|
||||
items?: MessageExtra[]; // undefined if empty, similar to Message['extra']
|
||||
addItems: (items: MessageExtra[]) => void;
|
||||
removeItem: (idx: number) => void;
|
||||
clearItems: () => void;
|
||||
onFileAdded: (files: File[]) => void; // used by "upload" button
|
||||
}
|
||||
|
||||
export function useChatExtraContext(): ChatExtraContextApi {
|
||||
const { serverProps, config } = useAppContext();
|
||||
const [items, setItems] = useState<MessageExtra[]>([]);
|
||||
|
||||
const addItems = (newItems: MessageExtra[]) => {
|
||||
setItems((prev) => [...prev, ...newItems]);
|
||||
};
|
||||
|
||||
const removeItem = (idx: number) => {
|
||||
setItems((prev) => prev.filter((_, i) => i !== idx));
|
||||
};
|
||||
|
||||
const clearItems = () => {
|
||||
setItems([]);
|
||||
};
|
||||
|
||||
const isSupportVision = serverProps?.modalities?.vision;
|
||||
|
||||
const onFileAdded = async (files: File[]) => {
|
||||
try {
|
||||
for (const file of files) {
|
||||
const mimeType = file.type;
|
||||
|
||||
// this limit is only to prevent accidental uploads of huge files
|
||||
// it can potentially crashes the browser because we read the file as base64
|
||||
if (file.size > 500 * 1024 * 1024) {
|
||||
toast.error('File is too large. Maximum size is 500MB.');
|
||||
break;
|
||||
}
|
||||
|
||||
if (mimeType.startsWith('image/')) {
|
||||
if (!isSupportVision) {
|
||||
toast.error('Multimodal is not supported by this server or model.');
|
||||
break;
|
||||
}
|
||||
|
||||
let base64Url = await getFileAsBase64(file);
|
||||
if (mimeType === 'image/svg+xml') {
|
||||
// Convert SVG to PNG
|
||||
base64Url = await svgBase64UrlToPngDataURL(base64Url);
|
||||
}
|
||||
addItems([
|
||||
{
|
||||
type: 'imageFile',
|
||||
name: file.name,
|
||||
base64Url,
|
||||
},
|
||||
]);
|
||||
} else if (mimeType.startsWith('video/')) {
|
||||
toast.error('Video files are not supported yet.');
|
||||
break;
|
||||
} else if (mimeType.startsWith('audio/')) {
|
||||
if (!/mpeg|wav/.test(mimeType)) {
|
||||
toast.error('Only mp3 and wav audio files are supported.');
|
||||
break;
|
||||
}
|
||||
|
||||
// plain base64, not a data URL
|
||||
const base64Data = await getFileAsBase64(file, false);
|
||||
addItems([
|
||||
{
|
||||
type: 'audioFile',
|
||||
name: file.name,
|
||||
mimeType,
|
||||
base64Data,
|
||||
},
|
||||
]);
|
||||
} else if (mimeType.startsWith('application/pdf')) {
|
||||
if (config.pdfAsImage && !isSupportVision) {
|
||||
toast(
|
||||
'Multimodal is not supported, PDF will be converted to text instead of image.'
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
if (config.pdfAsImage && isSupportVision) {
|
||||
// Convert PDF to images
|
||||
const base64Urls = await convertPDFToImage(file);
|
||||
addItems(
|
||||
base64Urls.map((base64Url) => ({
|
||||
type: 'imageFile',
|
||||
name: file.name,
|
||||
base64Url,
|
||||
}))
|
||||
);
|
||||
} else {
|
||||
// Convert PDF to text
|
||||
const content = await convertPDFToText(file);
|
||||
addItems([
|
||||
{
|
||||
type: 'textFile',
|
||||
name: file.name,
|
||||
content,
|
||||
},
|
||||
]);
|
||||
if (isSupportVision) {
|
||||
toast.success(
|
||||
'PDF file converted to text. You can also convert it to image, see in Settings.'
|
||||
);
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
// Because there can be many text file types (like code file), we will not check the mime type
|
||||
// and will just check if the file is not binary.
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
if (event.target?.result) {
|
||||
const content = event.target.result as string;
|
||||
if (!isLikelyNotBinary(content)) {
|
||||
toast.error('File is binary. Please upload a text file.');
|
||||
return;
|
||||
}
|
||||
addItems([
|
||||
{
|
||||
type: 'textFile',
|
||||
name: file.name,
|
||||
content,
|
||||
},
|
||||
]);
|
||||
}
|
||||
};
|
||||
reader.readAsText(file);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const errorMessage = `Error processing file: ${message}`;
|
||||
toast.error(errorMessage);
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
items: items.length > 0 ? items : undefined,
|
||||
addItems,
|
||||
removeItem,
|
||||
clearItems,
|
||||
onFileAdded,
|
||||
};
|
||||
}
|
||||
|
||||
async function getFileAsBase64(file: File, outputUrl = true): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
if (event.target?.result) {
|
||||
let result = event.target.result as string;
|
||||
if (!outputUrl) {
|
||||
// remove base64 url prefix and correct characters
|
||||
result = result.substring(result.indexOf(',') + 1);
|
||||
}
|
||||
resolve(result);
|
||||
} else {
|
||||
reject(new Error('Failed to read file.'));
|
||||
}
|
||||
};
|
||||
reader.readAsDataURL(file);
|
||||
});
|
||||
}
|
||||
|
||||
async function getFileAsBuffer(file: File): Promise<ArrayBuffer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
if (event.target?.result) {
|
||||
resolve(event.target.result as ArrayBuffer);
|
||||
} else {
|
||||
reject(new Error('Failed to read file.'));
|
||||
}
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
});
|
||||
}
|
||||
|
||||
async function convertPDFToText(file: File): Promise<string> {
|
||||
const buffer = await getFileAsBuffer(file);
|
||||
const pdf = await pdfjs.getDocument(buffer).promise;
|
||||
const numPages = pdf.numPages;
|
||||
const textContentPromises: Promise<TextContent>[] = [];
|
||||
for (let i = 1; i <= numPages; i++) {
|
||||
textContentPromises.push(
|
||||
pdf.getPage(i).then((page) => page.getTextContent())
|
||||
);
|
||||
}
|
||||
const textContents = await Promise.all(textContentPromises);
|
||||
const textItems = textContents.flatMap((textContent: TextContent) =>
|
||||
textContent.items.map((item) => (item as TextItem).str ?? '')
|
||||
);
|
||||
return textItems.join('\n');
|
||||
}
|
||||
|
||||
// returns list of base64 images
|
||||
async function convertPDFToImage(file: File): Promise<string[]> {
|
||||
const buffer = await getFileAsBuffer(file);
|
||||
const doc = await pdfjs.getDocument(buffer).promise;
|
||||
const pages: Promise<string>[] = [];
|
||||
|
||||
for (let i = 1; i <= doc.numPages; i++) {
|
||||
const page = await doc.getPage(i);
|
||||
const viewport = page.getViewport({ scale: 1.5 });
|
||||
const canvas = document.createElement('canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
if (!ctx) {
|
||||
throw new Error('Failed to get 2D context from canvas');
|
||||
}
|
||||
const task = page.render({ canvasContext: ctx, viewport: viewport });
|
||||
pages.push(
|
||||
task.promise.then(() => {
|
||||
return canvas.toDataURL();
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
return await Promise.all(pages);
|
||||
}
|
||||
|
||||
// WARN: vibe code below
|
||||
// This code is a heuristic to determine if a string is likely not binary.
|
||||
// It is necessary because input file can have various mime types which we don't have time to investigate.
|
||||
// For example, a python file can be text/plain, application/x-python, etc.
|
||||
function isLikelyNotBinary(str: string): boolean {
|
||||
const options = {
|
||||
prefixLength: 1024 * 10, // Check the first 10KB of the string
|
||||
suspiciousCharThresholdRatio: 0.15, // Allow up to 15% suspicious chars
|
||||
maxAbsoluteNullBytes: 2,
|
||||
};
|
||||
|
||||
if (!str) {
|
||||
return true; // Empty string is considered "not binary" or trivially text.
|
||||
}
|
||||
|
||||
const sampleLength = Math.min(str.length, options.prefixLength);
|
||||
if (sampleLength === 0) {
|
||||
return true; // Effectively an empty string after considering prefixLength.
|
||||
}
|
||||
|
||||
let suspiciousCharCount = 0;
|
||||
let nullByteCount = 0;
|
||||
|
||||
for (let i = 0; i < sampleLength; i++) {
|
||||
const charCode = str.charCodeAt(i);
|
||||
|
||||
// 1. Check for Unicode Replacement Character (U+FFFD)
|
||||
// This is a strong indicator if the string was created from decoding bytes as UTF-8.
|
||||
if (charCode === 0xfffd) {
|
||||
suspiciousCharCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2. Check for Null Bytes (U+0000)
|
||||
if (charCode === 0x0000) {
|
||||
nullByteCount++;
|
||||
// We also count nulls towards the general suspicious character count,
|
||||
// as they are less common in typical text files.
|
||||
suspiciousCharCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3. Check for C0 Control Characters (U+0001 to U+001F)
|
||||
// Exclude common text control characters: TAB (9), LF (10), CR (13).
|
||||
// We can also be a bit lenient with BEL (7) and BS (8) which sometimes appear in logs.
|
||||
if (charCode < 32) {
|
||||
if (
|
||||
charCode !== 9 && // TAB
|
||||
charCode !== 10 && // LF
|
||||
charCode !== 13 && // CR
|
||||
charCode !== 7 && // BEL (Bell) - sometimes in logs
|
||||
charCode !== 8 // BS (Backspace) - less common, but possible
|
||||
) {
|
||||
suspiciousCharCount++;
|
||||
}
|
||||
}
|
||||
// Characters from 32 (space) up to 126 (~) are printable ASCII.
|
||||
// Characters 127 (DEL) is a control character.
|
||||
// Characters >= 128 are extended ASCII / multi-byte Unicode.
|
||||
// If they resulted in U+FFFD, we caught it. Otherwise, they are valid
|
||||
// (though perhaps unusual) Unicode characters from JS's perspective.
|
||||
// The main concern is if those higher characters came from misinterpreting
|
||||
// a single-byte encoding as UTF-8, which again, U+FFFD would usually flag.
|
||||
}
|
||||
|
||||
// Check absolute null byte count
|
||||
if (nullByteCount > options.maxAbsoluteNullBytes) {
|
||||
return false; // Too many null bytes is a strong binary indicator
|
||||
}
|
||||
|
||||
// Check ratio of suspicious characters
|
||||
const ratio = suspiciousCharCount / sampleLength;
|
||||
return ratio <= options.suspiciousCharThresholdRatio;
|
||||
}
|
||||
|
||||
// WARN: vibe code below
|
||||
// Converts a Base64URL encoded SVG string to a PNG Data URL using browser Canvas API.
|
||||
function svgBase64UrlToPngDataURL(base64UrlSvg: string): Promise<string> {
|
||||
const backgroundColor = 'white'; // Default background color for PNG
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
const img = new Image();
|
||||
|
||||
img.onload = () => {
|
||||
const canvas = document.createElement('canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
if (!ctx) {
|
||||
reject(new Error('Failed to get 2D canvas context.'));
|
||||
return;
|
||||
}
|
||||
|
||||
// Use provided dimensions or SVG's natural dimensions, with fallbacks
|
||||
// Fallbacks (e.g., 300x300) are for SVGs without explicit width/height
|
||||
// or when naturalWidth/Height might be 0 before full processing.
|
||||
const targetWidth = img.naturalWidth || 300;
|
||||
const targetHeight = img.naturalHeight || 300;
|
||||
|
||||
canvas.width = targetWidth;
|
||||
canvas.height = targetHeight;
|
||||
|
||||
if (backgroundColor) {
|
||||
ctx.fillStyle = backgroundColor;
|
||||
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
||||
}
|
||||
|
||||
ctx.drawImage(img, 0, 0, targetWidth, targetHeight);
|
||||
resolve(canvas.toDataURL('image/png'));
|
||||
};
|
||||
|
||||
img.onerror = () => {
|
||||
reject(
|
||||
new Error('Failed to load SVG image. Ensure the SVG data is valid.')
|
||||
);
|
||||
};
|
||||
|
||||
// Load SVG string into an Image element
|
||||
img.src = base64UrlSvg;
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const errorMessage = `Error converting SVG to PNG: ${message}`;
|
||||
toast.error(errorMessage);
|
||||
reject(new Error(errorMessage));
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -37,6 +37,7 @@ export interface ChatTextareaApi {
|
||||
setValue: (value: string) => void;
|
||||
focus: () => void;
|
||||
ref: React.RefObject<HTMLTextAreaElement>;
|
||||
refOnSubmit: React.MutableRefObject<(() => void) | null>; // Submit handler
|
||||
onInput: (event: React.FormEvent<HTMLTextAreaElement>) => void; // Input handler
|
||||
}
|
||||
|
||||
@@ -46,7 +47,7 @@ export interface ChatTextareaApi {
|
||||
export function useChatTextarea(initValue: string): ChatTextareaApi {
|
||||
const [savedInitValue, setSavedInitValue] = useState<string>(initValue);
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
|
||||
const onSubmitRef = useRef<(() => void) | null>(null);
|
||||
// Effect to set initial value and height on mount or when initValue changes
|
||||
useEffect(() => {
|
||||
const textarea = textareaRef.current;
|
||||
@@ -91,6 +92,7 @@ export function useChatTextarea(initValue: string): ChatTextareaApi {
|
||||
}
|
||||
},
|
||||
ref: textareaRef,
|
||||
refOnSubmit: onSubmitRef,
|
||||
onInput: handleInput,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
APIMessage,
|
||||
CanvasData,
|
||||
Conversation,
|
||||
LlamaCppServerProps,
|
||||
Message,
|
||||
PendingMessage,
|
||||
ViewingChat,
|
||||
@@ -12,6 +13,7 @@ import {
|
||||
filterThoughtFromMsgs,
|
||||
normalizeMsgsForAPI,
|
||||
getSSEStreamAsync,
|
||||
getServerProps
|
||||
} from './misc';
|
||||
import { BASE_URL, CONFIG_DEFAULT, isDev } from '../Config';
|
||||
import { matchPath, useLocation, useNavigate } from 'react-router';
|
||||
@@ -54,6 +56,10 @@ interface AppContextValue {
|
||||
saveConfig: (config: typeof CONFIG_DEFAULT) => void;
|
||||
showSettings: boolean;
|
||||
setShowSettings: (show: boolean) => void;
|
||||
|
||||
// props
|
||||
serverProps: LlamaCppServerProps | null;
|
||||
|
||||
}
|
||||
|
||||
// this callback is used for scrolling to the bottom of the chat and switching to the last node
|
||||
@@ -82,6 +88,9 @@ export const AppContextProvider = ({
|
||||
const params = matchPath('/chat/:convId', pathname);
|
||||
const convId = params?.params?.convId;
|
||||
|
||||
const [serverProps, setServerProps] = useState<LlamaCppServerProps | null>(
|
||||
null
|
||||
);
|
||||
const [viewingChat, setViewingChat] = useState<ViewingChat | null>(null);
|
||||
const [pendingMessages, setPendingMessages] = useState<
|
||||
Record<Conversation['id'], PendingMessage>
|
||||
@@ -93,6 +102,20 @@ export const AppContextProvider = ({
|
||||
const [canvasData, setCanvasData] = useState<CanvasData | null>(null);
|
||||
const [showSettings, setShowSettings] = useState(false);
|
||||
|
||||
// get server props
|
||||
useEffect(() => {
|
||||
getServerProps(BASE_URL, config.apiKey)
|
||||
.then((props) => {
|
||||
console.debug('Server props:', props);
|
||||
setServerProps(props);
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error(err);
|
||||
toast.error('Failed to fetch server props');
|
||||
});
|
||||
// eslint-disable-next-line
|
||||
}, []);
|
||||
|
||||
// handle change when the convId from URL is changed
|
||||
useEffect(() => {
|
||||
// also reset the canvas data
|
||||
@@ -469,6 +492,7 @@ export const AppContextProvider = ({
|
||||
saveConfig,
|
||||
showSettings,
|
||||
setShowSettings,
|
||||
serverProps,
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
import { MessageExtraContext } from './types';
|
||||
import { useEffect } from 'react';
|
||||
import { ChatTextareaApi } from '../components/useChatTextarea.ts';
|
||||
import { ChatExtraContextApi } from '../components/useChatExtraContext.tsx';
|
||||
|
||||
// Extra context when using llama.cpp WebUI from llama-vscode, inside an iframe
|
||||
// Ref: https://github.com/ggml-org/llama.cpp/pull/11940
|
||||
@@ -15,11 +15,10 @@ interface SetTextEvData {
|
||||
* window.postMessage({ command: 'setText', text: 'Spot the syntax error', context: 'def test()\n return 123' }, '*');
|
||||
*/
|
||||
|
||||
export const useVSCodeContext = (textarea: ChatTextareaApi) => {
|
||||
const [extraContext, setExtraContext] = useState<MessageExtraContext | null>(
|
||||
null
|
||||
);
|
||||
|
||||
export const useVSCodeContext = (
|
||||
textarea: ChatTextareaApi,
|
||||
extraContext: ChatExtraContextApi
|
||||
) => {
|
||||
// Accept setText message from a parent window and set inputMsg and extraContext
|
||||
useEffect(() => {
|
||||
const handleMessage = (event: MessageEvent) => {
|
||||
@@ -27,18 +26,25 @@ export const useVSCodeContext = (textarea: ChatTextareaApi) => {
|
||||
const data: SetTextEvData = event.data;
|
||||
textarea.setValue(data?.text);
|
||||
if (data?.context && data.context.length > 0) {
|
||||
setExtraContext({
|
||||
type: 'context',
|
||||
content: data.context,
|
||||
});
|
||||
extraContext.clearItems();
|
||||
extraContext.addItems([
|
||||
{
|
||||
type: 'context',
|
||||
name: 'Extra context',
|
||||
content: data.context,
|
||||
},
|
||||
]);
|
||||
}
|
||||
textarea.focus();
|
||||
setTimeout(() => {
|
||||
textarea.refOnSubmit.current?.();
|
||||
}, 10); // wait for setExtraContext to finish
|
||||
}
|
||||
};
|
||||
|
||||
window.addEventListener('message', handleMessage);
|
||||
return () => window.removeEventListener('message', handleMessage);
|
||||
}, [textarea]);
|
||||
}, [textarea, extraContext]);
|
||||
|
||||
// Add a keydown listener that sends the "escapePressed" message to the parent window
|
||||
useEffect(() => {
|
||||
@@ -52,9 +58,5 @@ export const useVSCodeContext = (textarea: ChatTextareaApi) => {
|
||||
return () => window.removeEventListener('keydown', handleKeyDown);
|
||||
}, []);
|
||||
|
||||
return {
|
||||
extraContext,
|
||||
// call once the user message is sent, to clear the extra context
|
||||
clearExtraContext: () => setExtraContext(null),
|
||||
};
|
||||
return {};
|
||||
};
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// @ts-expect-error this package does not have typing
|
||||
import TextLineStream from 'textlinestream';
|
||||
import { APIMessage, Message } from './types';
|
||||
import { APIMessage, Message, LlamaCppServerProps, APIMessageContentPart } from './types';
|
||||
|
||||
// ponyfill for missing ReadableStream asyncIterator on Safari
|
||||
import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
|
||||
@@ -57,21 +57,55 @@ export const copyStr = (textToCopy: string) => {
|
||||
*/
|
||||
export function normalizeMsgsForAPI(messages: Readonly<Message[]>) {
|
||||
return messages.map((msg) => {
|
||||
let newContent = '';
|
||||
if (msg.role !== 'user' || !msg.extra) {
|
||||
return {
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
} as APIMessage;
|
||||
}
|
||||
|
||||
// extra content first, then user text message in the end
|
||||
// this allow re-using the same cache prefix for long context
|
||||
const contentArr: APIMessageContentPart[] = [];
|
||||
|
||||
for (const extra of msg.extra ?? []) {
|
||||
if (extra.type === 'context') {
|
||||
if (extra.content!='') {
|
||||
newContent += `${extra.content}\n\n`;
|
||||
}
|
||||
contentArr.push({
|
||||
type: 'text',
|
||||
text: extra.content,
|
||||
});
|
||||
} else if (extra.type === 'textFile') {
|
||||
contentArr.push({
|
||||
type: 'text',
|
||||
text: `File: ${extra.name}\nContent:\n\n${extra.content}`,
|
||||
});
|
||||
} else if (extra.type === 'imageFile') {
|
||||
contentArr.push({
|
||||
type: 'image_url',
|
||||
image_url: { url: extra.base64Url },
|
||||
});
|
||||
} else if (extra.type === 'audioFile') {
|
||||
contentArr.push({
|
||||
type: 'input_audio',
|
||||
input_audio: {
|
||||
data: extra.base64Data,
|
||||
format: /wav/.test(extra.mimeType) ? 'wav' : 'mp3',
|
||||
},
|
||||
});
|
||||
} else {
|
||||
throw new Error('Unknown extra type');
|
||||
}
|
||||
}
|
||||
|
||||
newContent += msg.content;
|
||||
// add user message to the end
|
||||
contentArr.push({
|
||||
type: 'text',
|
||||
text: msg.content,
|
||||
});
|
||||
|
||||
return {
|
||||
role: msg.role,
|
||||
content: newContent,
|
||||
content: contentArr,
|
||||
};
|
||||
}) as APIMessage[];
|
||||
}
|
||||
@@ -137,3 +171,25 @@ export const cleanCurrentUrl = (removeQueryParams: string[]) => {
|
||||
});
|
||||
window.history.replaceState({}, '', url.toString());
|
||||
};
|
||||
|
||||
export const getServerProps = async (
|
||||
baseUrl: string,
|
||||
apiKey?: string
|
||||
): Promise<LlamaCppServerProps> => {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/props`, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to fetch server props');
|
||||
}
|
||||
const data = await response.json();
|
||||
return data as LlamaCppServerProps;
|
||||
} catch (error) {
|
||||
console.error('Error fetching server props:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
@@ -48,7 +48,11 @@ export interface Message {
|
||||
children: Message['id'][];
|
||||
}
|
||||
|
||||
type MessageExtra = MessageExtraTextFile | MessageExtraContext; // TODO: will add more in the future
|
||||
export type MessageExtra =
|
||||
| MessageExtraTextFile
|
||||
| MessageExtraImageFile
|
||||
| MessageExtraAudioFile
|
||||
| MessageExtraContext;
|
||||
|
||||
export interface MessageExtraTextFile {
|
||||
type: 'textFile';
|
||||
@@ -56,12 +60,43 @@ export interface MessageExtraTextFile {
|
||||
content: string;
|
||||
}
|
||||
|
||||
export interface MessageExtraImageFile {
|
||||
type: 'imageFile';
|
||||
name: string;
|
||||
base64Url: string;
|
||||
}
|
||||
|
||||
export interface MessageExtraAudioFile {
|
||||
type: 'audioFile';
|
||||
name: string;
|
||||
base64Data: string;
|
||||
mimeType: string;
|
||||
}
|
||||
|
||||
export interface MessageExtraContext {
|
||||
type: 'context';
|
||||
name: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
export type APIMessage = Pick<Message, 'role' | 'content'>;
|
||||
export type APIMessageContentPart =
|
||||
| {
|
||||
type: 'text';
|
||||
text: string;
|
||||
}
|
||||
| {
|
||||
type: 'image_url';
|
||||
image_url: { url: string };
|
||||
}
|
||||
| {
|
||||
type: 'input_audio';
|
||||
input_audio: { data: string; format: 'wav' | 'mp3' };
|
||||
};
|
||||
|
||||
export type APIMessage = {
|
||||
role: Message['role'];
|
||||
content: string | APIMessageContentPart[];
|
||||
};
|
||||
|
||||
export interface Conversation {
|
||||
id: string; // format: `conv-{timestamp}`
|
||||
@@ -96,4 +131,15 @@ export interface SettingsPreset {
|
||||
name: string;
|
||||
createdAt: number; // timestamp from Date.now()
|
||||
config: Record<string, string | number | boolean>; // partial CONFIG_DEFAULT
|
||||
}
|
||||
|
||||
// a non-complete list of props, only contains the ones we need
|
||||
export interface LlamaCppServerProps {
|
||||
model_path: string;
|
||||
n_ctx: number;
|
||||
modalities?: {
|
||||
vision: boolean;
|
||||
audio: boolean;
|
||||
};
|
||||
// TODO: support params
|
||||
}
|
||||
@@ -7,7 +7,7 @@ import zlib from 'node:zlib';
|
||||
|
||||
/* eslint-disable */
|
||||
|
||||
const MAX_BUNDLE_SIZE = 1.5 * 1024 * 1024; // only increase when absolutely necessary
|
||||
const MAX_BUNDLE_SIZE = 2 * 1024 * 1024; // only increase when absolutely necessary
|
||||
|
||||
const GUIDE_FOR_FRONTEND = `
|
||||
<!--
|
||||
|
||||
@@ -99,6 +99,18 @@ ggml_cgraph * llm_build_context::build_k_shift() {
|
||||
|
||||
GGML_ASSERT(kv_self.size == n_ctx);
|
||||
|
||||
const auto & rope_type_shift = hparams.rope_type == LLAMA_ROPE_TYPE_MROPE
|
||||
// @ngxson : this is a workaround
|
||||
// for M-RoPE, we want to rotate the whole vector when doing KV shift
|
||||
// a normal RoPE should work, we just need to use the correct ordering
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/13870
|
||||
? LLAMA_ROPE_TYPE_NEOX
|
||||
: hparams.rope_type;
|
||||
|
||||
const float yarn_attn_factor_shift = model.arch == LLM_ARCH_DEEPSEEK2
|
||||
? 1.0f / (1.0f + 0.1f * logf(1.0f / freq_scale))
|
||||
: cparams.yarn_attn_factor;
|
||||
|
||||
lctx.inp_K_shift = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_ctx);
|
||||
cb(lctx.inp_K_shift, "K_shift", -1);
|
||||
ggml_set_input(lctx.inp_K_shift);
|
||||
@@ -127,15 +139,15 @@ ggml_cgraph * llm_build_context::build_k_shift() {
|
||||
}
|
||||
}
|
||||
tmp = ggml_rope_ext_inplace(ctx0, tmp,
|
||||
lctx.inp_K_shift, rope_factors, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow);
|
||||
lctx.inp_K_shift, rope_factors, n_rot, rope_type_shift, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, yarn_attn_factor_shift, beta_fast, beta_slow);
|
||||
cb(tmp, "K_shifted_f32", il);
|
||||
tmp = ggml_cpy(ctx0, tmp, k);
|
||||
} else {
|
||||
// we rotate only the first n_rot dimensions
|
||||
tmp = ggml_rope_ext_inplace(ctx0, k,
|
||||
lctx.inp_K_shift, rope_factors, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow);
|
||||
lctx.inp_K_shift, rope_factors, n_rot, rope_type_shift, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, yarn_attn_factor_shift, beta_fast, beta_slow);
|
||||
}
|
||||
cb(tmp, "K_shifted", il);
|
||||
ggml_build_forward_expand(gf, tmp);
|
||||
|
||||
Reference in New Issue
Block a user