Webui improvement (#481)

* update webui

* add token/s in webui

* add webui files

* fix webui first message disappear in some browser

* add missing html files

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2025-06-08 11:38:47 +00:00
committed by GitHub
parent 933b061441
commit 6cbd73c60e
60 changed files with 12299 additions and 1535 deletions

View File

@@ -15,21 +15,8 @@ set(TARGET_SRCS
httplib.h
)
set(PUBLIC_ASSETS
colorthemes.css
style.css
theme-beeninorder.css
theme-ketivah.css
theme-mangotango.css
theme-playground.css
theme-polarnight.css
theme-snowstorm.css
index.html
index-new.html
index.js
completion.js
system-prompts.js
prompt-formats.js
json-schema-to-grammar.mjs
index.html.gz
loading.html
)
foreach(asset ${PUBLIC_ASSETS})
@@ -41,6 +28,8 @@ foreach(asset ${PUBLIC_ASSETS})
OUTPUT "${output}"
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
)
set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
endforeach()
add_executable(${TARGET} ${TARGET_SRCS})
@@ -49,6 +38,7 @@ target_compile_definitions(${TARGET} PRIVATE
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
)
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
if (LLAMA_SERVER_SSL)
@@ -61,4 +51,4 @@ if (WIN32)
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
endif()
target_compile_features(${TARGET} PRIVATE cxx_std_11)
target_compile_features(${TARGET} PRIVATE cxx_std_17)

File diff suppressed because one or more lines are too long

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="refresh" content="5">
</head>
<body>
<div id="loading">
The model is loading. Please wait.<br/>
The user interface will appear soon.
</div>
</body>
</html>

View File

@@ -29,7 +29,7 @@ export async function* llama(prompt, params = {}, config = {}) {
const completionParams = { ...paramDefaults, ...params, prompt };
const response = await fetch(`${api_url}/completion`, {
const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
method: 'POST',
body: JSON.stringify(completionParams),
headers: {
@@ -78,7 +78,12 @@ export async function* llama(prompt, params = {}, config = {}) {
for (const line of lines) {
const match = regex.exec(line);
if (match) {
result[match[1]] = match[2]
result[match[1]] = match[2];
if (result.data === '[DONE]') {
cont = false;
break;
}
// since we know this is llama.cpp, let's just decode the json in data
if (result.data) {
result.data = JSON.parse(result.data);

View File

Before

Width:  |  Height:  |  Size: 4.0 KiB

After

Width:  |  Height:  |  Size: 4.0 KiB

View File

@@ -39,11 +39,15 @@
temperature: 0.8, // adapt all following parameters to optimized min-p requierements. If for non-english, set to 0.6 or lower
repeat_last_n: 0, // 0 = disable penalty, -1 = context size
repeat_penalty: 1.0, // 1.0 = disabled
penalize_nl: false, // true only useful for infinite completion
dry_multiplier: 0.0, // 0.0 = disabled, 0.8 works well
dry_base: 1.75, // 0.0 = disabled
dry_allowed_length: 2, // tokens extending repetitions beyond this receive penalty, 2 works well
dry_penalty_last_n: -1, // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
top_k: 0, // <= 0 to use vocab size
top_p: 1.0, // 1.0 = disabled
min_p: 0.05, // 0 = disabled; recommended for non-english: ~ 0.4
tfs_z: 1.0, // 1.0 = disabled
xtc_probability: 0.0, // 0 = disabled;
xtc_threshold: 0.1, // > 0.5 disables XTC;
typical_p: 1.0, // 1.0 = disabled
presence_penalty: 0.0, // 0.0 = disabled
frequency_penalty: 0.0, // 0.0 = disabled
@@ -831,11 +835,16 @@ return html`
<fieldset class="params">
${IntField({ label: "Top-K", title: "Limits the selection of the next token to the K most probable tokens. 1 means no randomness = greedy sampling. If set to 0, it means the entire vocabulary size is considered.", max: 100, min: 0, step: 1, name: "top_k", value: params.value.top_k })}
${IntField({ label: "Penalize Last N", title: "The last n tokens that are taken into account to penalise repetitions. A value of 0 means that this function is deactivated and -1 means that the entire size of the context is taken into account.", max: 2048, min: 0, step: 16, name: "repeat_last_n", value: params.value.repeat_last_n })}
${FloatField({ label: "Top-P", title: "Limits the selection of the next token to a subset of tokens whose combined probability reaches a threshold value P = top-P. If set to 1, it means the entire vocabulary size is considered.", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
${FloatField({ label: "Presence Penalty", title: "A penalty that is applied if certain tokens appear repeatedly in the generated text. A higher value leads to fewer repetitions.", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
${FloatField({ label: "TFS-Z", title: "Activates tail-free sampling, a method used to limit the prediction of tokens that are too frequent. The parameter z controls the strength of this limitation. A value of 1.0 means that this function is deactivated.", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
${FloatField({ label: "Frequency Penalty", title: "A penalty that is applied based on the frequency with which certain tokens occur in the training data set. A higher value results in rare tokens being favoured.", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
${FloatField({ label: "Top-P", title: "Limits the selection of the next token to a subset of tokens whose combined probability reaches a threshold value P = top-P. If set to 1, it means the entire vocabulary size is considered.", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
${FloatField({ label: "Typical-P", title: "Activates local typical sampling, a method used to limit the prediction of tokens that are atypical in the current context. The parameter p controls the strength of this limitation. A value of 1.0 means that this function is deactivated.", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
${FloatField({ label: "XTC probability", title: "Sets the chance for token removal (checked once on sampler start)", max: 1.0, min: 0.0, name: "xtc_probability", step: 0.01, value: params.value.xtc_probability })}
${FloatField({ label: "XTC threshold", title: "Sets a minimum probability threshold for tokens to be removed", max: 0.5, min: 0.0, name: "xtc_threshold", step: 0.01, value: params.value.xtc_threshold })}
${FloatField({ label: "DRY Penalty Multiplier", title: "Set the DRY repetition penalty multiplier. Default is 0.0, which disables DRY.", max: 5.0, min: 0.0, name: "dry_multiplier", step: 0.01, value: params.value.dry_multiplier })}
${FloatField({ label: "DRY Base", title: "Set the DRY repetition penalty base value. Default is 1.75", max: 3.0, min: 1.0, name: "dry_base", step: 0.01, value: params.value.dry_base })}
${IntField({ label: "DRY Allowed Length", title: "Tokens that extend repetition beyond this receive exponentially increasing penalty. Default is 2", max: 10, min: 1, step: 1, name: "dry_allowed_length", value: params.value.dry_allowed_length })}
${IntField({ label: "DRY Penalty Last N", title: "How many tokens to scan for repetitions. Default is -1, where 0 is disabled and -1 is context size", max: 2048, min: -1, step: 16, name: "dry_penalty_last_n", value: params.value.dry_penalty_last_n })}
${IntField({ label: "Min Keep", title: "If greater than 0, samplers are forced to return N possible tokens at minimum. Default is 0", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
</fieldset>
@@ -1132,12 +1141,15 @@ document.addEventListener('DOMContentLoaded', (event) => {
const snapSettings = {
temperature: { snapValue: 1.0, snapRangeMultiplier: 6 },
min_p: { snapValue: 0.05, snapRangeMultiplier: 2 },
xtc_probability: { snapValue: 0.0, snapRangeMultiplier: 4 },
xtc_threshold: { snapValue: 0.5, snapRangeMultiplier: 4 },
top_p: { snapValue: 1.0, snapRangeMultiplier: 4 },
tfs_z: { snapValue: 1.0, snapRangeMultiplier: 4 },
typical_p: { snapValue: 1.0, snapRangeMultiplier: 4 },
repeat_penalty: { snapValue: 1.0, snapRangeMultiplier: 4 },
presence_penalty: { snapValue: 0.0, snapRangeMultiplier: 4 },
frequency_penalty: { snapValue: 0.0, snapRangeMultiplier: 4 },
dry_multiplier: { snapValue: 0.0, snapRangeMultiplier: 4 },
dry_base: { snapValue: 1.75, snapRangeMultiplier: 4 },
};
// add an event listener for each slider
Object.keys(snapSettings).forEach(sliderName => {

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -1,7 +1,10 @@
// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
const SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}';
const SPACE_RULE = '| " " | "\\n"{1,2} [ \\t]{0,20}';
function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
if (maxItems == 0) {
return '';
}
if (minItems === 0 && maxItems === 1) {
return `${itemRule}?`;
}
@@ -529,7 +532,7 @@ export class SchemaConverter {
return joinSeq();
};
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
return this._addRule(name, "\"\\\"\" (" + toRule(transform()) + ") \"\\\"\" space")
}
_notStrings(strings) {

View File

@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="refresh" content="5">
</head>
<body>
<div id="loading">
The model is loading. Please wait.<br/>
The user interface will appear soon.
</div>
</body>
</html>

View File

@@ -407,6 +407,9 @@ class SimpleChat {
if (curLine.startsWith("data:")) {
curLine = curLine.substring(5);
}
if (curLine.trim() === "[DONE]") {
break;
}
let curJson = JSON.parse(curLine);
console.debug("DBUG:SC:PART:Json:", curJson);
this.append_response(this.response_extract_stream(curJson, apiEP));

View File

@@ -1,3 +1,4 @@
#pragma warning(disable : 4996)
#include "utils.hpp"
#include "common.h"
@@ -15,23 +16,8 @@
// Change JSON_ASSERT from assert() to GGML_ASSERT:
#define JSON_ASSERT GGML_ASSERT
#include "json.hpp"
// auto generated files (update with ./deps.sh)
#include "colorthemes.css.hpp"
#include "style.css.hpp"
#include "theme-beeninorder.css.hpp"
#include "theme-ketivah.css.hpp"
#include "theme-mangotango.css.hpp"
#include "theme-playground.css.hpp"
#include "theme-polarnight.css.hpp"
#include "theme-snowstorm.css.hpp"
#include "index.html.hpp"
#include "index-new.html.hpp"
#include "index.js.hpp"
#include "completion.js.hpp"
#include "system-prompts.js.hpp"
#include "prompt-formats.js.hpp"
#include "json-schema-to-grammar.mjs.hpp"
#include "index.html.gz.hpp"
#include "loading.html.hpp"
#include <atomic>
#include <chrono>
@@ -42,12 +28,14 @@
#include <thread>
#include <signal.h>
#include <memory>
#include <src/llama-impl.h>
using json = nlohmann::ordered_json;
bool server_verbose = false;
bool server_log_json = true;
enum stop_type {
STOP_TYPE_FULL,
STOP_TYPE_PARTIAL,
@@ -81,6 +69,44 @@ enum server_task_type {
SERVER_TASK_TYPE_SET_LORA,
};
struct result_timings {
int32_t prompt_n = -1;
double prompt_ms;
double prompt_per_token_ms;
double prompt_per_second;
int32_t predicted_n = -1;
double predicted_ms;
double predicted_per_token_ms;
double predicted_per_second;
// Optional speculative metrics - only included when > 0
int32_t draft_n = 0;
int32_t draft_n_accepted = 0;
json to_json() const {
json base = {
{"prompt_n", prompt_n},
{"prompt_ms", prompt_ms},
{"prompt_per_token_ms", prompt_per_token_ms},
{"prompt_per_second", prompt_per_second},
{"predicted_n", predicted_n},
{"predicted_ms", predicted_ms},
{"predicted_per_token_ms", predicted_per_token_ms},
{"predicted_per_second", predicted_per_second},
};
if (draft_n > 0) {
base["draft_n"] = draft_n;
base["draft_n_accepted"] = draft_n_accepted;
}
return base;
}
};
struct server_task {
int id = -1; // to be filled by server_queue
int id_multi = -1;
@@ -101,8 +127,13 @@ struct server_task_result {
bool stop;
bool error;
result_timings timings;
};
std::unordered_map<int, server_task_result > server_task_result_dict = {};
struct server_task_multi {
int id = -1;
@@ -120,6 +151,7 @@ struct slot_params {
std::vector<std::string> antiprompt;
bool timings_per_token = false;
json input_prefix;
json input_suffix;
};
@@ -262,6 +294,27 @@ struct server_slot {
};
}
result_timings get_timings() const {
result_timings timings;
timings.prompt_n = n_prompt_tokens_processed;
timings.prompt_ms = t_prompt_processing;
timings.prompt_per_token_ms = t_prompt_processing / n_prompt_tokens_processed;
timings.prompt_per_second = 1e3 / t_prompt_processing * n_prompt_tokens_processed;
timings.predicted_n = n_decoded;
timings.predicted_ms = (ggml_time_us() - t_start_generation) / 1e3;
timings.predicted_per_token_ms = t_token_generation / n_decoded;
timings.predicted_per_second = 1e3 / t_token_generation * n_decoded;
//// Add speculative metrics
//if (n_draft_total > 0) {
// timings.draft_n = n_draft_total;
// timings.draft_n_accepted = n_draft_accepted;
//}
return timings;
}
size_t find_stopping_strings(const std::string & text, const size_t last_token_size, const stop_type type) {
size_t stop_pos = std::string::npos;
@@ -903,7 +956,7 @@ struct server_context {
slot.oaicompat = false;
slot.oaicompat_model = "";
}
slot.params.timings_per_token = json_value(data, "timings_per_token", false);
slot.params.stream = json_value(data, "stream", false);
slot.params.cache_prompt = json_value(data, "cache_prompt", true);
slot.params.n_predict = json_value(data, "n_predict", json_value(data, "max_tokens", default_params.n_predict));
@@ -1423,8 +1476,13 @@ struct server_context {
res.data["oaicompat_token_ctr"] = slot.n_decoded;
res.data["model"] = slot.oaicompat_model;
}
queue_results.send(res);
// populate timings if this is final response or timings_per_token is enabled
if (slot.params.timings_per_token) {
//res.data["timings"] = slot.get_formated_timings();
res.timings = slot.get_timings();
}
server_task_result_dict[slot.id_task] = res;
queue_results.send(std::move(res));
}
void send_final_response(const server_slot & slot) {
@@ -2465,6 +2523,188 @@ struct server_context {
}
};
static json format_final_response_oaicompat(const json& request, json result, const std::string& completion_id, bool streaming = false) {
bool stopped_word = result.count("stopped_word") != 0;
bool stopped_eos = json_value(result, "stopped_eos", false);
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
std::string content = json_value(result, "content", std::string(""));
std::string finish_reason = "length";
if (stopped_word || stopped_eos) {
finish_reason = "stop";
}
json choices =
streaming ? json::array({ json{{"finish_reason", finish_reason},
{"index", 0},
{"delta", json::object()}} })
: json::array({ json{{"finish_reason", finish_reason},
{"index", 0},
{"message", json{{"content", content},
{"role", "assistant"}}}} });
std::time_t t = std::time(0);
json res = json{
{"choices", choices},
{"created", t},
{"model",
json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
{"object", streaming ? "chat.completion.chunk" : "chat.completion"},
{"usage", json {
{"completion_tokens", num_tokens_predicted},
{"prompt_tokens", num_prompt_tokens},
{"total_tokens", num_tokens_predicted + num_prompt_tokens}
}},
{"id", completion_id}
};
if (server_verbose) {
res["__verbose"] = result;
}
if (result.contains("completion_probabilities")) {
res["completion_probabilities"] = json_value(result, "completion_probabilities", json::array());
}
return res;
}
// return value is vector as there is one case where we might need to generate two responses
static std::vector<json> format_partial_response_oaicompat(server_task_result task_result, const std::string& completion_id) {
json result = task_result.data;
if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
return std::vector<json>({ result });
}
bool first = json_value(result, "oaicompat_token_ctr", 0) == 0;
std::string modelname = json_value(result, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
bool stopped_word = json_value(result, "stopped_word", false);
bool stopped_eos = json_value(result, "stopped_eos", false);
bool stopped_limit = json_value(result, "stopped_limit", false);
std::string content = json_value(result, "content", std::string(""));
std::string finish_reason;
if (stopped_word || stopped_eos) {
finish_reason = "stop";
}
if (stopped_limit) {
finish_reason = "length";
}
std::time_t t = std::time(0);
json choices;
if (!finish_reason.empty()) {
choices = json::array({ json{{"finish_reason", finish_reason},
{"index", 0},
{"delta", json::object()}} });
}
else {
if (first) {
if (content.empty()) {
choices = json::array({ json{{"finish_reason", nullptr},
{"index", 0},
{"delta", json{{"role", "assistant"}}}} });
}
else {
// We have to send this as two updates to conform to openai behavior
json initial_ret = json{ {"choices", json::array({json{
{"finish_reason", nullptr},
{"index", 0},
{"delta", json{
{"role", "assistant"}
}}}})},
{"created", t},
{"id", completion_id},
{"model", modelname},
{"object", "chat.completion.chunk"} };
json second_ret = json{
{"choices", json::array({json{{"finish_reason", nullptr},
{"index", 0},
{"delta", json{
{"content", content}}}
}})},
{"created", t},
{"id", completion_id},
{"model", modelname},
{"object", "chat.completion.chunk"} };
return std::vector<json>({ initial_ret, second_ret });
}
}
else {
// Some idiosyncrasy in task processing logic makes several trailing calls
// with empty content, we ignore these at the calee site.
if (content.empty()) {
return std::vector<json>({ json::object() });
}
choices = json::array({ json{
{"finish_reason", nullptr},
{"index", 0},
{"delta",
json{
{"content", content},
}},
} });
}
}
json ret = json{
{"choices", choices},
{"created", t},
{"id", completion_id},
{"model", modelname},
{"object", "chat.completion.chunk"}
};
if (server_task_result_dict.count(task_result.id) > 0)
{
ret.push_back({ "timings", server_task_result_dict[task_result.id].timings.to_json() });
}
//
if (!finish_reason.empty()) {
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
ret.push_back({ "usage", json {
{"completion_tokens", num_tokens_predicted},
{"prompt_tokens", num_prompt_tokens},
{"total_tokens", num_tokens_predicted + num_prompt_tokens}
} });
}
return std::vector<json>({ ret });
}
static json format_embeddings_response_oaicompat(const json& request, const json& embeddings) {
json data = json::array();
int i = 0;
for (auto& elem : embeddings) {
data.push_back(json{
{"embedding", json_value(elem, "embedding", json::array())},
{"index", i++},
{"object", "embedding"}
});
}
json res = json{
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
{"object", "list"},
{"usage", json {
{"prompt_tokens", 0},
{"total_tokens", 0}
}},
{"data", data}
};
return res;
}
static void log_server_request(const httplib::Request & req, const httplib::Response & res) {
// skip GH copilot requests when using default port
if (req.path == "/v1/health" || req.path == "/v1/completions") {
@@ -3121,6 +3361,7 @@ int main(int argc, char ** argv) {
res.set_content(models.dump(), "application/json; charset=utf-8");
};
const auto handle_chat_completions = [&ctx_server, &params, &res_error](const httplib::Request & req, httplib::Response & res) {
if (ctx_server.params.embedding) {
res_error(res, format_error_response("This server does not support chat completions. Start it without `--embeddings`", ERROR_TYPE_NOT_SUPPORTED));
@@ -3152,7 +3393,7 @@ int main(int argc, char ** argv) {
while (true) {
server_task_result result = ctx_server.queue_results.recv(id_task);
if (!result.error) {
std::vector<json> result_array = format_partial_response_oaicompat(result.data, completion_id);
std::vector<json> result_array = format_partial_response_oaicompat(result, completion_id);
for (auto it = result_array.begin(); it != result_array.end(); ++it) {
if (!it->empty()) {
@@ -3449,25 +3690,33 @@ int main(int argc, char ** argv) {
svr->set_base_dir(params.public_path);
}
// using embedded static files
svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8"));
svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8"));
svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8"));
svr->Get("/json-schema-to-grammar.mjs", handle_static_file(json_schema_to_grammar_mjs, json_schema_to_grammar_mjs_len, "text/javascript; charset=utf-8"));
// add new-ui files
svr->Get("/colorthemes.css", handle_static_file(colorthemes_css, colorthemes_css_len, "text/css; charset=utf-8"));
svr->Get("/style.css", handle_static_file(style_css, style_css_len, "text/css; charset=utf-8"));
svr->Get("/theme-beeninorder.css", handle_static_file(theme_beeninorder_css, theme_beeninorder_css_len, "text/css; charset=utf-8"));
svr->Get("/theme-ketivah.css", handle_static_file(theme_ketivah_css, theme_ketivah_css_len, "text/css; charset=utf-8"));
svr->Get("/theme-mangotango.css", handle_static_file(theme_mangotango_css, theme_mangotango_css_len, "text/css; charset=utf-8"));
svr->Get("/theme-playground.css", handle_static_file(theme_playground_css, theme_playground_css_len, "text/css; charset=utf-8"));
svr->Get("/theme-polarnight.css", handle_static_file(theme_polarnight_css, theme_polarnight_css_len, "text/css; charset=utf-8"));
svr->Get("/theme-snowstorm.css", handle_static_file(theme_snowstorm_css, theme_snowstorm_css_len, "text/css; charset=utf-8"));
svr->Get("/index-new.html", handle_static_file(index_new_html, index_new_html_len, "text/html; charset=utf-8"));
svr->Get("/system-prompts.js", handle_static_file(system_prompts_js, system_prompts_js_len, "text/javascript; charset=utf-8"));
svr->Get("/prompt-formats.js", handle_static_file(prompt_formats_js, prompt_formats_js_len, "text/javascript; charset=utf-8"));
{
// register static assets routes
if (!params.public_path.empty()) {
// Set the base directory for serving static files
bool is_found = svr->set_mount_point("/", params.public_path);
if (!is_found) {
GGML_ABORT("%s: static assets path not found: %s\n", __func__, params.public_path.c_str());
return 1;
}
}
else {
// using embedded static index.html
svr->Get("/", [](const httplib::Request& req, httplib::Response& res) {
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
res.set_content("Error: gzip is not supported by this browser", "text/plain");
}
else {
res.set_header("Content-Encoding", "gzip");
// COEP and COOP headers, required by pyodide (python interpreter)
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8");
}
return false;
});
}
}
// register API routes
svr->Get ("/health", handle_health);
svr->Get ("/metrics", handle_metrics);

View File

@@ -222,11 +222,9 @@
temperature: 0.7,
repeat_last_n: 256, // 0 = disable penalty, -1 = context size
repeat_penalty: 1.18, // 1.0 = disabled
penalize_nl: false,
top_k: 40, // <= 0 to use vocab size
top_p: 0.95, // 1.0 = disabled
min_p: 0.05, // 0 = disabled
tfs_z: 1.0, // 1.0 = disabled
typical_p: 1.0, // 1.0 = disabled
presence_penalty: 0.0, // 0.0 = disabled
frequency_penalty: 0.0, // 0.0 = disabled
@@ -780,7 +778,6 @@
${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
${BoolField({ label: "Penalize repetition of newlines", name: "penalize_nl", value: params.value.penalize_nl })}
${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
@@ -788,7 +785,6 @@
<details>
<summary>More options</summary>
<fieldset class="two">
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}

View File

@@ -225,11 +225,9 @@
temperature: 0.7,
repeat_last_n: 256, // 0 = disable penalty, -1 = context size
repeat_penalty: 1.18, // 1.0 = disabled
penalize_nl: false,
top_k: 40, // <= 0 to use vocab size
top_p: 0.95, // 1.0 = disabled
min_p: 0.05, // 0 = disabled
tfs_z: 1.0, // 1.0 = disabled
typical_p: 1.0, // 1.0 = disabled
presence_penalty: 0.0, // 0.0 = disabled
frequency_penalty: 0.0, // 0.0 = disabled
@@ -783,7 +781,6 @@
${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
${BoolField({ label: "Penalize repetition of newlines", name: "penalize_nl", value: params.value.penalize_nl })}
${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
@@ -791,7 +788,6 @@
<details>
<summary>More options</summary>
<fieldset class="two">
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}

View File

@@ -411,177 +411,7 @@ static json oaicompat_completion_params_parse(
return llama_params;
}
static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) {
bool stopped_word = result.count("stopped_word") != 0;
bool stopped_eos = json_value(result, "stopped_eos", false);
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
std::string content = json_value(result, "content", std::string(""));
std::string finish_reason = "length";
if (stopped_word || stopped_eos) {
finish_reason = "stop";
}
json choices =
streaming ? json::array({json{{"finish_reason", finish_reason},
{"index", 0},
{"delta", json::object()}}})
: json::array({json{{"finish_reason", finish_reason},
{"index", 0},
{"message", json{{"content", content},
{"role", "assistant"}}}}});
std::time_t t = std::time(0);
json res = json {
{"choices", choices},
{"created", t},
{"model",
json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
{"object", streaming ? "chat.completion.chunk" : "chat.completion"},
{"usage", json {
{"completion_tokens", num_tokens_predicted},
{"prompt_tokens", num_prompt_tokens},
{"total_tokens", num_tokens_predicted + num_prompt_tokens}
}},
{"id", completion_id}
};
if (server_verbose) {
res["__verbose"] = result;
}
if (result.contains("completion_probabilities")) {
res["completion_probabilities"] = json_value(result, "completion_probabilities", json::array());
}
return res;
}
// return value is vector as there is one case where we might need to generate two responses
static std::vector<json> format_partial_response_oaicompat(json result, const std::string & completion_id) {
if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
return std::vector<json>({result});
}
bool first = json_value(result, "oaicompat_token_ctr", 0) == 0;
std::string modelname = json_value(result, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
bool stopped_word = json_value(result, "stopped_word", false);
bool stopped_eos = json_value(result, "stopped_eos", false);
bool stopped_limit = json_value(result, "stopped_limit", false);
std::string content = json_value(result, "content", std::string(""));
std::string finish_reason;
if (stopped_word || stopped_eos) {
finish_reason = "stop";
}
if (stopped_limit) {
finish_reason = "length";
}
std::time_t t = std::time(0);
json choices;
if (!finish_reason.empty()) {
choices = json::array({json{{"finish_reason", finish_reason},
{"index", 0},
{"delta", json::object()}}});
} else {
if (first) {
if (content.empty()) {
choices = json::array({json{{"finish_reason", nullptr},
{"index", 0},
{"delta", json{{"role", "assistant"}}}}});
} else {
// We have to send this as two updates to conform to openai behavior
json initial_ret = json{{"choices", json::array({json{
{"finish_reason", nullptr},
{"index", 0},
{"delta", json{
{"role", "assistant"}
}}}})},
{"created", t},
{"id", completion_id},
{"model", modelname},
{"object", "chat.completion.chunk"}};
json second_ret = json{
{"choices", json::array({json{{"finish_reason", nullptr},
{"index", 0},
{"delta", json{
{"content", content}}}
}})},
{"created", t},
{"id", completion_id},
{"model", modelname},
{"object", "chat.completion.chunk"}};
return std::vector<json>({initial_ret, second_ret});
}
} else {
// Some idiosyncrasy in task processing logic makes several trailing calls
// with empty content, we ignore these at the calee site.
if (content.empty()) {
return std::vector<json>({json::object()});
}
choices = json::array({json{
{"finish_reason", nullptr},
{"index", 0},
{"delta",
json{
{"content", content},
}},
}});
}
}
json ret = json {
{"choices", choices},
{"created", t},
{"id", completion_id},
{"model", modelname},
{"object", "chat.completion.chunk"}
};
if (!finish_reason.empty()) {
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
ret.push_back({"usage", json {
{"completion_tokens", num_tokens_predicted},
{"prompt_tokens", num_prompt_tokens},
{"total_tokens", num_tokens_predicted + num_prompt_tokens}
}});
}
return std::vector<json>({ret});
}
static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) {
json data = json::array();
int i = 0;
for (auto & elem : embeddings) {
data.push_back(json{
{"embedding", json_value(elem, "embedding", json::array())},
{"index", i++},
{"object", "embedding"}
});
}
json res = json {
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
{"object", "list"},
{"usage", json {
{"prompt_tokens", 0},
{"total_tokens", 0}
}},
{"data", data}
};
return res;
}
static json format_tokenizer_response(const std::vector<llama_token> & tokens) {
return json {

View File

@@ -0,0 +1,10 @@
**/.vscode
**/.github
**/.git
**/.svn
**/.hg
**/node_modules
**/dist
**/build
*.config.js

View File

@@ -0,0 +1,33 @@
{
"demo": true,
"id": "conv-1734086746930",
"lastModified": 1734087548943,
"messages": [
{
"id": 1734086764521,
"role": "user",
"content": "this is a demo conversation, used in dev mode"
},
{
"id": 1734087548327,
"role": "assistant",
"content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\n$2x + y = z$\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
"timings": {
"prompt_n": 1,
"prompt_ms": 28.923,
"predicted_n": 25,
"predicted_ms": 573.016
}
},
{
"id": 1734087548328,
"role": "user",
"content": "this is a demo conversation, used in dev mode"
},
{
"id": 1734087548329,
"role": "assistant",
"content": "Code block:\n```js\nconsole.log('hello world')\n```\n```sh\nls -la /dev\n```"
}
]
}

411
examples/server/webui/dist/index.html vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,26 @@
import js from '@eslint/js'
import globals from 'globals'
import reactHooks from 'eslint-plugin-react-hooks'
import reactRefresh from 'eslint-plugin-react-refresh'
import tseslint from 'typescript-eslint'
export default tseslint.config(
{ ignores: ['dist'] },
{
extends: [js.configs.recommended, ...tseslint.configs.recommended],
files: ['**/*.{ts,tsx}'],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
plugins: {
'react-hooks': reactHooks,
'react-refresh': reactRefresh,
},
rules: {
...reactHooks.configs.recommended.rules,
'react-refresh/only-export-components': 'off',
'@typescript-eslint/no-unused-vars': 'off',
},
},
)

View File

@@ -0,0 +1,16 @@
<!doctype html>
<html>
<head>
<meta charset="UTF-8" />
<meta
name="viewport"
content="width=device-width, initial-scale=1, maximum-scale=1"
/>
<meta name="color-scheme" content="light dark" />
<title>🦙 llama.cpp - chat</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

6255
examples/server/webui/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,62 @@
{
"name": "webui",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"format": "eslint . && prettier --write .",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"@heroicons/react": "^2.2.0",
"@sec-ant/readable-stream": "^0.6.0",
"@tailwindcss/postcss": "^4.1.1",
"@tailwindcss/vite": "^4.1.1",
"@vscode/markdown-it-katex": "^1.1.1",
"autoprefixer": "^10.4.20",
"daisyui": "^5.0.12",
"dexie": "^4.0.11",
"highlight.js": "^11.10.0",
"katex": "^0.16.15",
"postcss": "^8.4.49",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-markdown": "^9.0.3",
"react-router": "^7.1.5",
"rehype-highlight": "^7.0.2",
"rehype-katex": "^7.0.1",
"remark-breaks": "^4.0.0",
"remark-gfm": "^4.0.0",
"remark-math": "^6.0.0",
"tailwindcss": "^4.1.1",
"textlinestream": "^1.1.1",
"vite-plugin-singlefile": "^2.0.3"
},
"devDependencies": {
"@eslint/js": "^9.17.0",
"@types/markdown-it": "^14.1.2",
"@types/node": "^22.13.1",
"@types/react": "^18.3.18",
"@types/react-dom": "^18.3.5",
"@vitejs/plugin-react": "^4.3.4",
"eslint": "^9.17.0",
"eslint-plugin-react-hooks": "^5.0.0",
"eslint-plugin-react-refresh": "^0.4.16",
"globals": "^15.14.0",
"prettier": "^3.4.2",
"sass-embedded": "^1.83.4",
"typescript": "~5.6.2",
"typescript-eslint": "^8.18.2",
"vite": "^6.0.5"
},
"prettier": {
"trailingComma": "es5",
"tabWidth": 2,
"semi": true,
"singleQuote": true,
"bracketSameLine": false
}
}

View File

@@ -0,0 +1,5 @@
export default {
plugins: {
"@tailwindcss/postcss": {},
},
}

View File

@@ -0,0 +1,33 @@
{
"demo": true,
"id": "conv-1734086746930",
"lastModified": 1734087548943,
"messages": [
{
"id": 1734086764521,
"role": "user",
"content": "this is a demo conversation, used in dev mode"
},
{
"id": 1734087548327,
"role": "assistant",
"content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\n$2x + y = z$\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
"timings": {
"prompt_n": 1,
"prompt_ms": 28.923,
"predicted_n": 25,
"predicted_ms": 573.016
}
},
{
"id": 1734087548328,
"role": "user",
"content": "this is a demo conversation, used in dev mode"
},
{
"id": 1734087548329,
"role": "assistant",
"content": "Code block:\n```js\nconsole.log('hello world')\n```\n```sh\nls -la /dev\n```"
}
]
}

View File

@@ -0,0 +1,47 @@
import { HashRouter, Outlet, Route, Routes } from 'react-router';
import Header from './components/Header';
import Sidebar from './components/Sidebar';
import { AppContextProvider, useAppContext } from './utils/app.context';
import ChatScreen from './components/ChatScreen';
import SettingDialog from './components/SettingDialog';
function App() {
return (
<HashRouter>
<div className="flex flex-row drawer lg:drawer-open">
<AppContextProvider>
<Routes>
<Route element={<AppLayout />}>
<Route path="/chat/:convId" element={<ChatScreen />} />
<Route path="*" element={<ChatScreen />} />
</Route>
</Routes>
</AppContextProvider>
</div>
</HashRouter>
);
}
function AppLayout() {
const { showSettings, setShowSettings } = useAppContext();
return (
<>
<Sidebar />
<div
className="drawer-content grow flex flex-col h-screen w-screen mx-auto px-4 overflow-auto bg-base-100"
id="main-scroll"
>
<Header />
<Outlet />
</div>
{
<SettingDialog
show={showSettings}
onClose={() => setShowSettings(false)}
/>
}
</>
);
}
export default App;

View File

@@ -0,0 +1,92 @@
import daisyuiThemes from 'daisyui/theme/object';
import { isNumeric } from './utils/misc';
export const isDev = import.meta.env.MODE === 'development';
// constants
export const BASE_URL = new URL('.', document.baseURI).href
.toString()
.replace(/\/$/, '');
export const CONFIG_DEFAULT = {
// Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
// Do not use nested objects, keep it single level. Prefix the key if you need to group them.
apiKey: '',
systemMessage: 'You are a helpful assistant.',
showTokensPerSecond: false,
showThoughtInProgress: false,
excludeThoughtOnReq: true,
// make sure these default values are in sync with `common.h`
samplers: 'dkypmxt',
temperature: 0.8,
dynatemp_range: 0.0,
dynatemp_exponent: 1.0,
top_k: 40,
top_p: 0.95,
min_p: 0.05,
xtc_probability: 0.0,
xtc_threshold: 0.1,
typical_p: 1.0,
repeat_last_n: 64,
repeat_penalty: 1.0,
presence_penalty: 0.0,
frequency_penalty: 0.0,
dry_multiplier: 0.0,
dry_base: 1.75,
dry_allowed_length: 2,
dry_penalty_last_n: -1,
max_tokens: -1,
custom: '', // custom json-stringified object
// experimental features
pyIntepreterEnabled: false,
};
export const CONFIG_INFO: Record<string, string> = {
apiKey: 'Set the API Key if you are using --api-key option for the server.',
systemMessage: 'The starting message that defines how model should behave.',
samplers:
'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
temperature:
'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
dynatemp_range:
'Addon for the temperature sampler. The added value to the range of dynamic temperature, which adjusts probabilities by entropy of tokens.',
dynatemp_exponent:
'Addon for the temperature sampler. Smoothes out the probability redistribution based on the most probable token.',
top_k: 'Keeps only k top tokens.',
top_p:
'Limits tokens to those that together have a cumulative probability of at least p',
min_p:
'Limits tokens based on the minimum probability for a token to be considered, relative to the probability of the most likely token.',
xtc_probability:
'XTC sampler cuts out top tokens; this parameter controls the chance of cutting tokens at all. 0 disables XTC.',
xtc_threshold:
'XTC sampler cuts out top tokens; this parameter controls the token probability that is required to cut that token.',
typical_p:
'Sorts and limits tokens based on the difference between log-probability and entropy.',
repeat_last_n: 'Last n tokens to consider for penalizing repetition',
repeat_penalty:
'Controls the repetition of token sequences in the generated text',
presence_penalty:
'Limits tokens based on whether they appear in the output or not.',
frequency_penalty:
'Limits tokens based on how often they appear in the output.',
dry_multiplier:
'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling multiplier.',
dry_base:
'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling base value.',
dry_allowed_length:
'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the allowed length for DRY sampling.',
dry_penalty_last_n:
'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets DRY penalty for the last n tokens.',
max_tokens: 'The maximum number of token per output.',
custom: '', // custom json-stringified object
};
// config keys having numeric value (i.e. temperature, top_k, top_p, etc)
export const CONFIG_NUMERIC_KEYS = Object.entries(CONFIG_DEFAULT)
.filter((e) => isNumeric(e[1]))
.map((e) => e[0]);
// list of themes supported by daisyui
export const THEMES = ['light', 'dark']
// make sure light & dark are always at the beginning
.concat(
Object.keys(daisyuiThemes).filter((t) => t !== 'light' && t !== 'dark')
);

View File

@@ -0,0 +1,195 @@
import { useEffect, useState } from 'react';
import { useAppContext } from '../utils/app.context';
import { OpenInNewTab, XCloseButton } from '../utils/common';
import { CanvasType } from '../utils/types';
import { PlayIcon, StopIcon } from '@heroicons/react/24/outline';
import StorageUtils from '../utils/storage';
const canInterrupt = typeof SharedArrayBuffer === 'function';
// adapted from https://pyodide.org/en/stable/usage/webworker.html
const WORKER_CODE = `
importScripts("https://cdn.jsdelivr.net/pyodide/v0.27.2/full/pyodide.js");
let stdOutAndErr = [];
let pyodideReadyPromise = loadPyodide({
stdout: (data) => stdOutAndErr.push(data),
stderr: (data) => stdOutAndErr.push(data),
});
let alreadySetBuff = false;
self.onmessage = async (event) => {
stdOutAndErr = [];
// make sure loading is done
const pyodide = await pyodideReadyPromise;
const { id, python, context, interruptBuffer } = event.data;
if (interruptBuffer && !alreadySetBuff) {
pyodide.setInterruptBuffer(interruptBuffer);
alreadySetBuff = true;
}
// Now load any packages we need, run the code, and send the result back.
await pyodide.loadPackagesFromImports(python);
// make a Python dictionary with the data from content
const dict = pyodide.globals.get("dict");
const globals = dict(Object.entries(context));
try {
self.postMessage({ id, running: true });
// Execute the python code in this context
const result = pyodide.runPython(python, { globals });
self.postMessage({ result, id, stdOutAndErr });
} catch (error) {
self.postMessage({ error: error.message, id });
}
interruptBuffer[0] = 0;
};
`;
let worker: Worker;
const interruptBuffer = canInterrupt
? new Uint8Array(new SharedArrayBuffer(1))
: null;
const startWorker = () => {
if (!worker) {
worker = new Worker(
URL.createObjectURL(new Blob([WORKER_CODE], { type: 'text/javascript' }))
);
}
};
if (StorageUtils.getConfig().pyIntepreterEnabled) {
startWorker();
}
const runCodeInWorker = (
pyCode: string,
callbackRunning: () => void
): {
donePromise: Promise<string>;
interrupt: () => void;
} => {
startWorker();
const id = Math.random() * 1e8;
const context = {};
if (interruptBuffer) {
interruptBuffer[0] = 0;
}
const donePromise = new Promise<string>((resolve) => {
worker.onmessage = (event) => {
const { error, stdOutAndErr, running } = event.data;
if (id !== event.data.id) return;
if (running) {
callbackRunning();
return;
} else if (error) {
resolve(error.toString());
} else {
resolve(stdOutAndErr.join('\n'));
}
};
worker.postMessage({ id, python: pyCode, context, interruptBuffer });
});
const interrupt = () => {
console.log('Interrupting...');
console.trace();
if (interruptBuffer) {
interruptBuffer[0] = 2;
}
};
return { donePromise, interrupt };
};
export default function CanvasPyInterpreter() {
const { canvasData, setCanvasData } = useAppContext();
const [code, setCode] = useState(canvasData?.content ?? ''); // copy to avoid direct mutation
const [running, setRunning] = useState(false);
const [output, setOutput] = useState('');
const [interruptFn, setInterruptFn] = useState<() => void>();
const [showStopBtn, setShowStopBtn] = useState(false);
const runCode = async (pycode: string) => {
interruptFn?.();
setRunning(true);
setOutput('Loading Pyodide...');
const { donePromise, interrupt } = runCodeInWorker(pycode, () => {
setOutput('Running...');
setShowStopBtn(canInterrupt);
});
setInterruptFn(() => interrupt);
const out = await donePromise;
setOutput(out);
setRunning(false);
setShowStopBtn(false);
};
// run code on mount
useEffect(() => {
setCode(canvasData?.content ?? '');
runCode(canvasData?.content ?? '');
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [canvasData?.content]);
if (canvasData?.type !== CanvasType.PY_INTERPRETER) {
return null;
}
return (
<div className="card bg-base-200 w-full h-full shadow-xl">
<div className="card-body">
<div className="flex justify-between items-center mb-4">
<span className="text-lg font-bold">Python Interpreter</span>
<XCloseButton
className="bg-base-100"
onClick={() => setCanvasData(null)}
/>
</div>
<div className="grid grid-rows-3 gap-4 h-full">
<textarea
className="textarea textarea-bordered w-full h-full font-mono"
value={code}
onChange={(e) => setCode(e.target.value)}
></textarea>
<div className="font-mono flex flex-col row-span-2">
<div className="flex items-center mb-2">
<button
className="btn btn-sm bg-base-100"
onClick={() => runCode(code)}
disabled={running}
>
<PlayIcon className="h-6 w-6" /> Run
</button>
{showStopBtn && (
<button
className="btn btn-sm bg-base-100 ml-2"
onClick={() => interruptFn?.()}
>
<StopIcon className="h-6 w-6" /> Stop
</button>
)}
<span className="grow text-right text-xs">
<OpenInNewTab href="https://github.com/ggerganov/llama.cpp/issues/11762">
Report a bug
</OpenInNewTab>
</span>
</div>
<textarea
className="textarea textarea-bordered h-full dark-color"
value={output}
readOnly
></textarea>
</div>
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,296 @@
import { useMemo, useState } from 'react';
import { useAppContext } from '../utils/app.context';
import { Message, PendingMessage } from '../utils/types';
import { classNames } from '../utils/misc';
import MarkdownDisplay, { CopyButton } from './MarkdownDisplay';
import { ChevronLeftIcon, ChevronRightIcon } from '@heroicons/react/24/outline';
interface SplitMessage {
content: PendingMessage['content'];
thought?: string;
isThinking?: boolean;
}
export default function ChatMessage({
msg,
siblingLeafNodeIds,
siblingCurrIdx,
id,
onRegenerateMessage,
onEditMessage,
onChangeSibling,
isPending,
}: {
msg: Message | PendingMessage;
siblingLeafNodeIds: Message['id'][];
siblingCurrIdx: number;
id?: string;
onRegenerateMessage(msg: Message): void;
onEditMessage(msg: Message, content: string): void;
onChangeSibling(sibling: Message['id']): void;
isPending?: boolean;
}) {
const { viewingChat, config } = useAppContext();
const [editingContent, setEditingContent] = useState<string | null>(null);
const timings = useMemo(
() =>
msg.timings
? {
...msg.timings,
prompt_per_second:
(msg.timings.prompt_n / msg.timings.prompt_ms) * 1000,
predicted_per_second:
(msg.timings.predicted_n / msg.timings.predicted_ms) * 1000,
}
: null,
[msg.timings]
);
const nextSibling = siblingLeafNodeIds[siblingCurrIdx + 1];
const prevSibling = siblingLeafNodeIds[siblingCurrIdx - 1];
// for reasoning model, we split the message into content and thought
// TODO: implement this as remark/rehype plugin in the future
const { content, thought, isThinking }: SplitMessage = useMemo(() => {
if (msg.content === null || msg.role !== 'assistant') {
return { content: msg.content };
}
let actualContent = '';
let thought = '';
let isThinking = false;
let thinkSplit = msg.content.split('<think>', 2);
actualContent += thinkSplit[0];
while (thinkSplit[1] !== undefined) {
// <think> tag found
thinkSplit = thinkSplit[1].split('</think>', 2);
thought += thinkSplit[0];
isThinking = true;
if (thinkSplit[1] !== undefined) {
// </think> closing tag found
isThinking = false;
thinkSplit = thinkSplit[1].split('<think>', 2);
actualContent += thinkSplit[0];
}
}
return { content: actualContent, thought, isThinking };
}, [msg]);
if (!viewingChat) return null;
return (
<div className="group" id={id}>
<div
className={classNames({
chat: true,
'chat-start': msg.role !== 'user',
'chat-end': msg.role === 'user',
})}
>
<div
className={classNames({
'chat-bubble markdown': true,
'chat-bubble-base-300': msg.role !== 'user',
})}
>
{/* textarea for editing message */}
{editingContent !== null && (
<>
<textarea
dir="auto"
className="textarea textarea-bordered bg-base-100 text-base-content max-w-2xl w-[calc(90vw-8em)] h-24"
value={editingContent}
onChange={(e) => setEditingContent(e.target.value)}
></textarea>
<br />
<button
className="btn btn-ghost mt-2 mr-2"
onClick={() => setEditingContent(null)}
>
Cancel
</button>
<button
className="btn mt-2"
onClick={() => {
if (msg.content !== null) {
setEditingContent(null);
onEditMessage(msg as Message, editingContent);
}
}}
>
Submit
</button>
</>
)}
{/* not editing content, render message */}
{editingContent === null && (
<>
{content === null ? (
<>
{/* show loading dots for pending message */}
<span className="loading loading-dots loading-md"></span>
</>
) : (
<>
{/* render message as markdown */}
<div dir="auto">
{thought && (
<details
className="collapse bg-base-200 collapse-arrow mb-4"
open={isThinking && config.showThoughtInProgress}
>
<summary className="collapse-title">
{isPending && isThinking ? (
<span>
<span
v-if="isGenerating"
className="loading loading-spinner loading-md mr-2"
style={{ verticalAlign: 'middle' }}
></span>
<b>Thinking</b>
</span>
) : (
<b>Thought Process</b>
)}
</summary>
<div className="collapse-content">
<MarkdownDisplay
content={thought}
isGenerating={isPending}
/>
</div>
</details>
)}
{msg.extra && msg.extra.length > 0 && (
<details
className={classNames({
'collapse collapse-arrow mb-4 bg-base-200': true,
'bg-opacity-10': msg.role !== 'assistant',
})}
>
<summary className="collapse-title">
Extra content
</summary>
<div className="collapse-content">
{msg.extra.map(
(extra, i) =>
extra.type === 'textFile' ? (
<div key={extra.name}>
<b>{extra.name}</b>
<pre>{extra.content}</pre>
</div>
) : extra.type === 'context' ? (
<div key={i}>
<pre>{extra.content}</pre>
</div>
) : null // TODO: support other extra types
)}
</div>
</details>
)}
<MarkdownDisplay
content={content}
isGenerating={isPending}
/>
</div>
</>
)}
{/* render timings if enabled */}
{timings && config.showTokensPerSecond && (
<div className="dropdown dropdown-hover dropdown-top mt-2">
<div
tabIndex={0}
role="button"
className="cursor-pointer font-semibold text-sm opacity-60"
>
Speed: {timings.predicted_per_second.toFixed(1)} t/s
</div>
<div className="dropdown-content bg-base-100 z-10 w-64 p-2 shadow mt-4">
<b>Prompt</b>
<br />- Tokens: {timings.prompt_n}
<br />- Time: {timings.prompt_ms} ms
<br />- Speed: {timings.prompt_per_second.toFixed(1)} t/s
<br />
<b>Generation</b>
<br />- Tokens: {timings.predicted_n}
<br />- Time: {timings.predicted_ms} ms
<br />- Speed: {timings.predicted_per_second.toFixed(1)} t/s
<br />
</div>
</div>
)}
</>
)}
</div>
</div>
{/* actions for each message */}
{msg.content !== null && (
<div
className={classNames({
'flex items-center gap-2 mx-4 mt-2 mb-2': true,
'flex-row-reverse': msg.role === 'user',
})}
>
{siblingLeafNodeIds && siblingLeafNodeIds.length > 1 && (
<div className="flex gap-1 items-center opacity-60 text-sm">
<button
className={classNames({
'btn btn-sm btn-ghost p-1': true,
'opacity-20': !prevSibling,
})}
onClick={() => prevSibling && onChangeSibling(prevSibling)}
>
<ChevronLeftIcon className="h-4 w-4" />
</button>
<span>
{siblingCurrIdx + 1} / {siblingLeafNodeIds.length}
</span>
<button
className={classNames({
'btn btn-sm btn-ghost p-1': true,
'opacity-20': !nextSibling,
})}
onClick={() => nextSibling && onChangeSibling(nextSibling)}
>
<ChevronRightIcon className="h-4 w-4" />
</button>
</div>
)}
{/* user message */}
{msg.role === 'user' && (
<button
className="badge btn-mini show-on-hover"
onClick={() => setEditingContent(msg.content)}
disabled={msg.content === null}
>
Edit
</button>
)}
{/* assistant message */}
{msg.role === 'assistant' && (
<>
{!isPending && (
<button
className="badge btn-mini show-on-hover mr-2"
onClick={() => {
if (msg.content !== null) {
onRegenerateMessage(msg as Message);
}
}}
disabled={msg.content === null}
>
🔄 Regenerate
</button>
)}
</>
)}
<CopyButton
className="badge btn-mini show-on-hover mr-2"
content={msg.content}
/>
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,296 @@
import { useEffect, useMemo, useState } from 'react';
import { CallbackGeneratedChunk, useAppContext } from '../utils/app.context';
import ChatMessage from './ChatMessage';
import { CanvasType, Message, PendingMessage } from '../utils/types';
import { classNames, cleanCurrentUrl, throttle } from '../utils/misc';
import CanvasPyInterpreter from './CanvasPyInterpreter';
import StorageUtils from '../utils/storage';
import { useVSCodeContext } from '../utils/llama-vscode';
import { useChatTextarea, ChatTextareaApi } from './useChatTextarea.ts';
/**
* A message display is a message node with additional information for rendering.
* For example, siblings of the message node are stored as their last node (aka leaf node).
*/
export interface MessageDisplay {
msg: Message | PendingMessage;
siblingLeafNodeIds: Message['id'][];
siblingCurrIdx: number;
isPending?: boolean;
}
/**
* If the current URL contains "?m=...", prefill the message input with the value.
* If the current URL contains "?q=...", prefill and SEND the message.
*/
const prefilledMsg = {
content() {
const url = new URL(window.location.href);
return url.searchParams.get('m') ?? url.searchParams.get('q') ?? '';
},
shouldSend() {
const url = new URL(window.location.href);
return url.searchParams.has('q');
},
clear() {
cleanCurrentUrl(['m', 'q']);
},
};
function getListMessageDisplay(
msgs: Readonly<Message[]>,
leafNodeId: Message['id']
): MessageDisplay[] {
const currNodes = StorageUtils.filterByLeafNodeId(msgs, leafNodeId, true);
const res: MessageDisplay[] = [];
const nodeMap = new Map<Message['id'], Message>();
for (const msg of msgs) {
nodeMap.set(msg.id, msg);
}
// find leaf node from a message node
const findLeafNode = (msgId: Message['id']): Message['id'] => {
let currNode: Message | undefined = nodeMap.get(msgId);
while (currNode) {
if (currNode.children.length === 0) break;
currNode = nodeMap.get(currNode.children.at(-1) ?? -1);
}
return currNode?.id ?? -1;
};
// traverse the current nodes
for (const msg of currNodes) {
const parentNode = nodeMap.get(msg.parent ?? -1);
if (!parentNode) continue;
const siblings = parentNode.children;
if (msg.type !== 'root') {
res.push({
msg,
siblingLeafNodeIds: siblings.map(findLeafNode),
siblingCurrIdx: siblings.indexOf(msg.id),
});
}
}
return res;
}
const scrollToBottom = throttle(
(requiresNearBottom: boolean, delay: number = 80) => {
const mainScrollElem = document.getElementById('main-scroll');
if (!mainScrollElem) return;
const spaceToBottom =
mainScrollElem.scrollHeight -
mainScrollElem.scrollTop -
mainScrollElem.clientHeight;
if (!requiresNearBottom || spaceToBottom < 50) {
setTimeout(
() => mainScrollElem.scrollTo({ top: mainScrollElem.scrollHeight }),
delay
);
}
},
80
);
export default function ChatScreen() {
const {
viewingChat,
sendMessage,
isGenerating,
stopGenerating,
pendingMessages,
canvasData,
replaceMessageAndGenerate,
} = useAppContext();
const textarea: ChatTextareaApi = useChatTextarea(prefilledMsg.content());
const { extraContext, clearExtraContext } = useVSCodeContext(textarea);
// TODO: improve this when we have "upload file" feature
const currExtra: Message['extra'] = extraContext ? [extraContext] : undefined;
// keep track of leaf node for rendering
const [currNodeId, setCurrNodeId] = useState<number>(-1);
const messages: MessageDisplay[] = useMemo(() => {
if (!viewingChat) return [];
else return getListMessageDisplay(viewingChat.messages, currNodeId);
}, [currNodeId, viewingChat]);
const currConvId = viewingChat?.conv.id ?? null;
const pendingMsg: PendingMessage | undefined =
pendingMessages[currConvId ?? ''];
useEffect(() => {
// reset to latest node when conversation changes
setCurrNodeId(-1);
// scroll to bottom when conversation changes
scrollToBottom(false, 1);
}, [currConvId]);
const onChunk: CallbackGeneratedChunk = (currLeafNodeId?: Message['id']) => {
if (currLeafNodeId) {
setCurrNodeId(currLeafNodeId);
}
scrollToBottom(true);
};
const sendNewMessage = async () => {
const lastInpMsg = textarea.value();
if (lastInpMsg.trim().length === 0 || isGenerating(currConvId ?? ''))
return;
textarea.setValue('');
scrollToBottom(false);
setCurrNodeId(-1);
// get the last message node
const lastMsgNodeId = messages.at(-1)?.msg.id ?? null;
if (
!(await sendMessage(
currConvId,
lastMsgNodeId,
lastInpMsg,
currExtra,
onChunk
))
) {
// restore the input message if failed
textarea.setValue(lastInpMsg);
}
// OK
clearExtraContext();
};
const handleEditMessage = async (msg: Message, content: string) => {
if (!viewingChat) return;
setCurrNodeId(msg.id);
scrollToBottom(false);
await replaceMessageAndGenerate(
viewingChat.conv.id,
msg.parent,
content,
msg.extra,
onChunk
);
setCurrNodeId(-1);
scrollToBottom(false);
};
const handleRegenerateMessage = async (msg: Message) => {
if (!viewingChat) return;
setCurrNodeId(msg.parent);
scrollToBottom(false);
await replaceMessageAndGenerate(
viewingChat.conv.id,
msg.parent,
null,
msg.extra,
onChunk
);
setCurrNodeId(-1);
scrollToBottom(false);
};
const hasCanvas = !!canvasData;
useEffect(() => {
if (prefilledMsg.shouldSend()) {
// send the prefilled message if needed
sendNewMessage();
} else {
// otherwise, focus on the input
textarea.focus();
}
prefilledMsg.clear();
// no need to keep track of sendNewMessage
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [textarea.ref]);
// due to some timing issues of StorageUtils.appendMsg(), we need to make sure the pendingMsg is not duplicated upon rendering (i.e. appears once in the saved conversation and once in the pendingMsg)
const pendingMsgDisplay: MessageDisplay[] =
pendingMsg && messages.at(-1)?.msg.id !== pendingMsg.id
? [
{
msg: pendingMsg,
siblingLeafNodeIds: [],
siblingCurrIdx: 0,
isPending: true,
},
]
: [];
return (
<div
className={classNames({
'grid lg:gap-8 grow transition-[300ms]': true,
'grid-cols-[1fr_0fr] lg:grid-cols-[1fr_1fr]': hasCanvas, // adapted for mobile
'grid-cols-[1fr_0fr]': !hasCanvas,
})}
>
<div
className={classNames({
'flex flex-col w-full max-w-[900px] mx-auto': true,
'hidden lg:flex': hasCanvas, // adapted for mobile
flex: !hasCanvas,
})}
>
{/* chat messages */}
<div id="messages-list" className="grow">
<div className="mt-auto flex justify-center">
{/* placeholder to shift the message to the bottom */}
{viewingChat ? '' : 'Send a message to start'}
</div>
{[...messages, ...pendingMsgDisplay].map((msg) => (
<ChatMessage
key={msg.msg.id}
msg={msg.msg}
siblingLeafNodeIds={msg.siblingLeafNodeIds}
siblingCurrIdx={msg.siblingCurrIdx}
onRegenerateMessage={handleRegenerateMessage}
onEditMessage={handleEditMessage}
onChangeSibling={setCurrNodeId}
/>
))}
</div>
{/* chat input */}
<div className="flex flex-row items-end pt-8 pb-6 sticky bottom-0 bg-base-100">
<textarea
// Default (mobile): Enable vertical resize, overflow auto for scrolling if needed
// Large screens (lg:): Disable manual resize, apply max-height for autosize limit
className="textarea textarea-bordered w-full resize-vertical lg:resize-none lg:max-h-48 lg:overflow-y-auto" // Adjust lg:max-h-48 as needed (e.g., lg:max-h-60)
placeholder="Type a message (Shift+Enter to add a new line)"
ref={textarea.ref}
onInput={textarea.onInput} // Hook's input handler (will only resize height on lg+ screens)
onKeyDown={(e) => {
if (e.nativeEvent.isComposing || e.keyCode === 229) return;
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
sendNewMessage();
}
}}
id="msg-input"
dir="auto"
// Set a base height of 2 rows for mobile views
// On lg+ screens, the hook will calculate and set the initial height anyway
rows={2}
></textarea>
{isGenerating(currConvId ?? '') ? (
<button
className="btn btn-neutral ml-2"
onClick={() => stopGenerating(currConvId ?? '')}
>
Stop
</button>
) : (
<button className="btn btn-primary ml-2" onClick={sendNewMessage}>
Send
</button>
)}
</div>
</div>
<div className="w-full sticky top-[7em] h-[calc(100vh-9em)]">
{canvasData?.type === CanvasType.PY_INTERPRETER && (
<CanvasPyInterpreter />
)}
</div>
</div>
);
}

View File

@@ -0,0 +1,178 @@
import { useEffect, useState } from 'react';
import StorageUtils from '../utils/storage';
import { useAppContext } from '../utils/app.context';
import { classNames } from '../utils/misc';
import daisyuiThemes from 'daisyui/theme/object';
import { THEMES } from '../Config';
import { useNavigate } from 'react-router';
export default function Header() {
const navigate = useNavigate();
const [selectedTheme, setSelectedTheme] = useState(StorageUtils.getTheme());
const { setShowSettings } = useAppContext();
const setTheme = (theme: string) => {
StorageUtils.setTheme(theme);
setSelectedTheme(theme);
};
useEffect(() => {
document.body.setAttribute('data-theme', selectedTheme);
document.body.setAttribute(
'data-color-scheme',
daisyuiThemes[selectedTheme]?.['color-scheme'] ?? 'auto'
);
}, [selectedTheme]);
const { isGenerating, viewingChat } = useAppContext();
const isCurrConvGenerating = isGenerating(viewingChat?.conv.id ?? '');
const removeConversation = () => {
if (isCurrConvGenerating || !viewingChat) return;
const convId = viewingChat?.conv.id;
if (window.confirm('Are you sure to delete this conversation?')) {
StorageUtils.remove(convId);
navigate('/');
}
};
const downloadConversation = () => {
if (isCurrConvGenerating || !viewingChat) return;
const convId = viewingChat?.conv.id;
const conversationJson = JSON.stringify(viewingChat, null, 2);
const blob = new Blob([conversationJson], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `conversation_${convId}.json`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
};
return (
<div className="flex flex-row items-center pt-6 pb-6 sticky top-0 z-10 bg-base-100">
{/* open sidebar button */}
<label htmlFor="toggle-drawer" className="btn btn-ghost lg:hidden">
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-list"
viewBox="0 0 16 16"
>
<path
fillRule="evenodd"
d="M2.5 12a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5m0-4a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5m0-4a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5"
/>
</svg>
</label>
<div className="grow text-2xl font-bold ml-2">llama.cpp</div>
{/* action buttons (top right) */}
<div className="flex items-center">
{viewingChat && (
<div className="dropdown dropdown-end">
{/* "..." button */}
<button
tabIndex={0}
role="button"
className="btn m-1"
disabled={isCurrConvGenerating}
>
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-three-dots-vertical"
viewBox="0 0 16 16"
>
<path d="M9.5 13a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0m0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0m0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0" />
</svg>
</button>
{/* dropdown menu */}
<ul
tabIndex={0}
className="dropdown-content menu bg-base-100 rounded-box z-[1] w-52 p-2 shadow"
>
<li onClick={downloadConversation}>
<a>Download</a>
</li>
<li className="text-error" onClick={removeConversation}>
<a>Delete</a>
</li>
</ul>
</div>
)}
<div className="tooltip tooltip-bottom" data-tip="Settings">
<button className="btn" onClick={() => setShowSettings(true)}>
{/* settings button */}
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-gear"
viewBox="0 0 16 16"
>
<path d="M8 4.754a3.246 3.246 0 1 0 0 6.492 3.246 3.246 0 0 0 0-6.492M5.754 8a2.246 2.246 0 1 1 4.492 0 2.246 2.246 0 0 1-4.492 0" />
<path d="M9.796 1.343c-.527-1.79-3.065-1.79-3.592 0l-.094.319a.873.873 0 0 1-1.255.52l-.292-.16c-1.64-.892-3.433.902-2.54 2.541l.159.292a.873.873 0 0 1-.52 1.255l-.319.094c-1.79.527-1.79 3.065 0 3.592l.319.094a.873.873 0 0 1 .52 1.255l-.16.292c-.892 1.64.901 3.434 2.541 2.54l.292-.159a.873.873 0 0 1 1.255.52l.094.319c.527 1.79 3.065 1.79 3.592 0l.094-.319a.873.873 0 0 1 1.255-.52l.292.16c1.64.893 3.434-.902 2.54-2.541l-.159-.292a.873.873 0 0 1 .52-1.255l.319-.094c1.79-.527 1.79-3.065 0-3.592l-.319-.094a.873.873 0 0 1-.52-1.255l.16-.292c.893-1.64-.902-3.433-2.541-2.54l-.292.159a.873.873 0 0 1-1.255-.52zm-2.633.283c.246-.835 1.428-.835 1.674 0l.094.319a1.873 1.873 0 0 0 2.693 1.115l.291-.16c.764-.415 1.6.42 1.184 1.185l-.159.292a1.873 1.873 0 0 0 1.116 2.692l.318.094c.835.246.835 1.428 0 1.674l-.319.094a1.873 1.873 0 0 0-1.115 2.693l.16.291c.415.764-.42 1.6-1.185 1.184l-.291-.159a1.873 1.873 0 0 0-2.693 1.116l-.094.318c-.246.835-1.428.835-1.674 0l-.094-.319a1.873 1.873 0 0 0-2.692-1.115l-.292.16c-.764.415-1.6-.42-1.184-1.185l.159-.291A1.873 1.873 0 0 0 1.945 8.93l-.319-.094c-.835-.246-.835-1.428 0-1.674l.319-.094A1.873 1.873 0 0 0 3.06 4.377l-.16-.292c-.415-.764.42-1.6 1.185-1.184l.292.159a1.873 1.873 0 0 0 2.692-1.115z" />
</svg>
</button>
</div>
{/* theme controller is copied from https://daisyui.com/components/theme-controller/ */}
<div className="tooltip tooltip-bottom" data-tip="Themes">
<div className="dropdown dropdown-end dropdown-bottom">
<div tabIndex={0} role="button" className="btn m-1">
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-palette2"
viewBox="0 0 16 16"
>
<path d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 .5.5v5.277l4.147-4.131a.5.5 0 0 1 .707 0l3.535 3.536a.5.5 0 0 1 0 .708L10.261 10H15.5a.5.5 0 0 1 .5.5v5a.5.5 0 0 1-.5.5H3a3 3 0 0 1-2.121-.879A3 3 0 0 1 0 13.044m6-.21 7.328-7.3-2.829-2.828L6 7.188zM4.5 13a1.5 1.5 0 1 0-3 0 1.5 1.5 0 0 0 3 0M15 15v-4H9.258l-4.015 4zM0 .5v12.495zm0 12.495V13z" />
</svg>
</div>
<ul
tabIndex={0}
className="dropdown-content bg-base-300 rounded-box z-[1] w-52 p-2 shadow-2xl h-80 overflow-y-auto"
>
<li>
<button
className={classNames({
'btn btn-sm btn-block btn-ghost justify-start': true,
'btn-active': selectedTheme === 'auto',
})}
onClick={() => setTheme('auto')}
>
auto
</button>
</li>
{THEMES.map((theme) => (
<li key={theme}>
<input
type="radio"
name="theme-dropdown"
className="theme-controller btn btn-sm btn-block btn-ghost justify-start"
aria-label={theme}
value={theme}
checked={selectedTheme === theme}
onChange={(e) => e.target.checked && setTheme(theme)}
/>
</li>
))}
</ul>
</div>
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,310 @@
import React, { useMemo, useState } from 'react';
import Markdown, { ExtraProps } from 'react-markdown';
import remarkGfm from 'remark-gfm';
import rehypeHightlight from 'rehype-highlight';
import rehypeKatex from 'rehype-katex';
import remarkMath from 'remark-math';
import remarkBreaks from 'remark-breaks';
import 'katex/dist/katex.min.css';
import { classNames, copyStr } from '../utils/misc';
import { ElementContent, Root } from 'hast';
import { visit } from 'unist-util-visit';
import { useAppContext } from '../utils/app.context';
import { CanvasType } from '../utils/types';
export default function MarkdownDisplay({
content,
isGenerating,
}: {
content: string;
isGenerating?: boolean;
}) {
const preprocessedContent = useMemo(
() => preprocessLaTeX(content),
[content]
);
return (
<Markdown
remarkPlugins={[remarkGfm, remarkMath, remarkBreaks]}
rehypePlugins={[rehypeHightlight, rehypeKatex, rehypeCustomCopyButton]}
components={{
button: (props) => (
<CodeBlockButtons
{...props}
isGenerating={isGenerating}
origContent={preprocessedContent}
/>
),
// note: do not use "pre", "p" or other basic html elements here, it will cause the node to re-render when the message is being generated (this should be a bug with react-markdown, not sure how to fix it)
}}
>
{preprocessedContent}
</Markdown>
);
}
const CodeBlockButtons: React.ElementType<
React.ClassAttributes<HTMLButtonElement> &
React.HTMLAttributes<HTMLButtonElement> &
ExtraProps & { origContent: string; isGenerating?: boolean }
> = ({ node, origContent, isGenerating }) => {
const { config } = useAppContext();
const startOffset = node?.position?.start.offset ?? 0;
const endOffset = node?.position?.end.offset ?? 0;
const copiedContent = useMemo(
() =>
origContent
.substring(startOffset, endOffset)
.replace(/^```[^\n]+\n/g, '')
.replace(/```$/g, ''),
[origContent, startOffset, endOffset]
);
const codeLanguage = useMemo(
() =>
origContent
.substring(startOffset, startOffset + 10)
.match(/^```([^\n]+)\n/)?.[1] ?? '',
[origContent, startOffset]
);
const canRunCode =
!isGenerating &&
config.pyIntepreterEnabled &&
codeLanguage.startsWith('py');
return (
<div
className={classNames({
'text-right sticky top-[7em] mb-2 mr-2 h-0': true,
'display-none': !node?.position,
})}
>
<CopyButton className="badge btn-mini" content={copiedContent} />
{canRunCode && (
<RunPyCodeButton
className="badge btn-mini ml-2"
content={copiedContent}
/>
)}
</div>
);
};
export const CopyButton = ({
content,
className,
}: {
content: string;
className?: string;
}) => {
const [copied, setCopied] = useState(false);
return (
<button
className={className}
onClick={() => {
copyStr(content);
setCopied(true);
}}
onMouseLeave={() => setCopied(false)}
>
{copied ? 'Copied!' : '📋 Copy'}
</button>
);
};
export const RunPyCodeButton = ({
content,
className,
}: {
content: string;
className?: string;
}) => {
const { setCanvasData } = useAppContext();
return (
<>
<button
className={className}
onClick={() =>
setCanvasData({
type: CanvasType.PY_INTERPRETER,
content,
})
}
>
Run
</button>
</>
);
};
/**
* This injects the "button" element before each "pre" element.
* The actual button will be replaced with a react component in the MarkdownDisplay.
* We don't replace "pre" node directly because it will cause the node to re-render, which causes this bug: https://github.com/ggerganov/llama.cpp/issues/9608
*/
function rehypeCustomCopyButton() {
return function (tree: Root) {
visit(tree, 'element', function (node) {
if (node.tagName === 'pre' && !node.properties.visited) {
const preNode = { ...node };
// replace current node
preNode.properties.visited = 'true';
node.tagName = 'div';
node.properties = {};
// add node for button
const btnNode: ElementContent = {
type: 'element',
tagName: 'button',
properties: {},
children: [],
position: node.position,
};
node.children = [btnNode, preNode];
}
});
};
}
/**
* The part below is copied and adapted from:
* https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
* (MIT License)
*/
// Regex to check if the processed content contains any potential LaTeX patterns
const containsLatexRegex =
/\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/;
// Regex for inline and block LaTeX expressions
const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g');
const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs');
// Function to restore code blocks
const restoreCodeBlocks = (content: string, codeBlocks: string[]) => {
return content.replace(
/<<CODE_BLOCK_(\d+)>>/g,
(_, index) => codeBlocks[index]
);
};
// Regex to identify code blocks and inline code
const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g;
export const processLaTeX = (_content: string) => {
let content = _content;
// Temporarily replace code blocks and inline code with placeholders
const codeBlocks: string[] = [];
let index = 0;
content = content.replace(codeBlockRegex, (match) => {
codeBlocks[index] = match;
return `<<CODE_BLOCK_${index++}>>`;
});
// Escape dollar signs followed by a digit or space and digit
let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$');
// If no LaTeX patterns are found, restore code blocks and return the processed content
if (!containsLatexRegex.test(processedContent)) {
return restoreCodeBlocks(processedContent, codeBlocks);
}
// Convert LaTeX expressions to a markdown compatible format
processedContent = processedContent
.replace(inlineLatex, (_: string, equation: string) => `$${equation}$`) // Convert inline LaTeX
.replace(blockLatex, (_: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX
// Restore code blocks
return restoreCodeBlocks(processedContent, codeBlocks);
};
/**
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
*
* @param content The input string containing LaTeX expressions.
* @returns The processed string with replaced delimiters and escaped characters.
*/
export function preprocessLaTeX(content: string): string {
// Step 1: Protect code blocks
const codeBlocks: string[] = [];
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (_, code) => {
codeBlocks.push(code);
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
});
// Step 2: Protect existing LaTeX expressions
const latexExpressions: string[] = [];
// Protect block math ($$...$$), \[...\], and \(...\) as before.
content = content.replace(
/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g,
(match) => {
latexExpressions.push(match);
return `<<LATEX_${latexExpressions.length - 1}>>`;
}
);
// Protect inline math ($...$) only if it does NOT match a currency pattern.
// We assume a currency pattern is one where the inner content is purely numeric (with optional decimals).
content = content.replace(/\$([^$]+)\$/g, (match, inner) => {
if (/^\s*\d+(?:\.\d+)?\s*$/.test(inner)) {
// This looks like a currency value (e.g. "$123" or "$12.34"),
// so don't protect it.
return match;
} else {
// Otherwise, treat it as a LaTeX expression.
latexExpressions.push(match);
return `<<LATEX_${latexExpressions.length - 1}>>`;
}
});
// Step 3: Escape dollar signs that are likely currency indicators.
// (Now that inline math is protected, this will only escape dollars not already protected)
content = content.replace(/\$(?=\d)/g, '\\$');
// Step 4: Restore LaTeX expressions
content = content.replace(
/<<LATEX_(\d+)>>/g,
(_, index) => latexExpressions[parseInt(index)]
);
// Step 5: Restore code blocks
content = content.replace(
/<<CODE_BLOCK_(\d+)>>/g,
(_, index) => codeBlocks[parseInt(index)]
);
// Step 6: Apply additional escaping functions
content = escapeBrackets(content);
content = escapeMhchem(content);
return content;
}
export function escapeBrackets(text: string): string {
const pattern =
/(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
return text.replace(
pattern,
(
match: string,
codeBlock: string | undefined,
squareBracket: string | undefined,
roundBracket: string | undefined
): string => {
if (codeBlock != null) {
return codeBlock;
} else if (squareBracket != null) {
return `$$${squareBracket}$$`;
} else if (roundBracket != null) {
return `$${roundBracket}$`;
}
return match;
}
);
}
export function escapeMhchem(text: string) {
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
}

View File

@@ -0,0 +1,536 @@
import { useState } from 'react';
import { useAppContext } from '../utils/app.context';
import { CONFIG_DEFAULT, CONFIG_INFO } from '../Config';
import { isDev } from '../Config';
import StorageUtils from '../utils/storage';
import { classNames, isBoolean, isNumeric, isString } from '../utils/misc';
import {
BeakerIcon,
ChatBubbleOvalLeftEllipsisIcon,
Cog6ToothIcon,
FunnelIcon,
HandRaisedIcon,
SquaresPlusIcon,
} from '@heroicons/react/24/outline';
import { OpenInNewTab } from '../utils/common';
type SettKey = keyof typeof CONFIG_DEFAULT;
const BASIC_KEYS: SettKey[] = [
'temperature',
'top_k',
'top_p',
'min_p',
'max_tokens',
];
const SAMPLER_KEYS: SettKey[] = [
'dynatemp_range',
'dynatemp_exponent',
'typical_p',
'xtc_probability',
'xtc_threshold',
];
const PENALTY_KEYS: SettKey[] = [
'repeat_last_n',
'repeat_penalty',
'presence_penalty',
'frequency_penalty',
'dry_multiplier',
'dry_base',
'dry_allowed_length',
'dry_penalty_last_n',
];
enum SettingInputType {
SHORT_INPUT,
LONG_INPUT,
CHECKBOX,
CUSTOM,
}
interface SettingFieldInput {
type: Exclude<SettingInputType, SettingInputType.CUSTOM>;
label: string | React.ReactElement;
help?: string | React.ReactElement;
key: SettKey;
}
interface SettingFieldCustom {
type: SettingInputType.CUSTOM;
key: SettKey;
component:
| string
| React.FC<{
value: string | boolean | number;
onChange: (value: string) => void;
}>;
}
interface SettingSection {
title: React.ReactElement;
fields: (SettingFieldInput | SettingFieldCustom)[];
}
const ICON_CLASSNAME = 'w-4 h-4 mr-1 inline';
const SETTING_SECTIONS: SettingSection[] = [
{
title: (
<>
<Cog6ToothIcon className={ICON_CLASSNAME} />
General
</>
),
fields: [
{
type: SettingInputType.SHORT_INPUT,
label: 'API Key',
key: 'apiKey',
},
{
type: SettingInputType.LONG_INPUT,
label: 'System Message (will be disabled if left empty)',
key: 'systemMessage',
},
...BASIC_KEYS.map(
(key) =>
({
type: SettingInputType.SHORT_INPUT,
label: key,
key,
}) as SettingFieldInput
),
],
},
{
title: (
<>
<FunnelIcon className={ICON_CLASSNAME} />
Samplers
</>
),
fields: [
{
type: SettingInputType.SHORT_INPUT,
label: 'Samplers queue',
key: 'samplers',
},
...SAMPLER_KEYS.map(
(key) =>
({
type: SettingInputType.SHORT_INPUT,
label: key,
key,
}) as SettingFieldInput
),
],
},
{
title: (
<>
<HandRaisedIcon className={ICON_CLASSNAME} />
Penalties
</>
),
fields: PENALTY_KEYS.map((key) => ({
type: SettingInputType.SHORT_INPUT,
label: key,
key,
})),
},
{
title: (
<>
<ChatBubbleOvalLeftEllipsisIcon className={ICON_CLASSNAME} />
Reasoning
</>
),
fields: [
{
type: SettingInputType.CHECKBOX,
label: 'Expand thought process by default when generating messages',
key: 'showThoughtInProgress',
},
{
type: SettingInputType.CHECKBOX,
label:
'Exclude thought process when sending requests to API (Recommended for DeepSeek-R1)',
key: 'excludeThoughtOnReq',
},
],
},
{
title: (
<>
<SquaresPlusIcon className={ICON_CLASSNAME} />
Advanced
</>
),
fields: [
{
type: SettingInputType.CUSTOM,
key: 'custom', // dummy key, won't be used
component: () => {
const debugImportDemoConv = async () => {
const res = await fetch('/demo-conversation.json');
const demoConv = await res.json();
StorageUtils.remove(demoConv.id);
for (const msg of demoConv.messages) {
StorageUtils.appendMsg(demoConv.id, msg);
}
};
return (
<button className="btn" onClick={debugImportDemoConv}>
(debug) Import demo conversation
</button>
);
},
},
{
type: SettingInputType.CHECKBOX,
label: 'Show tokens per second',
key: 'showTokensPerSecond',
},
{
type: SettingInputType.LONG_INPUT,
label: (
<>
Custom JSON config (For more info, refer to{' '}
<OpenInNewTab href="https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md">
server documentation
</OpenInNewTab>
)
</>
),
key: 'custom',
},
],
},
{
title: (
<>
<BeakerIcon className={ICON_CLASSNAME} />
Experimental
</>
),
fields: [
{
type: SettingInputType.CUSTOM,
key: 'custom', // dummy key, won't be used
component: () => (
<>
<p className="mb-8">
Experimental features are not guaranteed to work correctly.
<br />
<br />
If you encounter any problems, create a{' '}
<OpenInNewTab href="https://github.com/ggerganov/llama.cpp/issues/new?template=019-bug-misc.yml">
Bug (misc.)
</OpenInNewTab>{' '}
report on Github. Please also specify <b>webui/experimental</b> on
the report title and include screenshots.
<br />
<br />
Some features may require packages downloaded from CDN, so they
need internet connection.
</p>
</>
),
},
{
type: SettingInputType.CHECKBOX,
label: (
<>
<b>Enable Python interpreter</b>
<br />
<small className="text-xs">
This feature uses{' '}
<OpenInNewTab href="https://pyodide.org">pyodide</OpenInNewTab>,
downloaded from CDN. To use this feature, ask the LLM to generate
Python code inside a Markdown code block. You will see a "Run"
button on the code block, near the "Copy" button.
</small>
</>
),
key: 'pyIntepreterEnabled',
},
],
},
];
export default function SettingDialog({
show,
onClose,
}: {
show: boolean;
onClose: () => void;
}) {
const { config, saveConfig } = useAppContext();
const [sectionIdx, setSectionIdx] = useState(0);
// clone the config object to prevent direct mutation
const [localConfig, setLocalConfig] = useState<typeof CONFIG_DEFAULT>(
JSON.parse(JSON.stringify(config))
);
const resetConfig = () => {
if (window.confirm('Are you sure you want to reset all settings?')) {
setLocalConfig(CONFIG_DEFAULT);
}
};
const handleSave = () => {
// copy the local config to prevent direct mutation
const newConfig: typeof CONFIG_DEFAULT = JSON.parse(
JSON.stringify(localConfig)
);
// validate the config
for (const key in newConfig) {
const value = newConfig[key as SettKey];
const mustBeBoolean = isBoolean(CONFIG_DEFAULT[key as SettKey]);
const mustBeString = isString(CONFIG_DEFAULT[key as SettKey]);
const mustBeNumeric = isNumeric(CONFIG_DEFAULT[key as SettKey]);
if (mustBeString) {
if (!isString(value)) {
alert(`Value for ${key} must be string`);
return;
}
} else if (mustBeNumeric) {
const trimmedValue = value.toString().trim();
const numVal = Number(trimmedValue);
if (isNaN(numVal) || !isNumeric(numVal) || trimmedValue.length === 0) {
alert(`Value for ${key} must be numeric`);
return;
}
// force conversion to number
// @ts-expect-error this is safe
newConfig[key] = numVal;
} else if (mustBeBoolean) {
if (!isBoolean(value)) {
alert(`Value for ${key} must be boolean`);
return;
}
} else {
console.error(`Unknown default type for key ${key}`);
}
}
if (isDev) console.log('Saving config', newConfig);
saveConfig(newConfig);
onClose();
};
const onChange = (key: SettKey) => (value: string | boolean) => {
// note: we do not perform validation here, because we may get incomplete value as user is still typing it
setLocalConfig({ ...localConfig, [key]: value });
};
return (
<dialog className={classNames({ modal: true, 'modal-open': show })}>
<div className="modal-box w-11/12 max-w-3xl">
<h3 className="text-lg font-bold mb-6">Settings</h3>
<div className="flex flex-col md:flex-row h-[calc(90vh-12rem)]">
{/* Left panel, showing sections - Desktop version */}
<div className="hidden md:flex flex-col items-stretch pr-4 mr-4 border-r-2 border-base-200">
{SETTING_SECTIONS.map((section, idx) => (
<div
key={idx}
className={classNames({
'btn btn-ghost justify-start font-normal w-44 mb-1': true,
'btn-active': sectionIdx === idx,
})}
onClick={() => setSectionIdx(idx)}
dir="auto"
>
{section.title}
</div>
))}
</div>
{/* Left panel, showing sections - Mobile version */}
<div className="md:hidden flex flex-row gap-2 mb-4">
<details className="dropdown">
<summary className="btn bt-sm w-full m-1">
{SETTING_SECTIONS[sectionIdx].title}
</summary>
<ul className="menu dropdown-content bg-base-100 rounded-box z-[1] w-52 p-2 shadow">
{SETTING_SECTIONS.map((section, idx) => (
<div
key={idx}
className={classNames({
'btn btn-ghost justify-start font-normal': true,
'btn-active': sectionIdx === idx,
})}
onClick={() => setSectionIdx(idx)}
dir="auto"
>
{section.title}
</div>
))}
</ul>
</details>
</div>
{/* Right panel, showing setting fields */}
<div className="grow overflow-y-auto px-4">
{SETTING_SECTIONS[sectionIdx].fields.map((field, idx) => {
const key = `${sectionIdx}-${idx}`;
if (field.type === SettingInputType.SHORT_INPUT) {
return (
<SettingsModalShortInput
key={key}
configKey={field.key}
value={localConfig[field.key]}
onChange={onChange(field.key)}
label={field.label as string}
/>
);
} else if (field.type === SettingInputType.LONG_INPUT) {
return (
<SettingsModalLongInput
key={key}
configKey={field.key}
value={localConfig[field.key].toString()}
onChange={onChange(field.key)}
label={field.label as string}
/>
);
} else if (field.type === SettingInputType.CHECKBOX) {
return (
<SettingsModalCheckbox
key={key}
configKey={field.key}
value={!!localConfig[field.key]}
onChange={onChange(field.key)}
label={field.label as string}
/>
);
} else if (field.type === SettingInputType.CUSTOM) {
return (
<div key={key} className="mb-2">
{typeof field.component === 'string'
? field.component
: field.component({
value: localConfig[field.key],
onChange: onChange(field.key),
})}
</div>
);
}
})}
<p className="opacity-40 mb-6 text-sm mt-8">
Settings are saved in browser's localStorage
</p>
</div>
</div>
<div className="modal-action">
<button className="btn" onClick={resetConfig}>
Reset to default
</button>
<button className="btn" onClick={onClose}>
Close
</button>
<button className="btn btn-primary" onClick={handleSave}>
Save
</button>
</div>
</div>
</dialog>
);
}
function SettingsModalLongInput({
configKey,
value,
onChange,
label,
}: {
configKey: SettKey;
value: string;
onChange: (value: string) => void;
label?: string;
}) {
return (
<label className="form-control mb-2">
<div className="label inline">{label || configKey}</div>
<textarea
className="textarea textarea-bordered h-24"
placeholder={`Default: ${CONFIG_DEFAULT[configKey] || 'none'}`}
value={value}
onChange={(e) => onChange(e.target.value)}
/>
</label>
);
}
function SettingsModalShortInput({
configKey,
value,
onChange,
label,
}: {
configKey: SettKey;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
value: any;
onChange: (value: string) => void;
label?: string;
}) {
const helpMsg = CONFIG_INFO[configKey];
return (
<>
{/* on mobile, we simply show the help message here */}
{helpMsg && (
<div className="block md:hidden mb-1">
<b>{label || configKey}</b>
<br />
<p className="text-xs">{helpMsg}</p>
</div>
)}
<label className="input input-bordered join-item grow flex items-center gap-2 mb-2">
<div className="dropdown dropdown-hover">
<div tabIndex={0} role="button" className="font-bold hidden md:block">
{label || configKey}
</div>
{helpMsg && (
<div className="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
{helpMsg}
</div>
)}
</div>
<input
type="text"
className="grow"
placeholder={`Default: ${CONFIG_DEFAULT[configKey] || 'none'}`}
value={value}
onChange={(e) => onChange(e.target.value)}
/>
</label>
</>
);
}
function SettingsModalCheckbox({
configKey,
value,
onChange,
label,
}: {
configKey: SettKey;
value: boolean;
onChange: (value: boolean) => void;
label: string;
}) {
return (
<div className="flex flex-row items-center mb-2">
<input
type="checkbox"
className="toggle"
checked={value}
onChange={(e) => onChange(e.target.checked)}
/>
<span className="ml-4">{label || configKey}</span>
</div>
);
}

View File

@@ -0,0 +1,96 @@
import { useEffect, useState } from 'react';
import { classNames } from '../utils/misc';
import { Conversation } from '../utils/types';
import StorageUtils from '../utils/storage';
import { useNavigate, useParams } from 'react-router';
export default function Sidebar() {
const params = useParams();
const navigate = useNavigate();
const [conversations, setConversations] = useState<Conversation[]>([]);
const [currConv, setCurrConv] = useState<Conversation | null>(null);
useEffect(() => {
StorageUtils.getOneConversation(params.convId ?? '').then(setCurrConv);
}, [params.convId]);
useEffect(() => {
const handleConversationChange = async () => {
setConversations(await StorageUtils.getAllConversations());
};
StorageUtils.onConversationChanged(handleConversationChange);
handleConversationChange();
return () => {
StorageUtils.offConversationChanged(handleConversationChange);
};
}, []);
return (
<>
<input
id="toggle-drawer"
type="checkbox"
className="drawer-toggle"
defaultChecked
/>
<div className="drawer-side h-screen lg:h-screen z-50 lg:max-w-64">
<label
htmlFor="toggle-drawer"
aria-label="close sidebar"
className="drawer-overlay"
></label>
<div className="flex flex-col bg-base-200 min-h-full max-w-64 py-4 px-4">
<div className="flex flex-row items-center justify-between mb-4 mt-4">
<h2 className="font-bold ml-4">Conversations</h2>
{/* close sidebar button */}
<label htmlFor="toggle-drawer" className="btn btn-ghost lg:hidden">
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-arrow-bar-left"
viewBox="0 0 16 16"
>
<path
fillRule="evenodd"
d="M12.5 15a.5.5 0 0 1-.5-.5v-13a.5.5 0 0 1 1 0v13a.5.5 0 0 1-.5.5M10 8a.5.5 0 0 1-.5.5H3.707l2.147 2.146a.5.5 0 0 1-.708.708l-3-3a.5.5 0 0 1 0-.708l3-3a.5.5 0 1 1 .708.708L3.707 7.5H9.5a.5.5 0 0 1 .5.5"
/>
</svg>
</label>
</div>
{/* list of conversations */}
<div
className={classNames({
'btn btn-ghost justify-start': true,
'btn-active': !currConv,
})}
onClick={() => navigate('/')}
>
+ New conversation
</div>
{conversations.map((conv) => (
<div
key={conv.id}
className={classNames({
'btn btn-ghost justify-start font-normal': true,
'btn-active': conv.id === currConv?.id,
})}
onClick={() => navigate(`/chat/${conv.id}`)}
dir="auto"
>
<span className="truncate">{conv.name}</span>
</div>
))}
<div className="text-center text-xs opacity-40 mt-auto mx-4">
Conversations are saved to browser's IndexedDB
</div>
</div>
</div>
</>
);
}

View File

@@ -0,0 +1,96 @@
import { useEffect, useRef, useState, useCallback } from 'react';
// Media Query for detecting "large" screens (matching Tailwind's lg: breakpoint)
const LARGE_SCREEN_MQ = '(min-width: 1024px)';
// Calculates and sets the textarea height based on its scrollHeight
const adjustTextareaHeight = (textarea: HTMLTextAreaElement | null) => {
if (!textarea) return;
// Only perform auto-sizing on large screens
if (!window.matchMedia(LARGE_SCREEN_MQ).matches) {
// On small screens, reset inline height and max-height styles.
// This allows CSS (e.g., `rows` attribute or classes) to control the height,
// and enables manual resizing if `resize-vertical` is set.
textarea.style.height = ''; // Use 'auto' or '' to reset
textarea.style.maxHeight = '';
return; // Do not adjust height programmatically on small screens
}
const computedStyle = window.getComputedStyle(textarea);
// Get the max-height specified by CSS (e.g., from `lg:max-h-48`)
const currentMaxHeight = computedStyle.maxHeight;
// Temporarily remove max-height to allow scrollHeight to be calculated correctly
textarea.style.maxHeight = 'none';
// Reset height to 'auto' to measure the actual scrollHeight needed
textarea.style.height = 'auto';
// Set the height to the calculated scrollHeight
textarea.style.height = `${textarea.scrollHeight}px`;
// Re-apply the original max-height from CSS to enforce the limit
textarea.style.maxHeight = currentMaxHeight;
};
// Interface describing the API returned by the hook
export interface ChatTextareaApi {
value: () => string;
setValue: (value: string) => void;
focus: () => void;
ref: React.RefObject<HTMLTextAreaElement>;
onInput: (event: React.FormEvent<HTMLTextAreaElement>) => void; // Input handler
}
// This is a workaround to prevent the textarea from re-rendering when the inner content changes
// See https://github.com/ggml-org/llama.cpp/pull/12299
// combined now with auto-sizing logic.
export function useChatTextarea(initValue: string): ChatTextareaApi {
const [savedInitValue, setSavedInitValue] = useState<string>(initValue);
const textareaRef = useRef<HTMLTextAreaElement>(null);
// Effect to set initial value and height on mount or when initValue changes
useEffect(() => {
const textarea = textareaRef.current;
if (textarea) {
if (typeof savedInitValue === 'string' && savedInitValue.length > 0) {
textarea.value = savedInitValue;
// Call adjustTextareaHeight - it will check screen size internally
setTimeout(() => adjustTextareaHeight(textarea), 0);
setSavedInitValue(''); // Reset after applying
} else {
// Adjust height even if there's no initial value (for initial render)
setTimeout(() => adjustTextareaHeight(textarea), 0);
}
}
}, [textareaRef, savedInitValue]); // Depend on ref and savedInitValue
const handleInput = useCallback(
(event: React.FormEvent<HTMLTextAreaElement>) => {
// Call adjustTextareaHeight on every input - it will decide whether to act
adjustTextareaHeight(event.currentTarget);
},
[]
);
return {
// Method to get the current value directly from the textarea
value: () => {
return textareaRef.current?.value ?? '';
},
// Method to programmatically set the value and trigger height adjustment
setValue: (value: string) => {
const textarea = textareaRef.current;
if (textarea) {
textarea.value = value;
// Call adjustTextareaHeight - it will check screen size internally
setTimeout(() => adjustTextareaHeight(textarea), 0);
}
},
focus: () => {
if (textareaRef.current) {
textareaRef.current.focus();
}
},
ref: textareaRef,
onInput: handleInput,
};
}

View File

@@ -0,0 +1,78 @@
@use 'sass:meta';
@use 'tailwindcss';
@plugin 'daisyui' {
themes: all;
}
html {
scrollbar-gutter: auto;
}
.markdown {
h1,
h2,
h3,
h4,
h5,
h6,
ul,
ol,
li {
all: revert;
}
pre {
@apply whitespace-pre-wrap rounded-lg p-2;
border: 1px solid currentColor;
}
p {
@apply mb-2;
}
/* TODO: fix markdown table */
}
.show-on-hover {
@apply md:opacity-0 md:group-hover:opacity-100;
}
.btn-mini {
@apply cursor-pointer hover:shadow-md;
}
.chat-screen {
max-width: 900px;
}
.chat-bubble-base-300 {
--tw-bg-opacity: 1;
--tw-text-opacity: 1;
@apply bg-base-300 text-base-content;
}
/* Highlight.js */
[data-color-scheme='light'] {
@include meta.load-css('highlight.js/styles/stackoverflow-light');
.dark-color {
@apply bg-base-content text-base-100;
}
}
[data-color-scheme='dark'] {
@include meta.load-css('highlight.js/styles/stackoverflow-dark');
}
[data-color-scheme='auto'] {
@media (prefers-color-scheme: light) {
@include meta.load-css('highlight.js/styles/stackoverflow-light');
.dark-color {
@apply bg-base-content text-base-100;
}
}
@media (prefers-color-scheme: dark) {
@include meta.load-css('highlight.js/styles/stackoverflow-dark');
}
}
.hljs {
background: transparent !important;
padding: 0.5em !important;
}
.katex-display {
margin: 0 0 !important;
}

View File

@@ -0,0 +1,10 @@
import { StrictMode } from 'react';
import { createRoot } from 'react-dom/client';
import './index.scss';
import App from './App.tsx';
createRoot(document.getElementById('root')!).render(
<StrictMode>
<App />
</StrictMode>
);

View File

@@ -0,0 +1,393 @@
import React, { createContext, useContext, useEffect, useState } from 'react';
import {
APIMessage,
CanvasData,
Conversation,
Message,
PendingMessage,
ViewingChat,
} from './types';
import StorageUtils from './storage';
import {
filterThoughtFromMsgs,
normalizeMsgsForAPI,
getSSEStreamAsync,
} from './misc';
import { BASE_URL, CONFIG_DEFAULT, isDev } from '../Config';
import { matchPath, useLocation, useNavigate } from 'react-router';
class Timer {
static timercount = 1;
}
interface AppContextValue {
// conversations and messages
viewingChat: ViewingChat | null;
pendingMessages: Record<Conversation['id'], PendingMessage>;
isGenerating: (convId: string) => boolean;
sendMessage: (
convId: string | null,
leafNodeId: Message['id'] | null,
content: string,
extra: Message['extra'],
onChunk: CallbackGeneratedChunk
) => Promise<boolean>;
stopGenerating: (convId: string) => void;
replaceMessageAndGenerate: (
convId: string,
parentNodeId: Message['id'], // the parent node of the message to be replaced
content: string | null,
extra: Message['extra'],
onChunk: CallbackGeneratedChunk
) => Promise<void>;
// canvas
canvasData: CanvasData | null;
setCanvasData: (data: CanvasData | null) => void;
// config
config: typeof CONFIG_DEFAULT;
saveConfig: (config: typeof CONFIG_DEFAULT) => void;
showSettings: boolean;
setShowSettings: (show: boolean) => void;
}
// this callback is used for scrolling to the bottom of the chat and switching to the last node
export type CallbackGeneratedChunk = (currLeafNodeId?: Message['id']) => void;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const AppContext = createContext<AppContextValue>({} as any);
const getViewingChat = async (convId: string): Promise<ViewingChat | null> => {
const conv = await StorageUtils.getOneConversation(convId);
if (!conv) return null;
return {
conv: conv,
// all messages from all branches, not filtered by last node
messages: await StorageUtils.getMessages(convId),
};
};
export const AppContextProvider = ({
children,
}: {
children: React.ReactElement;
}) => {
const { pathname } = useLocation();
const navigate = useNavigate();
const params = matchPath('/chat/:convId', pathname);
const convId = params?.params?.convId;
const [viewingChat, setViewingChat] = useState<ViewingChat | null>(null);
const [pendingMessages, setPendingMessages] = useState<
Record<Conversation['id'], PendingMessage>
>({});
const [aborts, setAborts] = useState<
Record<Conversation['id'], AbortController>
>({});
const [config, setConfig] = useState(StorageUtils.getConfig());
const [canvasData, setCanvasData] = useState<CanvasData | null>(null);
const [showSettings, setShowSettings] = useState(false);
// handle change when the convId from URL is changed
useEffect(() => {
// also reset the canvas data
setCanvasData(null);
const handleConversationChange = async (changedConvId: string) => {
if (changedConvId !== convId) return;
setViewingChat(await getViewingChat(changedConvId));
};
StorageUtils.onConversationChanged(handleConversationChange);
getViewingChat(convId ?? '').then(setViewingChat);
return () => {
StorageUtils.offConversationChanged(handleConversationChange);
};
}, [convId]);
const setPending = (convId: string, pendingMsg: PendingMessage | null) => {
// if pendingMsg is null, remove the key from the object
if (!pendingMsg) {
setPendingMessages((prev) => {
const newState = { ...prev };
delete newState[convId];
return newState;
});
} else {
setPendingMessages((prev) => ({ ...prev, [convId]: pendingMsg }));
}
};
const setAbort = (convId: string, controller: AbortController | null) => {
if (!controller) {
setAborts((prev) => {
const newState = { ...prev };
delete newState[convId];
return newState;
});
} else {
setAborts((prev) => ({ ...prev, [convId]: controller }));
}
};
////////////////////////////////////////////////////////////////////////
// public functions
const isGenerating = (convId: string) => !!pendingMessages[convId];
const generateMessage = async (
convId: string,
leafNodeId: Message['id'],
onChunk: CallbackGeneratedChunk
) => {
if (isGenerating(convId)) return;
const config = StorageUtils.getConfig();
const currConversation = await StorageUtils.getOneConversation(convId);
if (!currConversation) {
throw new Error('Current conversation is not found');
}
const currMessages = StorageUtils.filterByLeafNodeId(
await StorageUtils.getMessages(convId),
leafNodeId,
false
);
const abortController = new AbortController();
setAbort(convId, abortController);
if (!currMessages) {
throw new Error('Current messages are not found');
}
const pendingId = Date.now() + Timer.timercount + 1;
Timer.timercount=Timer.timercount+2;
let pendingMsg: PendingMessage = {
id: pendingId,
convId,
type: 'text',
timestamp: pendingId,
role: 'assistant',
content: null,
parent: leafNodeId,
children: [],
};
setPending(convId, pendingMsg);
try {
// prepare messages for API
let messages: APIMessage[] = [
...(config.systemMessage.length === 0
? []
: [{ role: 'system', content: config.systemMessage } as APIMessage]),
...normalizeMsgsForAPI(currMessages),
];
if (config.excludeThoughtOnReq) {
messages = filterThoughtFromMsgs(messages);
}
if (isDev) console.log({ messages });
// prepare params
const params = {
messages,
stream: true,
cache_prompt: true,
samplers: config.samplers,
temperature: config.temperature,
dynatemp_range: config.dynatemp_range,
dynatemp_exponent: config.dynatemp_exponent,
top_k: config.top_k,
top_p: config.top_p,
min_p: config.min_p,
typical_p: config.typical_p,
xtc_probability: config.xtc_probability,
xtc_threshold: config.xtc_threshold,
repeat_last_n: config.repeat_last_n,
repeat_penalty: config.repeat_penalty,
presence_penalty: config.presence_penalty,
frequency_penalty: config.frequency_penalty,
dry_multiplier: config.dry_multiplier,
dry_base: config.dry_base,
dry_allowed_length: config.dry_allowed_length,
dry_penalty_last_n: config.dry_penalty_last_n,
max_tokens: config.max_tokens,
timings_per_token: !!config.showTokensPerSecond,
...(config.custom.length ? JSON.parse(config.custom) : {}),
};
// send request
const fetchResponse = await fetch(`${BASE_URL}/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(config.apiKey
? { Authorization: `Bearer ${config.apiKey}` }
: {}),
},
body: JSON.stringify(params),
signal: abortController.signal,
});
if (fetchResponse.status !== 200) {
const body = await fetchResponse.json();
throw new Error(body?.error?.message || 'Unknown error');
}
const chunks = getSSEStreamAsync(fetchResponse);
for await (const chunk of chunks) {
// const stop = chunk.stop;
if (chunk.error) {
throw new Error(chunk.error?.message || 'Unknown error');
}
const addedContent = chunk.choices[0].delta.content;
const lastContent = pendingMsg.content || '';
if (addedContent) {
pendingMsg = {
...pendingMsg,
content: lastContent + addedContent,
};
}
const timings = chunk.timings;
if (timings && config.showTokensPerSecond) {
// only extract what's really needed, to save some space
pendingMsg.timings = {
prompt_n: timings.prompt_n,
prompt_ms: timings.prompt_ms,
predicted_n: timings.predicted_n,
predicted_ms: timings.predicted_ms,
};
}
setPending(convId, pendingMsg);
onChunk(); // don't need to switch node for pending message
}
} catch (err) {
setPending(convId, null);
if ((err as Error).name === 'AbortError') {
// user stopped the generation via stopGeneration() function
// we can safely ignore this error
} else {
console.error(err);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
alert((err as any)?.message ?? 'Unknown error');
//throw err; // rethrow
}
}
finally {
if (pendingMsg.content !== null) {
await StorageUtils.appendMsg(pendingMsg as Message, leafNodeId);
}
}
setPending(convId, null);
onChunk(pendingId); // trigger scroll to bottom and switch to the last node
};
const sendMessage = async (
convId: string | null,
leafNodeId: Message['id'] | null,
content: string,
extra: Message['extra'],
onChunk: CallbackGeneratedChunk
): Promise<boolean> => {
if (isGenerating(convId ?? '') || content.trim().length === 0) return false;
if (convId === null || convId.length === 0 || leafNodeId === null) {
const conv = await StorageUtils.createConversation(
content.substring(0, 256)
);
convId = conv.id;
leafNodeId = conv.currNode;
// if user is creating a new conversation, redirect to the new conversation
navigate(`/chat/${convId}`);
}
const now = Date.now()+Timer.timercount;
Timer.timercount=Timer.timercount + 2;
const currMsgId = now;
StorageUtils.appendMsg(
{
id: currMsgId,
timestamp: now,
type: 'text',
convId,
role: 'user',
content,
extra,
parent: leafNodeId,
children: [],
},
leafNodeId
);
onChunk(currMsgId);
try {
await generateMessage(convId, currMsgId, onChunk);
return true;
} catch (_) {
// TODO: rollback
}
return false;
};
const stopGenerating = (convId: string) => {
setPending(convId, null);
aborts[convId]?.abort();
};
// if content is undefined, we remove last assistant message
const replaceMessageAndGenerate = async (
convId: string,
parentNodeId: Message['id'], // the parent node of the message to be replaced
content: string | null,
extra: Message['extra'],
onChunk: CallbackGeneratedChunk
) => {
if (isGenerating(convId)) return;
if (content !== null) {
const now = Date.now();
const currMsgId = now;
StorageUtils.appendMsg(
{
id: currMsgId,
timestamp: now,
type: 'text',
convId,
role: 'user',
content,
extra,
parent: parentNodeId,
children: [],
},
parentNodeId
);
parentNodeId = currMsgId;
}
onChunk(parentNodeId);
await generateMessage(convId, parentNodeId, onChunk);
};
const saveConfig = (config: typeof CONFIG_DEFAULT) => {
StorageUtils.setConfig(config);
setConfig(config);
};
return (
<AppContext.Provider
value={{
isGenerating,
viewingChat,
pendingMessages,
sendMessage,
stopGenerating,
replaceMessageAndGenerate,
canvasData,
setCanvasData,
config,
saveConfig,
showSettings,
setShowSettings,
}}
>
{children}
</AppContext.Provider>
);
};
export const useAppContext = () => useContext(AppContext);

View File

@@ -0,0 +1,38 @@
export const XCloseButton: React.ElementType<
React.ClassAttributes<HTMLButtonElement> &
React.HTMLAttributes<HTMLButtonElement>
> = ({ className, ...props }) => (
<button className={`btn btn-square btn-sm ${className ?? ''}`} {...props}>
<svg
xmlns="http://www.w3.org/2000/svg"
className="h-6 w-6"
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth="2"
d="M6 18L18 6M6 6l12 12"
/>
</svg>
</button>
);
export const OpenInNewTab = ({
href,
children,
}: {
href: string;
children: string;
}) => (
<a
className="underline"
href={href}
target="_blank"
rel="noopener noreferrer"
>
{children}
</a>
);

View File

@@ -0,0 +1,60 @@
import { useEffect, useState } from 'react';
import { MessageExtraContext } from './types';
import { ChatTextareaApi } from '../components/useChatTextarea.ts';
// Extra context when using llama.cpp WebUI from llama-vscode, inside an iframe
// Ref: https://github.com/ggml-org/llama.cpp/pull/11940
interface SetTextEvData {
text: string;
context: string;
}
/**
* To test it:
* window.postMessage({ command: 'setText', text: 'Spot the syntax error', context: 'def test()\n return 123' }, '*');
*/
export const useVSCodeContext = (textarea: ChatTextareaApi) => {
const [extraContext, setExtraContext] = useState<MessageExtraContext | null>(
null
);
// Accept setText message from a parent window and set inputMsg and extraContext
useEffect(() => {
const handleMessage = (event: MessageEvent) => {
if (event.data?.command === 'setText') {
const data: SetTextEvData = event.data;
textarea.setValue(data?.text);
if (data?.context && data.context.length > 0) {
setExtraContext({
type: 'context',
content: data.context,
});
}
textarea.focus();
}
};
window.addEventListener('message', handleMessage);
return () => window.removeEventListener('message', handleMessage);
}, [textarea]);
// Add a keydown listener that sends the "escapePressed" message to the parent window
useEffect(() => {
const handleKeyDown = (event: KeyboardEvent) => {
if (event.key === 'Escape') {
window.parent.postMessage({ command: 'escapePressed' }, '*');
}
};
window.addEventListener('keydown', handleKeyDown);
return () => window.removeEventListener('keydown', handleKeyDown);
}, []);
return {
extraContext,
// call once the user message is sent, to clear the extra context
clearExtraContext: () => setExtraContext(null),
};
};

View File

@@ -0,0 +1,130 @@
// @ts-expect-error this package does not have typing
import TextLineStream from 'textlinestream';
import { APIMessage, Message } from './types';
// ponyfill for missing ReadableStream asyncIterator on Safari
import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export const isString = (x: any) => !!x.toLowerCase;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export const isBoolean = (x: any) => x === true || x === false;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export const isNumeric = (n: any) => !isString(n) && !isNaN(n) && !isBoolean(n);
export const escapeAttr = (str: string) =>
str.replace(/>/g, '&gt;').replace(/"/g, '&quot;');
// wrapper for SSE
export async function* getSSEStreamAsync(fetchResponse: Response) {
if (!fetchResponse.body) throw new Error('Response body is empty');
const lines: ReadableStream<string> = fetchResponse.body
.pipeThrough(new TextDecoderStream())
.pipeThrough(new TextLineStream());
// @ts-expect-error asyncIterator complains about type, but it should work
for await (const line of asyncIterator(lines)) {
//if (isDev) console.log({ line });
if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
const data = JSON.parse(line.slice(5));
yield data;
} else if (line.startsWith('error:')) {
const data = JSON.parse(line.slice(6));
throw new Error(data.message || 'Unknown error');
}
}
}
// copy text to clipboard
export const copyStr = (textToCopy: string) => {
// Navigator clipboard api needs a secure context (https)
if (navigator.clipboard && window.isSecureContext) {
navigator.clipboard.writeText(textToCopy);
} else {
// Use the 'out of viewport hidden text area' trick
const textArea = document.createElement('textarea');
textArea.value = textToCopy;
// Move textarea out of the viewport so it's not visible
textArea.style.position = 'absolute';
textArea.style.left = '-999999px';
document.body.prepend(textArea);
textArea.select();
document.execCommand('copy');
}
};
/**
* filter out redundant fields upon sending to API
* also format extra into text
*/
export function normalizeMsgsForAPI(messages: Readonly<Message[]>) {
return messages.map((msg) => {
let newContent = '';
for (const extra of msg.extra ?? []) {
if (extra.type === 'context') {
if (extra.content!='') {
newContent += `${extra.content}\n\n`;
}
}
}
newContent += msg.content;
return {
role: msg.role,
content: newContent,
};
}) as APIMessage[];
}
/**
* recommended for DeepsSeek-R1, filter out content between <think> and </think> tags
*/
export function filterThoughtFromMsgs(messages: APIMessage[]) {
return messages.map((msg) => {
return {
role: msg.role,
content:
msg.role === 'assistant'
? msg.content.split('</think>').at(-1)!.trim()
: msg.content,
} as APIMessage;
});
}
export function classNames(classes: Record<string, boolean>): string {
return Object.entries(classes)
.filter(([_, value]) => value)
.map(([key, _]) => key)
.join(' ');
}
export const delay = (ms: number) =>
new Promise((resolve) => setTimeout(resolve, ms));
export const throttle = <T extends unknown[]>(
callback: (...args: T) => void,
delay: number
) => {
let isWaiting = false;
return (...args: T) => {
if (isWaiting) {
return;
}
callback(...args);
isWaiting = true;
setTimeout(() => {
isWaiting = false;
}, delay);
};
};
export const cleanCurrentUrl = (removeQueryParams: string[]) => {
const url = new URL(window.location.href);
removeQueryParams.forEach((param) => {
url.searchParams.delete(param);
});
window.history.replaceState({}, '', url.toString());
};

View File

@@ -0,0 +1,284 @@
// coversations is stored in localStorage
// format: { [convId]: { id: string, lastModified: number, messages: [...] } }
import { CONFIG_DEFAULT } from '../Config';
import { Conversation, Message, TimingReport } from './types';
import Dexie, { Table } from 'dexie';
const event = new EventTarget();
type CallbackConversationChanged = (convId: string) => void;
let onConversationChangedHandlers: [
CallbackConversationChanged,
EventListener,
][] = [];
const dispatchConversationChange = (convId: string) => {
event.dispatchEvent(
new CustomEvent('conversationChange', { detail: { convId } })
);
};
const db = new Dexie('LlamacppWebui') as Dexie & {
conversations: Table<Conversation>;
messages: Table<Message>;
};
// https://dexie.org/docs/Version/Version.stores()
db.version(1).stores({
// Unlike SQL, you dont need to specify all properties but only the one you wish to index.
conversations: '&id, lastModified',
messages: '&id, convId, [convId+id], timestamp',
});
// convId is a string prefixed with 'conv-'
const StorageUtils = {
/**
* manage conversations
*/
async getAllConversations(): Promise<Conversation[]> {
await migrationLStoIDB().catch(console.error); // noop if already migrated
return (await db.conversations.toArray()).sort(
(a, b) => b.lastModified - a.lastModified
);
},
/**
* can return null if convId does not exist
*/
async getOneConversation(convId: string): Promise<Conversation | null> {
return (await db.conversations.where('id').equals(convId).first()) ?? null;
},
/**
* get all message nodes in a conversation
*/
async getMessages(convId: string): Promise<Message[]> {
return await db.messages.where({ convId }).toArray();
},
/**
* use in conjunction with getMessages to filter messages by leafNodeId
* includeRoot: whether to include the root node in the result
* if node with leafNodeId does not exist, return the path with the latest timestamp
*/
filterByLeafNodeId(
msgs: Readonly<Message[]>,
leafNodeId: Message['id'],
includeRoot: boolean
): Readonly<Message[]> {
const res: Message[] = [];
const nodeMap = new Map<Message['id'], Message>();
for (const msg of msgs) {
nodeMap.set(msg.id, msg);
}
let startNode: Message | undefined = nodeMap.get(leafNodeId);
if (!startNode) {
// if not found, we return the path with the latest timestamp
let latestTime = -1;
for (const msg of msgs) {
if (msg.timestamp > latestTime) {
startNode = msg;
latestTime = msg.timestamp;
}
}
}
// traverse the path from leafNodeId to root
// startNode can never be undefined here
let currNode: Message | undefined = startNode;
while (currNode) {
if (currNode.type !== 'root' || (currNode.type === 'root' && includeRoot))
res.push(currNode);
currNode = nodeMap.get(currNode.parent ?? -1);
}
res.sort((a, b) => a.timestamp - b.timestamp);
return res;
},
/**
* create a new conversation with a default root node
*/
async createConversation(name: string): Promise<Conversation> {
const now = Date.now();
const msgId = now;
const conv: Conversation = {
id: `conv-${now}`,
lastModified: now,
currNode: msgId,
name,
};
await db.conversations.add(conv);
// create a root node
await db.messages.add({
id: msgId,
convId: conv.id,
type: 'root',
timestamp: now,
role: 'system',
content: '',
parent: -1,
children: [],
});
return conv;
},
/**
* if convId does not exist, throw an error
*/
async appendMsg(
msg: Exclude<Message, 'parent' | 'children'>,
parentNodeId: Message['id']
): Promise<void> {
if (msg.content === null) return;
const { convId } = msg;
await db.transaction('rw', db.conversations, db.messages, async () => {
const conv = await StorageUtils.getOneConversation(convId);
const parentMsg = await db.messages
.where({ convId, id: parentNodeId })
.first();
// update the currNode of conversation
if (!conv) {
throw new Error(`Conversation ${convId} does not exist`);
}
if (!parentMsg) {
throw new Error(
`Parent message ID ${parentNodeId} does not exist in conversation ${convId}`
);
}
await db.conversations.update(convId, {
lastModified: Date.now(),
currNode: msg.id,
});
// update parent
await db.messages.update(parentNodeId, {
children: [...parentMsg.children, msg.id],
});
// create message
await db.messages.add({
...msg,
parent: parentNodeId,
children: [],
});
});
dispatchConversationChange(convId);
},
/**
* remove conversation by id
*/
async remove(convId: string): Promise<void> {
await db.transaction('rw', db.conversations, db.messages, async () => {
await db.conversations.delete(convId);
await db.messages.where({ convId }).delete();
});
dispatchConversationChange(convId);
},
// event listeners
onConversationChanged(callback: CallbackConversationChanged) {
const fn = (e: Event) => callback((e as CustomEvent).detail.convId);
onConversationChangedHandlers.push([callback, fn]);
event.addEventListener('conversationChange', fn);
},
offConversationChanged(callback: CallbackConversationChanged) {
const fn = onConversationChangedHandlers.find(([cb, _]) => cb === callback);
if (fn) {
event.removeEventListener('conversationChange', fn[1]);
}
onConversationChangedHandlers = [];
},
// manage config
getConfig(): typeof CONFIG_DEFAULT {
const savedVal = JSON.parse(localStorage.getItem('config') || '{}');
// to prevent breaking changes in the future, we always provide default value for missing keys
return {
...CONFIG_DEFAULT,
...savedVal,
};
},
setConfig(config: typeof CONFIG_DEFAULT) {
localStorage.setItem('config', JSON.stringify(config));
},
getTheme(): string {
return localStorage.getItem('theme') || 'auto';
},
setTheme(theme: string) {
if (theme === 'auto') {
localStorage.removeItem('theme');
} else {
localStorage.setItem('theme', theme);
}
},
};
export default StorageUtils;
// Migration from localStorage to IndexedDB
// these are old types, LS prefix stands for LocalStorage
interface LSConversation {
id: string; // format: `conv-{timestamp}`
lastModified: number; // timestamp from Date.now()
messages: LSMessage[];
}
interface LSMessage {
id: number;
role: 'user' | 'assistant' | 'system';
content: string;
timings?: TimingReport;
}
async function migrationLStoIDB() {
if (localStorage.getItem('migratedToIDB')) return;
const res: LSConversation[] = [];
for (const key in localStorage) {
if (key.startsWith('conv-')) {
res.push(JSON.parse(localStorage.getItem(key) ?? '{}'));
}
}
if (res.length === 0) return;
await db.transaction('rw', db.conversations, db.messages, async () => {
let migratedCount = 0;
for (const conv of res) {
const { id: convId, lastModified, messages } = conv;
const firstMsg = messages[0];
const lastMsg = messages.at(-1);
if (messages.length < 2 || !firstMsg || !lastMsg) {
console.log(
`Skipping conversation ${convId} with ${messages.length} messages`
);
continue;
}
const name = firstMsg.content ?? '(no messages)';
await db.conversations.add({
id: convId,
lastModified,
currNode: lastMsg.id,
name,
});
const rootId = messages[0].id - 2;
await db.messages.add({
id: rootId,
convId: convId,
type: 'root',
timestamp: rootId,
role: 'system',
content: '',
parent: -1,
children: [firstMsg.id],
});
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
await db.messages.add({
...msg,
type: 'text',
convId: convId,
timestamp: msg.id,
parent: i === 0 ? rootId : messages[i - 1].id,
children: i === messages.length - 1 ? [] : [messages[i + 1].id],
});
}
migratedCount++;
console.log(
`Migrated conversation ${convId} with ${messages.length} messages`
);
}
console.log(
`Migrated ${migratedCount} conversations from localStorage to IndexedDB`
);
localStorage.setItem('migratedToIDB', '1');
});
}

View File

@@ -0,0 +1,91 @@
export interface TimingReport {
prompt_n: number;
prompt_ms: number;
predicted_n: number;
predicted_ms: number;
}
/**
* What is conversation "branching"? It is a feature that allows the user to edit an old message in the history, while still keeping the conversation flow.
* Inspired by ChatGPT / Claude / Hugging Chat where you edit a message, a new branch of the conversation is created, and the old message is still visible.
*
* We use the same node-based structure like other chat UIs, where each message has a parent and children. A "root" message is the first message in a conversation, which will not be displayed in the UI.
*
* root
* ├── message 1
* │ └── message 2
* │ └── message 3
* └── message 4
* └── message 5
*
* In the above example, assuming that user wants to edit message 2, a new branch will be created:
*
* ├── message 2
* │ └── message 3
* └── message 6
*
* Message 2 and 6 are siblings, and message 6 is the new branch.
*
* We only need to know the last node (aka leaf) to get the current branch. In the above example, message 5 is the leaf of branch containing message 4 and 5.
*
* For the implementation:
* - StorageUtils.getMessages() returns list of all nodes
* - StorageUtils.filterByLeafNodeId() filters the list of nodes from a given leaf node
*/
// Note: the term "message" and "node" are used interchangeably in this context
export interface Message {
id: number;
convId: string;
type: 'text' | 'root';
timestamp: number; // timestamp from Date.now()
role: 'user' | 'assistant' | 'system';
content: string;
timings?: TimingReport;
extra?: MessageExtra[];
// node based system for branching
parent: Message['id'];
children: Message['id'][];
}
type MessageExtra = MessageExtraTextFile | MessageExtraContext; // TODO: will add more in the future
export interface MessageExtraTextFile {
type: 'textFile';
name: string;
content: string;
}
export interface MessageExtraContext {
type: 'context';
content: string;
}
export type APIMessage = Pick<Message, 'role' | 'content'>;
export interface Conversation {
id: string; // format: `conv-{timestamp}`
lastModified: number; // timestamp from Date.now()
currNode: Message['id']; // the current message node being viewed
name: string;
}
export interface ViewingChat {
conv: Readonly<Conversation>;
messages: Readonly<Message[]>;
}
export type PendingMessage = Omit<Message, 'content'> & {
content: string | null;
};
export enum CanvasType {
PY_INTERPRETER,
}
export interface CanvasPyInterpreter {
type: CanvasType.PY_INTERPRETER;
content: string;
}
export type CanvasData = CanvasPyInterpreter;

View File

@@ -0,0 +1 @@
/// <reference types="vite/client" />

View File

@@ -0,0 +1,16 @@
/** @type {import('tailwindcss').Config} */
export default {
content: [
"./index.html",
"./src/**/*.{js,ts,jsx,tsx}",
],
theme: {
extend: {},
},
plugins: [
require('daisyui'),
],
daisyui: {
themes: ['light', 'dark', 'cupcake', 'bumblebee', 'emerald', 'corporate', 'synthwave', 'retro', 'cyberpunk', 'valentine', 'halloween', 'garden', 'forest', 'aqua', 'lofi', 'pastel', 'fantasy', 'wireframe', 'black', 'luxury', 'dracula', 'cmyk', 'autumn', 'business', 'acid', 'lemonade', 'night', 'coffee', 'winter', 'dim', 'nord', 'sunset'],
}
}

View File

@@ -0,0 +1,26 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
"target": "ES2021",
"useDefineForClassFields": true,
"lib": ["ES2021", "DOM", "DOM.Iterable"],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"isolatedModules": true,
"moduleDetection": "force",
"noEmit": true,
"jsx": "react-jsx",
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["src"]
}

View File

@@ -0,0 +1,7 @@
{
"files": [],
"references": [
{ "path": "./tsconfig.app.json" },
{ "path": "./tsconfig.node.json" }
]
}

View File

@@ -0,0 +1,24 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
"target": "ES2022",
"lib": ["ES2023"],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"isolatedModules": true,
"moduleDetection": "force",
"noEmit": true,
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["vite.config.ts"]
}

View File

@@ -0,0 +1,80 @@
import { defineConfig, PluginOption } from 'vite';
import react from '@vitejs/plugin-react';
import { viteSingleFile } from 'vite-plugin-singlefile';
import path from 'node:path';
import fs from 'node:fs';
import zlib from 'node:zlib';
/* eslint-disable */
const MAX_BUNDLE_SIZE = 1.5 * 1024 * 1024; // only increase when absolutely necessary
const GUIDE_FOR_FRONTEND = `
<!--
This is a single file build of the frontend.
It is automatically generated by the build process.
Do not edit this file directly.
To make changes, refer to the "Web UI" section in the README.
-->
`.trim();
const FRONTEND_PLUGINS = [react()];
const BUILD_PLUGINS = [
...FRONTEND_PLUGINS,
viteSingleFile(),
(function llamaCppPlugin() {
let config: any;
return {
name: 'llamacpp:build',
apply: 'build',
async configResolved(_config: any) {
config = _config;
},
writeBundle() {
const outputIndexHtml = path.join(config.build.outDir, 'index.html');
const content =
GUIDE_FOR_FRONTEND + '\n' + fs.readFileSync(outputIndexHtml, 'utf-8');
const compressed = zlib.gzipSync(Buffer.from(content, 'utf-8'), {
level: 9,
});
// because gzip header contains machine-specific info, we must remove these data from the header
// timestamp
compressed[0x4] = 0;
compressed[0x5] = 0;
compressed[0x6] = 0;
compressed[0x7] = 0;
// OS
compressed[0x9] = 0;
if (compressed.byteLength > MAX_BUNDLE_SIZE) {
throw new Error(
`Bundle size is too large (${Math.ceil(compressed.byteLength / 1024)} KB).\n` +
`Please reduce the size of the frontend or increase MAX_BUNDLE_SIZE in vite.config.js.\n`
);
}
const targetOutputFile = path.join(
config.build.outDir,
'../../public/index.html.gz'
);
fs.writeFileSync(targetOutputFile, compressed);
},
} satisfies PluginOption;
})(),
];
export default defineConfig({
// @ts-ignore
plugins: process.env.ANALYZE ? FRONTEND_PLUGINS : BUILD_PLUGINS,
server: {
proxy: {
'/v1': 'http://localhost:8080',
},
headers: {
'Cross-Origin-Embedder-Policy': 'require-corp',
'Cross-Origin-Opener-Policy': 'same-origin',
},
},
});