mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-27 08:34:09 +00:00
Merge mainline llama.cpp (#3)
* Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include "llama.h"
|
||||
#include "common/common.h"
|
||||
#include "common.h"
|
||||
|
||||
// Write C++ code here.
|
||||
//
|
||||
@@ -409,7 +409,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop(
|
||||
|
||||
const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value);
|
||||
if (llama_token_is_eog(model, new_token_id) || n_cur == n_len) {
|
||||
return env->NewStringUTF("");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto new_token_chars = llama_token_to_piece(context, new_token_id);
|
||||
|
||||
Reference in New Issue
Block a user