mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-20 13:14:09 +00:00
* spec : add self speculative decoding and ngram-mod and refactor common : use common_ prefix for common library function llama : use LLAMA_TOKEN_NULL spec : add self speculative decoding (no draft model required) + refactor spec : add ngram-mod spec : various improvements ton ngram-map + docs spec : fix the check-rate logic of ngram-simple common : add common_speculative_is_compat() spec : simplify time measurement using common_time_meas refactor common_sampler_init refactor common_token_to_piece refactor and fix cur_p bug clean up * spec : remove check rate * spec: show warnings instead of abort --------- Co-authored-by: firecoperana <firecoperana> Co-authored-by: Sascha Rogmann <59577610+srogmann@users.noreply.github.com>
61 lines
1.1 KiB
C++
61 lines
1.1 KiB
C++
#include "ngram-mod.h"
|
|
|
|
//
|
|
// common_ngram_mod
|
|
//
|
|
|
|
common_ngram_mod::common_ngram_mod(uint16_t n, size_t size) : n(n), used(0) {
|
|
entries.resize(size);
|
|
|
|
reset();
|
|
}
|
|
|
|
size_t common_ngram_mod::idx(const entry_t * tokens) const {
|
|
size_t res = 0;
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
res = res*6364136223846793005ULL + tokens[i];
|
|
}
|
|
|
|
res = res % entries.size();
|
|
|
|
return res;
|
|
}
|
|
|
|
void common_ngram_mod::add(const entry_t * tokens) {
|
|
const size_t i = idx(tokens);
|
|
|
|
if (entries[i] == EMPTY) {
|
|
used++;
|
|
}
|
|
|
|
entries[i] = tokens[n];
|
|
}
|
|
|
|
common_ngram_mod::entry_t common_ngram_mod::get(const entry_t * tokens) const {
|
|
const size_t i = idx(tokens);
|
|
|
|
return entries[i];
|
|
}
|
|
|
|
void common_ngram_mod::reset() {
|
|
std::fill(entries.begin(), entries.end(), EMPTY);
|
|
used = 0;
|
|
}
|
|
|
|
size_t common_ngram_mod::get_n() const {
|
|
return n;
|
|
}
|
|
|
|
size_t common_ngram_mod::get_used() const {
|
|
return used;
|
|
}
|
|
|
|
size_t common_ngram_mod::size() const {
|
|
return entries.size();
|
|
}
|
|
|
|
size_t common_ngram_mod::size_bytes() const {
|
|
return entries.size() * sizeof(entries[0]);
|
|
}
|