mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-23 14:44:09 +00:00
* spec : add self speculative decoding and ngram-mod and refactor common : use common_ prefix for common library function llama : use LLAMA_TOKEN_NULL spec : add self speculative decoding (no draft model required) + refactor spec : add ngram-mod spec : various improvements ton ngram-map + docs spec : fix the check-rate logic of ngram-simple common : add common_speculative_is_compat() spec : simplify time measurement using common_time_meas refactor common_sampler_init refactor common_token_to_piece refactor and fix cur_p bug clean up * spec : remove check rate * spec: show warnings instead of abort --------- Co-authored-by: firecoperana <firecoperana> Co-authored-by: Sascha Rogmann <59577610+srogmann@users.noreply.github.com>
38 lines
727 B
C++
38 lines
727 B
C++
#pragma once
|
|
|
|
#include <cstdint>
|
|
#include <vector>
|
|
#include <cstddef>
|
|
//
|
|
// common_ngram_mod
|
|
// ref: https://github.com/ggml-org/llama.cpp/pull/19164
|
|
//
|
|
|
|
// basic n-gram hasher
|
|
struct common_ngram_mod {
|
|
using entry_t = int32_t;
|
|
|
|
static constexpr entry_t EMPTY = -1;
|
|
|
|
common_ngram_mod(uint16_t n, size_t size);
|
|
|
|
size_t idx(const entry_t * tokens) const;
|
|
void add(const entry_t * tokens);
|
|
entry_t get(const entry_t * tokens) const; // return -1 if not found
|
|
|
|
void reset();
|
|
|
|
size_t get_n() const;
|
|
size_t get_used() const;
|
|
|
|
size_t size() const;
|
|
size_t size_bytes() const;
|
|
|
|
private:
|
|
size_t n; // ngram size to hash
|
|
|
|
size_t used;
|
|
|
|
std::vector<entry_t> entries;
|
|
};
|