mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 19:01:47 +00:00
add dry sampler (#513)
* add dry sampler * use vocab instead of model in dry_init function * fix compile error for build test --------- Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -75,6 +75,9 @@ struct naive_trie {
|
||||
llama_token value;
|
||||
};
|
||||
|
||||
uint32_t llama_vocab::n_tokens() const {
|
||||
return (uint32_t)id_to_token.size();
|
||||
}
|
||||
//
|
||||
// impl
|
||||
//
|
||||
@@ -1741,3 +1744,19 @@ int32_t llama_detokenize_impl(
|
||||
|
||||
return total <= text_len_max ? total : -total;
|
||||
}
|
||||
|
||||
std::string llama_detokenize(const struct llama_vocab& vocab, const std::vector<llama_token>& tokens, bool special) {
|
||||
std::string text;
|
||||
text.resize(std::max(text.capacity(), tokens.size()));
|
||||
int32_t n_chars = llama_detokenize_impl(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
||||
if (n_chars < 0) {
|
||||
text.resize(-n_chars);
|
||||
n_chars = llama_detokenize_impl(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
||||
GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization
|
||||
}
|
||||
|
||||
text.resize(n_chars);
|
||||
|
||||
// NOTE: the original tokenizer decodes bytes after collecting the pieces.
|
||||
return text;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user