From 02aa65009bbc2f0dfe257cf437cbd5df9d1c189a Mon Sep 17 00:00:00 2001 From: firecoperana Date: Sat, 17 Jan 2026 10:04:42 -0600 Subject: [PATCH] fix test build error --- tests/test-tokenizer-1-bpe.cpp | 6 +++--- tests/test-tokenizer-1-spm.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp index 9498387e..7f55784e 100644 --- a/tests/test-tokenizer-1-bpe.cpp +++ b/tests/test-tokenizer-1-bpe.cpp @@ -78,7 +78,7 @@ int main(int argc, char **argv) { const int n_vocab = llama_n_vocab(model); for (int i = 0; i < n_vocab; ++i) { - std::string str = llama_detokenize(ctx, std::vector(1, i)); + std::string str = common_token_to_piece(ctx, std::vector(1, i)); try { auto cps = unicode_cpts_from_utf8(str); std::vector tokens = llama_tokenize(ctx, str, false, true); @@ -94,7 +94,7 @@ int main(int argc, char **argv) { fprintf(stderr, "]\n"); return 2; } - std::string check = llama_detokenize(ctx, tokens); + std::string check = common_token_to_piece(ctx, tokens); if (check != str) { fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n", __func__, i, str.c_str(), str.length(), check.c_str(), check.length()); @@ -124,7 +124,7 @@ int main(int argc, char **argv) { std::string str = unicode_cpt_to_utf8(cp); std::vector tokens = llama_tokenize(ctx, str, false); - std::string check = llama_detokenize(ctx, tokens); + std::string check = common_token_to_piece(ctx, tokens); if (cp != 9601 && str != check) { fprintf(stderr, "error: codepoint 0x%x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n", cp, check.c_str(), check.length(), str.c_str(), str.length()); diff --git a/tests/test-tokenizer-1-spm.cpp b/tests/test-tokenizer-1-spm.cpp index 7ca9e2ca..ac47b547 100644 --- a/tests/test-tokenizer-1-spm.cpp +++ b/tests/test-tokenizer-1-spm.cpp @@ -66,9 +66,9 @@ int main(int argc, char ** argv) { const int n_vocab = llama_n_vocab(model); for (int i = 0; i < n_vocab; ++i) { - std::string str = llama_detokenize(ctx, std::vector(1, i), true); + std::string str = common_token_to_piece(ctx, std::vector(1, i), true); std::vector tokens = llama_tokenize(ctx, str, false, true); - std::string check = llama_detokenize(ctx, tokens); + std::string check = common_token_to_piece(ctx, tokens); if (check != str) { fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n", __func__, i, str.c_str(), str.length(), check.c_str(), check.length()); @@ -94,7 +94,7 @@ int main(int argc, char ** argv) { std::string str = unicode_cpt_to_utf8(cp); std::vector tokens = llama_tokenize(ctx, str, false, true); - std::string check = llama_detokenize(ctx, tokens); + std::string check = common_token_to_piece(ctx, tokens); if (cp != 9601 && str != check) { fprintf(stderr, "error: codepoint 0x%x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n", cp, check.c_str(), check.length(), str.c_str(), str.length());