Add mtmd: mtmd.cpp compiles

2026-02-23 22:54:10 +00:00 · 2025-09-25 09:37:35 +03:00
parent 7829a6024a
commit 31a9ddb658
3 changed files with 29 additions and 6 deletions
--- a/examples/mtmd/mtmd.cpp
+++ b/examples/mtmd/mtmd.cpp
@@ -334,10 +334,10 @@ private:
    std::string token_to_piece(const llama_vocab * vocab, llama_token token, bool special) {
        std::string piece;
        piece.resize(piece.capacity());  // using string internal cache, 15 bytes + '\n'
-        const int n_chars = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
+        const int n_chars = llama_vocab_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
        if (n_chars < 0) {
            piece.resize(-n_chars);
-            int check = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
+            int check = llama_vocab_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
            GGML_ASSERT(check == -n_chars);
        } else {
            piece.resize(n_chars);
@@ -720,10 +720,10 @@ struct mtmd_tokenizer {
        // upper limit for the number of tokens
        int n_tokens = text.length() + 2 * add_special;
        std::vector<llama_token> result(n_tokens);
-        n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+        n_tokens = llama_vocab_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
        if (n_tokens < 0) {
            result.resize(-n_tokens);
-            int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+            int check = llama_vocab_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
            GGML_ASSERT(check == -n_tokens);
        } else {
            result.resize(n_tokens);
--- a/include/llama.h
+++ b/include/llama.h
@@ -581,6 +581,14 @@ extern "C" {
    LLAMA_API int32_t llama_n_embd     (const struct llama_model * model);
    LLAMA_API int32_t llama_n_layer    (const struct llama_model * model);

+    // Compat
+    static    int32_t     llama_model_n_embd(const struct llama_model * model) { return llama_n_embd(model); }
+    LLAMA_API bool        llama_vocab_get_add_bos(const struct llama_vocab * vocab);
+    LLAMA_API bool        llama_vocab_get_add_eos(const struct llama_vocab * vocab);
+    LLAMA_API int32_t     llama_vocab_n_tokens(const struct llama_vocab * vocab);
+    LLAMA_API llama_token llama_vocab_bos(const struct llama_vocab * vocab);
+    LLAMA_API llama_token llama_vocab_eos(const struct llama_vocab * vocab);
+
    // Get the model's RoPE frequency scaling factor
    LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);

@@ -1061,6 +1069,14 @@ extern "C" {
                         int32_t   n_tokens_max,
                            bool   add_special,
                            bool   parse_special);
+    LLAMA_API int32_t llama_vocab_tokenize(
+        const struct llama_vocab * vocab,
+                      const char * text,
+                         int32_t   text_len,
+                     llama_token * tokens,
+                         int32_t   n_tokens_max,
+                            bool   add_special,
+                            bool   parse_special);

    // Token Id -> Piece.
    // Uses the vocabulary in the provided context.
@@ -1074,6 +1090,13 @@ extern "C" {
                               int32_t   length,
                               int32_t   lstrip,
                                  bool   special);
+    LLAMA_API int32_t llama_vocab_token_to_piece(
+              const struct llama_vocab * vocab,
+                           llama_token   token,
+                                  char * buf,
+                               int32_t   length,
+                               int32_t   lstrip,
+                                  bool   special);

    /// @details Convert the provided tokens into text (inverse of llama_tokenize()).
    /// @param text The char pointer must be large enough to hold the resulting text.
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -3770,7 +3770,7 @@ llama_token llama_token_fim_sep(const struct llama_vocab * vocab) {
 // tokenization
 //

-int32_t llama_tokenize(
+int32_t llama_vocab_tokenize(
    const struct llama_vocab * vocab,
                  const char * text,
                     int32_t   text_len,
@@ -3781,7 +3781,7 @@ int32_t llama_tokenize(
    return vocab->tokenize(text, text_len, tokens, n_tokens_max, add_special, parse_special);
 }

-int32_t llama_token_to_piece(
+int32_t llama_vocab_token_to_piece(
    const struct llama_vocab * vocab,
                 llama_token   token,
                        char * buf,