From 31a9ddb6586a9dcaa75d46eec576aa61e1567078 Mon Sep 17 00:00:00 2001
From: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Date: Thu, 25 Sep 2025 09:37:35 +0300
Subject: [PATCH] Add mtmd: mtmd.cpp compiles

---
 examples/mtmd/mtmd.cpp |  8 ++++----
 include/llama.h        | 23 +++++++++++++++++++++++
 src/llama-vocab.cpp    |  4 ++--
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/examples/mtmd/mtmd.cpp b/examples/mtmd/mtmd.cpp
index cd022c5e..94322e14 100644
--- a/examples/mtmd/mtmd.cpp
+++ b/examples/mtmd/mtmd.cpp
@@ -334,10 +334,10 @@ private:
     std::string token_to_piece(const llama_vocab * vocab, llama_token token, bool special) {
         std::string piece;
         piece.resize(piece.capacity());  // using string internal cache, 15 bytes + '\n'
-        const int n_chars = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
+        const int n_chars = llama_vocab_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
         if (n_chars < 0) {
             piece.resize(-n_chars);
-            int check = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
+            int check = llama_vocab_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
             GGML_ASSERT(check == -n_chars);
         } else {
             piece.resize(n_chars);
@@ -720,10 +720,10 @@ struct mtmd_tokenizer {
         // upper limit for the number of tokens
         int n_tokens = text.length() + 2 * add_special;
         std::vector<llama_token> result(n_tokens);
-        n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+        n_tokens = llama_vocab_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
         if (n_tokens < 0) {
             result.resize(-n_tokens);
-            int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+            int check = llama_vocab_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
             GGML_ASSERT(check == -n_tokens);
         } else {
             result.resize(n_tokens);
diff --git a/include/llama.h b/include/llama.h
index de9f0656..0784ef7b 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -581,6 +581,14 @@ extern "C" {
     LLAMA_API int32_t llama_n_embd     (const struct llama_model * model);
     LLAMA_API int32_t llama_n_layer    (const struct llama_model * model);
 
+    // Compat
+    static    int32_t     llama_model_n_embd(const struct llama_model * model) { return llama_n_embd(model); }
+    LLAMA_API bool        llama_vocab_get_add_bos(const struct llama_vocab * vocab);
+    LLAMA_API bool        llama_vocab_get_add_eos(const struct llama_vocab * vocab);
+    LLAMA_API int32_t     llama_vocab_n_tokens(const struct llama_vocab * vocab);
+    LLAMA_API llama_token llama_vocab_bos(const struct llama_vocab * vocab);
+    LLAMA_API llama_token llama_vocab_eos(const struct llama_vocab * vocab);
+
     // Get the model's RoPE frequency scaling factor
     LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
 
@@ -1061,6 +1069,14 @@ extern "C" {
                          int32_t   n_tokens_max,
                             bool   add_special,
                             bool   parse_special);
+    LLAMA_API int32_t llama_vocab_tokenize(
+        const struct llama_vocab * vocab,
+                      const char * text,
+                         int32_t   text_len,
+                     llama_token * tokens,
+                         int32_t   n_tokens_max,
+                            bool   add_special,
+                            bool   parse_special);
 
     // Token Id -> Piece.
     // Uses the vocabulary in the provided context.
@@ -1074,6 +1090,13 @@ extern "C" {
                                int32_t   length,
                                int32_t   lstrip,
                                   bool   special);
+    LLAMA_API int32_t llama_vocab_token_to_piece(
+              const struct llama_vocab * vocab,
+                           llama_token   token,
+                                  char * buf,
+                               int32_t   length,
+                               int32_t   lstrip,
+                                  bool   special);
 
     /// @details Convert the provided tokens into text (inverse of llama_tokenize()).
     /// @param text The char pointer must be large enough to hold the resulting text.
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 115ffa7d..271d4816 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -3770,7 +3770,7 @@ llama_token llama_token_fim_sep(const struct llama_vocab * vocab) {
 // tokenization
 //
 
-int32_t llama_tokenize(
+int32_t llama_vocab_tokenize(
     const struct llama_vocab * vocab,
                   const char * text,
                      int32_t   text_len,
@@ -3781,7 +3781,7 @@ int32_t llama_tokenize(
     return vocab->tokenize(text, text_len, tokens, n_tokens_max, add_special, parse_special);
 }
 
-int32_t llama_token_to_piece(
+int32_t llama_vocab_token_to_piece(
     const struct llama_vocab * vocab,
                  llama_token   token,
                         char * buf,