diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 9ec8e518..695dc722 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -18739,7 +18739,7 @@ static void ggml_compute_forward_flash_attn_ext_f16(
                 dst->ne[2], dst->ne[1], dst->nb[1],
                 k->type, v->type,
                 Dk, Dv, neq1, nek1, q->nb[1], k->nb[1], v->nb[1], mask->nb[1],
-                q->data, k->data, v->data, mask->data, sinks->data,
+                q->data, k->data, v->data, mask->data, sinks ? sinks->data : NULL,
                 scale, softcap, (float *)dst->data,
                 params->wdata, (barrier_t)ggml_barrier, (void *)params->shared, ith, nth)) return;
 
diff --git a/src/llama-impl.h b/src/llama-impl.h
index 08005b21..befdc559 100644
--- a/src/llama-impl.h
+++ b/src/llama-impl.h
@@ -38,6 +38,7 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
 #define LLAMA_LOG_INFO(...)  llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
 #define LLAMA_LOG_WARN(...)  llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
 #define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
+#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
 
 //
 // helpers
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index dbae465d..d6c42de8 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2333,7 +2333,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
             } else {
                 // token is control, but not marked as EOG -> print a debug log
                 if (id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL && special_eog_ids.count(t.second) == 0) {
-                    LLAMA_LOG_WARN("%s: control token: %6d '%s' is not marked as EOG\n",
+                    LLAMA_LOG_DEBUG("%s: control token: %6d '%s' is not marked as EOG\n",
                             __func__, t.second, t.first.c_str());
                 }
             }
@@ -2568,7 +2568,7 @@ llama_token_attr llama_vocab::impl::token_get_attr(llama_token id) const {
 }
 
 void llama_vocab::impl::init_tokenizer(enum llama_vocab_type type) {
-    LLAMA_LOG_INFO("%s: initializing tokenizer for type %d\n", __func__, type);
+    LLAMA_LOG_DEBUG("%s: initializing tokenizer for type %d\n", __func__, type);
 
     switch (type) {
         case LLAMA_VOCAB_TYPE_SPM:
diff --git a/src/llama.cpp b/src/llama.cpp
index ee3f03df..56b0d30d 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -346,6 +346,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
     { LLM_KV_TOKENIZER_MASK_ID,              "tokenizer.ggml.mask_token_id"            },
     { LLM_KV_TOKENIZER_ADD_BOS,              "tokenizer.ggml.add_bos_token"            },
     { LLM_KV_TOKENIZER_ADD_EOS,              "tokenizer.ggml.add_eos_token"            },
+    { LLM_KV_TOKENIZER_ADD_SEP,              "tokenizer.ggml.add_sep_token"            },
     { LLM_KV_TOKENIZER_ADD_PREFIX,           "tokenizer.ggml.add_space_prefix"         },
     { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,      "tokenizer.ggml.remove_extra_whitespaces" },
     { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap"     },
@@ -7326,7 +7327,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
             throw std::runtime_error("error loading model hyperparameters: " + std::string(e.what()));
         }
         try {
-            model.vocab.load(ml, LLM_KV(model.arch));
+            LLM_KV kv(model.arch);
+            model.vocab.load(ml, kv);
         } catch(const std::exception & e) {
             throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
         }