Add support for SmolLM3 (#934)

* Convert from HF * Model loading and compute graph --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-05-01 11:51:53 +00:00 · 2025-11-10 15:40:12 +02:00
parent 86e2bec04e
commit 263be6670b
10 changed files with 199 additions and 10 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -4182,6 +4182,21 @@ class MiniMaxM2Model(Model):
        return super().modify_tensors(data_torch, name, bid)


+@Model.register("SmolLM3ForCausalLM")
+class SmolLM3Model(LlamaModel):
+    model_arch = gguf.MODEL_ARCH.SMOLLM3
+
+    def set_vocab(self):
+        super().set_vocab()
+        # remove unsupported array slicing in chat template
+        # ref: https://huggingface.co/ggml-org/SmolLM3-3B-GGUF/discussions/1
+        from transformers import AutoTokenizer
+        tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
+        if tokenizer.chat_template is not None:
+            chat_template = tokenizer.chat_template.replace("[:]", "")
+            self.gguf_writer.add_chat_template(chat_template)
+
+
@Model.register("Dots1ForCausalLM")
 class Dots1Model(Qwen2MoeModel):
    model_arch = gguf.MODEL_ARCH.DOTS1