Add support for SmolLM3 (#934)

* Convert from HF

* Model loading and compute graph

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-11-10 15:40:12 +02:00
committed by GitHub
parent 86e2bec04e
commit 263be6670b
10 changed files with 199 additions and 10 deletions

View File

@@ -4182,6 +4182,21 @@ class MiniMaxM2Model(Model):
return super().modify_tensors(data_torch, name, bid)
@Model.register("SmolLM3ForCausalLM")
class SmolLM3Model(LlamaModel):
model_arch = gguf.MODEL_ARCH.SMOLLM3
def set_vocab(self):
super().set_vocab()
# remove unsupported array slicing in chat template
# ref: https://huggingface.co/ggml-org/SmolLM3-3B-GGUF/discussions/1
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
if tokenizer.chat_template is not None:
chat_template = tokenizer.chat_template.replace("[:]", "")
self.gguf_writer.add_chat_template(chat_template)
@Model.register("Dots1ForCausalLM")
class Dots1Model(Qwen2MoeModel):
model_arch = gguf.MODEL_ARCH.DOTS1