Add support for SmolLM3 (#934)

* Convert from HF * Model loading and compute graph --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-04-28 02:11:50 +00:00 · 2025-11-10 15:40:12 +02:00
parent a313b71bf8
commit e4145c013f
10 changed files with 199 additions and 10 deletions
--- a/src/llama-load-tensors.cpp
+++ b/src/llama-load-tensors.cpp
@@ -130,6 +130,8 @@ struct create_tensors_helper : public create_tensors_helper_interface {

    bool create_minimaxm2_tensors(const LLM_TN & tn);

+    bool create_smollm3_tensors(const LLM_TN & tn);
+
    llama_model_loader & ml;
    llama_model        & model;

@@ -2466,6 +2468,28 @@ bool create_tensors_helper::create_minimaxm2_tensors(const LLM_TN & tn) {
    return use_mmap_buffer;
 }

+bool create_tensors_helper::create_smollm3_tensors(const LLM_TN & tn) {
+    LOADING_PRELUDE
+
+    create_embd_output(tn, n_embd, n_vocab);
+
+    for (int i = 0; i < n_layer; ++i) {
+        ggml_context* ctx_layer = ctx_for_layer(i);
+        ggml_context* ctx_split = ctx_for_layer_split(i);
+        auto & layer = model.layers[i];
+
+        layer.attn_norm = create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), { n_embd }, 0);
+
+        use_mmap_buffer &= !merge_qkv(tn, i, 0);
+
+        layer.wo = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, 0);
+
+        layer.ffn_norm = create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), { n_embd }, 0);
+        create_std_ffn(i, tn, layer, n_ff, n_embd, ctx_split);
+    }
+    return use_mmap_buffer;
+}
+
 bool create_tensors_helper::merge_qkv(const LLM_TN & tn, int i, int bias) {
    auto& hparams = model.hparams;
    const int64_t n_head        = hparams.n_head();
@@ -2699,6 +2723,8 @@ bool create_tensors_helper::create_tensors() {
            use_mmap_buffer = create_bailingmoe2_tensors(tn); break;
        case LLM_ARCH_MINIMAX_M2:
            use_mmap_buffer = create_minimaxm2_tensors(tn); break;
+        case LLM_ARCH_SMOLLM3:
+            use_mmap_buffer = create_smollm3_tensors(tn); break;
        default:
            throw std::runtime_error("unknown architecture");
    }