From 6c53a97122ee66a30d6510c1f83e4017dade2fae Mon Sep 17 00:00:00 2001
From: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Date: Wed, 29 Oct 2025 11:15:20 +0200
Subject: [PATCH] Don't ignore the return value of create_tensors()

else, when q, k, v get merged and we are running on the CPU,
we get a crash because the backend is trying to use mmap,
but that no longer works.
---
 src/llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index b740b7c1..3fba6574 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1684,7 +1684,7 @@ static bool llm_load_tensors(
         throw std::runtime_error("model has expert layers but no expert layers are used");
     }
 
-    cth->create_tensors();
+    use_mmap_buffer = cth->create_tensors();
 
     ml.done_getting_tensors();