From 6c53a97122ee66a30d6510c1f83e4017dade2fae Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Wed, 29 Oct 2025 11:15:20 +0200 Subject: [PATCH] Don't ignore the return value of create_tensors() else, when q, k, v get merged and we are running on the CPU, we get a crash because the backend is trying to use mmap, but that no longer works. --- src/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index b740b7c1..3fba6574 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -1684,7 +1684,7 @@ static bool llm_load_tensors( throw std::runtime_error("model has expert layers but no expert layers are used"); } - cth->create_tensors(); + use_mmap_buffer = cth->create_tensors(); ml.done_getting_tensors();