mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-22 22:24:11 +00:00
Honor mmap setting when using tensor overrides (#270)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -8015,7 +8015,7 @@ static bool llm_load_tensors(
|
||||
// only the mmap region containing the tensors in the model is mapped to the backend buffer
|
||||
// this is important for metal with apple silicon: if the entire model could be mapped to a metal buffer, then we could just use metal for all layers
|
||||
// this allows using partial offloading when the model size exceeds the metal buffer size, but not the RAM size
|
||||
if (ml.use_mmap && use_mmap_buffer && buft == llama_default_buffer_type_cpu(true)) {
|
||||
if (ml.use_mmap && use_mmap_buffer && (buft == llama_default_buffer_type_cpu(true) || buft == ggml_backend_cpu_buffer_type())) {
|
||||
for (uint32_t idx = 0; idx < ml.files.size(); idx++) {
|
||||
void * addr = nullptr;
|
||||
size_t first, last;
|
||||
|
||||
Reference in New Issue
Block a user