Check for NaNs while loading the model. (#727)

* Check for NaNs while loading the model. * Also tell which experts have NaNs. * Add command line option to validate quants * Add checks for more quantization types * Add checks for more quantizagtion types --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-02-21 05:34:08 +00:00 · 2025-08-27 19:00:17 +03:00
parent ca5b6ab9b1
commit e760b4dc41
6 changed files with 199 additions and 2 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -4751,6 +4751,7 @@ static bool llm_load_tensors(
        int main_gpu,
        const float * tensor_split,
        bool use_mlock,
+        bool validate_quants,
        llama_progress_callback progress_callback,
        void * progress_callback_user_data) {
    model.t_start_us = ggml_time_us();
@@ -7261,6 +7262,19 @@ static bool llm_load_tensors(
        if (n_modified > 0) printf("============ Modified %d tensors\n", n_modified);
    }

+    if (validate_quants) {
+        int nbad = 0;
+        for (auto& it : model.tensors_by_name) {
+            if (ggml_backend_buffer_is_host(it.second->buffer)) {
+                if (!iqk_validate_tensor(it.second)) ++nbad;
+            }
+        }
+        if (nbad > 0) {
+            LLAMA_LOG_ERROR("Found %d bad tensors in model\n", nbad);
+            throw std::runtime_error("Bad tensors in model");
+        }
+    }
+
    if (!ml.use_mmap && ml.repack_tensors) {
        int n_repacked = 0;
        for (auto& it : model.tensors_by_name) {
@@ -7361,7 +7375,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
 #endif

        if (!llm_load_tensors(
-            ml, model, params.n_gpu_layers, params.mla, params.split_mode,  params.main_gpu, params.tensor_split, params.use_mlock,
+            ml, model, params.n_gpu_layers, params.mla, params.split_mode,  params.main_gpu, params.tensor_split,
+            params.use_mlock, params.validate_quants,
            params.progress_callback, params.progress_callback_user_data
        )) {
            return -2;