Check for NaNs while loading the model. (#727)

* Check for NaNs while loading the model.

* Also tell which experts have NaNs.

* Add command line option to validate quants

* Add checks for more quantization types

* Add checks for more quantizagtion types

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-08-27 19:00:17 +03:00
committed by GitHub
parent ca5b6ab9b1
commit e760b4dc41
6 changed files with 199 additions and 2 deletions

View File

@@ -4751,6 +4751,7 @@ static bool llm_load_tensors(
int main_gpu,
const float * tensor_split,
bool use_mlock,
bool validate_quants,
llama_progress_callback progress_callback,
void * progress_callback_user_data) {
model.t_start_us = ggml_time_us();
@@ -7261,6 +7262,19 @@ static bool llm_load_tensors(
if (n_modified > 0) printf("============ Modified %d tensors\n", n_modified);
}
if (validate_quants) {
int nbad = 0;
for (auto& it : model.tensors_by_name) {
if (ggml_backend_buffer_is_host(it.second->buffer)) {
if (!iqk_validate_tensor(it.second)) ++nbad;
}
}
if (nbad > 0) {
LLAMA_LOG_ERROR("Found %d bad tensors in model\n", nbad);
throw std::runtime_error("Bad tensors in model");
}
}
if (!ml.use_mmap && ml.repack_tensors) {
int n_repacked = 0;
for (auto& it : model.tensors_by_name) {
@@ -7361,7 +7375,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
#endif
if (!llm_load_tensors(
ml, model, params.n_gpu_layers, params.mla, params.split_mode, params.main_gpu, params.tensor_split, params.use_mlock,
ml, model, params.n_gpu_layers, params.mla, params.split_mode, params.main_gpu, params.tensor_split,
params.use_mlock, params.validate_quants,
params.progress_callback, params.progress_callback_user_data
)) {
return -2;