mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-21 05:34:08 +00:00
Check for NaNs while loading the model. (#727)
* Check for NaNs while loading the model. * Also tell which experts have NaNs. * Add command line option to validate quants * Add checks for more quantization types * Add checks for more quantizagtion types --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -4751,6 +4751,7 @@ static bool llm_load_tensors(
|
||||
int main_gpu,
|
||||
const float * tensor_split,
|
||||
bool use_mlock,
|
||||
bool validate_quants,
|
||||
llama_progress_callback progress_callback,
|
||||
void * progress_callback_user_data) {
|
||||
model.t_start_us = ggml_time_us();
|
||||
@@ -7261,6 +7262,19 @@ static bool llm_load_tensors(
|
||||
if (n_modified > 0) printf("============ Modified %d tensors\n", n_modified);
|
||||
}
|
||||
|
||||
if (validate_quants) {
|
||||
int nbad = 0;
|
||||
for (auto& it : model.tensors_by_name) {
|
||||
if (ggml_backend_buffer_is_host(it.second->buffer)) {
|
||||
if (!iqk_validate_tensor(it.second)) ++nbad;
|
||||
}
|
||||
}
|
||||
if (nbad > 0) {
|
||||
LLAMA_LOG_ERROR("Found %d bad tensors in model\n", nbad);
|
||||
throw std::runtime_error("Bad tensors in model");
|
||||
}
|
||||
}
|
||||
|
||||
if (!ml.use_mmap && ml.repack_tensors) {
|
||||
int n_repacked = 0;
|
||||
for (auto& it : model.tensors_by_name) {
|
||||
@@ -7361,7 +7375,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
||||
#endif
|
||||
|
||||
if (!llm_load_tensors(
|
||||
ml, model, params.n_gpu_layers, params.mla, params.split_mode, params.main_gpu, params.tensor_split, params.use_mlock,
|
||||
ml, model, params.n_gpu_layers, params.mla, params.split_mode, params.main_gpu, params.tensor_split,
|
||||
params.use_mlock, params.validate_quants,
|
||||
params.progress_callback, params.progress_callback_user_data
|
||||
)) {
|
||||
return -2;
|
||||
|
||||
Reference in New Issue
Block a user