mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
imatrix: be able to specify the name of the output tensor
For some models the same tensor is used for token embeddings and output. This tensor tends to be named token_embedding.weight rather than output.weight, which prevernts us from collecting imatrix data for this tensor. With this commit we can tell the name of the output tensor to the imatrix tool.
This commit is contained in:
@@ -1599,6 +1599,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||||||
params.process_output = true;
|
params.process_output = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (arg == "--output-tensor-name") {
|
||||||
|
if (++i >= argc) {
|
||||||
|
invalid_param = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
params.output_tensor_name = argv[i];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (arg == "--no-ppl") {
|
if (arg == "--no-ppl") {
|
||||||
params.compute_ppl = false;
|
params.compute_ppl = false;
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -224,6 +224,7 @@ struct gpt_params {
|
|||||||
|
|
||||||
// imatrix params
|
// imatrix params
|
||||||
std::string out_file = "imatrix.dat"; // save the resulting imatrix to this file
|
std::string out_file = "imatrix.dat"; // save the resulting imatrix to this file
|
||||||
|
std::string output_tensor_name = "output.weight"; // name of the output tensor
|
||||||
|
|
||||||
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
|
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
|
||||||
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
|
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
|
||||||
|
|||||||
@@ -83,7 +83,8 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|||||||
if (t->op != GGML_OP_MUL_MAT) return false;
|
if (t->op != GGML_OP_MUL_MAT) return false;
|
||||||
// why are small batches ignored (<16 tokens)?
|
// why are small batches ignored (<16 tokens)?
|
||||||
if (src1->ne[1] < 16 || src1->type != GGML_TYPE_F32) return false;
|
if (src1->ne[1] < 16 || src1->type != GGML_TYPE_F32) return false;
|
||||||
if (!(wname.substr(0, 4) == "blk." || (m_params.process_output && wname == "output.weight"))) return false;
|
//printf("wname = %s\n", wname.c_str());
|
||||||
|
if (!(wname.substr(0, 4) == "blk." || (m_params.process_output && wname == m_params.output_tensor_name))) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user