Disable pipeline parallel for tensor override or allocation failed (#879)

* disable pipeline parallelism when tensor override present

* disable pipeline parallel if allocation failed

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2025-10-31 12:20:48 +00:00
committed by GitHub
parent 14760aaf46
commit c7dbe3f2c1
4 changed files with 28 additions and 7 deletions

View File

@@ -2658,7 +2658,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
auto mparams = llama_model_params_from_gpt_params(params);
llama_model * model = nullptr;
if (!params.hf_repo.empty() && !params.hf_file.empty()) {
model = llama_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams);
} else if (!params.model_url.empty()) {