Disable pipeline parallel for tensor override or allocation failed (#879)

* disable pipeline parallelism when tensor override present

* disable pipeline parallel if allocation failed

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2025-10-31 12:20:48 +00:00
committed by GitHub
parent 14760aaf46
commit c7dbe3f2c1
4 changed files with 28 additions and 7 deletions

View File

@@ -1237,6 +1237,10 @@ std::string LLM_TN::operator()(llm_tensor tensor, const std::string & suffix, in
return ::format(LLM_TENSOR_NAMES.at(arch).at(tensor).c_str(), bid, xid) + "." + suffix;
}
void llama_model::set_tensor_overrides(const llama_model_params& params) {
tensor_overrides = params.tensor_buft_overrides && params.tensor_buft_overrides[0].pattern;
}
std::string llama_model_ftype_name(llama_ftype ftype) {
if (ftype & LLAMA_FTYPE_GUESSED) {
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";