Support --device and --device-draft parameter (#866)

* add --device and --device-draft parameter

* don't print debug message in release mode

* fix

* bug fix to throw exception when no device specified

* add const

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2025-10-27 16:13:28 +00:00
committed by GitHub
parent eb8116b097
commit 904e994bfb
12 changed files with 283 additions and 40 deletions

View File

@@ -1249,6 +1249,7 @@ struct server_context {
LOG_INFO("loading draft model", {{"model", params.model_draft}});
gpt_params params_dft;
params_dft.devices = params.devices_draft;
params_dft.model = params.model_draft;
params_dft.n_ctx = params.n_ctx_draft == 0 ? params.n_ctx / params.n_parallel : params.n_ctx_draft;
params_dft.n_gpu_layers = params.n_gpu_layers_draft;
@@ -1273,7 +1274,7 @@ struct server_context {
cparams_dft = llama_context_params_from_gpt_params(params_dft);
cparams_dft.n_batch = n_ctx_dft;
model_draft = llama_init_dft.model;
ctx_draft = llama_init_dft.context;
}