From 4d09e04501baf1562d20fbaa383c3063314c8d45 Mon Sep 17 00:00:00 2001 From: Marcel Coetzee <34739235+Pipboyguy@users.noreply.github.com> Date: Wed, 11 Mar 2026 16:35:26 +0200 Subject: [PATCH] common : add env vars for cache_type_k/v, mlock, k_cache_hadamard and enable env vars for all tools (#1402) Two changes: 1. Add four missing environment variable bindings to gpt_params_parse_from_env(): - LLAMA_ARG_CACHE_TYPE_K (string, e.g. "q8_0") - LLAMA_ARG_CACHE_TYPE_V (string, e.g. "q8_0") - LLAMA_ARG_MLOCK (bool, "1"/"true") - LLAMA_ARG_K_CACHE_HADAMARD (bool, "1"/"true") 2. Call gpt_params_parse_from_env() from gpt_params_parse() so that ALL tools (llama-cli, llama-bench, etc.) respect env vars, not just llama-server. Env vars act as defaults; CLI flags override. Follows the existing get_env() pattern and uses the same LLAMA_ARG_ prefix convention as the other env vars. Co-authored-by: Pipboyguy <> --- common/common.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/common.cpp b/common/common.cpp index f4074a61..0eec74ed 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -495,10 +495,15 @@ void gpt_params_parse_from_env(gpt_params & params) { get_env("LLAMA_ARG_CONT_BATCHING", params.cont_batching); get_env("LLAMA_ARG_HOST", params.hostname); get_env("LLAMA_ARG_PORT", params.port); + get_env("LLAMA_ARG_CACHE_TYPE_K", params.cache_type_k); + get_env("LLAMA_ARG_CACHE_TYPE_V", params.cache_type_v); + get_env("LLAMA_ARG_MLOCK", params.use_mlock); + get_env("LLAMA_ARG_K_CACHE_HADAMARD", params.k_cache_hadamard); } bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { + gpt_params_parse_from_env(params); const auto params_org = params; // the example can modify the default params try {