Create parameters overview (#1269)

* raw parameters.md * fix small typos in common.cpp * Update build args in parameters.md * Update parameters.md - format as table - sections * Update README.md - quickstart - build and run * Update parameters.md other tools examples * add PR links * multiple updates to parameters.md - description - add jargon section - add suggestions from feedbacks * don't imply that only linux is supported in README.md * add alias to parameters.md * Update README.md with recent models and features * Update parameters.md with latest features * address suggestions - no-ooae - placeholder for common commands - no-kv-offload - llama-sweep-bench - placeholder for unique parameters * specify Linux distro in README.md
2026-02-27 00:24:11 +00:00 · 2026-02-20 08:20:56 +02:00
parent 0f411b02e2
commit b2cb4512c5
3 changed files with 490 additions and 4 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2236,7 +2236,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
    options.push_back({ "*",           "-amb,  --attention-max-batch",  "max batch size for attention computations (default: %d)", params.attn_max_batch});
    options.push_back({ "*",           "-no-fmoe, --no-fused-moe",      "disable fused MoE (default: %s)", params.fused_moe_up_gate ? "enabled" : "disabled" });
    options.push_back({ "*",           "-ger,  --grouped-expert-routing", "enable grouped expert routing (default: %s)", params.grouped_expert_routing ? "enabled" : "disabled" });
-    options.push_back({ "*",           "-no-fug, --no-fused-up-gate",   "disaable fused up-gate (default: %s)", params.fused_up_gate ? "enabled" : "disabled" });
+    options.push_back({ "*",           "-no-fug, --no-fused-up-gate",   "disable fused up-gate (default: %s)", params.fused_up_gate ? "enabled" : "disabled" });
    options.push_back({ "*",           "-no-mmad, --no-fused-mul-multiadd", "disable fused mul-multi_add (default: %s)", params.fused_mmad? "enabled" : "disabled" });
    //options.push_back({ "*",           "-rcache, --rope-cache",         "enable RoPE cache (default: %s)", params.rope_cache ? "enabled" : "disabled" });
    options.push_back({ "*",           "-gr, --graph-reuse",            "enable graph reuse (default: %s)", params.graph_reuse ? "enabled" : "disabled" });
@@ -2249,7 +2249,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
    options.push_back({ "*",         "-smf32, --split-mode-f32,",       "Use f32 for data exchange between GPUs (default: %d)", false});
    options.push_back({ "*",         "-grt, --graph-reduce-type",       "Type for data exchange between GPUs (default: %s)", "f32"});
    options.push_back({ "*",         "-smgs, --split-mode-graph-scheduling,", "Force Split Mode Graph Scheduling (default: %d)", params.split_mode_graph_scheduling});
-    options.push_back({ "*",         "-sas,  ==scheduler_async,",       "Async evaluation of compute graphs: %d)", params.scheduler_async});
+    options.push_back({ "*",         "-sas,  --scheduler_async,",       "Async evaluation of compute graphs: %d)", params.scheduler_async});
    options.push_back({ "*",         "-vq, --validate-quants",          "validate quantized data while loading the model (default: %d)", params.validate_quants});
    options.push_back({ "*",           "-p,    --prompt PROMPT",        "prompt to start generation with\n"
                                                                        "in conversation mode, this will be used as system prompt\n"