Create parameters overview (#1269)

* raw parameters.md

* fix small typos in common.cpp

* Update build args in parameters.md

* Update parameters.md

- format as table
- sections

* Update README.md

- quickstart
- build and run

* Update parameters.md

other tools examples

* add PR links

* multiple updates to parameters.md

- description
- add jargon section
- add suggestions from feedbacks

* don't imply that only linux is supported in README.md

* add alias to parameters.md

* Update README.md with recent models and features

* Update parameters.md with latest features

* address suggestions

- no-ooae
- placeholder for common commands
- no-kv-offload
- llama-sweep-bench
- placeholder for unique parameters

* specify Linux distro in README.md
This commit is contained in:
mcm007
2026-02-20 08:20:56 +02:00
committed by GitHub
parent 0f411b02e2
commit b2cb4512c5
3 changed files with 490 additions and 4 deletions

View File

@@ -2236,7 +2236,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "*", "-amb, --attention-max-batch", "max batch size for attention computations (default: %d)", params.attn_max_batch});
options.push_back({ "*", "-no-fmoe, --no-fused-moe", "disable fused MoE (default: %s)", params.fused_moe_up_gate ? "enabled" : "disabled" });
options.push_back({ "*", "-ger, --grouped-expert-routing", "enable grouped expert routing (default: %s)", params.grouped_expert_routing ? "enabled" : "disabled" });
options.push_back({ "*", "-no-fug, --no-fused-up-gate", "disaable fused up-gate (default: %s)", params.fused_up_gate ? "enabled" : "disabled" });
options.push_back({ "*", "-no-fug, --no-fused-up-gate", "disable fused up-gate (default: %s)", params.fused_up_gate ? "enabled" : "disabled" });
options.push_back({ "*", "-no-mmad, --no-fused-mul-multiadd", "disable fused mul-multi_add (default: %s)", params.fused_mmad? "enabled" : "disabled" });
//options.push_back({ "*", "-rcache, --rope-cache", "enable RoPE cache (default: %s)", params.rope_cache ? "enabled" : "disabled" });
options.push_back({ "*", "-gr, --graph-reuse", "enable graph reuse (default: %s)", params.graph_reuse ? "enabled" : "disabled" });
@@ -2249,7 +2249,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "*", "-smf32, --split-mode-f32,", "Use f32 for data exchange between GPUs (default: %d)", false});
options.push_back({ "*", "-grt, --graph-reduce-type", "Type for data exchange between GPUs (default: %s)", "f32"});
options.push_back({ "*", "-smgs, --split-mode-graph-scheduling,", "Force Split Mode Graph Scheduling (default: %d)", params.split_mode_graph_scheduling});
options.push_back({ "*", "-sas, ==scheduler_async,", "Async evaluation of compute graphs: %d)", params.scheduler_async});
options.push_back({ "*", "-sas, --scheduler_async,", "Async evaluation of compute graphs: %d)", params.scheduler_async});
options.push_back({ "*", "-vq, --validate-quants", "validate quantized data while loading the model (default: %d)", params.validate_quants});
options.push_back({ "*", "-p, --prompt PROMPT", "prompt to start generation with\n"
"in conversation mode, this will be used as system prompt\n"