mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-27 00:24:11 +00:00
Create parameters overview (#1269)
* raw parameters.md * fix small typos in common.cpp * Update build args in parameters.md * Update parameters.md - format as table - sections * Update README.md - quickstart - build and run * Update parameters.md other tools examples * add PR links * multiple updates to parameters.md - description - add jargon section - add suggestions from feedbacks * don't imply that only linux is supported in README.md * add alias to parameters.md * Update README.md with recent models and features * Update parameters.md with latest features * address suggestions - no-ooae - placeholder for common commands - no-kv-offload - llama-sweep-bench - placeholder for unique parameters * specify Linux distro in README.md
This commit is contained in:
@@ -2236,7 +2236,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||
options.push_back({ "*", "-amb, --attention-max-batch", "max batch size for attention computations (default: %d)", params.attn_max_batch});
|
||||
options.push_back({ "*", "-no-fmoe, --no-fused-moe", "disable fused MoE (default: %s)", params.fused_moe_up_gate ? "enabled" : "disabled" });
|
||||
options.push_back({ "*", "-ger, --grouped-expert-routing", "enable grouped expert routing (default: %s)", params.grouped_expert_routing ? "enabled" : "disabled" });
|
||||
options.push_back({ "*", "-no-fug, --no-fused-up-gate", "disaable fused up-gate (default: %s)", params.fused_up_gate ? "enabled" : "disabled" });
|
||||
options.push_back({ "*", "-no-fug, --no-fused-up-gate", "disable fused up-gate (default: %s)", params.fused_up_gate ? "enabled" : "disabled" });
|
||||
options.push_back({ "*", "-no-mmad, --no-fused-mul-multiadd", "disable fused mul-multi_add (default: %s)", params.fused_mmad? "enabled" : "disabled" });
|
||||
//options.push_back({ "*", "-rcache, --rope-cache", "enable RoPE cache (default: %s)", params.rope_cache ? "enabled" : "disabled" });
|
||||
options.push_back({ "*", "-gr, --graph-reuse", "enable graph reuse (default: %s)", params.graph_reuse ? "enabled" : "disabled" });
|
||||
@@ -2249,7 +2249,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||
options.push_back({ "*", "-smf32, --split-mode-f32,", "Use f32 for data exchange between GPUs (default: %d)", false});
|
||||
options.push_back({ "*", "-grt, --graph-reduce-type", "Type for data exchange between GPUs (default: %s)", "f32"});
|
||||
options.push_back({ "*", "-smgs, --split-mode-graph-scheduling,", "Force Split Mode Graph Scheduling (default: %d)", params.split_mode_graph_scheduling});
|
||||
options.push_back({ "*", "-sas, ==scheduler_async,", "Async evaluation of compute graphs: %d)", params.scheduler_async});
|
||||
options.push_back({ "*", "-sas, --scheduler_async,", "Async evaluation of compute graphs: %d)", params.scheduler_async});
|
||||
options.push_back({ "*", "-vq, --validate-quants", "validate quantized data while loading the model (default: %d)", params.validate_quants});
|
||||
options.push_back({ "*", "-p, --prompt PROMPT", "prompt to start generation with\n"
|
||||
"in conversation mode, this will be used as system prompt\n"
|
||||
|
||||
Reference in New Issue
Block a user