From 15f891b277c4532d63de25f6e95dd862603cb5db Mon Sep 17 00:00:00 2001 From: kingbri Date: Mon, 22 Jul 2024 16:25:26 -0400 Subject: [PATCH] Args: Update to latest config.yml Fix order of params to follow the same flow as config.yml Signed-off-by: kingbri --- common/args.py | 84 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 18 deletions(-) diff --git a/common/args.py b/common/args.py index 14508a7..6bb0804 100644 --- a/common/args.py +++ b/common/args.py @@ -17,13 +17,15 @@ def init_argparser(): """Creates an argument parser that any function can use""" parser = argparse.ArgumentParser( - epilog="These args are only for a subset of the config. " - + "Please edit config.yml for all options!" + epilog="NOTE: These args serve to override parts of the config. " + + "It's highly recommended to edit config.yml for all options and " + + "better descriptions!" ) add_network_args(parser) add_model_args(parser) add_logging_args(parser) add_developer_args(parser) + add_sampling_args(parser) add_config_args(parser) return parser @@ -64,6 +66,11 @@ def add_network_args(parser: argparse.ArgumentParser): type=str_to_bool, help="Disable HTTP token authenticaion with requests", ) + network_group.add_argument( + "--send-tracebacks", + type=str_to_bool, + help="Decide whether to send error tracebacks over the API", + ) def add_model_args(parser: argparse.ArgumentParser): @@ -74,6 +81,17 @@ def add_model_args(parser: argparse.ArgumentParser): "--model-dir", type=str, help="Overrides the directory to look for models" ) model_group.add_argument("--model-name", type=str, help="An initial model to load") + model_group.add_argument( + "--use-dummy-models", + type=str_to_bool, + help="Add dummy OAI model names for API queries", + ) + model_group.add_argument( + "--use-as-default", + type=str, + nargs="+", + help="Names of args to use as a default fallback for API load requests ", + ) model_group.add_argument( "--max-seq-len", type=int, help="Override the maximum model sequence length" ) @@ -82,25 +100,17 @@ def add_model_args(parser: argparse.ArgumentParser): type=str_to_bool, help="Overrides base model context length", ) - model_group.add_argument( - "--cache-size", - type=int, - help="The size of the prompt cache (in number of tokens) to allocate", - ) - model_group.add_argument( - "--rope-scale", type=float, help="Sets rope_scale or compress_pos_emb" - ) - model_group.add_argument("--rope-alpha", type=float, help="Sets rope_alpha for NTK") - model_group.add_argument( - "--prompt-template", - type=str, - help="Set the prompt template for chat completions", - ) model_group.add_argument( "--gpu-split-auto", type=str_to_bool, help="Automatically allocate resources to GPUs", ) + model_group.add_argument( + "--autosplit-reserve", + type=int, + nargs="+", + help="Reserve VRAM used for autosplit loading (in MBs) ", + ) model_group.add_argument( "--gpu-split", type=float, @@ -108,15 +118,44 @@ def add_model_args(parser: argparse.ArgumentParser): help="An integer array of GBs of vram to split between GPUs. " + "Ignored if gpu_split_auto is true", ) + model_group.add_argument( + "--rope-scale", type=float, help="Sets rope_scale or compress_pos_emb" + ) + model_group.add_argument("--rope-alpha", type=float, help="Sets rope_alpha for NTK") + model_group.add_argument( + "--cache-mode", + type=str, + help="Set the quantization level of the K/V cache. Options: (FP16, Q8, Q6, Q4)", + ) + model_group.add_argument( + "--cache-size", + type=int, + help="The size of the prompt cache (in number of tokens) to allocate", + ) + model_group.add_argument( + "--chunk-size", + type=int, + help="Chunk size for prompt ingestion", + ) + model_group.add_argument( + "--max-batch-size", + type=int, + help="Maximum amount of prompts to process at one time", + ) + model_group.add_argument( + "--prompt-template", + type=str, + help="Set the jinja2 prompt template for chat completions", + ) model_group.add_argument( "--num-experts-per-token", type=int, help="Number of experts to use per token in MoE models", ) model_group.add_argument( - "--use-cfg", + "--fasttensors", type=str_to_bool, - help="Enables CFG support", + help="Possibly increases model loading speeds", ) @@ -151,3 +190,12 @@ def add_developer_args(parser: argparse.ArgumentParser): type=str_to_bool, help="Disables API request streaming", ) + + +def add_sampling_args(parser: argparse.ArgumentParser): + """Adds sampling-specific arguments""" + + sampling_group = parser.add_argument_group("sampling") + sampling_group.add_argument( + "--override-preset", type=str, help="Select a sampler override preset" + )