Test inference script: add max_output_len option

This commit is contained in:
turboderp
2024-04-05 21:35:52 +02:00
parent 97e8123c71
commit 3be55a97af

View File

@@ -57,6 +57,7 @@ parser.add_argument("-nwu", "--no_warmup", action = "store_true", help = "Skip w
parser.add_argument("-sl", "--stream_layers", action = "store_true", help = "Load model layer by layer (perplexity evaluation only)")
parser.add_argument("-sp", "--standard_perplexity", choices = ["wiki2"], help = "Run standard (HF) perplexity test, stride 512 (experimental)")
parser.add_argument("-rr", "--rank_reduce", type = str, help = "Rank-reduction for MLP layers of model, in reverse order (for experimentation)")
parser.add_argument("-mol", "--max_output_len", type = int, help = "Set max output chunk size (incompatible with ppl tests)")
# Initialize model and tokenizer
@@ -87,7 +88,11 @@ if args.stream_layers:
model_init.check_args(args)
model_init.print_options(args)
model, tokenizer = model_init.init(args, allow_auto_split = True, skip_load = args.stream_layers, benchmark = True)
model, tokenizer = model_init.init(args,
allow_auto_split = True,
skip_load = args.stream_layers,
benchmark = True,
max_output_len = args.max_output_len)
cache = None
# Auto split