Test inference script: add max_output_len option

2026-04-20 14:29:28 +00:00 · 2024-04-05 21:35:52 +02:00
parent 97e8123c71
commit 3be55a97af
1 changed files with 6 additions and 1 deletions
--- a/test_inference.py
+++ b/test_inference.py
@@ -57,6 +57,7 @@ parser.add_argument("-nwu", "--no_warmup", action = "store_true", help = "Skip w
 parser.add_argument("-sl", "--stream_layers", action = "store_true", help = "Load model layer by layer (perplexity evaluation only)")
 parser.add_argument("-sp", "--standard_perplexity", choices = ["wiki2"], help = "Run standard (HF) perplexity test, stride 512 (experimental)")
 parser.add_argument("-rr", "--rank_reduce", type = str, help = "Rank-reduction for MLP layers of model, in reverse order (for experimentation)")
+parser.add_argument("-mol", "--max_output_len", type = int, help = "Set max output chunk size (incompatible with ppl tests)")

 # Initialize model and tokenizer

@@ -87,7 +88,11 @@ if args.stream_layers:

 model_init.check_args(args)
 model_init.print_options(args)
-model, tokenizer = model_init.init(args, allow_auto_split = True, skip_load = args.stream_layers, benchmark = True)
+model, tokenizer = model_init.init(args,
+                                   allow_auto_split = True,
+                                   skip_load = args.stream_layers,
+                                   benchmark = True,
+                                   max_output_len = args.max_output_len)
 cache = None

 # Auto split