Added --gpu_split explanation on test_inference.py

2026-04-20 14:29:28 +00:00 · 2024-03-18 21:36:22 +02:00
parent 4ca5ca35a6
commit 2ee57974dc
1 changed files with 1 additions and 0 deletions
--- a/test_inference.py
+++ b/test_inference.py
@@ -54,6 +54,7 @@ parser.add_argument("-nwu", "--no_warmup", action = "store_true", help = "Skip w
 parser.add_argument("-sl", "--stream_layers", action = "store_true", help = "Load model layer by layer (perplexity evaluation only)")
 parser.add_argument("-sp", "--standard_perplexity", choices = ["wiki2"], help = "Run standard (HF) perplexity test, stride 512 (experimental)")
 parser.add_argument("-rr", "--rank_reduce", type = str, help = "Rank-reduction for MLP layers of model, in reverse order (for experimentation)")
+parser.add_argument("-gs", "--gpu_split", type = str, help = "Split the model between multiple GPUs. Use `--gpu_split auto` for automatic handling or `--gpu_split x,y` to manually assign the VRAM on each device")

 # Initialize model and tokenizer