Added --gpu_split explanation on test_inference.py

This commit is contained in:
Thanasis Galianis
2024-03-18 21:36:22 +02:00
committed by GitHub
parent 4ca5ca35a6
commit 2ee57974dc

View File

@@ -54,6 +54,7 @@ parser.add_argument("-nwu", "--no_warmup", action = "store_true", help = "Skip w
parser.add_argument("-sl", "--stream_layers", action = "store_true", help = "Load model layer by layer (perplexity evaluation only)")
parser.add_argument("-sp", "--standard_perplexity", choices = ["wiki2"], help = "Run standard (HF) perplexity test, stride 512 (experimental)")
parser.add_argument("-rr", "--rank_reduce", type = str, help = "Rank-reduction for MLP layers of model, in reverse order (for experimentation)")
parser.add_argument("-gs", "--gpu_split", type = str, help = "Split the model between multiple GPUs. Use `--gpu_split auto` for automatic handling or `--gpu_split x,y` to manually assign the VRAM on each device")
# Initialize model and tokenizer