Merge pull request #374 from Lyrcaxis/patch-1

Fix installation step (install requirements) & Add multi-GPU explanation
2026-04-20 14:29:28 +00:00 · 2024-03-20 04:57:50 +01:00
parent fda60a2b8b 90251cac8d
commit 37c3b69958
3 changed files with 8 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -39,15 +39,19 @@ then run:
 ```
 git clone https://github.com/turboderp/exllamav2
 cd exllamav2
+# Optionally, create and activate a new conda environment
+pip install -r requirements.txt
 pip install .

 python test_inference.py -m <path_to_model> -p "Once upon a time,"
+# Append the '--gpu_split auto' flag for multi-GPU inference
 ```

 A simple console chatbot is included. Run it with:

 ```
 python examples/chat.py -m <path_to_model> -mode llama
+# Append the '--gpu_split auto' flag for multi-GPU inference
 ```


@@ -79,6 +83,7 @@ To install the current dev version, clone the repo and run the setup script:
 ```
 git clone https://github.com/turboderp/exllamav2
 cd exllamav2
+pip install -r requirements.txt
 pip install .
 ```

--- a/examples/chat.py
+++ b/examples/chat.py
@@ -25,6 +25,7 @@ from chat_prompts import prompt_formats
 prompt_formats_list = list(prompt_formats.keys())

 # Options
+# (!!!) NOTE: These go on top of the engine arguments that can be found in `model_init.py` (!!!)

 parser = argparse.ArgumentParser(description = "Simple Llama2 chat example for ExLlamaV2")
 parser.add_argument("-dm", "--draft_model_dir", type = str, default = None, help = "Path to draft model directory")
@@ -386,4 +387,4 @@ while True:

    if amnesia:
        user_prompts = []
-        responses_ids = []
+        responses_ids = []
--- a/test_inference.py
+++ b/test_inference.py
@@ -37,6 +37,7 @@ torch.set_printoptions(precision = 10)
 # torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
 # torch.set_float32_matmul_precision("medium")

+# (!!!) NOTE: These go on top of the engine arguments that can be found in `model_init.py` (!!!)
 parser = argparse.ArgumentParser(description = "Test inference on ExLlamaV2 model")
 parser.add_argument("-ed", "--eval_dataset", type = str, help = "Perplexity evaluation dataset (.parquet file)")
 parser.add_argument("-er", "--eval_rows", type = int, default = 128, help = "Number of rows to apply from dataset")