Merge pull request #374 from Lyrcaxis/patch-1

Fix installation step (install requirements) & Add multi-GPU explanation
This commit is contained in:
turboderp
2024-03-20 04:57:50 +01:00
committed by GitHub
3 changed files with 8 additions and 1 deletions

View File

@@ -39,15 +39,19 @@ then run:
```
git clone https://github.com/turboderp/exllamav2
cd exllamav2
# Optionally, create and activate a new conda environment
pip install -r requirements.txt
pip install .
python test_inference.py -m <path_to_model> -p "Once upon a time,"
# Append the '--gpu_split auto' flag for multi-GPU inference
```
A simple console chatbot is included. Run it with:
```
python examples/chat.py -m <path_to_model> -mode llama
# Append the '--gpu_split auto' flag for multi-GPU inference
```
@@ -79,6 +83,7 @@ To install the current dev version, clone the repo and run the setup script:
```
git clone https://github.com/turboderp/exllamav2
cd exllamav2
pip install -r requirements.txt
pip install .
```

View File

@@ -25,6 +25,7 @@ from chat_prompts import prompt_formats
prompt_formats_list = list(prompt_formats.keys())
# Options
# (!!!) NOTE: These go on top of the engine arguments that can be found in `model_init.py` (!!!)
parser = argparse.ArgumentParser(description = "Simple Llama2 chat example for ExLlamaV2")
parser.add_argument("-dm", "--draft_model_dir", type = str, default = None, help = "Path to draft model directory")
@@ -386,4 +387,4 @@ while True:
if amnesia:
user_prompts = []
responses_ids = []
responses_ids = []

View File

@@ -37,6 +37,7 @@ torch.set_printoptions(precision = 10)
# torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
# torch.set_float32_matmul_precision("medium")
# (!!!) NOTE: These go on top of the engine arguments that can be found in `model_init.py` (!!!)
parser = argparse.ArgumentParser(description = "Test inference on ExLlamaV2 model")
parser.add_argument("-ed", "--eval_dataset", type = str, help = "Perplexity evaluation dataset (.parquet file)")
parser.add_argument("-er", "--eval_rows", type = int, default = 128, help = "Number of rows to apply from dataset")