mirror of
https://github.com/turboderp-org/exllamav2.git
synced 2026-04-20 14:29:28 +00:00
Merge pull request #374 from Lyrcaxis/patch-1
Fix installation step (install requirements) & Add multi-GPU explanation
This commit is contained in:
@@ -39,15 +39,19 @@ then run:
|
||||
```
|
||||
git clone https://github.com/turboderp/exllamav2
|
||||
cd exllamav2
|
||||
# Optionally, create and activate a new conda environment
|
||||
pip install -r requirements.txt
|
||||
pip install .
|
||||
|
||||
python test_inference.py -m <path_to_model> -p "Once upon a time,"
|
||||
# Append the '--gpu_split auto' flag for multi-GPU inference
|
||||
```
|
||||
|
||||
A simple console chatbot is included. Run it with:
|
||||
|
||||
```
|
||||
python examples/chat.py -m <path_to_model> -mode llama
|
||||
# Append the '--gpu_split auto' flag for multi-GPU inference
|
||||
```
|
||||
|
||||
|
||||
@@ -79,6 +83,7 @@ To install the current dev version, clone the repo and run the setup script:
|
||||
```
|
||||
git clone https://github.com/turboderp/exllamav2
|
||||
cd exllamav2
|
||||
pip install -r requirements.txt
|
||||
pip install .
|
||||
```
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ from chat_prompts import prompt_formats
|
||||
prompt_formats_list = list(prompt_formats.keys())
|
||||
|
||||
# Options
|
||||
# (!!!) NOTE: These go on top of the engine arguments that can be found in `model_init.py` (!!!)
|
||||
|
||||
parser = argparse.ArgumentParser(description = "Simple Llama2 chat example for ExLlamaV2")
|
||||
parser.add_argument("-dm", "--draft_model_dir", type = str, default = None, help = "Path to draft model directory")
|
||||
@@ -386,4 +387,4 @@ while True:
|
||||
|
||||
if amnesia:
|
||||
user_prompts = []
|
||||
responses_ids = []
|
||||
responses_ids = []
|
||||
|
||||
@@ -37,6 +37,7 @@ torch.set_printoptions(precision = 10)
|
||||
# torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
|
||||
# torch.set_float32_matmul_precision("medium")
|
||||
|
||||
# (!!!) NOTE: These go on top of the engine arguments that can be found in `model_init.py` (!!!)
|
||||
parser = argparse.ArgumentParser(description = "Test inference on ExLlamaV2 model")
|
||||
parser.add_argument("-ed", "--eval_dataset", type = str, help = "Perplexity evaluation dataset (.parquet file)")
|
||||
parser.add_argument("-er", "--eval_rows", type = int, default = 128, help = "Number of rows to apply from dataset")
|
||||
|
||||
Reference in New Issue
Block a user