mirror of
https://github.com/turboderp-org/exllamav2.git
synced 2026-04-20 06:19:00 +00:00
103 lines
2.2 KiB
Plaintext
103 lines
2.2 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": [],
|
|
"gpuType": "V100"
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
},
|
|
"accelerator": "GPU"
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"Clone ExLlamaV2 and install dependencies"
|
|
],
|
|
"metadata": {
|
|
"id": "NTOhV8supsTA"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"!git clone https://github.com/turboderp/exllamav2\n",
|
|
"!cd exllamav2 && pip install -r requirements.txt"
|
|
],
|
|
"metadata": {
|
|
"id": "MkEIIMJdpk_d"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"Optional: install Flash Attention"
|
|
],
|
|
"metadata": {
|
|
"id": "Wv96vR6HpNZF"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "KjOm-GNZpMA0"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install -U flash-attn"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"Download a model. This may take a moment."
|
|
],
|
|
"metadata": {
|
|
"id": "MqAu9pcBqAf4"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"!mkdir my_model\n",
|
|
"!huggingface-cli download turboderp/Mistral-7B-instruct-exl2 --revision 4.0bpw --local-dir my_model"
|
|
],
|
|
"metadata": {
|
|
"id": "4OotLYL3p7rD"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"Launch the chatbot example. On the first launch, this will compile ExLlamaV2's C++/CUDA extension, which can take several minutes on Colab."
|
|
],
|
|
"metadata": {
|
|
"id": "4DcCw_URrHja"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"!cd exllamav2 && python examples/chat.py -m ../my_model -mode llama -pt -ncf -ngram"
|
|
],
|
|
"metadata": {
|
|
"id": "HbpsCnOoqKzk"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
}
|
|
]
|
|
}
|