{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "V100" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "Optional: install Flash Attention" ], "metadata": { "id": "Wv96vR6HpNZF" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "KjOm-GNZpMA0" }, "outputs": [], "source": [ "!pip install flash-attn" ] }, { "cell_type": "markdown", "source": [ "Clone ExLlamaV2 and install dependencies" ], "metadata": { "id": "NTOhV8supsTA" } }, { "cell_type": "code", "source": [ "!git clone https://github.com/turboderp/exllamav2\n", "!cd exllamav2 && pip install -r requirements.txt" ], "metadata": { "id": "MkEIIMJdpk_d" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Download a model. This may take a moment." ], "metadata": { "id": "MqAu9pcBqAf4" } }, { "cell_type": "code", "source": [ "!sudo apt-get install git-lfs\n", "!git lfs install\n", "!git clone https://huggingface.co/turboderp/Mistral-7B-instruct-exl2 my_model\n", "!cd my_model && git switch 4.0bpw" ], "metadata": { "id": "4OotLYL3p7rD" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Launch the chatbot example. On the first launch, this may also take a while to compile the C++/CUDA extension." ], "metadata": { "id": "4DcCw_URrHja" } }, { "cell_type": "code", "source": [ "!cd exllamav2 && python examples/chat.py -m ../my_model -mode llama -pt -ncf" ], "metadata": { "id": "HbpsCnOoqKzk" }, "execution_count": null, "outputs": [] } ] }