diff --git a/README.md b/README.md index c054b142..45c94b57 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ Please do not open a bug report unless it is a bug in the code. You are welcome and ask for help there. However, please refrain from PMing me directly with general question or support. Ask in the discord and I will answer when I can. -### Training in RunPod cloud +## Training in RunPod Example RunPod template: **runpod/pytorch:2.2.0-py3.10-cuda12.1.1-devel-ubuntu22.04** > You need a minimum of 24GB VRAM, pick a GPU by your preference. @@ -142,26 +142,72 @@ pip install -r requirements.txt pip install --upgrade accelerate transformers diffusers huggingface_hub #Optional, run it if you run into issues ``` ### 2. Upload your dataset -- Create a new folder in the root, name it `dataset` or whatever you like -- Drag and drop your .jpg and .txt files inside the newly created dataset folder +- Create a new folder in the root, name it `dataset` or whatever you like. +- Drag and drop your .jpg, .jpeg, or .png images and .txt files inside the newly created dataset folder. ### 3. Login into Hugging Face with an Access Token -- Get a READ token from [here](https://huggingface.co/settings/tokens) -- Run ```huggingface-cli login``` and paste your token +- Get a READ token from [here](https://huggingface.co/settings/tokens) and request access to Flux.1-dev model from [here](https://huggingface.co/black-forest-labs/FLUX.1-dev). +- Run ```huggingface-cli login``` and paste your token. ### 4. Training -- Copy an example config file located at ```config/examples``` to the config folder and rename it to ```whatever_you_want.yml``` -- Edit the config following the comments in the file -- Change ```folder_path: "/path/to/images/folder"``` to your dataset path like ```folder_path: "/workspace/ai-toolkit/your-dataset"``` -- Run the file: ```python run.py config/whatever_you_want.yml``` +- Copy an example config file located at ```config/examples``` to the config folder and rename it to ```whatever_you_want.yml```. +- Edit the config following the comments in the file. +- Change ```folder_path: "/path/to/images/folder"``` to your dataset path like ```folder_path: "/workspace/ai-toolkit/your-dataset"```. +- Run the file: ```python run.py config/whatever_you_want.yml```. ### Screenshot from RunPod RunPod Training Screenshot - +## Training in Modal + +### 1. Setup +#### ai-toolkit: +``` +git clone https://github.com/ostris/ai-toolkit.git +cd ai-toolkit +git submodule update --init --recursive +python -m venv venv +source venv/bin/activate +pip install torch +pip install -r requirements.txt +pip install --upgrade accelerate transformers diffusers huggingface_hub #Optional, run it if you run into issues +``` +#### Modal: +- Run `pip install modal` to install the modal Python package. +- Run `modal setup` to authenticate (if this doesn’t work, try `python -m modal setup`). + +#### Hugging Face: +- Get a READ token from [here](https://huggingface.co/settings/tokens) and request access to Flux.1-dev model from [here](https://huggingface.co/black-forest-labs/FLUX.1-dev). +- Run `huggingface-cli login` and paste your token. + +### 2. Upload your dataset +- Drag and drop your dataset folder containing the .jpg, .jpeg, or .png images and .txt files in `ai-toolkit`. + +### 3. Configs +- Copy an example config file located at ```config/examples/modal``` to the `config` folder and rename it to ```whatever_you_want.yml```. +- Edit the config following the comments in the file, **be careful and follow the example `/root/ai-toolkit` paths**. + +### 4. Edit run_modal.py +- Set your entire local `ai-toolkit` path at `code_mount = modal.Mount.from_local_dir` like: + + ``` + code_mount = modal.Mount.from_local_dir("/Users/username/ai-toolkit", remote_path="/root/ai-toolkit") + ``` +- Choose a `GPU` and `Timeout` in `@app.function` _(default is A100 40GB and 2 hour timeout)_. + +### 5. Training +- Run the config file in your terminal: `modal run run_modal.py --config-file-list-str=/root/ai-toolkit/config/whatever_you_want.yml`. +- You can monitor your training in your local terminal, or on [modal.com](https://modal.com/). +- Models, samples and optimizer will be stored in `Storage > flux-lora-models`. + +### 6. Saving the model +- Check contents of the volume by running `modal volume ls flux-lora-models`. +- Download the content by running `modal volume get flux-lora-models your-model-name`. +- Example: `modal volume get flux-lora-models my_first_flux_lora_v1`. + +### Screenshot from Modal + +Modal Traning Screenshot --- diff --git a/config/examples/modal/modal_train_lora_flux_24gb.yaml b/config/examples/modal/modal_train_lora_flux_24gb.yaml new file mode 100644 index 00000000..51873de0 --- /dev/null +++ b/config/examples/modal/modal_train_lora_flux_24gb.yaml @@ -0,0 +1,96 @@ +--- +job: extension +config: + # this name will be the folder and filename name + name: "my_first_flux_lora_v1" + process: + - type: 'sd_trainer' + # root folder to save training sessions/samples/weights + training_folder: "/root/ai-toolkit/modal_output" # must match MOUNT_DIR from run_modal.py + # uncomment to see performance stats in the terminal every N steps +# performance_log_every: 1000 + device: cuda:0 + # if a trigger word is specified, it will be added to captions of training data if it does not already exist + # alternatively, in your captions you can add [trigger] and it will be replaced with the trigger word +# trigger_word: "p3r5on" + network: + type: "lora" + linear: 16 + linear_alpha: 16 + save: + dtype: float16 # precision to save + save_every: 250 # save every this many steps + max_step_saves_to_keep: 4 # how many intermittent saves to keep + datasets: + # datasets are a folder of images. captions need to be txt files with the same name as the image + # for instance image2.jpg and image2.txt. Only jpg, jpeg, and png are supported currently + # images will automatically be resized and bucketed into the resolution specified + # on windows, escape back slashes with another backslash so + # "C:\\path\\to\\images\\folder" + # your dataset must be placed in /ai-toolkit and /root is for modal to find the dir: + - folder_path: "/root/ai-toolkit/your-dataset" + caption_ext: "txt" + caption_dropout_rate: 0.05 # will drop out the caption 5% of time + shuffle_tokens: false # shuffle caption order, split by commas + cache_latents_to_disk: true # leave this true unless you know what you're doing + resolution: [ 512, 768, 1024 ] # flux enjoys multiple resolutions + train: + batch_size: 1 + steps: 2000 # total number of steps to train 500 - 4000 is a good range + gradient_accumulation_steps: 1 + train_unet: true + train_text_encoder: false # probably won't work with flux + gradient_checkpointing: true # need the on unless you have a ton of vram + noise_scheduler: "flowmatch" # for training only + optimizer: "adamw8bit" + lr: 1e-4 + # uncomment this to skip the pre training sample +# skip_first_sample: true + # uncomment to completely disable sampling +# disable_sampling: true + # uncomment to use new vell curved weighting. Experimental but may produce better results +# linear_timesteps: true + + # ema will smooth out learning, but could slow it down. Recommended to leave on. + ema_config: + use_ema: true + ema_decay: 0.99 + + # will probably need this if gpu supports it for flux, other dtypes may not work correctly + dtype: bf16 + model: + # huggingface model name or path + # if you get an error, or get stuck while downloading, + # check https://github.com/ostris/ai-toolkit/issues/84, download the model locally and + # place it like "/root/ai-toolkit/FLUX.1-dev" + name_or_path: "black-forest-labs/FLUX.1-dev" + is_flux: true + quantize: true # run 8bit mixed precision +# low_vram: true # uncomment this if the GPU is connected to your monitors. It will use less vram to quantize, but is slower. + sample: + sampler: "flowmatch" # must match train.noise_scheduler + sample_every: 250 # sample every this many steps + width: 1024 + height: 1024 + prompts: + # you can add [trigger] to the prompts here and it will be replaced with the trigger word +# - "[trigger] holding a sign that says 'I LOVE PROMPTS!'"\ + - "woman with red hair, playing chess at the park, bomb going off in the background" + - "a woman holding a coffee cup, in a beanie, sitting at a cafe" + - "a horse is a DJ at a night club, fish eye lens, smoke machine, lazer lights, holding a martini" + - "a man showing off his cool new t shirt at the beach, a shark is jumping out of the water in the background" + - "a bear building a log cabin in the snow covered mountains" + - "woman playing the guitar, on stage, singing a song, laser lights, punk rocker" + - "hipster man with a beard, building a chair, in a wood shop" + - "photo of a man, white background, medium shot, modeling clothing, studio lighting, white backdrop" + - "a man holding a sign that says, 'this is a sign'" + - "a bulldog, in a post apocalyptic world, with a shotgun, in a leather jacket, in a desert, with a motorcycle" + neg: "" # not used on flux + seed: 42 + walk_seed: true + guidance_scale: 4 + sample_steps: 20 +# you can add any additional meta info here. [name] is replaced with config name at top +meta: + name: "[name]" + version: '1.0' diff --git a/config/examples/modal/modal_train_lora_flux_schnell_24gb.yaml b/config/examples/modal/modal_train_lora_flux_schnell_24gb.yaml new file mode 100644 index 00000000..6d1e964f --- /dev/null +++ b/config/examples/modal/modal_train_lora_flux_schnell_24gb.yaml @@ -0,0 +1,98 @@ +--- +job: extension +config: + # this name will be the folder and filename name + name: "my_first_flux_lora_v1" + process: + - type: 'sd_trainer' + # root folder to save training sessions/samples/weights + training_folder: "/root/ai-toolkit/modal_output" # must match MOUNT_DIR from run_modal.py + # uncomment to see performance stats in the terminal every N steps +# performance_log_every: 1000 + device: cuda:0 + # if a trigger word is specified, it will be added to captions of training data if it does not already exist + # alternatively, in your captions you can add [trigger] and it will be replaced with the trigger word +# trigger_word: "p3r5on" + network: + type: "lora" + linear: 16 + linear_alpha: 16 + save: + dtype: float16 # precision to save + save_every: 250 # save every this many steps + max_step_saves_to_keep: 4 # how many intermittent saves to keep + datasets: + # datasets are a folder of images. captions need to be txt files with the same name as the image + # for instance image2.jpg and image2.txt. Only jpg, jpeg, and png are supported currently + # images will automatically be resized and bucketed into the resolution specified + # on windows, escape back slashes with another backslash so + # "C:\\path\\to\\images\\folder" + # your dataset must be placed in /ai-toolkit and /root is for modal to find the dir: + - folder_path: "/root/ai-toolkit/your-dataset" + caption_ext: "txt" + caption_dropout_rate: 0.05 # will drop out the caption 5% of time + shuffle_tokens: false # shuffle caption order, split by commas + cache_latents_to_disk: true # leave this true unless you know what you're doing + resolution: [ 512, 768, 1024 ] # flux enjoys multiple resolutions + train: + batch_size: 1 + steps: 2000 # total number of steps to train 500 - 4000 is a good range + gradient_accumulation_steps: 1 + train_unet: true + train_text_encoder: false # probably won't work with flux + gradient_checkpointing: true # need the on unless you have a ton of vram + noise_scheduler: "flowmatch" # for training only + optimizer: "adamw8bit" + lr: 1e-4 + # uncomment this to skip the pre training sample +# skip_first_sample: true + # uncomment to completely disable sampling +# disable_sampling: true + # uncomment to use new vell curved weighting. Experimental but may produce better results +# linear_timesteps: true + + # ema will smooth out learning, but could slow it down. Recommended to leave on. + ema_config: + use_ema: true + ema_decay: 0.99 + + # will probably need this if gpu supports it for flux, other dtypes may not work correctly + dtype: bf16 + model: + # huggingface model name or path + # if you get an error, or get stuck while downloading, + # check https://github.com/ostris/ai-toolkit/issues/84, download the models locally and + # place them like "/root/ai-toolkit/FLUX.1-schnell" and "/root/ai-toolkit/FLUX.1-schnell-training-adapter" + name_or_path: "black-forest-labs/FLUX.1-schnell" + assistant_lora_path: "ostris/FLUX.1-schnell-training-adapter" # Required for flux schnell training + is_flux: true + quantize: true # run 8bit mixed precision + # low_vram is painfully slow to fuse in the adapter avoid it unless absolutely necessary +# low_vram: true # uncomment this if the GPU is connected to your monitors. It will use less vram to quantize, but is slower. + sample: + sampler: "flowmatch" # must match train.noise_scheduler + sample_every: 250 # sample every this many steps + width: 1024 + height: 1024 + prompts: + # you can add [trigger] to the prompts here and it will be replaced with the trigger word +# - "[trigger] holding a sign that says 'I LOVE PROMPTS!'"\ + - "woman with red hair, playing chess at the park, bomb going off in the background" + - "a woman holding a coffee cup, in a beanie, sitting at a cafe" + - "a horse is a DJ at a night club, fish eye lens, smoke machine, lazer lights, holding a martini" + - "a man showing off his cool new t shirt at the beach, a shark is jumping out of the water in the background" + - "a bear building a log cabin in the snow covered mountains" + - "woman playing the guitar, on stage, singing a song, laser lights, punk rocker" + - "hipster man with a beard, building a chair, in a wood shop" + - "photo of a man, white background, medium shot, modeling clothing, studio lighting, white backdrop" + - "a man holding a sign that says, 'this is a sign'" + - "a bulldog, in a post apocalyptic world, with a shotgun, in a leather jacket, in a desert, with a motorcycle" + neg: "" # not used on flux + seed: 42 + walk_seed: true + guidance_scale: 1 # schnell does not do guidance + sample_steps: 4 # 1 - 4 works well +# you can add any additional meta info here. [name] is replaced with config name at top +meta: + name: "[name]" + version: '1.0' diff --git a/notebooks/FLUX_1_dev_LoRA_Training.ipynb b/notebooks/FLUX_1_dev_LoRA_Training.ipynb index d3174813..8cfcd1fe 100644 --- a/notebooks/FLUX_1_dev_LoRA_Training.ipynb +++ b/notebooks/FLUX_1_dev_LoRA_Training.ipynb @@ -1,53 +1,45 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "machine_shape": "hm", - "gpuType": "A100" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, "cells": [ { "cell_type": "markdown", - "source": [ - "# AI Toolkit by Ostris\n", - "## FLUX.1 Training\n" - ], "metadata": { "collapsed": false, "id": "zl-S0m3pkQC5" - } + }, + "source": [ + "# AI Toolkit by Ostris\n", + "## FLUX.1-dev Training\n" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "!git clone https://github.com/ostris/ai-toolkit\n", - "!mkdir -p /content/dataset" - ], + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "id": "BvAG0GKAh59G" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!git clone https://github.com/ostris/ai-toolkit\n", + "!mkdir -p /content/dataset" + ] }, { "cell_type": "markdown", - "source": [ - "Put your image dataset in the `/content/dataset` folder" - ], "metadata": { "id": "UFUW4ZMmnp1V" - } + }, + "source": [ + "Put your image dataset in the `/content/dataset` folder" + ] }, { "cell_type": "code", @@ -62,6 +54,9 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "OV0HnOI6o8V6" + }, "source": [ "## Model License\n", "Training currently only works with FLUX.1-dev. Which means anything you train will inherit the non-commercial license. It is also a gated model, so you need to accept the license on HF before using it. Otherwise, this will fail. Here are the required steps to setup a license.\n", @@ -69,13 +64,15 @@ "Sign into HF and accept the model access here [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)\n", "\n", "[Get a READ key from huggingface](https://huggingface.co/settings/tokens/new?) and place it in the next cell after running it." - ], - "metadata": { - "id": "OV0HnOI6o8V6" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3yZZdhFRoj2m" + }, + "outputs": [], "source": [ "import getpass\n", "import os\n", @@ -87,15 +84,15 @@ "os.environ['HF_TOKEN'] = hf_token\n", "\n", "print(\"HF_TOKEN environment variable has been set.\")" - ], - "metadata": { - "id": "3yZZdhFRoj2m" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9gO2EzQ1kQC8" + }, + "outputs": [], "source": [ "import os\n", "import sys\n", @@ -105,26 +102,26 @@ "from PIL import Image\n", "import os\n", "os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"" - ], - "metadata": { - "id": "9gO2EzQ1kQC8" - }, - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", + "metadata": { + "id": "N8UUFzVRigbC" + }, "source": [ "## Setup\n", "\n", "This is your config. It is documented pretty well. Normally you would do this as a yaml file, but for colab, this will work. This will run as is without modification, but feel free to edit as you want." - ], - "metadata": { - "id": "N8UUFzVRigbC" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_t28QURYjRQO" + }, + "outputs": [], "source": [ "from collections import OrderedDict\n", "\n", @@ -169,7 +166,7 @@ " ]),\n", " ('train', OrderedDict([\n", " ('batch_size', 1),\n", - " ('steps', 4000), # total number of steps to train 500 - 4000 is a good range\n", + " ('steps', 2000), # total number of steps to train 500 - 4000 is a good range\n", " ('gradient_accumulation_steps', 1),\n", " ('train_unet', True),\n", " ('train_text_encoder', False), # probably won't work with flux\n", @@ -177,9 +174,16 @@ " ('gradient_checkpointing', True), # need the on unless you have a ton of vram\n", " ('noise_scheduler', 'flowmatch'), # for training only\n", " ('optimizer', 'adamw8bit'),\n", - " ('lr', 4e-4),\n", + " ('lr', 1e-4),\n", + "\n", " # uncomment this to skip the pre training sample\n", - " #('skip_first_sample', True),\n", + " # ('skip_first_sample', True),\n", + "\n", + " # uncomment to completely disable sampling\n", + " # ('disable_sampling', True),\n", + "\n", + " # uncomment to use new vell curved weighting. Experimental but may produce better results\n", + " # ('linear_timesteps', True),\n", "\n", " # ema will smooth out learning, but could slow it down. Recommended to leave on.\n", " ('ema_config', OrderedDict([\n", @@ -231,45 +235,57 @@ " ('version', '1.0')\n", " ]))\n", "])\n" - ], - "metadata": { - "id": "_t28QURYjRQO" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "h6F1FlM2Wb3l" + }, "source": [ "## Run it\n", "\n", "Below does all the magic. Check your folders to the left. Items will be in output/LoRA/your_name_v1 In the samples folder, there are preiodic sampled. This doesnt work great with colab. They will be in /content/output" - ], - "metadata": { - "id": "h6F1FlM2Wb3l" - } + ] }, { "cell_type": "code", - "source": [ - "run_job(job_to_run)\n" - ], + "execution_count": null, "metadata": { "id": "HkajwI8gteOh" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "run_job(job_to_run)\n" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "Hblgb5uwW5SD" + }, "source": [ "## Done\n", "\n", "Check your ourput dir and get your slider\n" - ], - "metadata": { - "id": "Hblgb5uwW5SD" - } + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "A100", + "machine_shape": "hm", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/FLUX_1_schnell_LoRA_Training.ipynb b/notebooks/FLUX_1_schnell_LoRA_Training.ipynb new file mode 100644 index 00000000..652d8ccc --- /dev/null +++ b/notebooks/FLUX_1_schnell_LoRA_Training.ipynb @@ -0,0 +1,296 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "id": "zl-S0m3pkQC5" + }, + "source": [ + "# AI Toolkit by Ostris\n", + "## FLUX.1-schnell Training\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3cokMT-WC6rG" + }, + "outputs": [], + "source": [ + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "BvAG0GKAh59G" + }, + "outputs": [], + "source": [ + "!git clone https://github.com/ostris/ai-toolkit\n", + "!mkdir -p /content/dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UFUW4ZMmnp1V" + }, + "source": [ + "Put your image dataset in the `/content/dataset` folder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "XGZqVER_aQJW" + }, + "outputs": [], + "source": [ + "!cd ai-toolkit && git submodule update --init --recursive && pip install -r requirements.txt\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OV0HnOI6o8V6" + }, + "source": [ + "## Model License\n", + "Training currently only works with FLUX.1-dev. Which means anything you train will inherit the non-commercial license. It is also a gated model, so you need to accept the license on HF before using it. Otherwise, this will fail. Here are the required steps to setup a license.\n", + "\n", + "Sign into HF and accept the model access here [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)\n", + "\n", + "[Get a READ key from huggingface](https://huggingface.co/settings/tokens/new?) and place it in the next cell after running it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3yZZdhFRoj2m" + }, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "# Prompt for the token\n", + "hf_token = getpass.getpass('Enter your HF access token and press enter: ')\n", + "\n", + "# Set the environment variable\n", + "os.environ['HF_TOKEN'] = hf_token\n", + "\n", + "print(\"HF_TOKEN environment variable has been set.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "9gO2EzQ1kQC8" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "sys.path.append('/content/ai-toolkit')\n", + "from toolkit.job import run_job\n", + "from collections import OrderedDict\n", + "from PIL import Image\n", + "import os\n", + "os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "N8UUFzVRigbC" + }, + "source": [ + "## Setup\n", + "\n", + "This is your config. It is documented pretty well. Normally you would do this as a yaml file, but for colab, this will work. This will run as is without modification, but feel free to edit as you want." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "_t28QURYjRQO" + }, + "outputs": [], + "source": [ + "from collections import OrderedDict\n", + "\n", + "job_to_run = OrderedDict([\n", + " ('job', 'extension'),\n", + " ('config', OrderedDict([\n", + " # this name will be the folder and filename name\n", + " ('name', 'my_first_flux_lora_v1'),\n", + " ('process', [\n", + " OrderedDict([\n", + " ('type', 'sd_trainer'),\n", + " # root folder to save training sessions/samples/weights\n", + " ('training_folder', '/content/output'),\n", + " # uncomment to see performance stats in the terminal every N steps\n", + " #('performance_log_every', 1000),\n", + " ('device', 'cuda:0'),\n", + " # if a trigger word is specified, it will be added to captions of training data if it does not already exist\n", + " # alternatively, in your captions you can add [trigger] and it will be replaced with the trigger word\n", + " # ('trigger_word', 'image'),\n", + " ('network', OrderedDict([\n", + " ('type', 'lora'),\n", + " ('linear', 16),\n", + " ('linear_alpha', 16)\n", + " ])),\n", + " ('save', OrderedDict([\n", + " ('dtype', 'float16'), # precision to save\n", + " ('save_every', 250), # save every this many steps\n", + " ('max_step_saves_to_keep', 4) # how many intermittent saves to keep\n", + " ])),\n", + " ('datasets', [\n", + " # datasets are a folder of images. captions need to be txt files with the same name as the image\n", + " # for instance image2.jpg and image2.txt. Only jpg, jpeg, and png are supported currently\n", + " # images will automatically be resized and bucketed into the resolution specified\n", + " OrderedDict([\n", + " ('folder_path', '/content/dataset'),\n", + " ('caption_ext', 'txt'),\n", + " ('caption_dropout_rate', 0.05), # will drop out the caption 5% of time\n", + " ('shuffle_tokens', False), # shuffle caption order, split by commas\n", + " ('cache_latents_to_disk', True), # leave this true unless you know what you're doing\n", + " ('resolution', [512, 768, 1024]) # flux enjoys multiple resolutions\n", + " ])\n", + " ]),\n", + " ('train', OrderedDict([\n", + " ('batch_size', 1),\n", + " ('steps', 2000), # total number of steps to train 500 - 4000 is a good range\n", + " ('gradient_accumulation_steps', 1),\n", + " ('train_unet', True),\n", + " ('train_text_encoder', False), # probably won't work with flux\n", + " ('gradient_checkpointing', True), # need the on unless you have a ton of vram\n", + " ('noise_scheduler', 'flowmatch'), # for training only\n", + " ('optimizer', 'adamw8bit'),\n", + " ('lr', 1e-4),\n", + "\n", + " # uncomment this to skip the pre training sample\n", + " # ('skip_first_sample', True),\n", + "\n", + " # uncomment to completely disable sampling\n", + " # ('disable_sampling', True),\n", + "\n", + " # uncomment to use new vell curved weighting. Experimental but may produce better results\n", + " # ('linear_timesteps', True),\n", + "\n", + " # ema will smooth out learning, but could slow it down. Recommended to leave on.\n", + " ('ema_config', OrderedDict([\n", + " ('use_ema', True),\n", + " ('ema_decay', 0.99)\n", + " ])),\n", + "\n", + " # will probably need this if gpu supports it for flux, other dtypes may not work correctly\n", + " ('dtype', 'bf16')\n", + " ])),\n", + " ('model', OrderedDict([\n", + " # huggingface model name or path\n", + " ('name_or_path', 'black-forest-labs/FLUX.1-schnell'),\n", + " ('assistant_lora_path', 'ostris/FLUX.1-schnell-training-adapter'), # Required for flux schnell training\n", + " ('is_flux', True),\n", + " ('quantize', True), # run 8bit mixed precision\n", + " # low_vram is painfully slow to fuse in the adapter avoid it unless absolutely necessary\n", + " #('low_vram', True), # uncomment this if the GPU is connected to your monitors. It will use less vram to quantize, but is slower.\n", + " ])),\n", + " ('sample', OrderedDict([\n", + " ('sampler', 'flowmatch'), # must match train.noise_scheduler\n", + " ('sample_every', 250), # sample every this many steps\n", + " ('width', 1024),\n", + " ('height', 1024),\n", + " ('prompts', [\n", + " # you can add [trigger] to the prompts here and it will be replaced with the trigger word\n", + " #'[trigger] holding a sign that says \\'I LOVE PROMPTS!\\'',\n", + " 'woman with red hair, playing chess at the park, bomb going off in the background',\n", + " 'a woman holding a coffee cup, in a beanie, sitting at a cafe',\n", + " 'a horse is a DJ at a night club, fish eye lens, smoke machine, lazer lights, holding a martini',\n", + " 'a man showing off his cool new t shirt at the beach, a shark is jumping out of the water in the background',\n", + " 'a bear building a log cabin in the snow covered mountains',\n", + " 'woman playing the guitar, on stage, singing a song, laser lights, punk rocker',\n", + " 'hipster man with a beard, building a chair, in a wood shop',\n", + " 'photo of a man, white background, medium shot, modeling clothing, studio lighting, white backdrop',\n", + " 'a man holding a sign that says, \\'this is a sign\\'',\n", + " 'a bulldog, in a post apocalyptic world, with a shotgun, in a leather jacket, in a desert, with a motorcycle'\n", + " ]),\n", + " ('neg', ''), # not used on flux\n", + " ('seed', 42),\n", + " ('walk_seed', True),\n", + " ('guidance_scale', 1), # schnell does not do guidance\n", + " ('sample_steps', 4) # 1 - 4 works well\n", + " ]))\n", + " ])\n", + " ])\n", + " ])),\n", + " # you can add any additional meta info here. [name] is replaced with config name at top\n", + " ('meta', OrderedDict([\n", + " ('name', '[name]'),\n", + " ('version', '1.0')\n", + " ]))\n", + "])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h6F1FlM2Wb3l" + }, + "source": [ + "## Run it\n", + "\n", + "Below does all the magic. Check your folders to the left. Items will be in output/LoRA/your_name_v1 In the samples folder, there are preiodic sampled. This doesnt work great with colab. They will be in /content/output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HkajwI8gteOh" + }, + "outputs": [], + "source": [ + "run_job(job_to_run)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hblgb5uwW5SD" + }, + "source": [ + "## Done\n", + "\n", + "Check your ourput dir and get your slider\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "A100", + "machine_shape": "hm", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/requirements.txt b/requirements.txt index 119eeee0..1b03df7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,3 +29,4 @@ pytorch_fid optimum-quanto sentencepiece huggingface_hub +peft \ No newline at end of file diff --git a/run_modal.py b/run_modal.py new file mode 100644 index 00000000..4675c1cb --- /dev/null +++ b/run_modal.py @@ -0,0 +1,175 @@ +''' + +ostris/ai-toolkit on https://modal.com +Run training with the following command: +modal run run_modal.py --config-file-list-str=/root/ai-toolkit/config/whatever_you_want.yml + +''' + +import os +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" +import sys +import modal +from dotenv import load_dotenv +# Load the .env file if it exists +load_dotenv() + +sys.path.insert(0, "/root/ai-toolkit") +# must come before ANY torch or fastai imports +# import toolkit.cuda_malloc + +# turn off diffusers telemetry until I can figure out how to make it opt-in +os.environ['DISABLE_TELEMETRY'] = 'YES' + +# define the volume for storing model outputs, using "creating volumes lazily": https://modal.com/docs/guide/volumes +# you will find your model, samples and optimizer stored in: https://modal.com/storage/your-username/main/flux-lora-models +model_volume = modal.Volume.from_name("flux-lora-models", create_if_missing=True) + +# modal_output, due to "cannot mount volume on non-empty path" requirement +MOUNT_DIR = "/root/ai-toolkit/modal_output" # modal_output, due to "cannot mount volume on non-empty path" requirement + +# define modal app +image = ( + modal.Image.debian_slim(python_version="3.11") + # install required system and pip packages, more about this modal approach: https://modal.com/docs/examples/dreambooth_app + .apt_install("libgl1", "libglib2.0-0") + .pip_install( + "python-dotenv", + "torch", + "diffusers[torch]", + "transformers", + "ftfy", + "torchvision", + "oyaml", + "opencv-python", + "albumentations", + "safetensors", + "lycoris-lora==1.8.3", + "flatten_json", + "pyyaml", + "tensorboard", + "kornia", + "invisible-watermark", + "einops", + "accelerate", + "toml", + "pydantic", + "omegaconf", + "k-diffusion", + "open_clip_torch", + "timm", + "prodigyopt", + "controlnet_aux==0.0.7", + "bitsandbytes", + "hf_transfer", + "lpips", + "pytorch_fid", + "optimum-quanto", + "sentencepiece", + "huggingface_hub", + "peft" + ) +) + +# mount for the entire ai-toolkit directory +# example: "/Users/username/ai-toolkit" is the local directory, "/root/ai-toolkit" is the remote directory +code_mount = modal.Mount.from_local_dir("/Users/username/ai-toolkit", remote_path="/root/ai-toolkit") + +# create the Modal app with the necessary mounts and volumes +app = modal.App(name="flux-lora-training", image=image, mounts=[code_mount], volumes={MOUNT_DIR: model_volume}) + +# Check if we have DEBUG_TOOLKIT in env +if os.environ.get("DEBUG_TOOLKIT", "0") == "1": + # Set torch to trace mode + import torch + torch.autograd.set_detect_anomaly(True) + +import argparse +from toolkit.job import get_job + +def print_end_message(jobs_completed, jobs_failed): + failure_string = f"{jobs_failed} failure{'' if jobs_failed == 1 else 's'}" if jobs_failed > 0 else "" + completed_string = f"{jobs_completed} completed job{'' if jobs_completed == 1 else 's'}" + + print("") + print("========================================") + print("Result:") + if len(completed_string) > 0: + print(f" - {completed_string}") + if len(failure_string) > 0: + print(f" - {failure_string}") + print("========================================") + + +@app.function( + # request a GPU with at least 24GB VRAM + # more about modal GPU's: https://modal.com/docs/guide/gpu + gpu="A100", # gpu="H100" + # more about modal timeouts: https://modal.com/docs/guide/timeouts + timeout=7200 # 2 hours, increase or decrease if needed +) +def main(config_file_list_str: str, recover: bool = False, name: str = None): + # convert the config file list from a string to a list + config_file_list = config_file_list_str.split(",") + + jobs_completed = 0 + jobs_failed = 0 + + print(f"Running {len(config_file_list)} job{'' if len(config_file_list) == 1 else 's'}") + + for config_file in config_file_list: + try: + job = get_job(config_file, name) + + job.config['process'][0]['training_folder'] = MOUNT_DIR + os.makedirs(MOUNT_DIR, exist_ok=True) + print(f"Training outputs will be saved to: {MOUNT_DIR}") + + # run the job + job.run() + + # commit the volume after training + model_volume.commit() + + job.cleanup() + jobs_completed += 1 + except Exception as e: + print(f"Error running job: {e}") + jobs_failed += 1 + if not recover: + print_end_message(jobs_completed, jobs_failed) + raise e + + print_end_message(jobs_completed, jobs_failed) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + # require at least one config file + parser.add_argument( + 'config_file_list', + nargs='+', + type=str, + help='Name of config file (eg: person_v1 for config/person_v1.json/yaml), or full path if it is not in config folder, you can pass multiple config files and run them all sequentially' + ) + + # flag to continue if a job fails + parser.add_argument( + '-r', '--recover', + action='store_true', + help='Continue running additional jobs even if a job fails' + ) + + # optional name replacement for config file + parser.add_argument( + '-n', '--name', + type=str, + default=None, + help='Name to replace [name] tag in config file, useful for shared config file' + ) + args = parser.parse_args() + + # convert list of config files to a comma-separated string for Modal compatibility + config_file_list_str = ",".join(args.config_file_list) + + main.call(config_file_list_str=config_file_list_str, recover=args.recover, name=args.name) diff --git a/toolkit/scheduler.py b/toolkit/scheduler.py index 95ae36d8..f6f8f61a 100644 --- a/toolkit/scheduler.py +++ b/toolkit/scheduler.py @@ -26,7 +26,7 @@ def get_lr_scheduler( optimizer, **kwargs ) elif name == "constant": - if 'facor' not in kwargs: + if 'factor' not in kwargs: kwargs['factor'] = 1.0 return torch.optim.lr_scheduler.ConstantLR(optimizer, **kwargs)