diff --git a/notebooks/FLUX_1_dev_LoRA_Training.ipynb b/notebooks/FLUX_1_dev_LoRA_Training.ipynb new file mode 100644 index 00000000..d3174813 --- /dev/null +++ b/notebooks/FLUX_1_dev_LoRA_Training.ipynb @@ -0,0 +1,275 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "machine_shape": "hm", + "gpuType": "A100" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# AI Toolkit by Ostris\n", + "## FLUX.1 Training\n" + ], + "metadata": { + "collapsed": false, + "id": "zl-S0m3pkQC5" + } + }, + { + "cell_type": "code", + "source": [ + "!git clone https://github.com/ostris/ai-toolkit\n", + "!mkdir -p /content/dataset" + ], + "metadata": { + "id": "BvAG0GKAh59G" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Put your image dataset in the `/content/dataset` folder" + ], + "metadata": { + "id": "UFUW4ZMmnp1V" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XGZqVER_aQJW" + }, + "outputs": [], + "source": [ + "!cd ai-toolkit && git submodule update --init --recursive && pip install -r requirements.txt\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Model License\n", + "Training currently only works with FLUX.1-dev. Which means anything you train will inherit the non-commercial license. It is also a gated model, so you need to accept the license on HF before using it. Otherwise, this will fail. Here are the required steps to setup a license.\n", + "\n", + "Sign into HF and accept the model access here [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)\n", + "\n", + "[Get a READ key from huggingface](https://huggingface.co/settings/tokens/new?) and place it in the next cell after running it." + ], + "metadata": { + "id": "OV0HnOI6o8V6" + } + }, + { + "cell_type": "code", + "source": [ + "import getpass\n", + "import os\n", + "\n", + "# Prompt for the token\n", + "hf_token = getpass.getpass('Enter your HF access token and press enter: ')\n", + "\n", + "# Set the environment variable\n", + "os.environ['HF_TOKEN'] = hf_token\n", + "\n", + "print(\"HF_TOKEN environment variable has been set.\")" + ], + "metadata": { + "id": "3yZZdhFRoj2m" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import sys\n", + "sys.path.append('/content/ai-toolkit')\n", + "from toolkit.job import run_job\n", + "from collections import OrderedDict\n", + "from PIL import Image\n", + "import os\n", + "os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"" + ], + "metadata": { + "id": "9gO2EzQ1kQC8" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "## Setup\n", + "\n", + "This is your config. It is documented pretty well. Normally you would do this as a yaml file, but for colab, this will work. This will run as is without modification, but feel free to edit as you want." + ], + "metadata": { + "id": "N8UUFzVRigbC" + } + }, + { + "cell_type": "code", + "source": [ + "from collections import OrderedDict\n", + "\n", + "job_to_run = OrderedDict([\n", + " ('job', 'extension'),\n", + " ('config', OrderedDict([\n", + " # this name will be the folder and filename name\n", + " ('name', 'my_first_flux_lora_v1'),\n", + " ('process', [\n", + " OrderedDict([\n", + " ('type', 'sd_trainer'),\n", + " # root folder to save training sessions/samples/weights\n", + " ('training_folder', '/content/output'),\n", + " # uncomment to see performance stats in the terminal every N steps\n", + " #('performance_log_every', 1000),\n", + " ('device', 'cuda:0'),\n", + " # if a trigger word is specified, it will be added to captions of training data if it does not already exist\n", + " # alternatively, in your captions you can add [trigger] and it will be replaced with the trigger word\n", + " # ('trigger_word', 'image'),\n", + " ('network', OrderedDict([\n", + " ('type', 'lora'),\n", + " ('linear', 16),\n", + " ('linear_alpha', 16)\n", + " ])),\n", + " ('save', OrderedDict([\n", + " ('dtype', 'float16'), # precision to save\n", + " ('save_every', 250), # save every this many steps\n", + " ('max_step_saves_to_keep', 4) # how many intermittent saves to keep\n", + " ])),\n", + " ('datasets', [\n", + " # datasets are a folder of images. captions need to be txt files with the same name as the image\n", + " # for instance image2.jpg and image2.txt. Only jpg, jpeg, and png are supported currently\n", + " # images will automatically be resized and bucketed into the resolution specified\n", + " OrderedDict([\n", + " ('folder_path', '/content/dataset'),\n", + " ('caption_ext', 'txt'),\n", + " ('caption_dropout_rate', 0.05), # will drop out the caption 5% of time\n", + " ('shuffle_tokens', False), # shuffle caption order, split by commas\n", + " ('cache_latents_to_disk', True), # leave this true unless you know what you're doing\n", + " ('resolution', [512, 768, 1024]) # flux enjoys multiple resolutions\n", + " ])\n", + " ]),\n", + " ('train', OrderedDict([\n", + " ('batch_size', 1),\n", + " ('steps', 4000), # total number of steps to train 500 - 4000 is a good range\n", + " ('gradient_accumulation_steps', 1),\n", + " ('train_unet', True),\n", + " ('train_text_encoder', False), # probably won't work with flux\n", + " ('content_or_style', 'balanced'), # content, style, balanced\n", + " ('gradient_checkpointing', True), # need the on unless you have a ton of vram\n", + " ('noise_scheduler', 'flowmatch'), # for training only\n", + " ('optimizer', 'adamw8bit'),\n", + " ('lr', 4e-4),\n", + " # uncomment this to skip the pre training sample\n", + " #('skip_first_sample', True),\n", + "\n", + " # ema will smooth out learning, but could slow it down. Recommended to leave on.\n", + " ('ema_config', OrderedDict([\n", + " ('use_ema', True),\n", + " ('ema_decay', 0.99)\n", + " ])),\n", + "\n", + " # will probably need this if gpu supports it for flux, other dtypes may not work correctly\n", + " ('dtype', 'bf16')\n", + " ])),\n", + " ('model', OrderedDict([\n", + " # huggingface model name or path\n", + " ('name_or_path', 'black-forest-labs/FLUX.1-dev'),\n", + " ('is_flux', True),\n", + " ('quantize', True), # run 8bit mixed precision\n", + " #('low_vram', True), # uncomment this if the GPU is connected to your monitors. It will use less vram to quantize, but is slower.\n", + " ])),\n", + " ('sample', OrderedDict([\n", + " ('sampler', 'flowmatch'), # must match train.noise_scheduler\n", + " ('sample_every', 250), # sample every this many steps\n", + " ('width', 1024),\n", + " ('height', 1024),\n", + " ('prompts', [\n", + " # you can add [trigger] to the prompts here and it will be replaced with the trigger word\n", + " #'[trigger] holding a sign that says \\'I LOVE PROMPTS!\\'',\n", + " 'woman with red hair, playing chess at the park, bomb going off in the background',\n", + " 'a woman holding a coffee cup, in a beanie, sitting at a cafe',\n", + " 'a horse is a DJ at a night club, fish eye lens, smoke machine, lazer lights, holding a martini',\n", + " 'a man showing off his cool new t shirt at the beach, a shark is jumping out of the water in the background',\n", + " 'a bear building a log cabin in the snow covered mountains',\n", + " 'woman playing the guitar, on stage, singing a song, laser lights, punk rocker',\n", + " 'hipster man with a beard, building a chair, in a wood shop',\n", + " 'photo of a man, white background, medium shot, modeling clothing, studio lighting, white backdrop',\n", + " 'a man holding a sign that says, \\'this is a sign\\'',\n", + " 'a bulldog, in a post apocalyptic world, with a shotgun, in a leather jacket, in a desert, with a motorcycle'\n", + " ]),\n", + " ('neg', ''), # not used on flux\n", + " ('seed', 42),\n", + " ('walk_seed', True),\n", + " ('guidance_scale', 4),\n", + " ('sample_steps', 20)\n", + " ]))\n", + " ])\n", + " ])\n", + " ])),\n", + " # you can add any additional meta info here. [name] is replaced with config name at top\n", + " ('meta', OrderedDict([\n", + " ('name', '[name]'),\n", + " ('version', '1.0')\n", + " ]))\n", + "])\n" + ], + "metadata": { + "id": "_t28QURYjRQO" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Run it\n", + "\n", + "Below does all the magic. Check your folders to the left. Items will be in output/LoRA/your_name_v1 In the samples folder, there are preiodic sampled. This doesnt work great with colab. They will be in /content/output" + ], + "metadata": { + "id": "h6F1FlM2Wb3l" + } + }, + { + "cell_type": "code", + "source": [ + "run_job(job_to_run)\n" + ], + "metadata": { + "id": "HkajwI8gteOh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Done\n", + "\n", + "Check your ourput dir and get your slider\n" + ], + "metadata": { + "id": "Hblgb5uwW5SD" + } + } + ] +} \ No newline at end of file