From 9367089d488048f8fde982f7fca089b33d07753a Mon Sep 17 00:00:00 2001 From: Jaret Burkett Date: Sun, 23 Jul 2023 11:24:12 -0600 Subject: [PATCH] Added example for slider training that will run as is --- .gitignore | 4 +- config/examples/extract.example.json | 63 --------- config/examples/extract.example.yml | 74 ++++++++++ config/examples/train_slider.example.yml | 172 +++++++++++++++++++++++ output/.gitkeep | 0 toolkit/paths.py | 7 + 6 files changed, 256 insertions(+), 64 deletions(-) delete mode 100644 config/examples/extract.example.json create mode 100644 config/examples/extract.example.yml create mode 100644 config/examples/train_slider.example.yml create mode 100644 output/.gitkeep diff --git a/.gitignore b/.gitignore index c894990f..f1480418 100644 --- a/.gitignore +++ b/.gitignore @@ -168,4 +168,6 @@ cython_debug/ /venv.* /config/* !/config/examples -!/config/_PUT_YOUR_CONFIGS_HERE).txt \ No newline at end of file +!/config/_PUT_YOUR_CONFIGS_HERE).txt +/output/* +!/output/.gitkeep \ No newline at end of file diff --git a/config/examples/extract.example.json b/config/examples/extract.example.json deleted file mode 100644 index f0d6e32f..00000000 --- a/config/examples/extract.example.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "job": "extract", - "config": { - "name": "name_of_your_model", - "base_model": "/path/to/base/model", - "extract_model": "/path/to/model/to/extract", - "output_folder": "/path/to/output/folder", - "is_v2": false, - "dtype": "fp16", - "device": "cpu", - "process": [ - { - "filename":"[name]_64_32.safetensors", - "dtype": "fp16", - "type": "locon", - "mode": "fixed", - "linear": 64, - "conv": 32 - }, - { - "output_path": "/absolute/path/for/this/output.safetensors", - "type": "locon", - "mode": "ratio", - "linear": 0.2, - "conv": 0.2 - }, - { - "type": "locon", - "mode": "quantile", - "linear": 0.5, - "conv": 0.5 - }, - { - "type": "lora", - "mode": "fixed", - "linear": 4 - }, - { - "type": "lora", - "mode": "fixed", - "linear": 64, - "conv": 32 - } - ] - }, - "meta": { - "name": "[name]", - "description": "A short description of your model", - "trigger_words": [ - "put", - "trigger", - "words", - "here" - ], - "version": "0.1", - "creator": { - "name": "Your Name", - "email": "your@email.com", - "website": "https://yourwebsite.com" - }, - "any": "All meta data above is arbitrary, it can be whatever you want." - } -} \ No newline at end of file diff --git a/config/examples/extract.example.yml b/config/examples/extract.example.yml new file mode 100644 index 00000000..5eee869e --- /dev/null +++ b/config/examples/extract.example.yml @@ -0,0 +1,74 @@ +--- +# this is in yaml format. You can use json if you prefer +# I like both but yaml is easier to read and write +# plus it has comments which is nice for documentation +job: extract # tells the runner what to do +config: + # the name will be used to create a folder in the output folder + # it will also replace any [name] token in the rest of this config + name: name_of_your_model + # can be hugging face model, a .ckpt, or a .safetensors + base_model: "/path/to/base/model.safetensors" + # can be hugging face model, a .ckpt, or a .safetensors + extract_model: "/path/to/model/to/extract/trained.safetensors" + # we will create folder here with name above so. This will create /path/to/output/folder/name_of_your_model + output_folder: "/path/to/output/folder" + is_v2: false + dtype: fp16 # saved dtype + device: cpu # cpu, cuda:0, etc + + # processes can be chained like this to run multiple in a row + # they must all use same models above, but great for testing different + # sizes and typed of extractions. It is much faster as we already have the models loaded + process: + # process 1 + - type: locon # locon or lora (locon is lycoris) + filename: "[name]_64_32.safetensors" # will be put in output folder + dtype: fp16 + mode: fixed + linear: 64 + conv: 32 + + # process 2 + - type: locon + output_path: "/absolute/path/for/this/output.safetensors" # can be absolute + mode: ratio + linear: 0.2 + conv: 0.2 + + # process 3 + - type: locon + filename: "[name]_ratio_02.safetensors" + mode: quantile + linear: 0.5 + conv: 0.5 + + # process 4 + - type: lora # traditional lora extraction (lierla) with linear layers only + filename: "[name]_4.safetensors" + mode: fixed # fixed, ratio, quantile supported for lora as well + linear: 4 + + # process 5 + - type: lora + filename: "[name]_q05.safetensors" + mode: quantile + linear: 0.5 + +# you can put any information you want here, and it will be saved in the model +# the below is an example. I recommend doing trigger words at a minimum +# in the metadata. The software will include this plus some other information +meta: + name: "[name]" # [name] gets replaced with the name above + description: A short description of your model + trigger_words: + - put + - trigger + - words + - here + version: '0.1' + creator: + name: Your Name + email: your@email.com + website: https://yourwebsite.com + any: All meta data above is arbitrary, it can be whatever you want. diff --git a/config/examples/train_slider.example.yml b/config/examples/train_slider.example.yml new file mode 100644 index 00000000..8956eff1 --- /dev/null +++ b/config/examples/train_slider.example.yml @@ -0,0 +1,172 @@ +--- +# This is in yaml format. You can use json if you prefer +# I like both but yaml is easier to write +# Plus it has comments which is nice for documentation +# This is the config I use on my sliders, It is solid and tested +job: train +config: + # the name will be used to create a folder in the output folder + # it will also replace any [name] token in the rest of this config + name: pet_slider_v1 + # folder will be created with name above in folder below + # it can be relative to the project root or absolute + training_folder: "output/LoRA" + device: cuda:0 # cpu, cuda:0, etc + # for tensorboard logging, we will make a subfolder for this job + log_dir: "output/.tensorboard" + # you can stack processes for other jobs, It is not tested with sliders though + # just use one for now + process: + - type: slider # tells runner to run the slider process + # network is the LoRA network for a slider, I recommend to leave this be + network: + # network type lierla is traditional LoRA that works everywhere, only linear layers + type: "lierla" + # rank / dim of the network. Bigger is not always better. Especially for sliders. 8 is good + rank: 8 + alpha: 1.0 # just leave it + + # training config + train: + # this is also used in sampling. Stick with ddpm unless you know what you are doing + noise_scheduler: "ddpm" # or "ddpm", "lms", "euler_a" + # how many steps to train. More is not always better. I rarely go over 1000 + steps: 500 + # I have had good results with 4e-4 to 1e-4 at 500 steps + lr: 2e-4 + # train the unet. I recommend leaving this true + train_unet: true + # train the text encoder. I don't recommend this unless you have a special use case + # for sliders we are adjusting representation of the concept (unet), + # not the description of it (text encoder) + train_text_encoder: false + + # just leave unless you know what you are doing + # also supports "dadaptation" but set lr to 1 if you use that, + # but it learns too fast and I don't recommend it + optimizer: "adamw" + # only constant for now + lr_scheduler: "constant" + # we randomly denoise random num of steps form 1 to this number + # while training. Just leave it + max_denoising_steps: 40 + # works great at 1. I do 1 even with my 4090. + batch_size: 1 + # bf16 works best if your GPU supports it (modern) + dtype: bf16 # fp32, bf16, fp16 + # if you have it, use it. It is faster and better + xformers: true + # I don't recommend using unless you are trying to make a darker lora. Then do 0.1 MAX + # although, the way we train sliders is comparative, so it probably won't work anyway + noise_offset: 0.0 + + # the model to train the LoRA network on + model: + # huggingface name, relative prom project path, or absolute path to .safetensors or .ckpt + name_or_path: "runwayml/stable-diffusion-v1-5" + is_v2: false # for v2 models + is_v_pred: false # for v-prediction models (most v2 models) + + # saving config + save: + dtype: float16 # precision to save. I recommend float16 + save_every: 100 # save every this many steps + + # sampling config + sample: + # must match train.noise_scheduler, this is not used here + # but may be in future and in other processes + sampler: "ddpm" + # sample every this many steps + sample_every: 20 + # image size + width: 512 + height: 512 + # prompts to use for sampling. Do as many as you want, but it slows down training + # pick ones that will best represent the concept you are trying to adjust + # allows some flags after the prompt + # --m [number] # network multiplier. LoRA weight. -3 for the negative slide, 3 for the positive + # slide are good tests. will inherit sample.network_multiplier if not set + # --n [string] # negative prompt, will inherit sample.neg if not set + + # Only 75 tokens allowed currently + prompts: + - "a golden retriever --m -5" + - "a golden retriever --m -3" + - "a golden retriever --m 3" + - "a golden retriever --m 5" + - "calico cat --m -5" + - "calico cat --m -3" + - "calico cat --m 3" + - "calico cat --m 5" + # negative prompt used on all prompts above as default if they don't have one + neg: "cartoon, fake, drawing, illustration, cgi, animated, anime, monochrome" + # seed for sampling. 42 is the answer for everything + seed: 42 + # walks the seed so s1 is 42, s2 is 43, s3 is 44, etc + # will start over on next sample_every so s1 is always seed + # works well if you use same prompt but want different results + walk_seed: false + # cfg scale (4 to 10 is good) + guidance_scale: 7 + # sampler steps (20 to 30 is good) + sample_steps: 20 + # default network multiplier for all prompts + # since we are training a slider, I recommend overriding this with --m [number] + # in the prompts above to get both sides of the slider + network_multiplier: 1.0 + + # logging information + logging: + log_every: 10 # log every this many steps + use_wandb: false # not supported yet + verbose: false # probably done need unless you are debugging + + # slider training config, best for last + slider: + # resolutions to train on. [ width, height ]. This is less important for sliders + # as we are not teaching the model anything it doesn't already know + # but must be a size it understands [ 512, 512 ] for sd_v1.5 and [ 768, 768 ] for sd_v2.1 + # you can do as many as you want here + resolutions: + - [ 512, 512 ] +# - [ 512, 768 ] +# - [ 768, 768 ] + # These are the concepts to train on. You can do as many as you want here, + # but they can conflict outweigh each other. Other than experimenting, I recommend + # just doing one for good results + targets: + # target_class is the base concept we are adjusting the representation of + # for example, if we are adjusting the representation of a person, we would use "person" + # if we are adjusting the representation of a cat, we would use "cat" It is not + # a keyword necessarily but what the model understands the concept to represent. + # "person" will affect men, women, children, etc but will not affect cats, dogs, etc + # it is the models base general understanding of the concept and everything it represents + - target_class: "animal" + # positive is the prompt for the positive side of the slider. + # It is the concept that will be excited and amplified in the model when we slide the slider + # to the positive side and forgotten / inverted when we slide + # the slider to the negative side. It is generally best to include the target_class in + # the prompt. You want it to be the extreme of what you want to train on. For example, + # if you want to train on fat people, you would use "an extremely fat, morbidly obese person" + # as the prompt. Not just "fat person" + positive: "cat" + # negative is the prompt for the negative side of the slider and works the same as positive + # it does not necessarily work the same as a negative prompt when generating images + negative: "dog" + # LoRA weight to train this target. I recommend 1.0. Just leave it, it won't work + # how you expect if you change it + multiplier: 1.0 + +# You can put any information you want here, and it will be saved in the model. +# The below is an example, but you can put your grocery list in it if you want. +# It is saved in the model so be aware of that. The software will include this +# plus some other information for you automatically +meta: + # [name] gets replaced with the name above + name: "[name]" +# version: '1.0' +# creator: +# name: Your Name +# email: your@gmail.com +# website: https://your.website diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/toolkit/paths.py b/toolkit/paths.py index 2eb92e06..0dec1b6b 100644 --- a/toolkit/paths.py +++ b/toolkit/paths.py @@ -4,3 +4,10 @@ TOOLKIT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) CONFIG_ROOT = os.path.join(TOOLKIT_ROOT, 'config') SD_SCRIPTS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories", "sd-scripts") REPOS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories") + + +def get_path(path): + # we allow absolute paths, but if it is not absolute, we assume it is relative to the toolkit root + if not os.path.isabs(path): + path = os.path.join(TOOLKIT_ROOT, path) + return path