mirror of
https://github.com/ostris/ai-toolkit.git
synced 2026-01-26 08:29:45 +00:00
Added example for slider training that will run as is
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -168,4 +168,6 @@ cython_debug/
|
|||||||
/venv.*
|
/venv.*
|
||||||
/config/*
|
/config/*
|
||||||
!/config/examples
|
!/config/examples
|
||||||
!/config/_PUT_YOUR_CONFIGS_HERE).txt
|
!/config/_PUT_YOUR_CONFIGS_HERE).txt
|
||||||
|
/output/*
|
||||||
|
!/output/.gitkeep
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
{
|
|
||||||
"job": "extract",
|
|
||||||
"config": {
|
|
||||||
"name": "name_of_your_model",
|
|
||||||
"base_model": "/path/to/base/model",
|
|
||||||
"extract_model": "/path/to/model/to/extract",
|
|
||||||
"output_folder": "/path/to/output/folder",
|
|
||||||
"is_v2": false,
|
|
||||||
"dtype": "fp16",
|
|
||||||
"device": "cpu",
|
|
||||||
"process": [
|
|
||||||
{
|
|
||||||
"filename":"[name]_64_32.safetensors",
|
|
||||||
"dtype": "fp16",
|
|
||||||
"type": "locon",
|
|
||||||
"mode": "fixed",
|
|
||||||
"linear": 64,
|
|
||||||
"conv": 32
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"output_path": "/absolute/path/for/this/output.safetensors",
|
|
||||||
"type": "locon",
|
|
||||||
"mode": "ratio",
|
|
||||||
"linear": 0.2,
|
|
||||||
"conv": 0.2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "locon",
|
|
||||||
"mode": "quantile",
|
|
||||||
"linear": 0.5,
|
|
||||||
"conv": 0.5
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "lora",
|
|
||||||
"mode": "fixed",
|
|
||||||
"linear": 4
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "lora",
|
|
||||||
"mode": "fixed",
|
|
||||||
"linear": 64,
|
|
||||||
"conv": 32
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"meta": {
|
|
||||||
"name": "[name]",
|
|
||||||
"description": "A short description of your model",
|
|
||||||
"trigger_words": [
|
|
||||||
"put",
|
|
||||||
"trigger",
|
|
||||||
"words",
|
|
||||||
"here"
|
|
||||||
],
|
|
||||||
"version": "0.1",
|
|
||||||
"creator": {
|
|
||||||
"name": "Your Name",
|
|
||||||
"email": "your@email.com",
|
|
||||||
"website": "https://yourwebsite.com"
|
|
||||||
},
|
|
||||||
"any": "All meta data above is arbitrary, it can be whatever you want."
|
|
||||||
}
|
|
||||||
}
|
|
||||||
74
config/examples/extract.example.yml
Normal file
74
config/examples/extract.example.yml
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
---
|
||||||
|
# this is in yaml format. You can use json if you prefer
|
||||||
|
# I like both but yaml is easier to read and write
|
||||||
|
# plus it has comments which is nice for documentation
|
||||||
|
job: extract # tells the runner what to do
|
||||||
|
config:
|
||||||
|
# the name will be used to create a folder in the output folder
|
||||||
|
# it will also replace any [name] token in the rest of this config
|
||||||
|
name: name_of_your_model
|
||||||
|
# can be hugging face model, a .ckpt, or a .safetensors
|
||||||
|
base_model: "/path/to/base/model.safetensors"
|
||||||
|
# can be hugging face model, a .ckpt, or a .safetensors
|
||||||
|
extract_model: "/path/to/model/to/extract/trained.safetensors"
|
||||||
|
# we will create folder here with name above so. This will create /path/to/output/folder/name_of_your_model
|
||||||
|
output_folder: "/path/to/output/folder"
|
||||||
|
is_v2: false
|
||||||
|
dtype: fp16 # saved dtype
|
||||||
|
device: cpu # cpu, cuda:0, etc
|
||||||
|
|
||||||
|
# processes can be chained like this to run multiple in a row
|
||||||
|
# they must all use same models above, but great for testing different
|
||||||
|
# sizes and typed of extractions. It is much faster as we already have the models loaded
|
||||||
|
process:
|
||||||
|
# process 1
|
||||||
|
- type: locon # locon or lora (locon is lycoris)
|
||||||
|
filename: "[name]_64_32.safetensors" # will be put in output folder
|
||||||
|
dtype: fp16
|
||||||
|
mode: fixed
|
||||||
|
linear: 64
|
||||||
|
conv: 32
|
||||||
|
|
||||||
|
# process 2
|
||||||
|
- type: locon
|
||||||
|
output_path: "/absolute/path/for/this/output.safetensors" # can be absolute
|
||||||
|
mode: ratio
|
||||||
|
linear: 0.2
|
||||||
|
conv: 0.2
|
||||||
|
|
||||||
|
# process 3
|
||||||
|
- type: locon
|
||||||
|
filename: "[name]_ratio_02.safetensors"
|
||||||
|
mode: quantile
|
||||||
|
linear: 0.5
|
||||||
|
conv: 0.5
|
||||||
|
|
||||||
|
# process 4
|
||||||
|
- type: lora # traditional lora extraction (lierla) with linear layers only
|
||||||
|
filename: "[name]_4.safetensors"
|
||||||
|
mode: fixed # fixed, ratio, quantile supported for lora as well
|
||||||
|
linear: 4
|
||||||
|
|
||||||
|
# process 5
|
||||||
|
- type: lora
|
||||||
|
filename: "[name]_q05.safetensors"
|
||||||
|
mode: quantile
|
||||||
|
linear: 0.5
|
||||||
|
|
||||||
|
# you can put any information you want here, and it will be saved in the model
|
||||||
|
# the below is an example. I recommend doing trigger words at a minimum
|
||||||
|
# in the metadata. The software will include this plus some other information
|
||||||
|
meta:
|
||||||
|
name: "[name]" # [name] gets replaced with the name above
|
||||||
|
description: A short description of your model
|
||||||
|
trigger_words:
|
||||||
|
- put
|
||||||
|
- trigger
|
||||||
|
- words
|
||||||
|
- here
|
||||||
|
version: '0.1'
|
||||||
|
creator:
|
||||||
|
name: Your Name
|
||||||
|
email: your@email.com
|
||||||
|
website: https://yourwebsite.com
|
||||||
|
any: All meta data above is arbitrary, it can be whatever you want.
|
||||||
172
config/examples/train_slider.example.yml
Normal file
172
config/examples/train_slider.example.yml
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
---
|
||||||
|
# This is in yaml format. You can use json if you prefer
|
||||||
|
# I like both but yaml is easier to write
|
||||||
|
# Plus it has comments which is nice for documentation
|
||||||
|
# This is the config I use on my sliders, It is solid and tested
|
||||||
|
job: train
|
||||||
|
config:
|
||||||
|
# the name will be used to create a folder in the output folder
|
||||||
|
# it will also replace any [name] token in the rest of this config
|
||||||
|
name: pet_slider_v1
|
||||||
|
# folder will be created with name above in folder below
|
||||||
|
# it can be relative to the project root or absolute
|
||||||
|
training_folder: "output/LoRA"
|
||||||
|
device: cuda:0 # cpu, cuda:0, etc
|
||||||
|
# for tensorboard logging, we will make a subfolder for this job
|
||||||
|
log_dir: "output/.tensorboard"
|
||||||
|
# you can stack processes for other jobs, It is not tested with sliders though
|
||||||
|
# just use one for now
|
||||||
|
process:
|
||||||
|
- type: slider # tells runner to run the slider process
|
||||||
|
# network is the LoRA network for a slider, I recommend to leave this be
|
||||||
|
network:
|
||||||
|
# network type lierla is traditional LoRA that works everywhere, only linear layers
|
||||||
|
type: "lierla"
|
||||||
|
# rank / dim of the network. Bigger is not always better. Especially for sliders. 8 is good
|
||||||
|
rank: 8
|
||||||
|
alpha: 1.0 # just leave it
|
||||||
|
|
||||||
|
# training config
|
||||||
|
train:
|
||||||
|
# this is also used in sampling. Stick with ddpm unless you know what you are doing
|
||||||
|
noise_scheduler: "ddpm" # or "ddpm", "lms", "euler_a"
|
||||||
|
# how many steps to train. More is not always better. I rarely go over 1000
|
||||||
|
steps: 500
|
||||||
|
# I have had good results with 4e-4 to 1e-4 at 500 steps
|
||||||
|
lr: 2e-4
|
||||||
|
# train the unet. I recommend leaving this true
|
||||||
|
train_unet: true
|
||||||
|
# train the text encoder. I don't recommend this unless you have a special use case
|
||||||
|
# for sliders we are adjusting representation of the concept (unet),
|
||||||
|
# not the description of it (text encoder)
|
||||||
|
train_text_encoder: false
|
||||||
|
|
||||||
|
# just leave unless you know what you are doing
|
||||||
|
# also supports "dadaptation" but set lr to 1 if you use that,
|
||||||
|
# but it learns too fast and I don't recommend it
|
||||||
|
optimizer: "adamw"
|
||||||
|
# only constant for now
|
||||||
|
lr_scheduler: "constant"
|
||||||
|
# we randomly denoise random num of steps form 1 to this number
|
||||||
|
# while training. Just leave it
|
||||||
|
max_denoising_steps: 40
|
||||||
|
# works great at 1. I do 1 even with my 4090.
|
||||||
|
batch_size: 1
|
||||||
|
# bf16 works best if your GPU supports it (modern)
|
||||||
|
dtype: bf16 # fp32, bf16, fp16
|
||||||
|
# if you have it, use it. It is faster and better
|
||||||
|
xformers: true
|
||||||
|
# I don't recommend using unless you are trying to make a darker lora. Then do 0.1 MAX
|
||||||
|
# although, the way we train sliders is comparative, so it probably won't work anyway
|
||||||
|
noise_offset: 0.0
|
||||||
|
|
||||||
|
# the model to train the LoRA network on
|
||||||
|
model:
|
||||||
|
# huggingface name, relative prom project path, or absolute path to .safetensors or .ckpt
|
||||||
|
name_or_path: "runwayml/stable-diffusion-v1-5"
|
||||||
|
is_v2: false # for v2 models
|
||||||
|
is_v_pred: false # for v-prediction models (most v2 models)
|
||||||
|
|
||||||
|
# saving config
|
||||||
|
save:
|
||||||
|
dtype: float16 # precision to save. I recommend float16
|
||||||
|
save_every: 100 # save every this many steps
|
||||||
|
|
||||||
|
# sampling config
|
||||||
|
sample:
|
||||||
|
# must match train.noise_scheduler, this is not used here
|
||||||
|
# but may be in future and in other processes
|
||||||
|
sampler: "ddpm"
|
||||||
|
# sample every this many steps
|
||||||
|
sample_every: 20
|
||||||
|
# image size
|
||||||
|
width: 512
|
||||||
|
height: 512
|
||||||
|
# prompts to use for sampling. Do as many as you want, but it slows down training
|
||||||
|
# pick ones that will best represent the concept you are trying to adjust
|
||||||
|
# allows some flags after the prompt
|
||||||
|
# --m [number] # network multiplier. LoRA weight. -3 for the negative slide, 3 for the positive
|
||||||
|
# slide are good tests. will inherit sample.network_multiplier if not set
|
||||||
|
# --n [string] # negative prompt, will inherit sample.neg if not set
|
||||||
|
|
||||||
|
# Only 75 tokens allowed currently
|
||||||
|
prompts:
|
||||||
|
- "a golden retriever --m -5"
|
||||||
|
- "a golden retriever --m -3"
|
||||||
|
- "a golden retriever --m 3"
|
||||||
|
- "a golden retriever --m 5"
|
||||||
|
- "calico cat --m -5"
|
||||||
|
- "calico cat --m -3"
|
||||||
|
- "calico cat --m 3"
|
||||||
|
- "calico cat --m 5"
|
||||||
|
# negative prompt used on all prompts above as default if they don't have one
|
||||||
|
neg: "cartoon, fake, drawing, illustration, cgi, animated, anime, monochrome"
|
||||||
|
# seed for sampling. 42 is the answer for everything
|
||||||
|
seed: 42
|
||||||
|
# walks the seed so s1 is 42, s2 is 43, s3 is 44, etc
|
||||||
|
# will start over on next sample_every so s1 is always seed
|
||||||
|
# works well if you use same prompt but want different results
|
||||||
|
walk_seed: false
|
||||||
|
# cfg scale (4 to 10 is good)
|
||||||
|
guidance_scale: 7
|
||||||
|
# sampler steps (20 to 30 is good)
|
||||||
|
sample_steps: 20
|
||||||
|
# default network multiplier for all prompts
|
||||||
|
# since we are training a slider, I recommend overriding this with --m [number]
|
||||||
|
# in the prompts above to get both sides of the slider
|
||||||
|
network_multiplier: 1.0
|
||||||
|
|
||||||
|
# logging information
|
||||||
|
logging:
|
||||||
|
log_every: 10 # log every this many steps
|
||||||
|
use_wandb: false # not supported yet
|
||||||
|
verbose: false # probably done need unless you are debugging
|
||||||
|
|
||||||
|
# slider training config, best for last
|
||||||
|
slider:
|
||||||
|
# resolutions to train on. [ width, height ]. This is less important for sliders
|
||||||
|
# as we are not teaching the model anything it doesn't already know
|
||||||
|
# but must be a size it understands [ 512, 512 ] for sd_v1.5 and [ 768, 768 ] for sd_v2.1
|
||||||
|
# you can do as many as you want here
|
||||||
|
resolutions:
|
||||||
|
- [ 512, 512 ]
|
||||||
|
# - [ 512, 768 ]
|
||||||
|
# - [ 768, 768 ]
|
||||||
|
# These are the concepts to train on. You can do as many as you want here,
|
||||||
|
# but they can conflict outweigh each other. Other than experimenting, I recommend
|
||||||
|
# just doing one for good results
|
||||||
|
targets:
|
||||||
|
# target_class is the base concept we are adjusting the representation of
|
||||||
|
# for example, if we are adjusting the representation of a person, we would use "person"
|
||||||
|
# if we are adjusting the representation of a cat, we would use "cat" It is not
|
||||||
|
# a keyword necessarily but what the model understands the concept to represent.
|
||||||
|
# "person" will affect men, women, children, etc but will not affect cats, dogs, etc
|
||||||
|
# it is the models base general understanding of the concept and everything it represents
|
||||||
|
- target_class: "animal"
|
||||||
|
# positive is the prompt for the positive side of the slider.
|
||||||
|
# It is the concept that will be excited and amplified in the model when we slide the slider
|
||||||
|
# to the positive side and forgotten / inverted when we slide
|
||||||
|
# the slider to the negative side. It is generally best to include the target_class in
|
||||||
|
# the prompt. You want it to be the extreme of what you want to train on. For example,
|
||||||
|
# if you want to train on fat people, you would use "an extremely fat, morbidly obese person"
|
||||||
|
# as the prompt. Not just "fat person"
|
||||||
|
positive: "cat"
|
||||||
|
# negative is the prompt for the negative side of the slider and works the same as positive
|
||||||
|
# it does not necessarily work the same as a negative prompt when generating images
|
||||||
|
negative: "dog"
|
||||||
|
# LoRA weight to train this target. I recommend 1.0. Just leave it, it won't work
|
||||||
|
# how you expect if you change it
|
||||||
|
multiplier: 1.0
|
||||||
|
|
||||||
|
# You can put any information you want here, and it will be saved in the model.
|
||||||
|
# The below is an example, but you can put your grocery list in it if you want.
|
||||||
|
# It is saved in the model so be aware of that. The software will include this
|
||||||
|
# plus some other information for you automatically
|
||||||
|
meta:
|
||||||
|
# [name] gets replaced with the name above
|
||||||
|
name: "[name]"
|
||||||
|
# version: '1.0'
|
||||||
|
# creator:
|
||||||
|
# name: Your Name
|
||||||
|
# email: your@gmail.com
|
||||||
|
# website: https://your.website
|
||||||
0
output/.gitkeep
Normal file
0
output/.gitkeep
Normal file
@@ -4,3 +4,10 @@ TOOLKIT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|||||||
CONFIG_ROOT = os.path.join(TOOLKIT_ROOT, 'config')
|
CONFIG_ROOT = os.path.join(TOOLKIT_ROOT, 'config')
|
||||||
SD_SCRIPTS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories", "sd-scripts")
|
SD_SCRIPTS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories", "sd-scripts")
|
||||||
REPOS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories")
|
REPOS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories")
|
||||||
|
|
||||||
|
|
||||||
|
def get_path(path):
|
||||||
|
# we allow absolute paths, but if it is not absolute, we assume it is relative to the toolkit root
|
||||||
|
if not os.path.isabs(path):
|
||||||
|
path = os.path.join(TOOLKIT_ROOT, path)
|
||||||
|
return path
|
||||||
|
|||||||
Reference in New Issue
Block a user