From 355c80df071fda96cbcb211f73d48ea7acba8448 Mon Sep 17 00:00:00 2001 From: Jaret Burkett Date: Sun, 13 Aug 2023 13:09:51 -0600 Subject: [PATCH] Added ability to use civit ai url ar model name and built a model downloader and cache manager for it --- test.py | 211 ----------------------------- toolkit/civitai.py | 217 ++++++++++++++++++++++++++++++ toolkit/paths.py | 6 + toolkit/stable_diffusion_model.py | 19 ++- 4 files changed, 236 insertions(+), 217 deletions(-) delete mode 100644 test.py create mode 100644 toolkit/civitai.py diff --git a/test.py b/test.py deleted file mode 100644 index f69c116b..00000000 --- a/test.py +++ /dev/null @@ -1,211 +0,0 @@ -from collections import OrderedDict - -job_to_run = OrderedDict({ - # This is the config I use on my sliders, It is solid and tested - 'job': 'train', - 'config': { - # the name will be used to create a folder in the output folder - # it will also replace any [name] token in the rest of this config - 'name': 'detail_slider_v1', - # folder will be created with name above in folder below - # it can be relative to the project root or absolute - 'training_folder': "output/LoRA", - 'device': 'cuda', # cpu, cuda:0, etc - # for tensorboard logging, we will make a subfolder for this job - 'log_dir': "output/.tensorboard", - # you can stack processes for other jobs, It is not tested with sliders though - # just use one for now - 'process': { - 'type': 'slider', # tells runner to run the slider process - # network is the LoRA network for a slider, I recommend to leave this be - 'network': { - 'type': "lora", - # rank / dim of the network. Bigger is not always better. Especially for sliders. 8 is good - 'linear': 8, # "rank" or "dim" - 'linear_alpha': 4, # Do about half of rank "alpha" - # 'conv': 4, # for convolutional layers "locon" - # 'conv_alpha': 4, # Do about half of conv "alpha" - }, - # training config - 'train': { - # this is also used in sampling. Stick with ddpm unless you know what you are doing - 'noise_scheduler': "ddpm", # or "ddpm", "lms", "euler_a" - # how many steps to train. More is not always better. I rarely go over 1000 - 'steps': 100, - # I have had good results with 4e-4 to 1e-4 at 500 steps - 'lr': 2e-4, - # enables gradient checkpoint, saves vram, leave it on - 'gradient_checkpointing': True, - # train the unet. I recommend leaving this true - 'train_unet': True, - # train the text encoder. I don't recommend this unless you have a special use case - # for sliders we are adjusting representation of the concept (unet), - # not the description of it (text encoder) - 'train_text_encoder': False, - - # just leave unless you know what you are doing - # also supports "dadaptation" but set lr to 1 if you use that, - # but it learns too fast and I don't recommend it - 'optimizer': "adamw", - # only constant for now - 'lr_scheduler': "constant", - # we randomly denoise random num of steps form 1 to this number - # while training. Just leave it - 'max_denoising_steps': 40, - # works great at 1. I do 1 even with my 4090. - # higher may not work right with newer single batch stacking code anyway - 'batch_size': 1, - # bf16 works best if your GPU supports it (modern) - 'dtype': 'bf16', # fp32, bf16, fp16 - # I don't recommend using unless you are trying to make a darker lora. Then do 0.1 MAX - # although, the way we train sliders is comparative, so it probably won't work anyway - 'noise_offset': 0.0, - }, - - # the model to train the LoRA network on - 'model': { - # huggingface name, relative prom project path, or absolute path to .safetensors or .ckpt - 'name_or_path': "runwayml/stable-diffusion-v1-5", - 'is_v2': False, # for v2 models - 'is_v_pred': False, # for v-prediction models (most v2 models) - # has some issues with the dual text encoder and the way we train sliders - # it works bit weights need to probably be higher to see it. - 'is_xl': False, # for SDXL models - }, - - # saving config - 'save': { - 'dtype': 'float16', # precision to save. I recommend float16 - 'save_every': 50, # save every this many steps - # this will remove step counts more than this number - # allows you to save more often in case of a crash without filling up your drive - 'max_step_saves_to_keep': 2, - }, - - # sampling config - 'sample': { - # must match train.noise_scheduler, this is not used here - # but may be in future and in other processes - 'sampler': "ddpm", - # sample every this many steps - 'sample_every': 20, - # image size - 'width': 512, - 'height': 512, - # prompts to use for sampling. Do as many as you want, but it slows down training - # pick ones that will best represent the concept you are trying to adjust - # allows some flags after the prompt - # --m [number] # network multiplier. LoRA weight. -3 for the negative slide, 3 for the positive - # slide are good tests. will inherit sample.network_multiplier if not set - # --n [string] # negative prompt, will inherit sample.neg if not set - # Only 75 tokens allowed currently - # I like to do a wide positive and negative spread so I can see a good range and stop - # early if the network is braking down - 'prompts': [ - "a woman in a coffee shop, black hat, blonde hair, blue jacket --m -5", - "a woman in a coffee shop, black hat, blonde hair, blue jacket --m -3", - "a woman in a coffee shop, black hat, blonde hair, blue jacket --m 3", - "a woman in a coffee shop, black hat, blonde hair, blue jacket --m 5", - "a golden retriever sitting on a leather couch, --m -5", - "a golden retriever sitting on a leather couch --m -3", - "a golden retriever sitting on a leather couch --m 3", - "a golden retriever sitting on a leather couch --m 5", - "a man with a beard and red flannel shirt, wearing vr goggles, walking into traffic --m -5", - "a man with a beard and red flannel shirt, wearing vr goggles, walking into traffic --m -3", - "a man with a beard and red flannel shirt, wearing vr goggles, walking into traffic --m 3", - "a man with a beard and red flannel shirt, wearing vr goggles, walking into traffic --m 5", - ], - # negative prompt used on all prompts above as default if they don't have one - 'neg': "cartoon, fake, drawing, illustration, cgi, animated, anime, monochrome", - # seed for sampling. 42 is the answer for everything - 'seed': 42, - # walks the seed so s1 is 42, s2 is 43, s3 is 44, etc - # will start over on next sample_every so s1 is always seed - # works well if you use same prompt but want different results - 'walk_seed': False, - # cfg scale (4 to 10 is good) - 'guidance_scale': 7, - # sampler steps (20 to 30 is good) - 'sample_steps': 20, - # default network multiplier for all prompts - # since we are training a slider, I recommend overriding this with --m [number] - # in the prompts above to get both sides of the slider - 'network_multiplier': 1.0, - }, - - # logging information - 'logging': { - 'log_every': 10, # log every this many steps - 'use_wandb': False, # not supported yet - 'verbose': False, # probably done need unless you are debugging - }, - - # slider training config, best for last - 'slider': { - # resolutions to train on. [ width, height ]. This is less important for sliders - # as we are not teaching the model anything it doesn't already know - # but must be a size it understands [ 512, 512 ] for sd_v1.5 and [ 768, 768 ] for sd_v2.1 - # and [ 1024, 1024 ] for sd_xl - # you can do as many as you want here - 'resolutions': [ - [512, 512], - # [ 512, 768 ] - # [ 768, 768 ] - ], - # slider training uses 4 combined steps for a single round. This will do it in one gradient - # step. It is highly optimized and shouldn't take anymore vram than doing without it, - # since we break down batches for gradient accumulation now. so just leave it on. - 'batch_full_slide': True, - # These are the concepts to train on. You can do as many as you want here, - # but they can conflict outweigh each other. Other than experimenting, I recommend - # just doing one for good results - 'targets': [ - # target_class is the base concept we are adjusting the representation of - # for example, if we are adjusting the representation of a person, we would use "person" - # if we are adjusting the representation of a cat, we would use "cat" It is not - # a keyword necessarily but what the model understands the concept to represent. - # "person" will affect men, women, children, etc but will not affect cats, dogs, etc - # it is the models base general understanding of the concept and everything it represents - # you can leave it blank to affect everything. In this example, we are adjusting - # detail, so we will leave it blank to affect everything - { - 'target_class': "", - # positive is the prompt for the positive side of the slider. - # It is the concept that will be excited and amplified in the model when we slide the slider - # to the positive side and forgotten / inverted when we slide - # the slider to the negative side. It is generally best to include the target_class in - # the prompt. You want it to be the extreme of what you want to train on. For example, - # if you want to train on fat people, you would use "an extremely fat, morbidly obese person" - # as the prompt. Not just "fat person" - # max 75 tokens for now - 'positive': "high detail, 8k, intricate, detailed, high resolution, high res, high quality", - # negative is the prompt for the negative side of the slider and works the same as positive - # it does not necessarily work the same as a negative prompt when generating images - # these need to be polar opposites. - # max 76 tokens for now - 'negative': "blurry, boring, fuzzy, low detail, low resolution, low res, low quality", - # the loss for this target is multiplied by this number. - # if you are doing more than one target it may be good to set less important ones - # to a lower number like 0.1 so they don't outweigh the primary target - 'weight': 1.0, - }, - ], - }, - }, - }, - - # You can put any information you want here, and it will be saved in the model. - # The below is an example, but you can put your grocery list in it if you want. - # It is saved in the model so be aware of that. The software will include this - # plus some other information for you automatically - 'meta': { - # [name] gets replaced with the name above - 'name': "[name]", - 'version': '1.0', - # 'creator': { - # 'name': 'your name', - # 'email': 'your@gmail.com', - # 'website': 'https://your.website' - # } - } -}) \ No newline at end of file diff --git a/toolkit/civitai.py b/toolkit/civitai.py new file mode 100644 index 00000000..ef505ad8 --- /dev/null +++ b/toolkit/civitai.py @@ -0,0 +1,217 @@ +from toolkit.paths import MODELS_PATH +import requests +import os +import json +import tqdm + + +class ModelCache: + def __init__(self): + self.raw_cache = {} + self.cache_path = os.path.join(MODELS_PATH, '.ai_toolkit_cache.json') + if os.path.exists(self.cache_path): + with open(self.cache_path, 'r') as f: + all_cache = json.load(f) + if 'models' in all_cache: + self.raw_cache = all_cache['models'] + else: + self.raw_cache = all_cache + + def get_model_path(self, model_id: int, model_version_id: int = None): + if str(model_id) not in self.raw_cache: + return None + if model_version_id is None: + # get latest version + model_version_id = max([int(x) for x in self.raw_cache[str(model_id)].keys()]) + if model_version_id is None: + return None + model_path = self.raw_cache[str(model_id)][str(model_version_id)]['model_path'] + # check if model path exists + if not os.path.exists(model_path): + # remove version from cache + del self.raw_cache[str(model_id)][str(model_version_id)] + self.save() + return None + return model_path + else: + if str(model_version_id) not in self.raw_cache[str(model_id)]: + return None + model_path = self.raw_cache[str(model_id)][str(model_version_id)]['model_path'] + # check if model path exists + if not os.path.exists(model_path): + # remove version from cache + del self.raw_cache[str(model_id)][str(model_version_id)] + self.save() + return None + return model_path + + def update_cache(self, model_id: int, model_version_id: int, model_path: str): + if str(model_id) not in self.raw_cache: + self.raw_cache[str(model_id)] = {} + if str(model_version_id) not in self.raw_cache[str(model_id)]: + self.raw_cache[str(model_id)][str(model_version_id)] = {} + self.raw_cache[str(model_id)][str(model_version_id)] = { + 'model_path': model_path + } + self.save() + + def save(self): + if not os.path.exists(os.path.dirname(self.cache_path)): + os.makedirs(os.path.dirname(self.cache_path), exist_ok=True) + all_cache = {'models': {}} + if os.path.exists(self.cache_path): + # load it first + with open(self.cache_path, 'r') as f: + all_cache = json.load(f) + + all_cache['models'] = self.raw_cache + + with open(self.cache_path, 'w') as f: + json.dump(all_cache, f, indent=2) + + +def get_model_download_info(model_id: int, model_version_id: int = None): + # curl https://civitai.com/api/v1/models?limit=3&types=TextualInversion \ + # -H "Content-Type: application/json" \ + # -X GET + print( + f"Getting model info for model id: {model_id}{f' and version id: {model_version_id}' if model_version_id is not None else ''}") + endpoint = f"https://civitai.com/api/v1/models/{model_id}" + + # get the json + response = requests.get(endpoint) + response.raise_for_status() + model_data = response.json() + + model_version = None + + # go through versions and get the top one if one is not set + for version in model_data['modelVersions']: + if model_version_id is not None: + if str(version['id']) == str(model_version_id): + model_version = version + break + else: + # get first version + model_version = version + break + + if model_version is None: + raise ValueError( + f"Could not find a model version for model id: {model_id}{f' and version id: {model_version_id}' if model_version_id is not None else ''}") + + model_file = None + # go through files and prefer fp16 safetensors + # "metadata": { + # "fp": "fp16", + # "size": "pruned", + # "format": "SafeTensor" + # }, + # todo check pickle scans and skip if not good + # try to get fp16 safetensor + for file in model_version['files']: + if file['metadata']['fp'] == 'fp16' and file['metadata']['format'] == 'SafeTensor': + model_file = file + break + + if model_file is None: + # try to get primary + for file in model_version['files']: + if file['primary']: + model_file = file + break + + if model_file is None: + # try to get any safetensor + for file in model_version['files']: + if file['metadata']['format'] == 'SafeTensor': + model_file = file + break + + if model_file is None: + # try to get any fp16 + for file in model_version['files']: + if file['metadata']['fp'] == 'fp16': + model_file = file + break + + if model_file is None: + # try to get any + for file in model_version['files']: + model_file = file + break + + if model_file is None: + raise ValueError(f"Could not find a model file to download for model id: {model_id}") + + return model_file, model_version['id'] + + +def get_model_path_from_url(url: str): + # get query params form url if they are set + # https: // civitai.com / models / 25694?modelVersionId = 127742 + query_params = {} + if '?' in url: + query_string = url.split('?')[1] + query_params = dict(qc.split("=") for qc in query_string.split("&")) + + # get model id from url + model_id = url.split('/')[-1] + # remove query params from model id + if '?' in model_id: + model_id = model_id.split('?')[0] + if model_id.isdigit(): + model_id = int(model_id) + else: + raise ValueError(f"Invalid model id: {model_id}") + + model_cache = ModelCache() + model_path = model_cache.get_model_path(model_id, query_params.get('modelVersionId', None)) + if model_path is not None: + return model_path + else: + # download model + file_info, model_version_id = get_model_download_info(model_id, query_params.get('modelVersionId', None)) + + download_url = file_info['downloadUrl'] # url does not work directly + size_kb = file_info['sizeKB'] + filename = file_info['name'] + model_path = os.path.join(MODELS_PATH, filename) + + # download model + print(f"Did not find model locally, downloading from model from: {download_url}") + + # use tqdm to show status of downlod + response = requests.get(download_url, stream=True) + response.raise_for_status() + total_size_in_bytes = int(response.headers.get('content-length', 0)) + block_size = 1024 # 1 Kibibyte + progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) + tmp_path = os.path.join(MODELS_PATH, f".download_tmp_{filename}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + # remove tmp file if it exists + if os.path.exists(tmp_path): + os.remove(tmp_path) + + try: + + with open(tmp_path, 'wb') as f: + for data in response.iter_content(block_size): + progress_bar.update(len(data)) + f.write(data) + progress_bar.close() + # move to final path + os.rename(tmp_path, model_path) + model_cache.update_cache(model_id, model_version_id, model_path) + + return model_path + except Exception as e: + # remove tmp file + os.remove(tmp_path) + raise e + + +# if is main +if __name__ == '__main__': + model_path = get_model_path_from_url("https://civitai.com/models/25694?modelVersionId=127742") + print(model_path) diff --git a/toolkit/paths.py b/toolkit/paths.py index 0dec1b6b..fe885ba2 100644 --- a/toolkit/paths.py +++ b/toolkit/paths.py @@ -5,6 +5,12 @@ CONFIG_ROOT = os.path.join(TOOLKIT_ROOT, 'config') SD_SCRIPTS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories", "sd-scripts") REPOS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories") +# check if ENV variable is set +if 'MODELS_PATH' in os.environ: + MODELS_PATH = os.environ['MODELS_PATH'] +else: + MODELS_PATH = os.path.join(TOOLKIT_ROOT, "models") + def get_path(path): # we allow absolute paths, but if it is not absolute, we assume it is relative to the toolkit root diff --git a/toolkit/stable_diffusion_model.py b/toolkit/stable_diffusion_model.py index 2dbe830f..a41d3541 100644 --- a/toolkit/stable_diffusion_model.py +++ b/toolkit/stable_diffusion_model.py @@ -143,6 +143,13 @@ class StableDiffusion: prediction_type=prediction_type, steps_offset=1 ) + + model_path = self.model_config.name_or_path + if 'civitai.com' in self.model_config.name_or_path: + # load is a civit ai model, use the loader. + from toolkit.civitai import get_model_path_from_url + model_path = get_model_path_from_url(self.model_config.name_or_path) + if self.model_config.is_xl: if self.custom_pipeline is not None: pipln = self.custom_pipeline @@ -150,17 +157,17 @@ class StableDiffusion: pipln = CustomStableDiffusionXLPipeline # see if path exists - if not os.path.exists(self.model_config.name_or_path): + if not os.path.exists(model_path): # try to load with default diffusers pipe = pipln.from_pretrained( - self.model_config.name_or_path, + model_path, dtype=dtype, scheduler_type='ddpm', device=self.device_torch, ).to(self.device_torch) else: pipe = pipln.from_single_file( - self.model_config.name_or_path, + model_path, dtype=dtype, scheduler_type='ddpm', device=self.device_torch, @@ -180,10 +187,10 @@ class StableDiffusion: pipln = CustomStableDiffusionPipeline # see if path exists - if not os.path.exists(self.model_config.name_or_path): + if not os.path.exists(model_path): # try to load with default diffusers pipe = pipln.from_pretrained( - self.model_config.name_or_path, + model_path, dtype=dtype, scheduler_type='dpm', device=self.device_torch, @@ -193,7 +200,7 @@ class StableDiffusion: ).to(self.device_torch) else: pipe = pipln.from_single_file( - self.model_config.name_or_path, + model_path, dtype=dtype, scheduler_type='dpm', device=self.device_torch,