diff --git a/.gitmodules b/.gitmodules
index 18079356..657cf28b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,9 @@
 [submodule "repositories/leco"]
 	path = repositories/leco
 	url = https://github.com/p1atdev/LECO
+[submodule "repositories/batch_annotator"]
+	path = repositories/batch_annotator
+	url = https://github.com/ostris/batch-annotator
+[submodule "repositories/ipadapter"]
+	path = repositories/ipadapter
+	url = https://github.com/tencent-ailab/IP-Adapter.git
diff --git a/extensions_built_in/advanced_generator/PureLoraGenerator.py b/extensions_built_in/advanced_generator/PureLoraGenerator.py
new file mode 100644
index 00000000..ec19da31
--- /dev/null
+++ b/extensions_built_in/advanced_generator/PureLoraGenerator.py
@@ -0,0 +1,102 @@
+import os
+from collections import OrderedDict
+
+from toolkit.config_modules import ModelConfig, GenerateImageConfig, SampleConfig, LoRMConfig
+from toolkit.lorm import ExtractMode, convert_diffusers_unet_to_lorm
+from toolkit.sd_device_states_presets import get_train_sd_device_state_preset
+from toolkit.stable_diffusion_model import StableDiffusion
+import gc
+import torch
+from jobs.process import BaseExtensionProcess
+from toolkit.train_tools import get_torch_dtype
+
+
+def flush():
+    torch.cuda.empty_cache()
+    gc.collect()
+
+
+class PureLoraGenerator(BaseExtensionProcess):
+
+    def __init__(self, process_id: int, job, config: OrderedDict):
+        super().__init__(process_id, job, config)
+        self.output_folder = self.get_conf('output_folder', required=True)
+        self.device = self.get_conf('device', 'cuda')
+        self.device_torch = torch.device(self.device)
+        self.model_config = ModelConfig(**self.get_conf('model', required=True))
+        self.generate_config = SampleConfig(**self.get_conf('sample', required=True))
+        self.dtype = self.get_conf('dtype', 'float16')
+        self.torch_dtype = get_torch_dtype(self.dtype)
+        lorm_config = self.get_conf('lorm', None)
+        self.lorm_config = LoRMConfig(**lorm_config) if lorm_config is not None else None
+
+        self.device_state_preset = get_train_sd_device_state_preset(
+            device=torch.device(self.device),
+        )
+
+        self.progress_bar = None
+        self.sd = StableDiffusion(
+            device=self.device,
+            model_config=self.model_config,
+            dtype=self.dtype,
+        )
+
+    def run(self):
+        super().run()
+        print("Loading model...")
+        with torch.no_grad():
+            self.sd.load_model()
+            self.sd.unet.eval()
+            self.sd.unet.to(self.device_torch)
+            if isinstance(self.sd.text_encoder, list):
+                for te in self.sd.text_encoder:
+                    te.eval()
+                    te.to(self.device_torch)
+            else:
+                self.sd.text_encoder.eval()
+                self.sd.to(self.device_torch)
+
+            print(f"Converting to LoRM UNet")
+            # replace the unet with LoRMUnet
+            convert_diffusers_unet_to_lorm(
+                self.sd.unet,
+                config=self.lorm_config,
+            )
+
+            sample_folder = os.path.join(self.output_folder)
+            gen_img_config_list = []
+
+            sample_config = self.generate_config
+            start_seed = sample_config.seed
+            current_seed = start_seed
+            for i in range(len(sample_config.prompts)):
+                if sample_config.walk_seed:
+                    current_seed = start_seed + i
+
+                filename = f"[time]_[count].{self.generate_config.ext}"
+                output_path = os.path.join(sample_folder, filename)
+                prompt = sample_config.prompts[i]
+                extra_args = {}
+                gen_img_config_list.append(GenerateImageConfig(
+                    prompt=prompt,  # it will autoparse the prompt
+                    width=sample_config.width,
+                    height=sample_config.height,
+                    negative_prompt=sample_config.neg,
+                    seed=current_seed,
+                    guidance_scale=sample_config.guidance_scale,
+                    guidance_rescale=sample_config.guidance_rescale,
+                    num_inference_steps=sample_config.sample_steps,
+                    network_multiplier=sample_config.network_multiplier,
+                    output_path=output_path,
+                    output_ext=sample_config.ext,
+                    adapter_conditioning_scale=sample_config.adapter_conditioning_scale,
+                    **extra_args
+                ))
+
+            # send to be generated
+            self.sd.generate_images(gen_img_config_list, sampler=sample_config.sampler)
+            print("Done generating images")
+            # cleanup
+            del self.sd
+            gc.collect()
+            torch.cuda.empty_cache()
diff --git a/extensions_built_in/advanced_generator/ReferenceGenerator.py b/extensions_built_in/advanced_generator/ReferenceGenerator.py
new file mode 100644
index 00000000..14ae1ff5
--- /dev/null
+++ b/extensions_built_in/advanced_generator/ReferenceGenerator.py
@@ -0,0 +1,193 @@
+import os
+import random
+from collections import OrderedDict
+from typing import List
+
+import numpy as np
+from PIL import Image
+from diffusers import T2IAdapter
+from torch.utils.data import DataLoader
+from diffusers import StableDiffusionXLAdapterPipeline
+from tqdm import tqdm
+
+from toolkit.config_modules import ModelConfig, GenerateImageConfig, preprocess_dataset_raw_config, DatasetConfig
+from toolkit.data_transfer_object.data_loader import FileItemDTO, DataLoaderBatchDTO
+from toolkit.sampler import get_sampler
+from toolkit.stable_diffusion_model import StableDiffusion
+import gc
+import torch
+from jobs.process import BaseExtensionProcess
+from toolkit.data_loader import get_dataloader_from_datasets
+from toolkit.train_tools import get_torch_dtype
+from controlnet_aux.midas import MidasDetector
+from diffusers.utils import load_image
+
+
+def flush():
+    torch.cuda.empty_cache()
+    gc.collect()
+
+
+class GenerateConfig:
+
+    def __init__(self, **kwargs):
+        self.prompts: List[str]
+        self.sampler = kwargs.get('sampler', 'ddpm')
+        self.neg = kwargs.get('neg', '')
+        self.seed = kwargs.get('seed', -1)
+        self.walk_seed = kwargs.get('walk_seed', False)
+        self.t2i_adapter_path = kwargs.get('t2i_adapter_path', None)
+        self.guidance_scale = kwargs.get('guidance_scale', 7)
+        self.sample_steps = kwargs.get('sample_steps', 20)
+        self.prompt_2 = kwargs.get('prompt_2', None)
+        self.neg_2 = kwargs.get('neg_2', None)
+        self.prompts = kwargs.get('prompts', None)
+        self.guidance_rescale = kwargs.get('guidance_rescale', 0.0)
+        self.ext = kwargs.get('ext', 'png')
+        self.adapter_conditioning_scale = kwargs.get('adapter_conditioning_scale', 1.0)
+        if kwargs.get('shuffle', False):
+            # shuffle the prompts
+            random.shuffle(self.prompts)
+
+
+class ReferenceGenerator(BaseExtensionProcess):
+
+    def __init__(self, process_id: int, job, config: OrderedDict):
+        super().__init__(process_id, job, config)
+        self.output_folder = self.get_conf('output_folder', required=True)
+        self.device = self.get_conf('device', 'cuda')
+        self.model_config = ModelConfig(**self.get_conf('model', required=True))
+        self.generate_config = GenerateConfig(**self.get_conf('generate', required=True))
+        self.is_latents_cached = True
+        raw_datasets = self.get_conf('datasets', None)
+        if raw_datasets is not None and len(raw_datasets) > 0:
+            raw_datasets = preprocess_dataset_raw_config(raw_datasets)
+        self.datasets = None
+        self.datasets_reg = None
+        self.dtype = self.get_conf('dtype', 'float16')
+        self.torch_dtype = get_torch_dtype(self.dtype)
+        self.params = []
+        if raw_datasets is not None and len(raw_datasets) > 0:
+            for raw_dataset in raw_datasets:
+                dataset = DatasetConfig(**raw_dataset)
+                is_caching = dataset.cache_latents or dataset.cache_latents_to_disk
+                if not is_caching:
+                    self.is_latents_cached = False
+                if dataset.is_reg:
+                    if self.datasets_reg is None:
+                        self.datasets_reg = []
+                    self.datasets_reg.append(dataset)
+                else:
+                    if self.datasets is None:
+                        self.datasets = []
+                    self.datasets.append(dataset)
+
+        self.progress_bar = None
+        self.sd = StableDiffusion(
+            device=self.device,
+            model_config=self.model_config,
+            dtype=self.dtype,
+        )
+        print(f"Using device {self.device}")
+        self.data_loader: DataLoader = None
+        self.adapter: T2IAdapter = None
+
+    def run(self):
+        super().run()
+        print("Loading model...")
+        self.sd.load_model()
+        device = torch.device(self.device)
+
+        if self.generate_config.t2i_adapter_path is not None:
+            self.adapter = T2IAdapter.from_pretrained(
+                "TencentARC/t2i-adapter-depth-midas-sdxl-1.0", torch_dtype=self.torch_dtype, varient="fp16"
+            ).to(device)
+
+        midas_depth = MidasDetector.from_pretrained(
+            "valhalla/t2iadapter-aux-models", filename="dpt_large_384.pt", model_type="dpt_large"
+        ).to(device)
+
+        pipe = StableDiffusionXLAdapterPipeline(
+            vae=self.sd.vae,
+            unet=self.sd.unet,
+            text_encoder=self.sd.text_encoder[0],
+            text_encoder_2=self.sd.text_encoder[1],
+            tokenizer=self.sd.tokenizer[0],
+            tokenizer_2=self.sd.tokenizer[1],
+            scheduler=get_sampler(self.generate_config.sampler),
+            adapter=self.adapter,
+        ).to(device)
+        pipe.set_progress_bar_config(disable=True)
+
+        self.data_loader = get_dataloader_from_datasets(self.datasets, 1, self.sd)
+
+        num_batches = len(self.data_loader)
+        pbar = tqdm(total=num_batches, desc="Generating images")
+        seed = self.generate_config.seed
+        # load images from datasets, use tqdm
+        for i, batch in enumerate(self.data_loader):
+            batch: DataLoaderBatchDTO = batch
+
+            file_item: FileItemDTO = batch.file_items[0]
+            img_path = file_item.path
+            img_filename = os.path.basename(img_path)
+            img_filename_no_ext = os.path.splitext(img_filename)[0]
+            output_path = os.path.join(self.output_folder, img_filename)
+            output_caption_path = os.path.join(self.output_folder, img_filename_no_ext + '.txt')
+            output_depth_path = os.path.join(self.output_folder, img_filename_no_ext + '.depth.png')
+
+            caption = batch.get_caption_list()[0]
+
+            img: torch.Tensor = batch.tensor.clone()
+            # image comes in -1 to 1. convert to a PIL RGB image
+            img = (img + 1) / 2
+            img = img.clamp(0, 1)
+            img = img[0].permute(1, 2, 0).cpu().numpy()
+            img = (img * 255).astype(np.uint8)
+            image = Image.fromarray(img)
+
+            width, height = image.size
+            min_res = min(width, height)
+
+            if self.generate_config.walk_seed:
+                seed = seed + 1
+
+            if self.generate_config.seed == -1:
+                # random
+                seed = random.randint(0, 1000000)
+
+            torch.manual_seed(seed)
+            torch.cuda.manual_seed(seed)
+
+            # generate depth map
+            image = midas_depth(
+                image,
+                detect_resolution=min_res,  # do 512 ?
+                image_resolution=min_res
+            )
+
+            # image.save(output_depth_path)
+
+            gen_images = pipe(
+                prompt=caption,
+                negative_prompt=self.generate_config.neg,
+                image=image,
+                num_inference_steps=self.generate_config.sample_steps,
+                adapter_conditioning_scale=self.generate_config.adapter_conditioning_scale,
+                guidance_scale=self.generate_config.guidance_scale,
+            ).images[0]
+            gen_images.save(output_path)
+
+            # save caption
+            with open(output_caption_path, 'w') as f:
+                f.write(caption)
+
+            pbar.update(1)
+            batch.cleanup()
+
+        pbar.close()
+        print("Done generating images")
+        # cleanup
+        del self.sd
+        gc.collect()
+        torch.cuda.empty_cache()
diff --git a/extensions_built_in/advanced_generator/__init__.py b/extensions_built_in/advanced_generator/__init__.py
new file mode 100644
index 00000000..94a91c6b
--- /dev/null
+++ b/extensions_built_in/advanced_generator/__init__.py
@@ -0,0 +1,42 @@
+# This is an example extension for custom training. It is great for experimenting with new ideas.
+from toolkit.extension import Extension
+
+
+# This is for generic training (LoRA, Dreambooth, FineTuning)
+class AdvancedReferenceGeneratorExtension(Extension):
+    # uid must be unique, it is how the extension is identified
+    uid = "reference_generator"
+
+    # name is the name of the extension for printing
+    name = "Reference Generator"
+
+    # This is where your process class is loaded
+    # keep your imports in here so they don't slow down the rest of the program
+    @classmethod
+    def get_process(cls):
+        # import your process class here so it is only loaded when needed and return it
+        from .ReferenceGenerator import ReferenceGenerator
+        return ReferenceGenerator
+
+
+# This is for generic training (LoRA, Dreambooth, FineTuning)
+class PureLoraGenerator(Extension):
+    # uid must be unique, it is how the extension is identified
+    uid = "pure_lora_generator"
+
+    # name is the name of the extension for printing
+    name = "Pure LoRA Generator"
+
+    # This is where your process class is loaded
+    # keep your imports in here so they don't slow down the rest of the program
+    @classmethod
+    def get_process(cls):
+        # import your process class here so it is only loaded when needed and return it
+        from .PureLoraGenerator import PureLoraGenerator
+        return PureLoraGenerator
+
+
+AI_TOOLKIT_EXTENSIONS = [
+    # you can put a list of extensions here
+    AdvancedReferenceGeneratorExtension, PureLoraGenerator
+]
diff --git a/extensions_built_in/advanced_generator/config/train.example.yaml b/extensions_built_in/advanced_generator/config/train.example.yaml
new file mode 100644
index 00000000..793d5d55
--- /dev/null
+++ b/extensions_built_in/advanced_generator/config/train.example.yaml
@@ -0,0 +1,91 @@
+---
+job: extension
+config:
+  name: test_v1
+  process:
+    - type: 'textual_inversion_trainer'
+      training_folder: "out/TI"
+      device: cuda:0
+      # for tensorboard logging
+      log_dir: "out/.tensorboard"
+      embedding:
+        trigger: "your_trigger_here"
+        tokens: 12
+        init_words: "man with short brown hair"
+        save_format: "safetensors"  # 'safetensors' or 'pt'
+      save:
+        dtype: float16 # precision to save
+        save_every: 100 # save every this many steps
+        max_step_saves_to_keep: 5 # only affects step counts
+      datasets:
+        - folder_path: "/path/to/dataset"
+          caption_ext: "txt"
+          default_caption: "[trigger]"
+          buckets: true
+          resolution: 512
+      train:
+        noise_scheduler: "ddpm" # or "ddpm", "lms", "euler_a"
+        steps: 3000
+        weight_jitter: 0.0
+        lr: 5e-5
+        train_unet: false
+        gradient_checkpointing: true
+        train_text_encoder: false
+        optimizer: "adamw"
+#        optimizer: "prodigy"
+        optimizer_params:
+          weight_decay: 1e-2
+        lr_scheduler: "constant"
+        max_denoising_steps: 1000
+        batch_size: 4
+        dtype: bf16
+        xformers: true
+        min_snr_gamma: 5.0
+#        skip_first_sample: true
+        noise_offset: 0.0 # not needed for this
+      model:
+        # objective reality v2
+        name_or_path: "https://civitai.com/models/128453?modelVersionId=142465"
+        is_v2: false  # for v2 models
+        is_xl: false  # for SDXL models
+        is_v_pred: false # for v-prediction models (most v2 models)
+      sample:
+        sampler: "ddpm" # must match train.noise_scheduler
+        sample_every: 100 # sample every this many steps
+        width: 512
+        height: 512
+        prompts:
+          - "photo of [trigger] laughing"
+          - "photo of [trigger] smiling"
+          - "[trigger] close up"
+          - "dark scene [trigger] frozen"
+          - "[trigger] nighttime"
+          - "a painting of [trigger]"
+          - "a drawing of [trigger]"
+          - "a cartoon of [trigger]"
+          - "[trigger] pixar style"
+          - "[trigger] costume"
+        neg: ""
+        seed: 42
+        walk_seed: false
+        guidance_scale: 7
+        sample_steps: 20
+        network_multiplier: 1.0
+
+      logging:
+        log_every: 10 # log every this many steps
+        use_wandb: false # not supported yet
+        verbose: false
+
+# You can put any information you want here, and it will be saved in the model.
+# The below is an example, but you can put your grocery list in it if you want.
+# It is saved in the model so be aware of that. The software will include this
+# plus some other information for you automatically
+meta:
+  # [name] gets replaced with the name above
+  name: "[name]"
+#  version: '1.0'
+#  creator:
+#    name: Your Name
+#    email: your@gmail.com
+#    website: https://your.website
diff --git a/extensions_built_in/concept_replacer/ConceptReplacer.py b/extensions_built_in/concept_replacer/ConceptReplacer.py
new file mode 100644
index 00000000..1600e8e1
--- /dev/null
+++ b/extensions_built_in/concept_replacer/ConceptReplacer.py
@@ -0,0 +1,151 @@
+import random
+from collections import OrderedDict
+from torch.utils.data import DataLoader
+from toolkit.prompt_utils import concat_prompt_embeds, split_prompt_embeds
+from toolkit.stable_diffusion_model import StableDiffusion, BlankNetwork
+from toolkit.train_tools import get_torch_dtype, apply_snr_weight
+import gc
+import torch
+from jobs.process import BaseSDTrainProcess
+
+
+def flush():
+    torch.cuda.empty_cache()
+    gc.collect()
+
+
+class ConceptReplacementConfig:
+    def __init__(self, **kwargs):
+        self.concept: str = kwargs.get('concept', '')
+        self.replacement: str = kwargs.get('replacement', '')
+
+
+class ConceptReplacer(BaseSDTrainProcess):
+
+    def __init__(self, process_id: int, job, config: OrderedDict, **kwargs):
+        super().__init__(process_id, job, config, **kwargs)
+        replacement_list = self.config.get('replacements', [])
+        self.replacement_list = [ConceptReplacementConfig(**x) for x in replacement_list]
+
+    def before_model_load(self):
+        pass
+
+    def hook_before_train_loop(self):
+        self.sd.vae.eval()
+        self.sd.vae.to(self.device_torch)
+
+        # textual inversion
+        if self.embedding is not None:
+            # set text encoder to train. Not sure if this is necessary but diffusers example did it
+            self.sd.text_encoder.train()
+
+    def hook_train_loop(self, batch):
+        with torch.no_grad():
+            dtype = get_torch_dtype(self.train_config.dtype)
+            noisy_latents, noise, timesteps, conditioned_prompts, imgs = self.process_general_training_batch(batch)
+            network_weight_list = batch.get_network_weight_list()
+
+            # have a blank network so we can wrap it in a context and set multipliers without checking every time
+            if self.network is not None:
+                network = self.network
+            else:
+                network = BlankNetwork()
+
+            batch_replacement_list = []
+            # get a random replacement for each prompt
+            for prompt in conditioned_prompts:
+                replacement = random.choice(self.replacement_list)
+                batch_replacement_list.append(replacement)
+
+            # build out prompts
+            concept_prompts = []
+            replacement_prompts = []
+            for idx, replacement in enumerate(batch_replacement_list):
+                prompt = conditioned_prompts[idx]
+
+                # insert shuffled concept at beginning and end of prompt
+                shuffled_concept = [x.strip() for x in replacement.concept.split(',')]
+                random.shuffle(shuffled_concept)
+                shuffled_concept = ', '.join(shuffled_concept)
+                concept_prompts.append(f"{shuffled_concept}, {prompt}, {shuffled_concept}")
+
+                # insert replacement at beginning and end of prompt
+                shuffled_replacement = [x.strip() for x in replacement.replacement.split(',')]
+                random.shuffle(shuffled_replacement)
+                shuffled_replacement = ', '.join(shuffled_replacement)
+                replacement_prompts.append(f"{shuffled_replacement}, {prompt}, {shuffled_replacement}")
+
+            # predict the replacement without network
+            conditional_embeds = self.sd.encode_prompt(replacement_prompts).to(self.device_torch, dtype=dtype)
+
+            replacement_pred = self.sd.predict_noise(
+                latents=noisy_latents.to(self.device_torch, dtype=dtype),
+                conditional_embeddings=conditional_embeds.to(self.device_torch, dtype=dtype),
+                timestep=timesteps,
+                guidance_scale=1.0,
+            )
+
+            del conditional_embeds
+            replacement_pred = replacement_pred.detach()
+
+        self.optimizer.zero_grad()
+        flush()
+
+        # text encoding
+        grad_on_text_encoder = False
+        if self.train_config.train_text_encoder:
+            grad_on_text_encoder = True
+
+        if self.embedding:
+            grad_on_text_encoder = True
+
+        # set the weights
+        network.multiplier = network_weight_list
+
+        # activate network if it exits
+        with network:
+            with torch.set_grad_enabled(grad_on_text_encoder):
+                # embed the prompts
+                conditional_embeds = self.sd.encode_prompt(concept_prompts).to(self.device_torch, dtype=dtype)
+            if not grad_on_text_encoder:
+                # detach the embeddings
+                conditional_embeds = conditional_embeds.detach()
+                self.optimizer.zero_grad()
+                flush()
+
+            noise_pred = self.sd.predict_noise(
+                latents=noisy_latents.to(self.device_torch, dtype=dtype),
+                conditional_embeddings=conditional_embeds.to(self.device_torch, dtype=dtype),
+                timestep=timesteps,
+                guidance_scale=1.0,
+            )
+
+            loss = torch.nn.functional.mse_loss(noise_pred.float(), replacement_pred.float(), reduction="none")
+            loss = loss.mean([1, 2, 3])
+
+            if self.train_config.min_snr_gamma is not None and self.train_config.min_snr_gamma > 0.000001:
+                # add min_snr_gamma
+                loss = apply_snr_weight(loss, timesteps, self.sd.noise_scheduler, self.train_config.min_snr_gamma)
+
+            loss = loss.mean()
+
+            # back propagate loss to free ram
+            loss.backward()
+            flush()
+
+        # apply gradients
+        self.optimizer.step()
+        self.optimizer.zero_grad()
+        self.lr_scheduler.step()
+
+        if self.embedding is not None:
+            # Let's make sure we don't update any embedding weights besides the newly added token
+            self.embedding.restore_embeddings()
+
+        loss_dict = OrderedDict(
+            {'loss': loss.item()}
+        )
+        # reset network multiplier
+        network.multiplier = 1.0
+
+        return loss_dict
diff --git a/extensions_built_in/concept_replacer/__init__.py b/extensions_built_in/concept_replacer/__init__.py
new file mode 100644
index 00000000..69dc7311
--- /dev/null
+++ b/extensions_built_in/concept_replacer/__init__.py
@@ -0,0 +1,26 @@
+# This is an example extension for custom training. It is great for experimenting with new ideas.
+from toolkit.extension import Extension
+
+
+# This is for generic training (LoRA, Dreambooth, FineTuning)
+class ConceptReplacerExtension(Extension):
+    # uid must be unique, it is how the extension is identified
+    uid = "concept_replacer"
+
+    # name is the name of the extension for printing
+    name = "Concept Replacer"
+
+    # This is where your process class is loaded
+    # keep your imports in here so they don't slow down the rest of the program
+    @classmethod
+    def get_process(cls):
+        # import your process class here so it is only loaded when needed and return it
+        from .ConceptReplacer import ConceptReplacer
+        return ConceptReplacer
+
+
+
+AI_TOOLKIT_EXTENSIONS = [
+    # you can put a list of extensions here
+    ConceptReplacerExtension,
+]
diff --git a/extensions_built_in/concept_replacer/config/train.example.yaml b/extensions_built_in/concept_replacer/config/train.example.yaml
new file mode 100644
index 00000000..793d5d55
--- /dev/null
+++ b/extensions_built_in/concept_replacer/config/train.example.yaml
@@ -0,0 +1,91 @@
+---
+job: extension
+config:
+  name: test_v1
+  process:
+    - type: 'textual_inversion_trainer'
+      training_folder: "out/TI"
+      device: cuda:0
+      # for tensorboard logging
+      log_dir: "out/.tensorboard"
+      embedding:
+        trigger: "your_trigger_here"
+        tokens: 12
+        init_words: "man with short brown hair"
+        save_format: "safetensors"  # 'safetensors' or 'pt'
+      save:
+        dtype: float16 # precision to save
+        save_every: 100 # save every this many steps
+        max_step_saves_to_keep: 5 # only affects step counts
+      datasets:
+        - folder_path: "/path/to/dataset"
+          caption_ext: "txt"
+          default_caption: "[trigger]"
+          buckets: true
+          resolution: 512
+      train:
+        noise_scheduler: "ddpm" # or "ddpm", "lms", "euler_a"
+        steps: 3000
+        weight_jitter: 0.0
+        lr: 5e-5
+        train_unet: false
+        gradient_checkpointing: true
+        train_text_encoder: false
+        optimizer: "adamw"
+#        optimizer: "prodigy"
+        optimizer_params:
+          weight_decay: 1e-2
+        lr_scheduler: "constant"
+        max_denoising_steps: 1000
+        batch_size: 4
+        dtype: bf16
+        xformers: true
+        min_snr_gamma: 5.0
+#        skip_first_sample: true
+        noise_offset: 0.0 # not needed for this
+      model:
+        # objective reality v2
+        name_or_path: "https://civitai.com/models/128453?modelVersionId=142465"
+        is_v2: false  # for v2 models
+        is_xl: false  # for SDXL models
+        is_v_pred: false # for v-prediction models (most v2 models)
+      sample:
+        sampler: "ddpm" # must match train.noise_scheduler
+        sample_every: 100 # sample every this many steps
+        width: 512
+        height: 512
+        prompts:
+          - "photo of [trigger] laughing"
+          - "photo of [trigger] smiling"
+          - "[trigger] close up"
+          - "dark scene [trigger] frozen"
+          - "[trigger] nighttime"
+          - "a painting of [trigger]"
+          - "a drawing of [trigger]"
+          - "a cartoon of [trigger]"
+          - "[trigger] pixar style"
+          - "[trigger] costume"
+        neg: ""
+        seed: 42
+        walk_seed: false
+        guidance_scale: 7
+        sample_steps: 20
+        network_multiplier: 1.0
+
+      logging:
+        log_every: 10 # log every this many steps
+        use_wandb: false # not supported yet
+        verbose: false
+
+# You can put any information you want here, and it will be saved in the model.
+# The below is an example, but you can put your grocery list in it if you want.
+# It is saved in the model so be aware of that. The software will include this
+# plus some other information for you automatically
+meta:
+  # [name] gets replaced with the name above
+  name: "[name]"
+#  version: '1.0'
+#  creator:
+#    name: Your Name
+#    email: your@gmail.com
+#    website: https://your.website
diff --git a/extensions_built_in/dataset_tools/DatasetTools.py b/extensions_built_in/dataset_tools/DatasetTools.py
new file mode 100644
index 00000000..d969b77a
--- /dev/null
+++ b/extensions_built_in/dataset_tools/DatasetTools.py
@@ -0,0 +1,20 @@
+from collections import OrderedDict
+import gc
+import torch
+from jobs.process import BaseExtensionProcess
+
+
+def flush():
+    torch.cuda.empty_cache()
+    gc.collect()
+
+
+class DatasetTools(BaseExtensionProcess):
+
+    def __init__(self, process_id: int, job, config: OrderedDict):
+        super().__init__(process_id, job, config)
+
+    def run(self):
+        super().run()
+
+        raise NotImplementedError("This extension is not yet implemented")
diff --git a/extensions_built_in/dataset_tools/SuperTagger.py b/extensions_built_in/dataset_tools/SuperTagger.py
new file mode 100644
index 00000000..6eb3c70e
--- /dev/null
+++ b/extensions_built_in/dataset_tools/SuperTagger.py
@@ -0,0 +1,196 @@
+import copy
+import json
+import os
+from collections import OrderedDict
+import gc
+import traceback
+import torch
+from PIL import Image, ImageOps
+from tqdm import tqdm
+
+from .tools.dataset_tools_config_modules import RAW_DIR, TRAIN_DIR, Step, ImgInfo
+from .tools.fuyu_utils import FuyuImageProcessor
+from .tools.image_tools import load_image, ImageProcessor, resize_to_max
+from .tools.llava_utils import LLaVAImageProcessor
+from .tools.caption import default_long_prompt, default_short_prompt, default_replacements
+from jobs.process import BaseExtensionProcess
+from .tools.sync_tools import get_img_paths
+
+img_ext = ['.jpg', '.jpeg', '.png', '.webp']
+
+
+def flush():
+    torch.cuda.empty_cache()
+    gc.collect()
+
+
+VERSION = 2
+
+
+class SuperTagger(BaseExtensionProcess):
+
+    def __init__(self, process_id: int, job, config: OrderedDict):
+        super().__init__(process_id, job, config)
+        parent_dir = config.get('parent_dir', None)
+        self.dataset_paths: list[str] = config.get('dataset_paths', [])
+        self.device = config.get('device', 'cuda')
+        self.steps: list[Step] = config.get('steps', [])
+        self.caption_method = config.get('caption_method', 'llava:default')
+        self.caption_prompt = config.get('caption_prompt', default_long_prompt)
+        self.caption_short_prompt = config.get('caption_short_prompt', default_short_prompt)
+        self.force_reprocess_img = config.get('force_reprocess_img', False)
+        self.caption_replacements = config.get('caption_replacements', default_replacements)
+        self.caption_short_replacements = config.get('caption_short_replacements', default_replacements)
+        self.master_dataset_dict = OrderedDict()
+        self.dataset_master_config_file = config.get('dataset_master_config_file', None)
+        if parent_dir is not None and len(self.dataset_paths) == 0:
+            # find all folders in the patent_dataset_path
+            self.dataset_paths = [
+                os.path.join(parent_dir, folder)
+                for folder in os.listdir(parent_dir)
+                if os.path.isdir(os.path.join(parent_dir, folder))
+            ]
+        else:
+            # make sure they exist
+            for dataset_path in self.dataset_paths:
+                if not os.path.exists(dataset_path):
+                    raise ValueError(f"Dataset path does not exist: {dataset_path}")
+
+        print(f"Found {len(self.dataset_paths)} dataset paths")
+
+        self.image_processor: ImageProcessor = self.get_image_processor()
+
+    def get_image_processor(self):
+        if self.caption_method.startswith('llava'):
+            return LLaVAImageProcessor(device=self.device)
+        elif self.caption_method.startswith('fuyu'):
+            return FuyuImageProcessor(device=self.device)
+        else:
+            raise ValueError(f"Unknown caption method: {self.caption_method}")
+
+    def process_image(self, img_path: str):
+        root_img_dir = os.path.dirname(os.path.dirname(img_path))
+        filename = os.path.basename(img_path)
+        filename_no_ext = os.path.splitext(filename)[0]
+        train_dir = os.path.join(root_img_dir, TRAIN_DIR)
+        train_img_path = os.path.join(train_dir, filename)
+        json_path = os.path.join(train_dir, f"{filename_no_ext}.json")
+
+        # check if json exists, if it does load it as image info
+        if os.path.exists(json_path):
+            with open(json_path, 'r') as f:
+                img_info = ImgInfo(**json.load(f))
+        else:
+            img_info = ImgInfo()
+
+        # always send steps first in case other processes need them
+        img_info.add_steps(copy.deepcopy(self.steps))
+        img_info.set_version(VERSION)
+        img_info.set_caption_method(self.caption_method)
+
+        image: Image = None
+        caption_image: Image = None
+
+        did_update_image = False
+
+        # trigger reprocess of steps
+        if self.force_reprocess_img:
+            img_info.trigger_image_reprocess()
+
+        # set the image as updated if it does not exist on disk
+        if not os.path.exists(train_img_path):
+            did_update_image = True
+            image = load_image(img_path)
+        if img_info.force_image_process:
+            did_update_image = True
+            image = load_image(img_path)
+
+        # go through the needed steps
+        for step in copy.deepcopy(img_info.state.steps_to_complete):
+            if step == 'caption':
+                # load image
+                if image is None:
+                    image = load_image(img_path)
+                if caption_image is None:
+                    caption_image = resize_to_max(image, 1024, 1024)
+
+                if not self.image_processor.is_loaded:
+                    print('Loading Model. Takes a while, especially the first time')
+                    self.image_processor.load_model()
+
+                img_info.caption = self.image_processor.generate_caption(
+                    image=caption_image,
+                    prompt=self.caption_prompt,
+                    replacements=self.caption_replacements
+                )
+                img_info.mark_step_complete(step)
+            elif step == 'caption_short':
+                # load image
+                if image is None:
+                    image = load_image(img_path)
+
+                if caption_image is None:
+                    caption_image = resize_to_max(image, 1024, 1024)
+
+                if not self.image_processor.is_loaded:
+                    print('Loading Model. Takes a while, especially the first time')
+                    self.image_processor.load_model()
+                img_info.caption_short = self.image_processor.generate_caption(
+                    image=caption_image,
+                    prompt=self.caption_short_prompt,
+                    replacements=self.caption_short_replacements
+                )
+                img_info.mark_step_complete(step)
+            elif step == 'contrast_stretch':
+                # load image
+                if image is None:
+                    image = load_image(img_path)
+                image = ImageOps.autocontrast(image, cutoff=(0.1, 0), preserve_tone=True)
+                did_update_image = True
+                img_info.mark_step_complete(step)
+            else:
+                raise ValueError(f"Unknown step: {step}")
+
+        os.makedirs(os.path.dirname(train_img_path), exist_ok=True)
+        if did_update_image:
+            image.save(train_img_path)
+
+        if img_info.is_dirty:
+            with open(json_path, 'w') as f:
+                json.dump(img_info.to_dict(), f, indent=4)
+
+        if self.dataset_master_config_file:
+            # add to master dict
+            self.master_dataset_dict[train_img_path] = img_info.to_dict()
+
+    def run(self):
+        super().run()
+        imgs_to_process = []
+        # find all images
+        for dataset_path in self.dataset_paths:
+            raw_dir = os.path.join(dataset_path, RAW_DIR)
+            raw_image_paths = get_img_paths(raw_dir)
+            for raw_image_path in raw_image_paths:
+                imgs_to_process.append(raw_image_path)
+
+        if len(imgs_to_process) == 0:
+            print(f"No images to process")
+        else:
+            print(f"Found {len(imgs_to_process)} to process")
+
+            for img_path in tqdm(imgs_to_process, desc="Processing images"):
+                try:
+                    self.process_image(img_path)
+                except Exception:
+                    # print full stack trace
+                    print(traceback.format_exc())
+                    continue
+                # self.process_image(img_path)
+
+        if self.dataset_master_config_file is not None:
+            # save it as json
+            with open(self.dataset_master_config_file, 'w') as f:
+                json.dump(self.master_dataset_dict, f, indent=4)
+
+        del self.image_processor
+        flush()
diff --git a/extensions_built_in/dataset_tools/SyncFromCollection.py b/extensions_built_in/dataset_tools/SyncFromCollection.py
new file mode 100644
index 00000000..e65a3584
--- /dev/null
+++ b/extensions_built_in/dataset_tools/SyncFromCollection.py
@@ -0,0 +1,131 @@
+import os
+import shutil
+from collections import OrderedDict
+import gc
+from typing import List
+
+import torch
+from tqdm import tqdm
+
+from .tools.dataset_tools_config_modules import DatasetSyncCollectionConfig, RAW_DIR, NEW_DIR
+from .tools.sync_tools import get_unsplash_images, get_pexels_images, get_local_image_file_names, download_image, \
+    get_img_paths
+from jobs.process import BaseExtensionProcess
+
+
+def flush():
+    torch.cuda.empty_cache()
+    gc.collect()
+
+
+class SyncFromCollection(BaseExtensionProcess):
+
+    def __init__(self, process_id: int, job, config: OrderedDict):
+        super().__init__(process_id, job, config)
+
+        self.min_width = config.get('min_width', 1024)
+        self.min_height = config.get('min_height', 1024)
+
+        # add our min_width and min_height to each dataset config if they don't exist
+        for dataset_config in config.get('dataset_sync', []):
+            if 'min_width' not in dataset_config:
+                dataset_config['min_width'] = self.min_width
+            if 'min_height' not in dataset_config:
+                dataset_config['min_height'] = self.min_height
+
+        self.dataset_configs: List[DatasetSyncCollectionConfig] = [
+            DatasetSyncCollectionConfig(**dataset_config)
+            for dataset_config in config.get('dataset_sync', [])
+        ]
+        print(f"Found {len(self.dataset_configs)} dataset configs")
+
+    def move_new_images(self, root_dir: str):
+        raw_dir = os.path.join(root_dir, RAW_DIR)
+        new_dir = os.path.join(root_dir, NEW_DIR)
+        new_images = get_img_paths(new_dir)
+
+        for img_path in new_images:
+            # move to raw
+            new_path = os.path.join(raw_dir, os.path.basename(img_path))
+            shutil.move(img_path, new_path)
+
+        # remove new dir
+        shutil.rmtree(new_dir)
+
+    def sync_dataset(self, config: DatasetSyncCollectionConfig):
+        if config.host == 'unsplash':
+            get_images = get_unsplash_images
+        elif config.host == 'pexels':
+            get_images = get_pexels_images
+        else:
+            raise ValueError(f"Unknown host: {config.host}")
+
+        results = {
+            'num_downloaded': 0,
+            'num_skipped': 0,
+            'bad': 0,
+            'total': 0,
+        }
+
+        photos = get_images(config)
+        raw_dir = os.path.join(config.directory, RAW_DIR)
+        new_dir = os.path.join(config.directory, NEW_DIR)
+        raw_images = get_local_image_file_names(raw_dir)
+        new_images = get_local_image_file_names(new_dir)
+
+        for photo in tqdm(photos, desc=f"{config.host}-{config.collection_id}"):
+            try:
+                if photo.filename not in raw_images and photo.filename not in new_images:
+                    download_image(photo, new_dir, min_width=self.min_width, min_height=self.min_height)
+                    results['num_downloaded'] += 1
+                else:
+                    results['num_skipped'] += 1
+            except Exception as e:
+                print(f" - BAD({photo.id}): {e}")
+                results['bad'] += 1
+                continue
+            results['total'] += 1
+
+        return results
+
+    def print_results(self, results):
+        print(
+            f" - new:{results['num_downloaded']}, old:{results['num_skipped']}, bad:{results['bad']} total:{results['total']}")
+
+    def run(self):
+        super().run()
+        print(f"Syncing {len(self.dataset_configs)} datasets")
+        all_results = None
+        failed_datasets = []
+        for dataset_config in tqdm(self.dataset_configs, desc="Syncing datasets", leave=True):
+            try:
+                results = self.sync_dataset(dataset_config)
+                if all_results is None:
+                    all_results = {**results}
+                else:
+                    for key, value in results.items():
+                        all_results[key] += value
+
+                self.print_results(results)
+            except Exception as e:
+                print(f" - FAILED: {e}")
+                if 'response' in e.__dict__:
+                    error = f"{e.response.status_code}: {e.response.text}"
+                    print(f"   - {error}")
+                    failed_datasets.append({'dataset': dataset_config, 'error': error})
+                else:
+                    failed_datasets.append({'dataset': dataset_config, 'error': str(e)})
+                continue
+
+        print("Moving new images to raw")
+        for dataset_config in self.dataset_configs:
+            self.move_new_images(dataset_config.directory)
+
+        print("Done syncing datasets")
+        self.print_results(all_results)
+
+        if len(failed_datasets) > 0:
+            print(f"Failed to sync {len(failed_datasets)} datasets")
+            for failed in failed_datasets:
+                print(f" - {failed['dataset'].host}-{failed['dataset'].collection_id}")
+                print(f"   - ERR: {failed['error']}")
diff --git a/extensions_built_in/dataset_tools/__init__.py b/extensions_built_in/dataset_tools/__init__.py
new file mode 100644
index 00000000..b86d3cf5
--- /dev/null
+++ b/extensions_built_in/dataset_tools/__init__.py
@@ -0,0 +1,43 @@
+from toolkit.extension import Extension
+
+
+class DatasetToolsExtension(Extension):
+    uid = "dataset_tools"
+
+    # name is the name of the extension for printing
+    name = "Dataset Tools"
+
+    # This is where your process class is loaded
+    # keep your imports in here so they don't slow down the rest of the program
+    @classmethod
+    def get_process(cls):
+        # import your process class here so it is only loaded when needed and return it
+        from .DatasetTools import DatasetTools
+        return DatasetTools
+
+
+class SyncFromCollectionExtension(Extension):
+    uid = "sync_from_collection"
+    name = "Sync from Collection"
+
+    @classmethod
+    def get_process(cls):
+        # import your process class here so it is only loaded when needed and return it
+        from .SyncFromCollection import SyncFromCollection
+        return SyncFromCollection
+    
+    
+class SuperTaggerExtension(Extension):
+    uid = "super_tagger"
+    name = "Super Tagger"
+
+    @classmethod
+    def get_process(cls):
+        # import your process class here so it is only loaded when needed and return it
+        from .SuperTagger import SuperTagger
+        return SuperTagger
+
+
+AI_TOOLKIT_EXTENSIONS = [
+    SyncFromCollectionExtension, DatasetToolsExtension, SuperTaggerExtension
+]
diff --git a/extensions_built_in/dataset_tools/tools/caption.py b/extensions_built_in/dataset_tools/tools/caption.py
new file mode 100644
index 00000000..370786a8
--- /dev/null
+++ b/extensions_built_in/dataset_tools/tools/caption.py
@@ -0,0 +1,53 @@
+
+caption_manipulation_steps = ['caption', 'caption_short']
+
+default_long_prompt = 'caption this image. describe every single thing in the image in detail. Do not include any unnecessary words in your description for the sake of good grammar. I want many short statements that serve the single purpose of giving the most thorough description if items as possible in the smallest, comma separated way possible. be sure to describe people\'s moods, clothing, the environment, lighting, colors, and everything.'
+default_short_prompt = 'caption this image in less than ten words'
+
+default_replacements = [
+    ("the image features", ""),
+    ("the image shows", ""),
+    ("the image depicts", ""),
+    ("the image is", ""),
+    ("in this image", ""),
+    ("in the image", ""),
+]
+
+
+def clean_caption(cap, replacements=None):
+    if replacements is None:
+        replacements = default_replacements
+
+    # remove any newlines
+    cap = cap.replace("\n", ", ")
+    cap = cap.replace("\r", ", ")
+    cap = cap.replace(".", ",")
+    cap = cap.replace("\"", "")
+
+    # remove unicode characters
+    cap = cap.encode('ascii', 'ignore').decode('ascii')
+
+    # make lowercase
+    cap = cap.lower()
+    # remove any extra spaces
+    cap = " ".join(cap.split())
+
+    for replacement in replacements:
+        if replacement[0].startswith('*'):
+            # we are removing all text if it starts with this and the rest matches
+            search_text = replacement[0][1:]
+            if cap.startswith(search_text):
+                cap = ""
+        else:
+            cap = cap.replace(replacement[0].lower(), replacement[1].lower())
+
+    cap_list = cap.split(",")
+    # trim whitespace
+    cap_list = [c.strip() for c in cap_list]
+    # remove empty strings
+    cap_list = [c for c in cap_list if c != ""]
+    # remove duplicates
+    cap_list = list(dict.fromkeys(cap_list))
+    # join back together
+    cap = ", ".join(cap_list)
+    return cap
\ No newline at end of file
diff --git a/extensions_built_in/dataset_tools/tools/dataset_tools_config_modules.py b/extensions_built_in/dataset_tools/tools/dataset_tools_config_modules.py
new file mode 100644
index 00000000..60c69dbb
--- /dev/null
+++ b/extensions_built_in/dataset_tools/tools/dataset_tools_config_modules.py
@@ -0,0 +1,187 @@
+import json
+from typing import Literal, Type, TYPE_CHECKING
+
+Host: Type = Literal['unsplash', 'pexels']
+
+RAW_DIR = "raw"
+NEW_DIR = "_tmp"
+TRAIN_DIR = "train"
+DEPTH_DIR = "depth"
+
+from .image_tools import Step, img_manipulation_steps
+from .caption import caption_manipulation_steps
+
+
+class DatasetSyncCollectionConfig:
+    def __init__(self, **kwargs):
+        self.host: Host = kwargs.get('host', None)
+        self.collection_id: str = kwargs.get('collection_id', None)
+        self.directory: str = kwargs.get('directory', None)
+        self.api_key: str = kwargs.get('api_key', None)
+        self.min_width: int = kwargs.get('min_width', 1024)
+        self.min_height: int = kwargs.get('min_height', 1024)
+
+        if self.host is None:
+            raise ValueError("host is required")
+        if self.collection_id is None:
+            raise ValueError("collection_id is required")
+        if self.directory is None:
+            raise ValueError("directory is required")
+        if self.api_key is None:
+            raise ValueError(f"api_key is required: {self.host}:{self.collection_id}")
+
+
+class ImageState:
+    def __init__(self, **kwargs):
+        self.steps_complete: list[Step] = kwargs.get('steps_complete', [])
+        self.steps_to_complete: list[Step] = kwargs.get('steps_to_complete', [])
+
+    def to_dict(self):
+        return {
+            'steps_complete': self.steps_complete
+        }
+
+
+class Rect:
+    def __init__(self, **kwargs):
+        self.x = kwargs.get('x', 0)
+        self.y = kwargs.get('y', 0)
+        self.width = kwargs.get('width', 0)
+        self.height = kwargs.get('height', 0)
+
+    def to_dict(self):
+        return {
+            'x': self.x,
+            'y': self.y,
+            'width': self.width,
+            'height': self.height
+        }
+
+
+class ImgInfo:
+    def __init__(self, **kwargs):
+        self.version: int = kwargs.get('version', None)
+        self.caption: str = kwargs.get('caption', None)
+        self.caption_short: str = kwargs.get('caption_short', None)
+        self.poi = [Rect(**poi) for poi in kwargs.get('poi', [])]
+        self.state = ImageState(**kwargs.get('state', {}))
+        self.caption_method = kwargs.get('caption_method', None)
+        self.other_captions = kwargs.get('other_captions', {})
+        self._upgrade_state()
+        self.force_image_process: bool = False
+        self._requested_steps: list[Step] = []
+
+        self.is_dirty: bool = False
+
+    def _upgrade_state(self):
+        # upgrades older states
+        if self.caption is not None and 'caption' not in self.state.steps_complete:
+            self.mark_step_complete('caption')
+            self.is_dirty = True
+        if self.caption_short is not None and 'caption_short' not in self.state.steps_complete:
+            self.mark_step_complete('caption_short')
+            self.is_dirty = True
+        if self.caption_method is None and self.caption is not None:
+            # added caption method in version 2. Was all llava before that
+            self.caption_method = 'llava:default'
+            self.is_dirty = True
+
+    def to_dict(self):
+        return {
+            'version': self.version,
+            'caption_method': self.caption_method,
+            'caption': self.caption,
+            'caption_short': self.caption_short,
+            'poi': [poi.to_dict() for poi in self.poi],
+            'state': self.state.to_dict(),
+            'other_captions': self.other_captions
+        }
+
+    def mark_step_complete(self, step: Step):
+        if step not in self.state.steps_complete:
+            self.state.steps_complete.append(step)
+        if step in self.state.steps_to_complete:
+            self.state.steps_to_complete.remove(step)
+        self.is_dirty = True
+
+    def add_step(self, step: Step):
+        if step not in self.state.steps_to_complete and step not in self.state.steps_complete:
+            self.state.steps_to_complete.append(step)
+
+    def trigger_image_reprocess(self):
+        if self._requested_steps is None:
+            raise Exception("Must call add_steps before trigger_image_reprocess")
+        steps = self._requested_steps
+        # remove all image manipulationf from steps_to_complete
+        for step in img_manipulation_steps:
+            if step in self.state.steps_to_complete:
+                self.state.steps_to_complete.remove(step)
+            if step in self.state.steps_complete:
+                self.state.steps_complete.remove(step)
+        self.force_image_process = True
+        self.is_dirty = True
+        # we want to keep the order passed in process file
+        for step in steps:
+            if step in img_manipulation_steps:
+                self.add_step(step)
+
+    def add_steps(self, steps: list[Step]):
+        self._requested_steps = [step for step in steps]
+        for stage in steps:
+            self.add_step(stage)
+
+        # update steps if we have any img processes not complete, we have to reprocess them all
+        # if any steps_to_complete are in img_manipulation_steps
+
+        is_manipulating_image = any([step in img_manipulation_steps for step in self.state.steps_to_complete])
+        order_has_changed = False
+
+        if not is_manipulating_image:
+            # check to see if order has changed. No need to if already redoing it. Will detect if ones are removed
+            target_img_manipulation_order = [step for step in steps if step in img_manipulation_steps]
+            current_img_manipulation_order = [step for step in self.state.steps_complete if
+                                              step in img_manipulation_steps]
+            if target_img_manipulation_order != current_img_manipulation_order:
+                order_has_changed = True
+
+        if is_manipulating_image or order_has_changed:
+            self.trigger_image_reprocess()
+
+    def set_caption_method(self, method: str):
+        if self._requested_steps is None:
+            raise Exception("Must call add_steps before set_caption_method")
+        if self.caption_method != method:
+            self.is_dirty = True
+            # move previous caption method to other_captions
+            if self.caption_method is not None and self.caption is not None or self.caption_short is not None:
+                self.other_captions[self.caption_method] = {
+                    'caption': self.caption,
+                    'caption_short': self.caption_short,
+                }
+            self.caption_method = method
+            self.caption = None
+            self.caption_short = None
+            # see if we have a caption from the new method
+            if method in self.other_captions:
+                self.caption = self.other_captions[method].get('caption', None)
+                self.caption_short = self.other_captions[method].get('caption_short', None)
+            else:
+                self.trigger_new_caption()
+
+    def trigger_new_caption(self):
+        self.caption = None
+        self.caption_short = None
+        self.is_dirty = True
+        # check to see if we have any steps in the complete list and move them to the to_complete list
+        for step in self.state.steps_complete:
+            if step in caption_manipulation_steps:
+                self.state.steps_complete.remove(step)
+                self.state.steps_to_complete.append(step)
+
+    def to_json(self):
+        return json.dumps(self.to_dict())
+
+    def set_version(self, version: int):
+        if self.version != version:
+            self.is_dirty = True
+        self.version = version
diff --git a/extensions_built_in/dataset_tools/tools/fuyu_utils.py b/extensions_built_in/dataset_tools/tools/fuyu_utils.py
new file mode 100644
index 00000000..407da10c
--- /dev/null
+++ b/extensions_built_in/dataset_tools/tools/fuyu_utils.py
@@ -0,0 +1,66 @@
+from transformers import  CLIPImageProcessor, BitsAndBytesConfig, AutoTokenizer
+
+from .caption import default_long_prompt, default_short_prompt, default_replacements, clean_caption
+import torch
+from PIL import Image
+
+
+class FuyuImageProcessor:
+    def __init__(self, device='cuda'):
+        from transformers import FuyuProcessor, FuyuForCausalLM
+        self.device = device
+        self.model: FuyuForCausalLM = None
+        self.processor: FuyuProcessor = None
+        self.dtype = torch.bfloat16
+        self.tokenizer: AutoTokenizer
+        self.is_loaded = False
+
+    def load_model(self):
+        from transformers import FuyuProcessor, FuyuForCausalLM
+        model_path = "adept/fuyu-8b"
+        kwargs = {"device_map": self.device}
+        kwargs['load_in_4bit'] = True
+        kwargs['quantization_config'] = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=self.dtype,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type='nf4'
+        )
+        self.processor = FuyuProcessor.from_pretrained(model_path)
+        self.model = FuyuForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
+        self.is_loaded = True
+
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.model = FuyuForCausalLM.from_pretrained(model_path, torch_dtype=self.dtype, **kwargs)
+        self.processor = FuyuProcessor(image_processor=FuyuImageProcessor(), tokenizer=self.tokenizer)
+
+    def generate_caption(
+            self, image: Image,
+            prompt: str = default_long_prompt,
+            replacements=default_replacements,
+            max_new_tokens=512
+    ):
+        # prepare inputs for the model
+        # text_prompt = f"{prompt}\n"
+
+        # image = image.convert('RGB')
+        model_inputs = self.processor(text=prompt, images=[image])
+        model_inputs = {k: v.to(dtype=self.dtype if torch.is_floating_point(v) else v.dtype, device=self.device) for k, v in
+                        model_inputs.items()}
+
+        generation_output = self.model.generate(**model_inputs, max_new_tokens=max_new_tokens)
+        prompt_len = model_inputs["input_ids"].shape[-1]
+        output = self.tokenizer.decode(generation_output[0][prompt_len:], skip_special_tokens=True)
+        output = clean_caption(output, replacements=replacements)
+        return output
+
+        # inputs = self.processor(text=text_prompt, images=image, return_tensors="pt")
+        # for k, v in inputs.items():
+        #     inputs[k] = v.to(self.device)
+
+        # # autoregressively generate text
+        # generation_output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
+        # generation_text = self.processor.batch_decode(generation_output[:, -max_new_tokens:], skip_special_tokens=True)
+        # output = generation_text[0]
+        #
+        # return clean_caption(output, replacements=replacements)
diff --git a/extensions_built_in/dataset_tools/tools/image_tools.py b/extensions_built_in/dataset_tools/tools/image_tools.py
new file mode 100644
index 00000000..d36073c0
--- /dev/null
+++ b/extensions_built_in/dataset_tools/tools/image_tools.py
@@ -0,0 +1,49 @@
+from typing import Literal, Type, TYPE_CHECKING, Union
+
+import cv2
+import numpy as np
+from PIL import Image, ImageOps
+
+Step: Type = Literal['caption', 'caption_short', 'create_mask', 'contrast_stretch']
+
+img_manipulation_steps = ['contrast_stretch']
+
+img_ext = ['.jpg', '.jpeg', '.png', '.webp']
+
+if TYPE_CHECKING:
+    from .llava_utils import LLaVAImageProcessor
+    from .fuyu_utils import FuyuImageProcessor
+
+ImageProcessor = Union['LLaVAImageProcessor', 'FuyuImageProcessor']
+
+
+def pil_to_cv2(image):
+    """Convert a PIL image to a cv2 image."""
+    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+
+
+def cv2_to_pil(image):
+    """Convert a cv2 image to a PIL image."""
+    return Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+
+
+def load_image(img_path: str):
+    image = Image.open(img_path).convert('RGB')
+    try:
+        # transpose with exif data
+        image = ImageOps.exif_transpose(image)
+    except Exception as e:
+        pass
+    return image
+
+
+def resize_to_max(image, max_width=1024, max_height=1024):
+    width, height = image.size
+    if width <= max_width and height <= max_height:
+        return image
+
+    scale = min(max_width / width, max_height / height)
+    width = int(width * scale)
+    height = int(height * scale)
+
+    return image.resize((width, height), Image.LANCZOS)
diff --git a/extensions_built_in/dataset_tools/tools/llava_utils.py b/extensions_built_in/dataset_tools/tools/llava_utils.py
new file mode 100644
index 00000000..9ba38d66
--- /dev/null
+++ b/extensions_built_in/dataset_tools/tools/llava_utils.py
@@ -0,0 +1,85 @@
+
+from .caption import default_long_prompt, default_short_prompt, default_replacements, clean_caption
+
+import torch
+from PIL import Image, ImageOps
+
+from transformers import AutoTokenizer, BitsAndBytesConfig, CLIPImageProcessor
+
+img_ext = ['.jpg', '.jpeg', '.png', '.webp']
+
+
+class LLaVAImageProcessor:
+    def __init__(self, device='cuda'):
+        try:
+            from llava.model import LlavaLlamaForCausalLM
+        except ImportError:
+            # print("You need to manually install llava -> pip install --no-deps  git+https://github.com/haotian-liu/LLaVA.git")
+            print(
+                "You need to manually install llava -> pip install --no-deps  git+https://github.com/haotian-liu/LLaVA.git")
+            raise
+        self.device = device
+        self.model: LlavaLlamaForCausalLM = None
+        self.tokenizer: AutoTokenizer = None
+        self.image_processor: CLIPImageProcessor = None
+        self.is_loaded = False
+
+    def load_model(self):
+        from llava.model import LlavaLlamaForCausalLM
+
+        model_path = "4bit/llava-v1.5-13b-3GB"
+        # kwargs = {"device_map": "auto"}
+        kwargs = {"device_map": self.device}
+        kwargs['load_in_4bit'] = True
+        kwargs['quantization_config'] = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type='nf4'
+        )
+        self.model = LlavaLlamaForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
+        vision_tower = self.model.get_vision_tower()
+        if not vision_tower.is_loaded:
+            vision_tower.load_model()
+        vision_tower.to(device=self.device)
+        self.image_processor = vision_tower.image_processor
+        self.is_loaded = True
+
+    def generate_caption(
+            self, image:
+            Image, prompt: str = default_long_prompt,
+            replacements=default_replacements,
+            max_new_tokens=512
+    ):
+        from llava.conversation import conv_templates, SeparatorStyle
+        from llava.utils import disable_torch_init
+        from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
+        from llava.mm_utils import tokenizer_image_token, KeywordsStoppingCriteria
+        # question = "how many dogs are in the picture?"
+        disable_torch_init()
+        conv_mode = "llava_v0"
+        conv = conv_templates[conv_mode].copy()
+        roles = conv.roles
+        image_tensor = self.image_processor.preprocess([image], return_tensors='pt')['pixel_values'].half().cuda()
+
+        inp = f"{roles[0]}: {prompt}"
+        inp = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + inp
+        conv.append_message(conv.roles[0], inp)
+        conv.append_message(conv.roles[1], None)
+        raw_prompt = conv.get_prompt()
+        input_ids = tokenizer_image_token(raw_prompt, self.tokenizer, IMAGE_TOKEN_INDEX,
+                                          return_tensors='pt').unsqueeze(0).cuda()
+        stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
+        keywords = [stop_str]
+        stopping_criteria = KeywordsStoppingCriteria(keywords, self.tokenizer, input_ids)
+        with torch.inference_mode():
+            output_ids = self.model.generate(
+                input_ids, images=image_tensor, do_sample=True, temperature=0.1,
+                max_new_tokens=max_new_tokens, use_cache=True, stopping_criteria=[stopping_criteria],
+                top_p=0.8
+            )
+        outputs = self.tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
+        conv.messages[-1][-1] = outputs
+        output = outputs.rsplit('</s>', 1)[0]
+        return clean_caption(output, replacements=replacements)
diff --git a/extensions_built_in/dataset_tools/tools/sync_tools.py b/extensions_built_in/dataset_tools/tools/sync_tools.py
new file mode 100644
index 00000000..143cc6bb
--- /dev/null
+++ b/extensions_built_in/dataset_tools/tools/sync_tools.py
@@ -0,0 +1,279 @@
+import os
+import requests
+import tqdm
+from typing import List, Optional, TYPE_CHECKING
+
+
+def img_root_path(img_id: str):
+    return os.path.dirname(os.path.dirname(img_id))
+
+
+if TYPE_CHECKING:
+    from .dataset_tools_config_modules import DatasetSyncCollectionConfig
+
+img_exts = ['.jpg', '.jpeg', '.webp', '.png']
+
+class Photo:
+    def __init__(
+            self,
+            id,
+            host,
+            width,
+            height,
+            url,
+            filename
+    ):
+        self.id = str(id)
+        self.host = host
+        self.width = width
+        self.height = height
+        self.url = url
+        self.filename = filename
+
+
+def get_desired_size(img_width: int, img_height: int, min_width: int, min_height: int):
+    if img_width > img_height:
+        scale = min_height / img_height
+    else:
+        scale = min_width / img_width
+
+    new_width = int(img_width * scale)
+    new_height = int(img_height * scale)
+
+    return new_width, new_height
+
+
+def get_pexels_images(config: 'DatasetSyncCollectionConfig') -> List[Photo]:
+    all_images = []
+    next_page = f"https://api.pexels.com/v1/collections/{config.collection_id}?page=1&per_page=80&type=photos"
+
+    while True:
+        response = requests.get(next_page, headers={
+            "Authorization": f"{config.api_key}"
+        })
+        response.raise_for_status()
+        data = response.json()
+        all_images.extend(data['media'])
+        if 'next_page' in data and data['next_page']:
+            next_page = data['next_page']
+        else:
+            break
+
+    photos = []
+    for image in all_images:
+        new_width, new_height = get_desired_size(image['width'], image['height'], config.min_width, config.min_height)
+        url = f"{image['src']['original']}?auto=compress&cs=tinysrgb&h={new_height}&w={new_width}"
+        filename = os.path.basename(image['src']['original'])
+
+        photos.append(Photo(
+            id=image['id'],
+            host="pexels",
+            width=image['width'],
+            height=image['height'],
+            url=url,
+            filename=filename
+        ))
+
+    return photos
+
+
+def get_unsplash_images(config: 'DatasetSyncCollectionConfig') -> List[Photo]:
+    headers = {
+        # "Authorization": f"Client-ID {UNSPLASH_ACCESS_KEY}"
+        "Authorization": f"Client-ID {config.api_key}"
+    }
+    # headers['Authorization'] = f"Bearer {token}"
+
+    url = f"https://api.unsplash.com/collections/{config.collection_id}/photos?page=1&per_page=30"
+    response = requests.get(url, headers=headers)
+    response.raise_for_status()
+    res_headers = response.headers
+    # parse the link header to get the next page
+    # 'Link': '<https://api.unsplash.com/collections/mIPWwLdfct8/photos?page=82>; rel="last", <https://api.unsplash.com/collections/mIPWwLdfct8/photos?page=2>; rel="next"'
+    has_next_page = False
+    if 'Link' in res_headers:
+        has_next_page = True
+        link_header = res_headers['Link']
+        link_header = link_header.split(',')
+        link_header = [link.strip() for link in link_header]
+        link_header = [link.split(';') for link in link_header]
+        link_header = [[link[0].strip('<>'), link[1].strip().strip('"')] for link in link_header]
+        link_header = {link[1]: link[0] for link in link_header}
+
+        # get page number from last url
+        last_page = link_header['rel="last']
+        last_page = last_page.split('?')[1]
+        last_page = last_page.split('&')
+        last_page = [param.split('=') for param in last_page]
+        last_page = {param[0]: param[1] for param in last_page}
+        last_page = int(last_page['page'])
+
+    all_images = response.json()
+
+    if has_next_page:
+        # assume we start on page 1, so we don't need to get it again
+        for page in tqdm.tqdm(range(2, last_page + 1)):
+            url = f"https://api.unsplash.com/collections/{config.collection_id}/photos?page={page}&per_page=30"
+            response = requests.get(url, headers=headers)
+            response.raise_for_status()
+            all_images.extend(response.json())
+
+    photos = []
+    for image in all_images:
+        new_width, new_height = get_desired_size(image['width'], image['height'], config.min_width, config.min_height)
+        url = f"{image['urls']['raw']}&w={new_width}"
+        filename = f"{image['id']}.jpg"
+
+        photos.append(Photo(
+            id=image['id'],
+            host="unsplash",
+            width=image['width'],
+            height=image['height'],
+            url=url,
+            filename=filename
+        ))
+
+    return photos
+
+
+def get_img_paths(dir_path: str):
+    os.makedirs(dir_path, exist_ok=True)
+    local_files = os.listdir(dir_path)
+    # remove non image files
+    local_files = [file for file in local_files if os.path.splitext(file)[1].lower() in img_exts]
+    # make full path
+    local_files = [os.path.join(dir_path, file) for file in local_files]
+    return local_files
+
+
+def get_local_image_ids(dir_path: str):
+    os.makedirs(dir_path, exist_ok=True)
+    local_files = get_img_paths(dir_path)
+    # assuming local files are named after Unsplash IDs, e.g., 'abc123.jpg'
+    return set([os.path.basename(file).split('.')[0] for file in local_files])
+
+
+def get_local_image_file_names(dir_path: str):
+    os.makedirs(dir_path, exist_ok=True)
+    local_files = get_img_paths(dir_path)
+    # assuming local files are named after Unsplash IDs, e.g., 'abc123.jpg'
+    return set([os.path.basename(file) for file in local_files])
+
+
+def download_image(photo: Photo, dir_path: str, min_width: int = 1024, min_height: int = 1024):
+    img_width = photo.width
+    img_height = photo.height
+
+    if img_width < min_width or img_height < min_height:
+        raise ValueError(f"Skipping {photo.id} because it is too small: {img_width}x{img_height}")
+
+    img_response = requests.get(photo.url)
+    img_response.raise_for_status()
+    os.makedirs(dir_path, exist_ok=True)
+
+    filename = os.path.join(dir_path, photo.filename)
+    with open(filename, 'wb') as file:
+        file.write(img_response.content)
+
+
+def update_caption(img_path: str):
+    # if the caption is a txt file, convert it to a json file
+    filename_no_ext = os.path.splitext(os.path.basename(img_path))[0]
+    # see if it exists
+    if os.path.exists(os.path.join(os.path.dirname(img_path), f"{filename_no_ext}.json")):
+        # todo add poi and what not
+        return  # we have a json file
+    caption = ""
+    # see if txt file exists
+    if os.path.exists(os.path.join(os.path.dirname(img_path), f"{filename_no_ext}.txt")):
+        # read it
+        with open(os.path.join(os.path.dirname(img_path), f"{filename_no_ext}.txt"), 'r') as file:
+            caption = file.read()
+    # write json file
+    with open(os.path.join(os.path.dirname(img_path), f"{filename_no_ext}.json"), 'w') as file:
+        file.write(f'{{"caption": "{caption}"}}')
+
+    # delete txt file
+    os.remove(os.path.join(os.path.dirname(img_path), f"{filename_no_ext}.txt"))
+
+
+# def equalize_img(img_path: str):
+#     input_path = img_path
+#     output_path = os.path.join(img_root_path(img_path), COLOR_CORRECTED_DIR, os.path.basename(img_path))
+#     os.makedirs(os.path.dirname(output_path), exist_ok=True)
+#     process_img(
+#         img_path=input_path,
+#         output_path=output_path,
+#         equalize=True,
+#         max_size=2056,
+#         white_balance=False,
+#         gamma_correction=False,
+#         strength=0.6,
+#     )
+
+
+# def annotate_depth(img_path: str):
+#     # make fake args
+#     args = argparse.Namespace()
+#     args.annotator = "midas"
+#     args.res = 1024
+#
+#     img = cv2.imread(img_path)
+#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+#
+#     output = annotate(img, args)
+#
+#     output = output.astype('uint8')
+#     output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
+#
+#     os.makedirs(os.path.dirname(img_path), exist_ok=True)
+#     output_path = os.path.join(img_root_path(img_path), DEPTH_DIR, os.path.basename(img_path))
+#
+#     cv2.imwrite(output_path, output)
+
+
+# def invert_depth(img_path: str):
+#     img = cv2.imread(img_path)
+#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+#     # invert the colors
+#     img = cv2.bitwise_not(img)
+#
+#     os.makedirs(os.path.dirname(img_path), exist_ok=True)
+#     output_path = os.path.join(img_root_path(img_path), INVERTED_DEPTH_DIR, os.path.basename(img_path))
+#     cv2.imwrite(output_path, img)
+
+
+    #
+    # # update our list of raw images
+    # raw_images = get_img_paths(raw_dir)
+    #
+    # # update raw captions
+    # for image_id in tqdm.tqdm(raw_images, desc="Updating raw captions"):
+    #     update_caption(image_id)
+    #
+    # # equalize images
+    # for img_path in tqdm.tqdm(raw_images, desc="Equalizing images"):
+    #     if img_path not in eq_images:
+    #         equalize_img(img_path)
+    #
+    # # update our list of eq images
+    # eq_images = get_img_paths(eq_dir)
+    # # update eq captions
+    # for image_id in tqdm.tqdm(eq_images, desc="Updating eq captions"):
+    #     update_caption(image_id)
+    #
+    # # annotate depth
+    # depth_dir = os.path.join(root_dir, DEPTH_DIR)
+    # depth_images = get_img_paths(depth_dir)
+    # for img_path in tqdm.tqdm(eq_images, desc="Annotating depth"):
+    #     if img_path not in depth_images:
+    #         annotate_depth(img_path)
+    #
+    # depth_images = get_img_paths(depth_dir)
+    #
+    # # invert depth
+    # inv_depth_dir = os.path.join(root_dir, INVERTED_DEPTH_DIR)
+    # inv_depth_images = get_img_paths(inv_depth_dir)
+    # for img_path in tqdm.tqdm(depth_images, desc="Inverting depth"):
+    #     if img_path not in inv_depth_images:
+    #         invert_depth(img_path)
diff --git a/extensions_built_in/image_reference_slider_trainer/ImageReferenceSliderTrainerProcess.py b/extensions_built_in/image_reference_slider_trainer/ImageReferenceSliderTrainerProcess.py
index 9e28a7c4..11fa8a9e 100644
--- a/extensions_built_in/image_reference_slider_trainer/ImageReferenceSliderTrainerProcess.py
+++ b/extensions_built_in/image_reference_slider_trainer/ImageReferenceSliderTrainerProcess.py
@@ -16,6 +16,7 @@ from toolkit import train_tools
 import torch
 from jobs.process import BaseSDTrainProcess
 import random
+from toolkit.basic import value_map
 
 
 def flush():
@@ -91,11 +92,18 @@ class ImageReferenceSliderTrainerProcess(BaseSDTrainProcess):
                 network_neg_weight = network_neg_weight.item()
 
             # get an array of random floats between -weight_jitter and weight_jitter
+            loss_jitter_multiplier = 1.0
             weight_jitter = self.slider_config.weight_jitter
             if weight_jitter > 0.0:
                 jitter_list = random.uniform(-weight_jitter, weight_jitter)
+                orig_network_pos_weight = network_pos_weight
                 network_pos_weight += jitter_list
                 network_neg_weight += (jitter_list * -1.0)
+                # penalize the loss for its distance from network_pos_weight
+                # a jitter_list of abs(3.0) on a weight of 5.0 is a 60% jitter
+                # so the loss_jitter_multiplier needs to be 0.4
+                loss_jitter_multiplier = value_map(abs(jitter_list), 0.0, weight_jitter, 1.0, 0.0)
+
 
             # if items in network_weight list are tensors, convert them to floats
 
@@ -146,38 +154,33 @@ class ImageReferenceSliderTrainerProcess(BaseSDTrainProcess):
             timesteps = torch.cat([timesteps, timesteps], dim=0)
             network_multiplier = [network_pos_weight * 1.0, network_neg_weight * -1.0]
 
-        flush()
-
-        loss_float = None
-        loss_mirror_float = None
-
         self.optimizer.zero_grad()
         noisy_latents.requires_grad = False
 
         # if training text encoder enable grads, else do context of no grad
         with torch.set_grad_enabled(self.train_config.train_text_encoder):
-            # text encoding
-            embedding_list = []
-            # embed the prompts
+            # fix issue with them being tuples sometimes
+            prompt_list = []
             for prompt in prompts:
-                embedding = self.sd.encode_prompt(prompt).to(self.device_torch, dtype=dtype)
-                embedding_list.append(embedding)
-            conditional_embeds = concat_prompt_embeds(embedding_list)
+                if isinstance(prompt, tuple):
+                    prompt = prompt[0]
+                prompt_list.append(prompt)
+            conditional_embeds = self.sd.encode_prompt(prompt_list).to(self.device_torch, dtype=dtype)
             conditional_embeds = concat_prompt_embeds([conditional_embeds, conditional_embeds])
 
-        if self.model_config.is_xl:
-            # todo also allow for setting this for low ram in general, but sdxl spikes a ton on back prop
-            network_multiplier_list = network_multiplier
-            noisy_latent_list = torch.chunk(noisy_latents, 2, dim=0)
-            noise_list = torch.chunk(noise, 2, dim=0)
-            timesteps_list = torch.chunk(timesteps, 2, dim=0)
-            conditional_embeds_list = split_prompt_embeds(conditional_embeds)
-        else:
-            network_multiplier_list = [network_multiplier]
-            noisy_latent_list = [noisy_latents]
-            noise_list = [noise]
-            timesteps_list = [timesteps]
-            conditional_embeds_list = [conditional_embeds]
+        # if self.model_config.is_xl:
+        #     # todo also allow for setting this for low ram in general, but sdxl spikes a ton on back prop
+        #     network_multiplier_list = network_multiplier
+        #     noisy_latent_list = torch.chunk(noisy_latents, 2, dim=0)
+        #     noise_list = torch.chunk(noise, 2, dim=0)
+        #     timesteps_list = torch.chunk(timesteps, 2, dim=0)
+        #     conditional_embeds_list = split_prompt_embeds(conditional_embeds)
+        # else:
+        network_multiplier_list = [network_multiplier]
+        noisy_latent_list = [noisy_latents]
+        noise_list = [noise]
+        timesteps_list = [timesteps]
+        conditional_embeds_list = [conditional_embeds]
 
         losses = []
         # allow to chunk it out to save vram
@@ -205,20 +208,17 @@ class ImageReferenceSliderTrainerProcess(BaseSDTrainProcess):
                 loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none")
                 loss = loss.mean([1, 2, 3])
 
-                # todo add snr gamma here
                 if self.train_config.min_snr_gamma is not None and self.train_config.min_snr_gamma > 0.000001:
                     # add min_snr_gamma
                     loss = apply_snr_weight(loss, timesteps, noise_scheduler, self.train_config.min_snr_gamma)
 
-                loss = loss.mean()
-                loss_slide_float = loss.item()
+                loss = loss.mean() * loss_jitter_multiplier
 
                 loss_float = loss.item()
                 losses.append(loss_float)
 
                 # back propagate loss to free ram
                 loss.backward()
-                flush()
 
         # apply gradients
         optimizer.step()
diff --git a/extensions_built_in/sd_trainer/SDTrainer.py b/extensions_built_in/sd_trainer/SDTrainer.py
index 4edbf26b..69e51fc6 100644
--- a/extensions_built_in/sd_trainer/SDTrainer.py
+++ b/extensions_built_in/sd_trainer/SDTrainer.py
@@ -1,11 +1,21 @@
 from collections import OrderedDict
-from torch.utils.data import DataLoader
-from toolkit.prompt_utils import concat_prompt_embeds, split_prompt_embeds
+from typing import Union, Literal, List
+from diffusers import T2IAdapter
+
+from toolkit import train_tools
+from toolkit.basic import value_map, adain, get_mean_std
+from toolkit.config_modules import GuidanceConfig
+from toolkit.data_transfer_object.data_loader import DataLoaderBatchDTO, FileItemDTO
+from toolkit.image_utils import show_tensors, show_latents
+from toolkit.ip_adapter import IPAdapter
+from toolkit.prompt_utils import PromptEmbeds
 from toolkit.stable_diffusion_model import StableDiffusion, BlankNetwork
-from toolkit.train_tools import get_torch_dtype, apply_snr_weight
+from toolkit.train_tools import get_torch_dtype, apply_snr_weight, add_all_snr_to_noise_scheduler, \
+    apply_learnable_snr_gos, LearnableSNRGamma
 import gc
 import torch
 from jobs.process import BaseSDTrainProcess
+from torchvision import transforms
 
 
 def flush():
@@ -13,101 +23,618 @@ def flush():
     gc.collect()
 
 
+adapter_transforms = transforms.Compose([
+    transforms.ToTensor(),
+])
+
+
 class SDTrainer(BaseSDTrainProcess):
 
     def __init__(self, process_id: int, job, config: OrderedDict, **kwargs):
         super().__init__(process_id, job, config, **kwargs)
+        self.assistant_adapter: Union['T2IAdapter', None]
+        self.do_prior_prediction = False
+        self.do_long_prompts = False
+        if self.train_config.inverted_mask_prior:
+            self.do_prior_prediction = True
 
     def before_model_load(self):
         pass
 
+    def before_dataset_load(self):
+        self.assistant_adapter = None
+        # get adapter assistant if one is set
+        if self.train_config.adapter_assist_name_or_path is not None:
+            adapter_path = self.train_config.adapter_assist_name_or_path
+
+            # dont name this adapter since we are not training it
+            self.assistant_adapter = T2IAdapter.from_pretrained(
+                adapter_path, torch_dtype=get_torch_dtype(self.train_config.dtype), varient="fp16"
+            ).to(self.device_torch)
+            self.assistant_adapter.eval()
+            self.assistant_adapter.requires_grad_(False)
+            flush()
+
     def hook_before_train_loop(self):
-        self.sd.vae.eval()
-        self.sd.vae.to(self.device_torch)
-
-        # textual inversion
-        if self.embedding is not None:
-            # keep original embeddings as reference
-            self.orig_embeds_params = self.sd.text_encoder.get_input_embeddings().weight.data.clone()
-            # set text encoder to train. Not sure if this is necessary but diffusers example did it
-            self.sd.text_encoder.train()
-
-    def hook_train_loop(self, batch):
-        dtype = get_torch_dtype(self.train_config.dtype)
-        noisy_latents, noise, timesteps, conditioned_prompts, imgs = self.process_general_training_batch(batch)
-
-        self.optimizer.zero_grad()
-        flush()
-
-        # text encoding
-        grad_on_text_encoder = False
-        if self.train_config.train_text_encoder:
-            grad_on_text_encoder = True
-
-        if self.embedding:
-            grad_on_text_encoder = True
-
-        # have a blank network so we can wrap it in a context and set multipliers without checking every time
-        if self.network is not None:
-            network = self.network
+        # move vae to device if we did not cache latents
+        if not self.is_latents_cached:
+            self.sd.vae.eval()
+            self.sd.vae.to(self.device_torch)
         else:
-            network = BlankNetwork()
+            # offload it. Already cached
+            self.sd.vae.to('cpu')
+            flush()
+        add_all_snr_to_noise_scheduler(self.sd.noise_scheduler, self.device_torch)
 
-        # activate network if it exits
-        with network:
-            with torch.set_grad_enabled(grad_on_text_encoder):
-                embedding_list = []
-                # embed the prompts
-                for prompt in conditioned_prompts:
-                    embedding = self.sd.encode_prompt(prompt).to(self.device_torch, dtype=dtype)
-                    embedding_list.append(embedding)
-                conditional_embeds = concat_prompt_embeds(embedding_list)
+    # you can expand these in a child class to make customization easier
+    def calculate_loss(
+            self,
+            noise_pred: torch.Tensor,
+            noise: torch.Tensor,
+            noisy_latents: torch.Tensor,
+            timesteps: torch.Tensor,
+            batch: 'DataLoaderBatchDTO',
+            mask_multiplier: Union[torch.Tensor, float] = 1.0,
+            prior_pred: Union[torch.Tensor, None] = None,
+            **kwargs
+    ):
+        loss_target = self.train_config.loss_target
 
-            noise_pred = self.sd.predict_noise(
-                latents=noisy_latents.to(self.device_torch, dtype=dtype),
-                conditional_embeddings=conditional_embeds.to(self.device_torch, dtype=dtype),
-                timestep=timesteps,
-                guidance_scale=1.0,
-            )
-        # 9.18 gb
-        noise = noise.to(self.device_torch, dtype=dtype)
+        prior_mask_multiplier = None
+        target_mask_multiplier = None
 
-        if self.sd.prediction_type == 'v_prediction':
+        if self.train_config.match_noise_norm:
+            # match the norm of the noise
+            noise_norm = torch.linalg.vector_norm(noise, ord=2, dim=(1, 2, 3), keepdim=True)
+            noise_pred_norm = torch.linalg.vector_norm(noise_pred, ord=2, dim=(1, 2, 3), keepdim=True)
+            noise_pred = noise_pred * (noise_norm / noise_pred_norm)
+
+        if self.train_config.inverted_mask_prior:
+            # we need to make the noise prediction be a masked blending of noise and prior_pred
+            prior_mask_multiplier = 1.0 - mask_multiplier
+            # target_mask_multiplier = mask_multiplier
+            # mask_multiplier = 1.0
+            target = noise
+            # target = (noise * mask_multiplier) + (prior_pred * prior_mask_multiplier)
+            # set masked multiplier to 1.0 so we dont double apply it
+            # mask_multiplier = 1.0
+        elif prior_pred is not None:
+            # matching adapter prediction
+            target = prior_pred
+        elif self.sd.prediction_type == 'v_prediction':
             # v-parameterization training
             target = self.sd.noise_scheduler.get_velocity(noisy_latents, noise, timesteps)
         else:
             target = noise
 
-        loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none")
+        pred = noise_pred
+
+        ignore_snr = False
+
+        if loss_target == 'source' or loss_target == 'unaugmented':
+            # ignore_snr = True
+            if batch.sigmas is None:
+                raise ValueError("Batch sigmas is None. This should not happen")
+
+            # src https://github.com/huggingface/diffusers/blob/324d18fba23f6c9d7475b0ff7c777685f7128d40/examples/t2i_adapter/train_t2i_adapter_sdxl.py#L1190
+            denoised_latents = noise_pred * (-batch.sigmas) + noisy_latents
+            weighing = batch.sigmas ** -2.0
+            if loss_target == 'source':
+                # denoise the latent and compare to the latent in the batch
+                target = batch.latents
+            elif loss_target == 'unaugmented':
+                # we have to encode images into latents for now
+                # we also denoise as the unaugmented tensor is not a noisy diffirental
+                with torch.no_grad():
+                    unaugmented_latents = self.sd.encode_images(batch.unaugmented_tensor)
+                    unaugmented_latents = unaugmented_latents * self.train_config.latent_multiplier
+                    target = unaugmented_latents.detach()
+
+                # Get the target for loss depending on the prediction type
+                if self.sd.noise_scheduler.config.prediction_type == "epsilon":
+                    target = target  # we are computing loss against denoise latents
+                elif self.sd.noise_scheduler.config.prediction_type == "v_prediction":
+                    target = self.sd.noise_scheduler.get_velocity(target, noise, timesteps)
+                else:
+                    raise ValueError(f"Unknown prediction type {self.sd.noise_scheduler.config.prediction_type}")
+
+            # mse loss without reduction
+            loss_per_element = (weighing.float() * (denoised_latents.float() - target.float()) ** 2)
+            loss = loss_per_element
+        else:
+            loss = torch.nn.functional.mse_loss(pred.float(), target.float(), reduction="none")
+
+        # multiply by our mask
+        loss = loss * mask_multiplier
+
+        if self.train_config.inverted_mask_prior:
+            # to a loss to unmasked areas of the prior for unmasked regularization
+            prior_loss = torch.nn.functional.mse_loss(
+                prior_pred.float(),
+                pred.float(),
+                reduction="none"
+            )
+            prior_loss = prior_loss * prior_mask_multiplier * self.train_config.inverted_mask_prior_multiplier
+            loss = loss + prior_loss
+
         loss = loss.mean([1, 2, 3])
 
-        if self.train_config.min_snr_gamma is not None and self.train_config.min_snr_gamma > 0.000001:
+        if self.train_config.learnable_snr_gos:
+            # add snr_gamma
+            loss = apply_learnable_snr_gos(loss, timesteps, self.snr_gos)
+        elif self.train_config.snr_gamma is not None and self.train_config.snr_gamma > 0.000001 and not ignore_snr:
+            # add snr_gamma
+            loss = apply_snr_weight(loss, timesteps, self.sd.noise_scheduler, self.train_config.snr_gamma, fixed=True)
+        elif self.train_config.min_snr_gamma is not None and self.train_config.min_snr_gamma > 0.000001 and not ignore_snr:
             # add min_snr_gamma
             loss = apply_snr_weight(loss, timesteps, self.sd.noise_scheduler, self.train_config.min_snr_gamma)
 
         loss = loss.mean()
+        return loss
 
-        # back propagate loss to free ram
+    def preprocess_batch(self, batch: 'DataLoaderBatchDTO'):
+        return batch
+
+    def get_guided_loss(
+            self,
+            noisy_latents: torch.Tensor,
+            conditional_embeds: PromptEmbeds,
+            match_adapter_assist: bool,
+            network_weight_list: list,
+            timesteps: torch.Tensor,
+            pred_kwargs: dict,
+            batch: 'DataLoaderBatchDTO',
+            noise: torch.Tensor,
+            **kwargs
+    ):
+        with torch.no_grad():
+            # Perform targeted guidance (working title)
+            conditional_noisy_latents = noisy_latents  # target images
+            dtype = get_torch_dtype(self.train_config.dtype)
+
+            if batch.unconditional_latents is not None:
+                # unconditional latents are the "neutral" images. Add noise here identical to
+                # the noise added to the conditional latents, at the same timesteps
+                unconditional_noisy_latents = self.sd.noise_scheduler.add_noise(
+                    batch.unconditional_latents, noise, timesteps
+                )
+
+            # calculate the differential between our conditional (target image) and out unconditional (neutral image)
+            target_differential_noise = unconditional_noisy_latents - conditional_noisy_latents
+            target_differential_noise = target_differential_noise.detach()
+
+            # add the target differential to the target latents as if it were noise with the scheduler, scaled to
+            # the current timestep. Scaling the noise here is important as it scales our guidance to the current
+            # timestep. This is the key to making the guidance work.
+            guidance_latents = self.sd.noise_scheduler.add_noise(
+                conditional_noisy_latents,
+                target_differential_noise,
+                timesteps
+            )
+
+            # Disable the LoRA network so we can predict parent network knowledge without it
+            self.network.is_active = False
+            self.sd.unet.eval()
+
+            # Predict noise to get a baseline of what the parent network wants to do with the latents + noise.
+            # This acts as our control to preserve the unaltered parts of the image.
+            baseline_prediction = self.sd.predict_noise(
+                latents=guidance_latents.to(self.device_torch, dtype=dtype).detach(),
+                conditional_embeddings=conditional_embeds.to(self.device_torch, dtype=dtype).detach(),
+                timestep=timesteps,
+                guidance_scale=1.0,
+                **pred_kwargs  # adapter residuals in here
+            ).detach()
+
+        # turn the LoRA network back on.
+        self.sd.unet.train()
+        self.network.is_active = True
+        self.network.multiplier = network_weight_list
+
+        # do our prediction with LoRA active on the scaled guidance latents
+        prediction = self.sd.predict_noise(
+            latents=guidance_latents.to(self.device_torch, dtype=dtype).detach(),
+            conditional_embeddings=conditional_embeds.to(self.device_torch, dtype=dtype).detach(),
+            timestep=timesteps,
+            guidance_scale=1.0,
+            **pred_kwargs  # adapter residuals in here
+        )
+
+        # remove the baseline prediction from our prediction to get the differential between the two
+        # all that should be left is the differential between the conditional and unconditional images
+        pred_differential_noise = prediction - baseline_prediction
+
+        # for loss, we target ONLY the unscaled differential between our conditional and unconditional latents
+        # not the timestep scaled noise that was added. This is the diffusion training process.
+        # This will guide the network to make identical predictions it previously did for everything EXCEPT our
+        # differential between the conditional and unconditional images (target)
+        loss = torch.nn.functional.mse_loss(
+            pred_differential_noise.float(),
+            target_differential_noise.float(),
+            reduction="none"
+        )
+
+        loss = loss.mean([1, 2, 3])
+        loss = self.apply_snr(loss, timesteps)
+        loss = loss.mean()
         loss.backward()
-        flush()
 
-        # apply gradients
-        self.optimizer.step()
-        self.optimizer.zero_grad()
-        self.lr_scheduler.step()
+        # detach it so parent class can run backward on no grads without throwing error
+        loss = loss.detach()
+        loss.requires_grad_(True)
+
+        return loss
+
+    def get_prior_prediction(
+            self,
+            noisy_latents: torch.Tensor,
+            conditional_embeds: PromptEmbeds,
+            match_adapter_assist: bool,
+            network_weight_list: list,
+            timesteps: torch.Tensor,
+            pred_kwargs: dict,
+            batch: 'DataLoaderBatchDTO',
+            noise: torch.Tensor,
+            **kwargs
+    ):
+        # do a prediction here so we can match its output with network multiplier set to 0.0
+        with torch.no_grad():
+            dtype = get_torch_dtype(self.train_config.dtype)
+            # dont use network on this
+            # self.network.multiplier = 0.0
+            was_network_active = self.network.is_active
+            self.network.is_active = False
+            self.sd.unet.eval()
+            prior_pred = self.sd.predict_noise(
+                latents=noisy_latents.to(self.device_torch, dtype=dtype).detach(),
+                conditional_embeddings=conditional_embeds.to(self.device_torch, dtype=dtype).detach(),
+                timestep=timesteps,
+                guidance_scale=1.0,
+                **pred_kwargs  # adapter residuals in here
+            )
+            self.sd.unet.train()
+            prior_pred = prior_pred.detach()
+            # remove the residuals as we wont use them on prediction when matching control
+            if match_adapter_assist and 'down_block_additional_residuals' in pred_kwargs:
+                del pred_kwargs['down_block_additional_residuals']
+            # restore network
+            # self.network.multiplier = network_weight_list
+            self.network.is_active = was_network_active
+        return prior_pred
+
+    def before_unet_predict(self):
+        pass
+
+    def after_unet_predict(self):
+        pass
+
+    def end_of_training_loop(self):
+        pass
+
+    def hook_train_loop(self, batch: 'DataLoaderBatchDTO'):
+        self.timer.start('preprocess_batch')
+        batch = self.preprocess_batch(batch)
+        dtype = get_torch_dtype(self.train_config.dtype)
+        noisy_latents, noise, timesteps, conditioned_prompts, imgs = self.process_general_training_batch(batch)
+        network_weight_list = batch.get_network_weight_list()
+        if self.train_config.single_item_batching:
+            network_weight_list = network_weight_list + network_weight_list
+
+        has_adapter_img = batch.control_tensor is not None
+
+        match_adapter_assist = False
+
+
+        # check if we are matching the adapter assistant
+        if self.assistant_adapter:
+            if self.train_config.match_adapter_chance == 1.0:
+                match_adapter_assist = True
+            elif self.train_config.match_adapter_chance > 0.0:
+                match_adapter_assist = torch.rand(
+                    (1,), device=self.device_torch, dtype=dtype
+                ) < self.train_config.match_adapter_chance
+
+        self.timer.stop('preprocess_batch')
+
+        with torch.no_grad():
+            loss_multiplier = torch.ones((noisy_latents.shape[0], 1, 1, 1), device=self.device_torch, dtype=dtype)
+            for idx, file_item in enumerate(batch.file_items):
+                if file_item.is_reg:
+                    loss_multiplier[idx] = loss_multiplier[idx] * self.train_config.reg_weight
+
+
+            adapter_images = None
+            sigmas = None
+            if has_adapter_img and (self.adapter or self.assistant_adapter):
+                with self.timer('get_adapter_images'):
+                    # todo move this to data loader
+                    if batch.control_tensor is not None:
+                        adapter_images = batch.control_tensor.to(self.device_torch, dtype=dtype).detach()
+                        # match in channels
+                        if self.assistant_adapter is not None:
+                            in_channels = self.assistant_adapter.config.in_channels
+                            if adapter_images.shape[1] != in_channels:
+                                # we need to match the channels
+                                adapter_images = adapter_images[:, :in_channels, :, :]
+                    else:
+                        raise NotImplementedError("Adapter images now must be loaded with dataloader")
+                    # not 100% sure what this does. But they do it here
+                    # https://github.com/huggingface/diffusers/blob/38a664a3d61e27ab18cd698231422b3c38d6eebf/examples/t2i_adapter/train_t2i_adapter_sdxl.py#L1170
+                    # sigmas = self.get_sigmas(timesteps, len(noisy_latents.shape), noisy_latents.dtype)
+                    # noisy_latents = noisy_latents / ((sigmas ** 2 + 1) ** 0.5)
+
+            mask_multiplier = torch.ones((noisy_latents.shape[0], 1, 1, 1), device=self.device_torch, dtype=dtype)
+            if batch.mask_tensor is not None:
+                with self.timer('get_mask_multiplier'):
+                    # upsampling no supported for bfloat16
+                    mask_multiplier = batch.mask_tensor.to(self.device_torch, dtype=torch.float16).detach()
+                    # scale down to the size of the latents, mask multiplier shape(bs, 1, width, height), noisy_latents shape(bs, channels, width, height)
+                    mask_multiplier = torch.nn.functional.interpolate(
+                        mask_multiplier, size=(noisy_latents.shape[2], noisy_latents.shape[3])
+                    )
+                    # expand to match latents
+                    mask_multiplier = mask_multiplier.expand(-1, noisy_latents.shape[1], -1, -1)
+                    mask_multiplier = mask_multiplier.to(self.device_torch, dtype=dtype).detach()
+
+        def get_adapter_multiplier():
+            if self.adapter and isinstance(self.adapter, T2IAdapter):
+                # training a t2i adapter, not using as assistant.
+                return 1.0
+            elif match_adapter_assist:
+                # training a texture. We want it high
+                adapter_strength_min = 0.9
+                adapter_strength_max = 1.0
+            else:
+                # training with assistance, we want it low
+                adapter_strength_min = 0.4
+                adapter_strength_max = 0.7
+                # adapter_strength_min = 0.9
+                # adapter_strength_max = 1.1
+
+            adapter_conditioning_scale = torch.rand(
+                (1,), device=self.device_torch, dtype=dtype
+            )
+
+            adapter_conditioning_scale = value_map(
+                adapter_conditioning_scale,
+                0.0,
+                1.0,
+                adapter_strength_min,
+                adapter_strength_max
+            )
+            return adapter_conditioning_scale
+
+        # flush()
+        with self.timer('grad_setup'):
+
+            # text encoding
+            grad_on_text_encoder = False
+            if self.train_config.train_text_encoder:
+                grad_on_text_encoder = True
+
+            if self.embedding:
+                grad_on_text_encoder = True
+
+            # have a blank network so we can wrap it in a context and set multipliers without checking every time
+            if self.network is not None:
+                network = self.network
+            else:
+                network = BlankNetwork()
+
+            # set the weights
+            network.multiplier = network_weight_list
+            self.optimizer.zero_grad(set_to_none=True)
+
+        # activate network if it exits
+
+        prompts_1 = conditioned_prompts
+        prompts_2 = None
+        if self.train_config.short_and_long_captions_encoder_split and self.sd.is_xl:
+            prompts_1 = batch.get_caption_short_list()
+            prompts_2 = conditioned_prompts
+
+            # make the batch splits
+        if self.train_config.single_item_batching:
+            if self.model_config.refiner_name_or_path is not None:
+                raise ValueError("Single item batching is not supported when training the refiner")
+            batch_size = noisy_latents.shape[0]
+            # chunk/split everything
+            noisy_latents_list = torch.chunk(noisy_latents, batch_size, dim=0)
+            noise_list = torch.chunk(noise, batch_size, dim=0)
+            timesteps_list = torch.chunk(timesteps, batch_size, dim=0)
+            conditioned_prompts_list = [[prompt] for prompt in prompts_1]
+            if imgs is not None:
+                imgs_list = torch.chunk(imgs, batch_size, dim=0)
+            else:
+                imgs_list = [None for _ in range(batch_size)]
+            if adapter_images is not None:
+                adapter_images_list = torch.chunk(adapter_images, batch_size, dim=0)
+            else:
+                adapter_images_list = [None for _ in range(batch_size)]
+            mask_multiplier_list = torch.chunk(mask_multiplier, batch_size, dim=0)
+            if prompts_2 is None:
+                prompt_2_list = [None for _ in range(batch_size)]
+            else:
+                prompt_2_list = [[prompt] for prompt in prompts_2]
+
+        else:
+            noisy_latents_list = [noisy_latents]
+            noise_list = [noise]
+            timesteps_list = [timesteps]
+            conditioned_prompts_list = [prompts_1]
+            imgs_list = [imgs]
+            adapter_images_list = [adapter_images]
+            mask_multiplier_list = [mask_multiplier]
+            if prompts_2 is None:
+                prompt_2_list = [None]
+            else:
+                prompt_2_list = [prompts_2]
+
+        for noisy_latents, noise, timesteps, conditioned_prompts, imgs, adapter_images, mask_multiplier, prompt_2 in zip(
+                noisy_latents_list,
+                noise_list,
+                timesteps_list,
+                conditioned_prompts_list,
+                imgs_list,
+                adapter_images_list,
+                mask_multiplier_list,
+                prompt_2_list
+        ):
+            if self.train_config.negative_prompt is not None:
+                # add negative prompt
+                conditioned_prompts = conditioned_prompts + [self.train_config.negative_prompt for x in
+                                                             range(len(conditioned_prompts))]
+                if prompt_2 is not None:
+                    prompt_2 = prompt_2 + [self.train_config.negative_prompt for x in range(len(prompt_2))]
+
+            with network:
+                with self.timer('encode_prompt'):
+                    if grad_on_text_encoder:
+                        with torch.set_grad_enabled(True):
+                            conditional_embeds = self.sd.encode_prompt(
+                                conditioned_prompts, prompt_2,
+                                dropout_prob=self.train_config.prompt_dropout_prob,
+                                long_prompts=self.do_long_prompts).to(
+                                self.device_torch,
+                                dtype=dtype)
+                    else:
+                        with torch.set_grad_enabled(False):
+                            # make sure it is in eval mode
+                            if isinstance(self.sd.text_encoder, list):
+                                for te in self.sd.text_encoder:
+                                    te.eval()
+                            else:
+                                self.sd.text_encoder.eval()
+                            conditional_embeds = self.sd.encode_prompt(
+                                conditioned_prompts, prompt_2,
+                                dropout_prob=self.train_config.prompt_dropout_prob,
+                                long_prompts=self.do_long_prompts).to(
+                                self.device_torch,
+                                dtype=dtype)
+
+                        # detach the embeddings
+                        conditional_embeds = conditional_embeds.detach()
+
+                # flush()
+                pred_kwargs = {}
+                if has_adapter_img and (
+                        (self.adapter and isinstance(self.adapter, T2IAdapter)) or self.assistant_adapter):
+                    with torch.set_grad_enabled(self.adapter is not None):
+                        adapter = self.adapter if self.adapter else self.assistant_adapter
+                        adapter_multiplier = get_adapter_multiplier()
+                        with self.timer('encode_adapter'):
+                            down_block_additional_residuals = adapter(adapter_images)
+                            if self.assistant_adapter:
+                                # not training. detach
+                                down_block_additional_residuals = [
+                                    sample.to(dtype=dtype).detach() * adapter_multiplier for sample in
+                                    down_block_additional_residuals
+                                ]
+                            else:
+                                down_block_additional_residuals = [
+                                    sample.to(dtype=dtype) * adapter_multiplier for sample in
+                                    down_block_additional_residuals
+                                ]
+
+                            pred_kwargs['down_block_additional_residuals'] = down_block_additional_residuals
+
+                prior_pred = None
+                if (has_adapter_img and self.assistant_adapter and match_adapter_assist) or self.do_prior_prediction:
+                    with self.timer('prior predict'):
+                        prior_pred = self.get_prior_prediction(
+                            noisy_latents=noisy_latents,
+                            conditional_embeds=conditional_embeds,
+                            match_adapter_assist=match_adapter_assist,
+                            network_weight_list=network_weight_list,
+                            timesteps=timesteps,
+                            pred_kwargs=pred_kwargs,
+                            noise=noise,
+                            batch=batch,
+                        )
+
+                if has_adapter_img and self.adapter and isinstance(self.adapter, IPAdapter):
+                    with self.timer('encode_adapter'):
+                        with torch.no_grad():
+                            conditional_clip_embeds = self.adapter.get_clip_image_embeds_from_tensors(adapter_images)
+                        conditional_embeds = self.adapter(conditional_embeds, conditional_clip_embeds)
+
+                self.before_unet_predict()
+                # do a prior pred if we have an unconditional image, we will swap out the giadance later
+                if batch.unconditional_latents is not None:
+                    # do guided loss
+                    loss = self.get_guided_loss(
+                        noisy_latents=noisy_latents,
+                        conditional_embeds=conditional_embeds,
+                        match_adapter_assist=match_adapter_assist,
+                        network_weight_list=network_weight_list,
+                        timesteps=timesteps,
+                        pred_kwargs=pred_kwargs,
+                        batch=batch,
+                        noise=noise,
+                    )
+
+                else:
+                    with self.timer('predict_unet'):
+                        noise_pred = self.sd.predict_noise(
+                            latents=noisy_latents.to(self.device_torch, dtype=dtype),
+                            conditional_embeddings=conditional_embeds.to(self.device_torch, dtype=dtype),
+                            timestep=timesteps,
+                            guidance_scale=1.0,
+                            **pred_kwargs
+                        )
+                    self.after_unet_predict()
+
+                    with self.timer('calculate_loss'):
+                        noise = noise.to(self.device_torch, dtype=dtype).detach()
+                        loss = self.calculate_loss(
+                            noise_pred=noise_pred,
+                            noise=noise,
+                            noisy_latents=noisy_latents,
+                            timesteps=timesteps,
+                            batch=batch,
+                            mask_multiplier=mask_multiplier,
+                            prior_pred=prior_pred,
+                        )
+                # check if nan
+                if torch.isnan(loss):
+                    raise ValueError("loss is nan")
+
+                with self.timer('backward'):
+                    # todo we have multiplier seperated. works for now as res are not in same batch, but need to change
+                    loss = loss * loss_multiplier.mean()
+                    # IMPORTANT if gradient checkpointing do not leave with network when doing backward
+                    # it will destroy the gradients. This is because the network is a context manager
+                    # and will change the multipliers back to 0.0 when exiting. They will be
+                    # 0.0 for the backward pass and the gradients will be 0.0
+                    # I spent weeks on fighting this. DON'T DO IT
+                    # with fsdp_overlap_step_with_backward():
+                    loss.backward()
+        # flush()
+
+        if not self.is_grad_accumulation_step:
+            torch.nn.utils.clip_grad_norm_(self.params, self.train_config.max_grad_norm)
+            # only step if we are not accumulating
+            with self.timer('optimizer_step'):
+                # apply gradients
+                self.optimizer.step()
+                self.optimizer.zero_grad(set_to_none=True)
+        else:
+            # gradient accumulation. Just a place for breakpoint
+            pass
+
+        # TODO Should we only step scheduler on grad step? If so, need to recalculate last step
+        with self.timer('scheduler_step'):
+            self.lr_scheduler.step()
 
         if self.embedding is not None:
-            # Let's make sure we don't update any embedding weights besides the newly added token
-            index_no_updates = torch.ones((len(self.sd.tokenizer),), dtype=torch.bool)
-            index_no_updates[
-            min(self.embedding.placeholder_token_ids): max(self.embedding.placeholder_token_ids) + 1] = False
-            with torch.no_grad():
-                self.sd.text_encoder.get_input_embeddings().weight[
-                    index_no_updates
-                ] = self.orig_embeds_params[index_no_updates]
+            with self.timer('restore_embeddings'):
+                # Let's make sure we don't update any embedding weights besides the newly added token
+                self.embedding.restore_embeddings()
 
         loss_dict = OrderedDict(
             {'loss': loss.item()}
         )
 
+        self.end_of_training_loop()
+
         return loss_dict
diff --git a/extensions_built_in/sd_trainer/config/train.example.yaml b/extensions_built_in/sd_trainer/config/train.example.yaml
index a563ee8c..793d5d55 100644
--- a/extensions_built_in/sd_trainer/config/train.example.yaml
+++ b/extensions_built_in/sd_trainer/config/train.example.yaml
@@ -19,7 +19,7 @@ config:
         max_step_saves_to_keep: 5 # only affects step counts
       datasets:
         - folder_path: "/path/to/dataset"
-          caption_type: "txt"
+          caption_ext: "txt"
           default_caption: "[trigger]"
           buckets: true
           resolution: 512
diff --git a/info.py b/info.py
index 81cffcad..9f2f0a97 100644
--- a/info.py
+++ b/info.py
@@ -3,6 +3,6 @@ from collections import OrderedDict
 v = OrderedDict()
 v["name"] = "ai-toolkit"
 v["repo"] = "https://github.com/ostris/ai-toolkit"
-v["version"] = "0.0.4"
+v["version"] = "0.1.0"
 
 software_meta = v
diff --git a/jobs/process/BaseProcess.py b/jobs/process/BaseProcess.py
index 7a7a69f5..f0644607 100644
--- a/jobs/process/BaseProcess.py
+++ b/jobs/process/BaseProcess.py
@@ -2,6 +2,8 @@ import copy
 import json
 from collections import OrderedDict
 
+from toolkit.timer import Timer
+
 
 class BaseProcess(object):
 
@@ -18,6 +20,9 @@ class BaseProcess(object):
         self.raw_process_config = config
         self.name = self.get_conf('name', self.job.name)
         self.meta = copy.deepcopy(self.job.meta)
+        self.timer: Timer = Timer(f'{self.name} Timer')
+        self.performance_log_every = self.get_conf('performance_log_every', 0)
+
         print(json.dumps(self.config, indent=4))
 
     def get_conf(self, key, default=None, required=False, as_type=None):
diff --git a/jobs/process/BaseSDTrainProcess.py b/jobs/process/BaseSDTrainProcess.py
index bd8790d9..4a98f17b 100644
--- a/jobs/process/BaseSDTrainProcess.py
+++ b/jobs/process/BaseSDTrainProcess.py
@@ -1,29 +1,52 @@
+import copy
 import glob
+import inspect
+import json
+import shutil
 from collections import OrderedDict
 import os
-from typing import Union
+from typing import Union, List
 
+import numpy as np
+import yaml
+from diffusers import T2IAdapter
+from safetensors.torch import save_file, load_file
+# from lycoris.config import PRESET
 from torch.utils.data import DataLoader
+import torch
+import torch.backends.cuda
 
-from toolkit.data_loader import get_dataloader_from_datasets
+from toolkit.basic import value_map
+from toolkit.data_loader import get_dataloader_from_datasets, trigger_dataloader_setup_epoch
+from toolkit.data_transfer_object.data_loader import FileItemDTO, DataLoaderBatchDTO
 from toolkit.embedding import Embedding
+from toolkit.ip_adapter import IPAdapter
 from toolkit.lora_special import LoRASpecialNetwork
+from toolkit.lorm import convert_diffusers_unet_to_lorm, count_parameters, print_lorm_extract_details, \
+    lorm_ignore_if_contains, lorm_parameter_threshold, LORM_TARGET_REPLACE_MODULE
+from toolkit.lycoris_special import LycorisSpecialNetwork
+from toolkit.network_mixins import Network
 from toolkit.optimizer import get_optimizer
 from toolkit.paths import CONFIG_ROOT
+from toolkit.progress_bar import ToolkitProgressBar
+from toolkit.sampler import get_sampler
+from toolkit.saving import save_t2i_from_diffusers, load_t2i_model, save_ip_adapter_from_diffusers, \
+    load_ip_adapter_model
 
 from toolkit.scheduler import get_lr_scheduler
+from toolkit.sd_device_states_presets import get_train_sd_device_state_preset
 from toolkit.stable_diffusion_model import StableDiffusion
 
 from jobs.process import BaseTrainProcess
-from toolkit.metadata import get_meta_for_safetensors, load_metadata_from_safetensors, add_base_model_info_to_meta
-from toolkit.train_tools import get_torch_dtype
+from toolkit.metadata import get_meta_for_safetensors, load_metadata_from_safetensors, add_base_model_info_to_meta, \
+    parse_metadata_from_safetensors
+from toolkit.train_tools import get_torch_dtype, LearnableSNRGamma, apply_learnable_snr_gos, apply_snr_weight
 import gc
 
-import torch
 from tqdm import tqdm
 
 from toolkit.config_modules import SaveConfig, LogingConfig, SampleConfig, NetworkConfig, TrainConfig, ModelConfig, \
-    GenerateImageConfig, EmbeddingConfig, DatasetConfig
+    GenerateImageConfig, EmbeddingConfig, DatasetConfig, preprocess_dataset_raw_config, AdapterConfig, GuidanceConfig
 
 
 def flush():
@@ -41,6 +64,11 @@ class BaseSDTrainProcess(BaseTrainProcess):
         self.custom_pipeline = custom_pipeline
         self.step_num = 0
         self.start_step = 0
+        self.epoch_num = 0
+        # start at 1 so we can do a sample at the start
+        self.grad_accumulation_step = 1
+        # if true, then we do not do an optimizer step. We are accumulating gradients
+        self.is_grad_accumulation_step = False
         self.device = self.get_conf('device', self.job.device)
         self.device_torch = torch.device(self.device)
         network_config = self.get_conf('network', None)
@@ -60,18 +88,31 @@ class BaseSDTrainProcess(BaseTrainProcess):
             self.has_first_sample_requested = False
             self.first_sample_config = self.sample_config
         self.logging_config = LogingConfig(**self.get_conf('logging', {}))
-        self.optimizer = None
+        self.optimizer: torch.optim.Optimizer = None
         self.lr_scheduler = None
         self.data_loader: Union[DataLoader, None] = None
         self.data_loader_reg: Union[DataLoader, None] = None
         self.trigger_word = self.get_conf('trigger_word', None)
 
+        self.guidance_config: Union[GuidanceConfig, None] = None
+        guidance_config_raw = self.get_conf('guidance', None)
+        if guidance_config_raw is not None:
+            self.guidance_config = GuidanceConfig(**guidance_config_raw)
+
+        # store is all are cached. Allows us to not load vae if we don't need to
+        self.is_latents_cached = True
         raw_datasets = self.get_conf('datasets', None)
+        if raw_datasets is not None and len(raw_datasets) > 0:
+            raw_datasets = preprocess_dataset_raw_config(raw_datasets)
         self.datasets = None
         self.datasets_reg = None
+        self.params = []
         if raw_datasets is not None and len(raw_datasets) > 0:
             for raw_dataset in raw_datasets:
                 dataset = DatasetConfig(**raw_dataset)
+                is_caching = dataset.cache_latents or dataset.cache_latents_to_disk
+                if not is_caching:
+                    self.is_latents_cached = False
                 if dataset.is_reg:
                     if self.datasets_reg is None:
                         self.datasets_reg = []
@@ -86,18 +127,55 @@ class BaseSDTrainProcess(BaseTrainProcess):
         if embedding_raw is not None:
             self.embed_config = EmbeddingConfig(**embedding_raw)
 
-        self.sd = StableDiffusion(
-            device=self.device,
-            model_config=self.model_config,
-            dtype=self.train_config.dtype,
-            custom_pipeline=self.custom_pipeline,
-        )
+        # t2i adapter
+        self.adapter_config = None
+        adapter_raw = self.get_conf('adapter', None)
+        if adapter_raw is not None:
+            self.adapter_config = AdapterConfig(**adapter_raw)
+            # sdxl adapters end in _xl. Only full_adapter_xl for now
+            if self.model_config.is_xl and not self.adapter_config.adapter_type.endswith('_xl'):
+                self.adapter_config.adapter_type += '_xl'
 
         # to hold network if there is one
-        self.network = None
-        self.embedding = None
+        self.network: Union[Network, None] = None
+        self.adapter: Union[T2IAdapter, IPAdapter, None] = None
+        self.embedding: Union[Embedding, None] = None
+
+        is_training_adapter = self.adapter_config is not None and self.adapter_config.train
+
+        self.do_lorm = self.get_conf('do_lorm', False)
+        self.lorm_extract_mode = self.get_conf('lorm_extract_mode', 'ratio')
+        self.lorm_extract_mode_param = self.get_conf('lorm_extract_mode_param', 0.25)
+        # 'ratio', 0.25)
+
+        # get the device state preset based on what we are training
+        self.train_device_state_preset = get_train_sd_device_state_preset(
+            device=self.device_torch,
+            train_unet=self.train_config.train_unet,
+            train_text_encoder=self.train_config.train_text_encoder,
+            cached_latents=self.is_latents_cached,
+            train_lora=self.network_config is not None,
+            train_adapter=is_training_adapter,
+            train_embedding=self.embed_config is not None,
+            train_refiner=self.train_config.train_refiner,
+        )
+
+        # fine_tuning here is for training actual SD network, not LoRA, embeddings, etc. it is (Dreambooth, etc)
+        self.is_fine_tuning = True
+        if self.network_config is not None or is_training_adapter or self.embed_config is not None:
+            self.is_fine_tuning = False
+
+        self.named_lora = False
+        if self.embed_config is not None or is_training_adapter:
+            self.named_lora = True
+        self.snr_gos: Union[LearnableSNRGamma, None] = None
+
+    def post_process_generate_image_config_list(self, generate_image_config_list: List[GenerateImageConfig]):
+        # override in subclass
+        return generate_image_config_list
 
     def sample(self, step=None, is_first=False):
+        flush()
         sample_folder = os.path.join(self.save_root, 'samples')
         gen_img_config_list = []
 
@@ -113,7 +191,7 @@ class BaseSDTrainProcess(BaseTrainProcess):
                 # zero-pad 9 digits
                 step_num = f"_{str(step).zfill(9)}"
 
-            filename = f"[time]_{step_num}_[count].png"
+            filename = f"[time]_{step_num}_[count].{self.sample_config.ext}"
 
             output_path = os.path.join(sample_folder, filename)
 
@@ -124,13 +202,17 @@ class BaseSDTrainProcess(BaseTrainProcess):
             # ie test123 will become test123 test123_1 test123_2 etc. Do not add this yourself here
             if self.embedding is not None:
                 prompt = self.embedding.inject_embedding_to_prompt(
-                    prompt,
+                    prompt, add_if_not_present=False
                 )
             if self.trigger_word is not None:
                 prompt = self.sd.inject_trigger_into_prompt(
-                    prompt, self.trigger_word
+                    prompt, self.trigger_word, add_if_not_present=False
                 )
 
+            extra_args = {}
+            if self.adapter_config is not None and self.adapter_config.test_img_path is not None:
+                extra_args['adapter_image_path'] = self.adapter_config.test_img_path
+
             gen_img_config_list.append(GenerateImageConfig(
                 prompt=prompt,  # it will autoparse the prompt
                 width=sample_config.width,
@@ -142,10 +224,17 @@ class BaseSDTrainProcess(BaseTrainProcess):
                 num_inference_steps=sample_config.sample_steps,
                 network_multiplier=sample_config.network_multiplier,
                 output_path=output_path,
+                output_ext=sample_config.ext,
+                adapter_conditioning_scale=sample_config.adapter_conditioning_scale,
+                refiner_start_at=sample_config.refiner_start_at,
+                **extra_args
             ))
 
+        # post process
+        gen_img_config_list = self.post_process_generate_image_config_list(gen_img_config_list)
+
         # send to be generated
-        self.sd.generate_images(gen_img_config_list)
+        self.sd.generate_images(gen_img_config_list, sampler=sample_config.sampler)
 
     def update_training_metadata(self):
         o_dict = OrderedDict({
@@ -167,31 +256,86 @@ class BaseSDTrainProcess(BaseTrainProcess):
         )
         o_dict['ss_output_name'] = self.job.name
 
+        if self.trigger_word is not None:
+            # just so auto1111 will pick it up
+            o_dict['ss_tag_frequency'] = {
+                f"1_{self.trigger_word}": {
+                    f"{self.trigger_word}": 1
+                }
+            }
+
         self.add_meta(o_dict)
 
     def get_training_info(self):
         info = OrderedDict({
-            'step': self.step_num + 1
+            'step': self.step_num,
+            'epoch': self.epoch_num,
         })
         return info
 
     def clean_up_saves(self):
         # remove old saves
         # get latest saved step
+        latest_item = None
         if os.path.exists(self.save_root):
-            latest_file = None
-            # pattern is {job_name}_{zero_filles_step}.safetensors but NOT {job_name}.safetensors
-            pattern = f"{self.job.name}_*.safetensors"
-            files = glob.glob(os.path.join(self.save_root, pattern))
-            if len(files) > self.save_config.max_step_saves_to_keep:
-                # remove all but the latest max_step_saves_to_keep
-                files.sort(key=os.path.getctime)
-                for file in files[:-self.save_config.max_step_saves_to_keep]:
-                    self.print(f"Removing old save: {file}")
-                    os.remove(file)
-            return latest_file
-        else:
-            return None
+            # pattern is {job_name}_{zero_filled_step} for both files and directories
+            pattern = f"{self.job.name}_*"
+            items = glob.glob(os.path.join(self.save_root, pattern))
+            # Separate files and directories
+            safetensors_files = [f for f in items if f.endswith('.safetensors')]
+            pt_files = [f for f in items if f.endswith('.pt')]
+            directories = [d for d in items if os.path.isdir(d) and not d.endswith('.safetensors')]
+            embed_files = []
+            # do embedding files
+            if self.embed_config is not None:
+                embed_pattern = f"{self.embed_config.trigger}_*"
+                embed_items = glob.glob(os.path.join(self.save_root, embed_pattern))
+                # will end in safetensors or pt
+                embed_files = [f for f in embed_items if f.endswith('.safetensors') or f.endswith('.pt')]
+
+            # Sort the lists by creation time if they are not empty
+            if safetensors_files:
+                safetensors_files.sort(key=os.path.getctime)
+            if pt_files:
+                pt_files.sort(key=os.path.getctime)
+            if directories:
+                directories.sort(key=os.path.getctime)
+            if embed_files:
+                embed_files.sort(key=os.path.getctime)
+
+            # Combine and sort the lists
+            combined_items = safetensors_files + directories + pt_files
+            combined_items.sort(key=os.path.getctime)
+
+            # Use slicing with a check to avoid 'NoneType' error
+            safetensors_to_remove = safetensors_files[
+                                    :-self.save_config.max_step_saves_to_keep] if safetensors_files else []
+            pt_files_to_remove = pt_files[:-self.save_config.max_step_saves_to_keep] if pt_files else []
+            directories_to_remove = directories[:-self.save_config.max_step_saves_to_keep] if directories else []
+            embeddings_to_remove = embed_files[:-self.save_config.max_step_saves_to_keep] if embed_files else []
+
+            items_to_remove = safetensors_to_remove + pt_files_to_remove + directories_to_remove + embeddings_to_remove
+
+            # remove all but the latest max_step_saves_to_keep
+            # items_to_remove = combined_items[:-self.save_config.max_step_saves_to_keep]
+
+            for item in items_to_remove:
+                self.print(f"Removing old save: {item}")
+                if os.path.isdir(item):
+                    shutil.rmtree(item)
+                else:
+                    os.remove(item)
+                # see if a yaml file with same name exists
+                yaml_file = os.path.splitext(item)[0] + ".yaml"
+                if os.path.exists(yaml_file):
+                    os.remove(yaml_file)
+            if combined_items:
+                latest_item = combined_items[-1]
+        return latest_item
+
+    def post_save_hook(self, save_path):
+        # override in subclass
+        pass
 
     def save(self, step=None):
         if not os.path.exists(self.save_root):
@@ -207,35 +351,119 @@ class BaseSDTrainProcess(BaseTrainProcess):
         file_path = os.path.join(self.save_root, filename)
         # prepare meta
         save_meta = get_meta_for_safetensors(self.meta, self.job.name)
-        if self.network is not None:
-            prev_multiplier = self.network.multiplier
-            self.network.multiplier = 1.0
-            if self.network_config.normalize:
-                # apply the normalization
-                self.network.apply_stored_normalizer()
-            self.network.save_weights(
-                file_path,
-                dtype=get_torch_dtype(self.save_config.dtype),
-                metadata=save_meta
-            )
-            self.network.multiplier = prev_multiplier
-        elif self.embedding is not None:
-            # set current step
-            self.embedding.step = self.step_num
-            # change filename to pt if that is set
-            if self.embed_config.save_format == "pt":
-                # replace extension
-                file_path = os.path.splitext(file_path)[0] + ".pt"
-            self.embedding.save(file_path)
+        if not self.is_fine_tuning:
+            if self.network is not None:
+                lora_name = self.job.name
+                if self.named_lora:
+                    # add _lora to name
+                    lora_name += '_LoRA'
+
+                filename = f'{lora_name}{step_num}.safetensors'
+                file_path = os.path.join(self.save_root, filename)
+                prev_multiplier = self.network.multiplier
+                self.network.multiplier = 1.0
+
+                # if we are doing embedding training as well, add that
+                embedding_dict = self.embedding.state_dict() if self.embedding else None
+                self.network.save_weights(
+                    file_path,
+                    dtype=get_torch_dtype(self.save_config.dtype),
+                    metadata=save_meta,
+                    extra_state_dict=embedding_dict
+                )
+                self.network.multiplier = prev_multiplier
+                # if we have an embedding as well, pair it with the network
+
+            # even if added to lora, still save the trigger version
+            if self.embedding is not None:
+                emb_filename = f'{self.embed_config.trigger}{step_num}.safetensors'
+                emb_file_path = os.path.join(self.save_root, emb_filename)
+                # for combo, above will get it
+                # set current step
+                self.embedding.step = self.step_num
+                # change filename to pt if that is set
+                if self.embed_config.save_format == "pt":
+                    # replace extension
+                    emb_file_path = os.path.splitext(emb_file_path)[0] + ".pt"
+                self.embedding.save(emb_file_path)
+
+            if self.adapter is not None and self.adapter_config.train:
+                adapter_name = self.job.name
+                if self.network_config is not None or self.embedding is not None:
+                    # add _lora to name
+                    if self.adapter_config.type == 't2i':
+                        adapter_name += '_t2i'
+                    else:
+                        adapter_name += '_ip'
+
+                filename = f'{adapter_name}{step_num}.safetensors'
+                file_path = os.path.join(self.save_root, filename)
+                # save adapter
+                state_dict = self.adapter.state_dict()
+                if self.adapter_config.type == 't2i':
+                    save_t2i_from_diffusers(
+                        state_dict,
+                        output_file=file_path,
+                        meta=save_meta,
+                        dtype=get_torch_dtype(self.save_config.dtype)
+                    )
+                else:
+                    save_ip_adapter_from_diffusers(
+                        state_dict,
+                        output_file=file_path,
+                        meta=save_meta,
+                        dtype=get_torch_dtype(self.save_config.dtype)
+                    )
         else:
-            self.sd.save(
-                file_path,
-                save_meta,
-                get_torch_dtype(self.save_config.dtype)
-            )
+            if self.save_config.save_format == "diffusers":
+                # saving as a folder path
+                file_path = file_path.replace('.safetensors', '')
+                # convert it back to normal object
+                save_meta = parse_metadata_from_safetensors(save_meta)
+
+            if self.sd.refiner_unet and self.train_config.train_refiner:
+                # save refiner
+                refiner_name = self.job.name + '_refiner'
+                filename = f'{refiner_name}{step_num}.safetensors'
+                file_path = os.path.join(self.save_root, filename)
+                self.sd.save_refiner(
+                    file_path,
+                    save_meta,
+                    get_torch_dtype(self.save_config.dtype)
+                )
+            if self.train_config.train_unet or self.train_config.train_text_encoder:
+                self.sd.save(
+                    file_path,
+                    save_meta,
+                    get_torch_dtype(self.save_config.dtype)
+                )
+
+        # save learnable params as json if we have thim
+        if self.snr_gos:
+            json_data = {
+                'offset_1': self.snr_gos.offset_1.item(),
+                'offset_2': self.snr_gos.offset_2.item(),
+                'scale': self.snr_gos.scale.item(),
+                'gamma': self.snr_gos.gamma.item(),
+            }
+            path_to_save = file_path = os.path.join(self.save_root, 'learnable_snr.json')
+            with open(path_to_save, 'w') as f:
+                json.dump(json_data, f, indent=4)
+
+        # save optimizer
+        if self.optimizer is not None:
+            try:
+                filename = f'optimizer.pt'
+                file_path = os.path.join(self.save_root, filename)
+                torch.save(self.optimizer.state_dict(), file_path)
+            except Exception as e:
+                print(e)
+                print("Could not save optimizer")
 
         self.print(f"Saved to {file_path}")
         self.clean_up_saves()
+        self.post_save_hook(file_path)
+        flush()
 
     # Called before the model is loaded
     def hook_before_model_load(self):
@@ -252,97 +480,346 @@ class BaseSDTrainProcess(BaseTrainProcess):
     def before_dataset_load(self):
         pass
 
+    def get_params(self):
+        # you can extend this in subclass to get params
+        # otherwise params will be gathered through normal means
+        return None
+
     def hook_train_loop(self, batch):
         # return loss
         return 0.0
 
-    def get_latest_save_path(self):
+    def get_latest_save_path(self, name=None, post=''):
+        if name == None:
+            name = self.job.name
         # get latest saved step
+        latest_path = None
         if os.path.exists(self.save_root):
-            latest_file = None
-            # pattern is {job_name}_{zero_filles_step}.safetensors or {job_name}.safetensors
-            pattern = f"{self.job.name}*.safetensors"
-            files = glob.glob(os.path.join(self.save_root, pattern))
-            if len(files) > 0:
-                latest_file = max(files, key=os.path.getctime)
-            # try pt
-            pattern = f"{self.job.name}*.pt"
-            files = glob.glob(os.path.join(self.save_root, pattern))
-            if len(files) > 0:
-                latest_file = max(files, key=os.path.getctime)
-            return latest_file
+            # Define patterns for both files and directories
+            patterns = [
+                f"{name}*{post}.safetensors",
+                f"{name}*{post}.pt",
+                f"{name}*{post}"
+            ]
+            # Search for both files and directories
+            paths = []
+            for pattern in patterns:
+                paths.extend(glob.glob(os.path.join(self.save_root, pattern)))
+
+            # Filter out non-existent paths and sort by creation time
+            if paths:
+                paths = [p for p in paths if os.path.exists(p)]
+                # remove false positives
+                if '_LoRA' not in name:
+                    paths = [p for p in paths if '_LoRA' not in p]
+                if '_refiner' not in name:
+                    paths = [p for p in paths if '_refiner' not in p]
+                if '_t2i' not in name:
+                    paths = [p for p in paths if '_t2i' not in p]
+
+                if len(paths) > 0:
+                    latest_path = max(paths, key=os.path.getctime)
+
+        return latest_path
+
+    def load_training_state_from_metadata(self, path):
+        # if path is folder, then it is diffusers
+        if os.path.isdir(path):
+            meta_path = os.path.join(path, 'aitk_meta.yaml')
+            # load it
+            with open(meta_path, 'r') as f:
+                meta = yaml.load(f, Loader=yaml.FullLoader)
         else:
-            return None
+            meta = load_metadata_from_safetensors(path)
+        # if 'training_info' in Orderdict keys
+        if 'training_info' in meta and 'step' in meta['training_info'] and self.train_config.start_step is None:
+            self.step_num = meta['training_info']['step']
+            if 'epoch' in meta['training_info']:
+                self.epoch_num = meta['training_info']['epoch']
+            self.start_step = self.step_num
+            print(f"Found step {self.step_num} in metadata, starting from there")
 
     def load_weights(self, path):
         if self.network is not None:
-            self.network.load_weights(path)
-            meta = load_metadata_from_safetensors(path)
+            extra_weights = self.network.load_weights(path)
+            self.load_training_state_from_metadata(path)
+            return extra_weights
+        else:
+            print("load_weights not implemented for non-network models")
+            return None
+
+    def apply_snr(self, seperated_loss, timesteps):
+        if self.train_config.learnable_snr_gos:
+            # add snr_gamma
+            seperated_loss = apply_learnable_snr_gos(seperated_loss, timesteps, self.snr_gos)
+        elif self.train_config.snr_gamma is not None and self.train_config.snr_gamma > 0.000001:
+            # add snr_gamma
+            seperated_loss = apply_snr_weight(seperated_loss, timesteps, self.sd.noise_scheduler, self.train_config.snr_gamma, fixed=True)
+        elif self.train_config.min_snr_gamma is not None and self.train_config.min_snr_gamma > 0.000001:
+            # add min_snr_gamma
+            seperated_loss = apply_snr_weight(seperated_loss, timesteps, self.sd.noise_scheduler, self.train_config.min_snr_gamma)
+
+        return seperated_loss
+
+    def load_lorm(self):
+        latest_save_path = self.get_latest_save_path()
+        if latest_save_path is not None:
+            # hacky way to reload weights for now
+            # todo, do this
+            state_dict = load_file(latest_save_path, device=self.device)
+            self.sd.unet.load_state_dict(state_dict)
+
+            meta = load_metadata_from_safetensors(latest_save_path)
             # if 'training_info' in Orderdict keys
             if 'training_info' in meta and 'step' in meta['training_info']:
                 self.step_num = meta['training_info']['step']
+                if 'epoch' in meta['training_info']:
+                    self.epoch_num = meta['training_info']['epoch']
                 self.start_step = self.step_num
                 print(f"Found step {self.step_num} in metadata, starting from there")
 
-        else:
-            print("load_weights not implemented for non-network models")
+    # def get_sigmas(self, timesteps, n_dim=4, dtype=torch.float32):
+    #     self.sd.noise_scheduler.set_timesteps(1000, device=self.device_torch)
+    #     sigmas = self.sd.noise_scheduler.sigmas.to(device=self.device_torch, dtype=dtype)
+    #     schedule_timesteps = self.sd.noise_scheduler.timesteps.to(self.device_torch, )
+    #     timesteps = timesteps.to(self.device_torch, )
+    #
+    #     # step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+    #     step_indices = [t for t in timesteps]
+    #
+    #     sigma = sigmas[step_indices].flatten()
+    #     while len(sigma.shape) < n_dim:
+    #         sigma = sigma.unsqueeze(-1)
+    #     return sigma
 
-    def process_general_training_batch(self, batch):
+    def load_additional_training_modules(self, params):
+        # override in subclass
+        return params
+
+    def get_sigmas(self, timesteps, n_dim=4, dtype=torch.float32):
+        sigmas = self.sd.noise_scheduler.sigmas.to(device=self.device, dtype=dtype)
+        schedule_timesteps = self.sd.noise_scheduler.timesteps.to(self.device)
+        timesteps = timesteps.to(self.device)
+
+        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+
+        sigma = sigmas[step_indices].flatten()
+        while len(sigma.shape) < n_dim:
+            sigma = sigma.unsqueeze(-1)
+        return sigma
+
+    def process_general_training_batch(self, batch: 'DataLoaderBatchDTO'):
         with torch.no_grad():
-            imgs, prompts, dataset_config = batch
+            with self.timer('prepare_prompt'):
+                prompts = batch.get_caption_list()
+                is_reg_list = batch.get_is_reg_list()
 
-            # convert the 0 or 1 for is reg to a bool list
-            if isinstance(dataset_config, list):
-                is_reg_list = [x.get('is_reg', 0) for x in dataset_config]
-            else:
-                is_reg_list = dataset_config.get('is_reg', [0 for _ in range(imgs.shape[0])])
-            if isinstance(is_reg_list, torch.Tensor):
-                is_reg_list = is_reg_list.numpy().tolist()
-            is_reg_list = [bool(x) for x in is_reg_list]
+                is_any_reg = any([is_reg for is_reg in is_reg_list])
 
-            conditioned_prompts = []
+                do_double = self.train_config.short_and_long_captions and not is_any_reg
 
-            for prompt, is_reg in zip(prompts, is_reg_list):
+                if self.train_config.short_and_long_captions and do_double:
+                    # dont do this with regs. No point
 
-                # make sure the embedding is in the prompts
-                if self.embedding is not None:
-                    prompt = self.embedding.inject_embedding_to_prompt(
-                        prompt,
-                        expand_token=True,
-                        add_if_not_present=True,
+                    # double batch and add short captions to the end
+                    prompts = prompts + batch.get_caption_short_list()
+                    is_reg_list = is_reg_list + is_reg_list
+                if self.model_config.refiner_name_or_path is not None and self.train_config.train_unet:
+                    prompts = prompts + prompts
+                    is_reg_list = is_reg_list + is_reg_list
+
+                conditioned_prompts = []
+
+                for prompt, is_reg in zip(prompts, is_reg_list):
+
+                    # make sure the embedding is in the prompts
+                    if self.embedding is not None:
+                        prompt = self.embedding.inject_embedding_to_prompt(
+                            prompt,
+                            expand_token=True,
+                            add_if_not_present=not is_reg,
+                        )
+
+                    # make sure trigger is in the prompts if not a regularization run
+                    if self.trigger_word is not None:
+                        prompt = self.sd.inject_trigger_into_prompt(
+                            prompt,
+                            trigger=self.trigger_word,
+                            add_if_not_present=not is_reg,
+                        )
+                    conditioned_prompts.append(prompt)
+
+            with self.timer('prepare_latents'):
+                dtype = get_torch_dtype(self.train_config.dtype)
+                imgs = None
+                if batch.tensor is not None:
+                    imgs = batch.tensor
+                    imgs = imgs.to(self.device_torch, dtype=dtype)
+                if batch.latents is not None:
+                    latents = batch.latents.to(self.device_torch, dtype=dtype)
+                    batch.latents = latents
+                else:
+                    latents = self.sd.encode_images(imgs)
+                    batch.latents = latents
+
+                if batch.unconditional_tensor is not None and batch.unconditional_latents is None:
+                    unconditional_imgs = batch.unconditional_tensor
+                    unconditional_imgs = unconditional_imgs.to(self.device_torch, dtype=dtype)
+                    unconditional_latents = self.sd.encode_images(unconditional_imgs)
+                    batch.unconditional_latents = unconditional_latents * self.train_config.latent_multiplier
+
+                unaugmented_latents = None
+                if self.train_config.loss_target == 'differential_noise':
+                    # we determine noise from the differential of the latents
+                    unaugmented_latents = self.sd.encode_images(batch.unaugmented_tensor)
+
+            batch_size = len(batch.file_items)
+            min_noise_steps = self.train_config.min_denoising_steps
+            max_noise_steps = self.train_config.max_denoising_steps
+            if self.model_config.refiner_name_or_path is not None:
+                # if we are not training the unet, then we are only doing refiner and do not need to double up
+                if self.train_config.train_unet:
+                    max_noise_steps = round(self.train_config.max_denoising_steps * self.model_config.refiner_start_at)
+                    do_double = True
+                else:
+                    min_noise_steps = round(self.train_config.max_denoising_steps * self.model_config.refiner_start_at)
+                    do_double = False
+
+            with self.timer('prepare_noise'):
+                num_train_timesteps = self.sd.noise_scheduler.config['num_train_timesteps']
+
+                if self.train_config.noise_scheduler in ['custom_lcm']:
+                    # we store this value on our custom one
+                    self.sd.noise_scheduler.set_timesteps(
+                        self.sd.noise_scheduler.train_timesteps, device=self.device_torch
+                    )
+                elif self.train_config.noise_scheduler in ['lcm']:
+                    self.sd.noise_scheduler.set_timesteps(
+                        num_train_timesteps, device=self.device_torch, original_inference_steps=num_train_timesteps
+                    )
+                else:
+                    self.sd.noise_scheduler.set_timesteps(
+                        num_train_timesteps, device=self.device_torch
                     )
 
-                # make sure trigger is in the prompts if not a regularization run
-                if self.trigger_word is not None and not is_reg:
-                    prompt = self.sd.inject_trigger_into_prompt(
-                        prompt,
-                        trigger=self.trigger_word,
-                        add_if_not_present=True,
+                # if self.train_config.timestep_sampling == 'style' or self.train_config.timestep_sampling == 'content':
+                if self.train_config.content_or_style in ['style', 'content']:
+                    # this is from diffusers training code
+                    # Cubic sampling for favoring later or earlier timesteps
+                    # For more details about why cubic sampling is used for content / structure,
+                    # refer to section 3.4 of https://arxiv.org/abs/2302.08453
+
+                    # for content / structure, it is best to favor earlier timesteps
+                    # for style, it is best to favor later timesteps
+
+                    orig_timesteps = torch.rand((batch_size,), device=latents.device)
+
+                    if self.train_config.content_or_style == 'content':
+                        timestep_indices = orig_timesteps ** 3 * self.sd.noise_scheduler.config['num_train_timesteps']
+                    elif self.train_config.content_or_style == 'style':
+                        timestep_indices = (1 - orig_timesteps ** 3) * self.sd.noise_scheduler.config['num_train_timesteps']
+
+                    timestep_indices = value_map(
+                        timestep_indices,
+                        0,
+                        self.sd.noise_scheduler.config['num_train_timesteps'] - 1,
+                        min_noise_steps,
+                        max_noise_steps - 1
+                    )
+                    timestep_indices = timestep_indices.long().clamp(
+                        min_noise_steps + 1,
+                        max_noise_steps - 1
                     )
-                conditioned_prompts.append(prompt)
 
-            batch_size = imgs.shape[0]
+                elif self.train_config.content_or_style == 'balanced':
+                    if min_noise_steps == max_noise_steps:
+                        timestep_indices = torch.ones((batch_size,), device=self.device_torch) * min_noise_steps
+                    else:
+                        # todo, some schedulers use indices, otheres use timesteps. Not sure what to do here
+                        timestep_indices = torch.randint(
+                            min_noise_steps + 1,
+                            max_noise_steps - 1,
+                            (batch_size,),
+                            device=self.device_torch
+                        )
+                    timestep_indices = timestep_indices.long()
+                else:
+                    raise ValueError(f"Unknown content_or_style {self.train_config.content_or_style}")
 
-            dtype = get_torch_dtype(self.train_config.dtype)
-            imgs = imgs.to(self.device_torch, dtype=dtype)
-            latents = self.sd.encode_images(imgs)
+                # convert the timestep_indices to a timestep
+                timesteps = [self.sd.noise_scheduler.timesteps[x.item()] for x in timestep_indices]
+                timesteps = torch.stack(timesteps, dim=0)
 
-            self.sd.noise_scheduler.set_timesteps(
-                self.train_config.max_denoising_steps, device=self.device_torch
-            )
+                # get noise
+                noise = self.sd.get_latent_noise(
+                    height=latents.shape[2],
+                    width=latents.shape[3],
+                    batch_size=batch_size,
+                    noise_offset=self.train_config.noise_offset
+                ).to(self.device_torch, dtype=dtype)
 
-            timesteps = torch.randint(0, self.train_config.max_denoising_steps, (batch_size,), device=self.device_torch)
-            timesteps = timesteps.long()
+                if self.train_config.loss_target == 'differential_noise':
+                    differential = latents - unaugmented_latents
+                    # add noise to differential
+                    # noise = noise + differential
+                    noise = noise + (differential * 0.5)
+                    # noise = value_map(differential, 0, torch.abs(differential).max(), 0, torch.abs(noise).max())
+                    latents = unaugmented_latents
 
-            # get noise
-            noise = self.sd.get_latent_noise(
-                pixel_height=imgs.shape[2],
-                pixel_width=imgs.shape[3],
-                batch_size=batch_size,
-                noise_offset=self.train_config.noise_offset
-            ).to(self.device_torch, dtype=dtype)
+                noise_multiplier = self.train_config.noise_multiplier
 
-            noisy_latents = self.sd.noise_scheduler.add_noise(latents, noise, timesteps)
+                noise = noise * noise_multiplier
+
+                latents = latents * self.train_config.latent_multiplier
+
+                if batch.unconditional_latents is not None:
+                    batch.unconditional_latents = batch.unconditional_latents * self.train_config.latent_multiplier
+
+                noisy_latents = self.sd.noise_scheduler.add_noise(latents, noise, timesteps)
+
+                # determine scaled noise
+                # todo do we need to scale this or does it always predict full intensity
+                # noise = noisy_latents - latents
+
+                # https://github.com/huggingface/diffusers/blob/324d18fba23f6c9d7475b0ff7c777685f7128d40/examples/t2i_adapter/train_t2i_adapter_sdxl.py#L1170C17-L1171C77
+                if self.train_config.loss_target == 'source' or self.train_config.loss_target == 'unaugmented':
+                    sigmas = self.get_sigmas(timesteps, len(noisy_latents.shape), noisy_latents.dtype)
+                    # add it to the batch
+                    batch.sigmas = sigmas
+                    # todo is this for sdxl? find out where this came from originally
+                    # noisy_latents = noisy_latents / ((sigmas ** 2 + 1) ** 0.5)
+
+            def double_up_tensor(tensor: torch.Tensor):
+                if tensor is None:
+                    return None
+                return torch.cat([tensor, tensor], dim=0)
+
+            if do_double:
+                if self.model_config.refiner_name_or_path:
+                    # apply refiner double up
+                    refiner_timesteps = torch.randint(
+                        max_noise_steps,
+                        self.train_config.max_denoising_steps,
+                        (batch_size,),
+                        device=self.device_torch
+                    )
+                    refiner_timesteps = refiner_timesteps.long()
+                    # add our new timesteps on to end
+                    timesteps = torch.cat([timesteps, refiner_timesteps], dim=0)
+
+                    refiner_noisy_latents = self.sd.noise_scheduler.add_noise(latents, noise, refiner_timesteps)
+                    noisy_latents = torch.cat([noisy_latents, refiner_noisy_latents], dim=0)
+
+                else:
+                    # just double it
+                    noisy_latents = double_up_tensor(noisy_latents)
+                    timesteps = double_up_tensor(timesteps)
+
+                noise = double_up_tensor(noise)
+                # prompts are already updated above
+                imgs = double_up_tensor(imgs)
+                batch.mask_tensor = double_up_tensor(batch.mask_tensor)
+                batch.control_tensor = double_up_tensor(batch.control_tensor)
 
             # remove grads for these
             noisy_latents.requires_grad = False
@@ -352,26 +829,100 @@ class BaseSDTrainProcess(BaseTrainProcess):
 
         return noisy_latents, noise, timesteps, conditioned_prompts, imgs
 
+    def setup_adapter(self):
+        # t2i adapter
+        is_t2i = self.adapter_config.type == 't2i'
+        suffix = 't2i' if is_t2i else 'ip'
+        adapter_name = self.name
+        if self.network_config is not None:
+            adapter_name = f"{adapter_name}_{suffix}"
+        latest_save_path = self.get_latest_save_path(adapter_name)
+
+        dtype = get_torch_dtype(self.train_config.dtype)
+        if is_t2i:
+            # if we do not have a last save path and we have a name_or_path,
+            # load from that
+            if latest_save_path is None and self.adapter_config.name_or_path is not None:
+                self.adapter = T2IAdapter.from_pretrained(
+                    self.adapter_config.name_or_path,
+                    torch_dtype=get_torch_dtype(self.train_config.dtype),
+                    varient="fp16",
+                    # use_safetensors=True,
+                )
+            else:
+                self.adapter = T2IAdapter(
+                    in_channels=self.adapter_config.in_channels,
+                    channels=self.adapter_config.channels,
+                    num_res_blocks=self.adapter_config.num_res_blocks,
+                    downscale_factor=self.adapter_config.downscale_factor,
+                    adapter_type=self.adapter_config.adapter_type,
+                )
+        else:
+            self.adapter = IPAdapter(
+                sd=self.sd,
+                adapter_config=self.adapter_config,
+            )
+        self.adapter.to(self.device_torch, dtype=dtype)
+        if latest_save_path is not None:
+            # load adapter from path
+            print(f"Loading adapter from {latest_save_path}")
+            if is_t2i:
+                loaded_state_dict = load_t2i_model(
+                    latest_save_path,
+                    self.device,
+                    dtype=dtype
+                )
+            else:
+                loaded_state_dict = load_ip_adapter_model(
+                    latest_save_path,
+                    self.device,
+                    dtype=dtype
+                )
+            self.adapter.load_state_dict(loaded_state_dict)
+            if self.adapter_config.train:
+                self.load_training_state_from_metadata(latest_save_path)
+        # set trainable params
+        self.sd.adapter = self.adapter
+
     def run(self):
+        # torch.autograd.set_detect_anomaly(True)
         # run base process run
         BaseTrainProcess.run(self)
-        ### HOOk ###
-        self.before_dataset_load()
-        # load datasets if passed in the root process
-        if self.datasets is not None:
-            self.data_loader = get_dataloader_from_datasets(self.datasets, self.train_config.batch_size)
-        if self.datasets_reg is not None:
-            self.data_loader_reg = get_dataloader_from_datasets(self.datasets_reg, self.train_config.batch_size)
+        params = []
 
         ### HOOK ###
         self.hook_before_model_load()
+        model_config_to_load = copy.deepcopy(self.model_config)
+
+        if self.is_fine_tuning:
+            # get the latest checkpoint
+            # check to see if we have a latest save
+            latest_save_path = self.get_latest_save_path()
+
+            if latest_save_path is not None:
+                print(f"#### IMPORTANT RESUMING FROM {latest_save_path} ####")
+                model_config_to_load.name_or_path = latest_save_path
+                self.load_training_state_from_metadata(latest_save_path)
+
+        # get the noise scheduler
+        sampler = get_sampler(self.train_config.noise_scheduler)
+
+        if self.train_config.train_refiner and self.model_config.refiner_name_or_path is not None and self.network_config is None:
+            previous_refiner_save = self.get_latest_save_path(self.job.name + '_refiner')
+            if previous_refiner_save is not None:
+                model_config_to_load.refiner_name_or_path = previous_refiner_save
+                self.load_training_state_from_metadata(previous_refiner_save)
+
+        self.sd = StableDiffusion(
+            device=self.device,
+            model_config=model_config_to_load,
+            dtype=self.train_config.dtype,
+            custom_pipeline=self.custom_pipeline,
+            noise_scheduler=sampler,
+        )
         # run base sd process run
         self.sd.load_model()
 
-        if self.train_config.gradient_checkpointing:
-            # may get disabled elsewhere
-            self.sd.unet.enable_gradient_checkpointing()
-
         dtype = get_torch_dtype(self.train_config.dtype)
 
         # model is loaded from BaseSDProcess
@@ -382,139 +933,309 @@ class BaseSDTrainProcess(BaseTrainProcess):
         noise_scheduler = self.sd.noise_scheduler
 
         if self.train_config.xformers:
-            vae.set_use_memory_efficient_attention_xformers(True)
+            vae.enable_xformers_memory_efficient_attention()
             unet.enable_xformers_memory_efficient_attention()
+            if isinstance(text_encoder, list):
+                for te in text_encoder:
+                    # if it has it
+                    if hasattr(te, 'enable_xformers_memory_efficient_attention'):
+                        te.enable_xformers_memory_efficient_attention()
+        if self.train_config.sdp:
+            torch.backends.cuda.enable_math_sdp(True)
+            torch.backends.cuda.enable_flash_sdp(True)
+            torch.backends.cuda.enable_mem_efficient_sdp(True)
+
         if self.train_config.gradient_checkpointing:
             unet.enable_gradient_checkpointing()
-            # if isinstance(text_encoder, list):
-            #     for te in text_encoder:
-            #         te.enable_gradient_checkpointing()
-            # else:
-            #     text_encoder.enable_gradient_checkpointing()
+            if isinstance(text_encoder, list):
+                for te in text_encoder:
+                    if hasattr(te, 'enable_gradient_checkpointing'):
+                        te.enable_gradient_checkpointing()
+                    if hasattr(te, "gradient_checkpointing_enable"):
+                        te.gradient_checkpointing_enable()
+            else:
+                if hasattr(text_encoder, 'enable_gradient_checkpointing'):
+                    text_encoder.enable_gradient_checkpointing()
+                if hasattr(text_encoder, "gradient_checkpointing_enable"):
+                    text_encoder.gradient_checkpointing_enable()
 
+        if self.sd.refiner_unet is not None:
+            self.sd.refiner_unet.to(self.device_torch, dtype=dtype)
+            self.sd.refiner_unet.requires_grad_(False)
+            self.sd.refiner_unet.eval()
+            if self.train_config.xformers:
+                self.sd.refiner_unet.enable_xformers_memory_efficient_attention()
+            if self.train_config.gradient_checkpointing:
+                self.sd.refiner_unet.enable_gradient_checkpointing()
+
+        if isinstance(text_encoder, list):
+            for te in text_encoder:
+                te.requires_grad_(False)
+                te.eval()
+        else:
+            text_encoder.requires_grad_(False)
+            text_encoder.eval()
         unet.to(self.device_torch, dtype=dtype)
         unet.requires_grad_(False)
         unet.eval()
         vae = vae.to(torch.device('cpu'), dtype=dtype)
         vae.requires_grad_(False)
         vae.eval()
-
-        if self.network_config is not None:
-            self.network = LoRASpecialNetwork(
-                text_encoder=text_encoder,
-                unet=unet,
-                lora_dim=self.network_config.linear,
-                multiplier=1.0,
-                alpha=self.network_config.linear_alpha,
-                train_unet=self.train_config.train_unet,
-                train_text_encoder=self.train_config.train_text_encoder,
-                conv_lora_dim=self.network_config.conv,
-                conv_alpha=self.network_config.conv_alpha,
+        if self.train_config.learnable_snr_gos:
+            self.snr_gos = LearnableSNRGamma(
+                self.sd.noise_scheduler, device=self.device_torch
             )
+            # check to see if previous settings exist
+            path_to_load = os.path.join(self.save_root, 'learnable_snr.json')
+            if os.path.exists(path_to_load):
+                with open(path_to_load, 'r') as f:
+                    json_data = json.load(f)
+                    if 'offset' in json_data:
+                        # legacy
+                        self.snr_gos.offset_2.data = torch.tensor(json_data['offset'], device=self.device_torch)
+                    else:
+                        self.snr_gos.offset_1.data = torch.tensor(json_data['offset_1'], device=self.device_torch)
+                        self.snr_gos.offset_2.data = torch.tensor(json_data['offset_2'], device=self.device_torch)
+                    self.snr_gos.scale.data = torch.tensor(json_data['scale'], device=self.device_torch)
+                    self.snr_gos.gamma.data = torch.tensor(json_data['gamma'], device=self.device_torch)
 
-            self.network.force_to(self.device_torch, dtype=dtype)
-            # give network to sd so it can use it
-            self.sd.network = self.network
+        flush()
 
-            self.network.apply_to(
-                text_encoder,
-                unet,
-                self.train_config.train_text_encoder,
-                self.train_config.train_unet
-            )
+        ### HOOk ###
+        self.before_dataset_load()
+        # load datasets if passed in the root process
+        if self.datasets is not None:
+            self.data_loader = get_dataloader_from_datasets(self.datasets, self.train_config.batch_size, self.sd)
+        if self.datasets_reg is not None:
+            self.data_loader_reg = get_dataloader_from_datasets(self.datasets_reg, self.train_config.batch_size,
+                                                                self.sd)
+        if not self.is_fine_tuning:
+            if self.network_config is not None:
+                # TODO should we completely switch to LycorisSpecialNetwork?
+                network_kwargs = {}
+                is_lycoris = False
+                is_lorm = self.network_config.type.lower() == 'lorm'
+                # default to LoCON if there are any conv layers or if it is named
+                NetworkClass = LoRASpecialNetwork
+                if self.network_config.type.lower() == 'locon' or self.network_config.type.lower() == 'lycoris':
+                    NetworkClass = LycorisSpecialNetwork
+                    is_lycoris = True
 
-            self.network.prepare_grad_etc(text_encoder, unet)
+                if is_lorm:
+                    network_kwargs['ignore_if_contains'] = lorm_ignore_if_contains
+                    network_kwargs['parameter_threshold'] = lorm_parameter_threshold
+                    network_kwargs['target_lin_modules'] = LORM_TARGET_REPLACE_MODULE
 
-            params = self.network.prepare_optimizer_params(
-                text_encoder_lr=self.train_config.lr,
-                unet_lr=self.train_config.lr,
-                default_lr=self.train_config.lr
-            )
+                # if is_lycoris:
+                #     preset = PRESET['full']
+                # NetworkClass.apply_preset(preset)
 
-            if self.train_config.gradient_checkpointing:
-                self.network.enable_gradient_checkpointing()
+                self.network = NetworkClass(
+                    text_encoder=text_encoder,
+                    unet=unet,
+                    lora_dim=self.network_config.linear,
+                    multiplier=1.0,
+                    alpha=self.network_config.linear_alpha,
+                    train_unet=self.train_config.train_unet,
+                    train_text_encoder=self.train_config.train_text_encoder,
+                    conv_lora_dim=self.network_config.conv,
+                    conv_alpha=self.network_config.conv_alpha,
+                    is_sdxl=self.model_config.is_xl,
+                    is_v2=self.model_config.is_v2,
+                    dropout=self.network_config.dropout,
+                    use_text_encoder_1=self.model_config.use_text_encoder_1,
+                    use_text_encoder_2=self.model_config.use_text_encoder_2,
+                    use_bias=is_lorm,
+                    is_lorm=is_lorm,
+                    network_config=self.network_config,
+                    **network_kwargs
+                )
 
-            # set the network to normalize if we are
-            self.network.is_normalizing = self.network_config.normalize
+                self.network.force_to(self.device_torch, dtype=dtype)
+                # give network to sd so it can use it
+                self.sd.network = self.network
+                self.network._update_torch_multiplier()
 
-            latest_save_path = self.get_latest_save_path()
-            if latest_save_path is not None:
-                self.print(f"#### IMPORTANT RESUMING FROM {latest_save_path} ####")
-                self.print(f"Loading from {latest_save_path}")
-                self.load_weights(latest_save_path)
-                self.network.multiplier = 1.0
-        elif self.embed_config is not None:
-            self.embedding = Embedding(
-                sd=self.sd,
-                embed_config=self.embed_config
-            )
-            latest_save_path = self.get_latest_save_path()
-            # load last saved weights
-            if latest_save_path is not None:
-                self.embedding.load_embedding_from_file(latest_save_path, self.device_torch)
+                self.network.apply_to(
+                    text_encoder,
+                    unet,
+                    self.train_config.train_text_encoder,
+                    self.train_config.train_unet
+                )
 
-            # resume state from embedding
-            self.step_num = self.embedding.step
+                if is_lorm:
+                    self.network.is_lorm = True
+                    # make sure it is on the right device
+                    self.sd.unet.to(self.sd.device, dtype=dtype)
+                    original_unet_param_count = count_parameters(self.sd.unet)
+                    self.network.setup_lorm()
+                    new_unet_param_count = original_unet_param_count - self.network.calculate_lorem_parameter_reduction()
 
-            # set trainable params
-            params = self.embedding.get_trainable_params()
+                    print_lorm_extract_details(
+                        start_num_params=original_unet_param_count,
+                        end_num_params=new_unet_param_count,
+                        num_replaced=len(self.network.get_all_modules()),
+                    )
 
-        else:
-            params = []
-            # assume dreambooth/finetune
-            if self.train_config.train_text_encoder:
-                if self.sd.is_xl:
-                    for te in text_encoder:
-                        te.requires_grad_(True)
-                        te.train()
-                        params += te.parameters()
-                else:
-                    text_encoder.requires_grad_(True)
-                    text_encoder.train()
-                    params += text_encoder.parameters()
-            if self.train_config.train_unet:
-                unet.requires_grad_(True)
-                unet.train()
-                params += unet.parameters()
+                self.network.prepare_grad_etc(text_encoder, unet)
+                flush()
 
+                # LyCORIS doesnt have default_lr
+                config = {
+                    'text_encoder_lr': self.train_config.lr,
+                    'unet_lr': self.train_config.lr,
+                }
+                sig = inspect.signature(self.network.prepare_optimizer_params)
+                if 'default_lr' in sig.parameters:
+                    config['default_lr'] = self.train_config.lr
+                if 'learning_rate' in sig.parameters:
+                    config['learning_rate'] = self.train_config.lr
+                params_net = self.network.prepare_optimizer_params(
+                    **config
+                )
+
+                params += params_net
+
+                if self.train_config.gradient_checkpointing:
+                    self.network.enable_gradient_checkpointing()
+
+                lora_name = self.name
+                # need to adapt name so they are not mixed up
+                if self.named_lora:
+                    lora_name = f"{lora_name}_LoRA"
+
+                latest_save_path = self.get_latest_save_path(lora_name)
+                extra_weights = None
+                if latest_save_path is not None:
+                    self.print(f"#### IMPORTANT RESUMING FROM {latest_save_path} ####")
+                    self.print(f"Loading from {latest_save_path}")
+                    extra_weights = self.load_weights(latest_save_path)
+                    self.network.multiplier = 1.0
+
+            if self.embed_config is not None:
+                # we are doing embedding training as well
+                self.embedding = Embedding(
+                    sd=self.sd,
+                    embed_config=self.embed_config
+                )
+                latest_save_path = self.get_latest_save_path(self.embed_config.trigger)
+                # load last saved weights
+                if latest_save_path is not None:
+                    self.embedding.load_embedding_from_file(latest_save_path, self.device_torch)
+
+                # self.step_num = self.embedding.step
+                # self.start_step = self.step_num
+                params.append({
+                    'params': self.embedding.get_trainable_params(),
+                    'lr': self.train_config.embedding_lr
+                })
+
+                flush()
+
+            if self.adapter_config is not None:
+                self.setup_adapter()
+                # set trainable params
+                params.append({
+                    'params': self.adapter.parameters(),
+                    'lr': self.train_config.adapter_lr
+                })
+                flush()
+
+            params = self.load_additional_training_modules(params)
+
+        else:  # no network, embedding or adapter
+            # set the device state preset before getting params
+            self.sd.set_device_state(self.train_device_state_preset)
+
+            # params = self.get_params()
+            if len(params) == 0:
+                # will only return savable weights and ones with grad
+                params = self.sd.prepare_optimizer_params(
+                    unet=self.train_config.train_unet,
+                    text_encoder=self.train_config.train_text_encoder,
+                    text_encoder_lr=self.train_config.lr,
+                    unet_lr=self.train_config.lr,
+                    default_lr=self.train_config.lr,
+                    refiner=self.train_config.train_refiner and self.sd.refiner_unet is not None,
+                    refiner_lr=self.train_config.refiner_lr,
+                )
+            # we may be using it for prompt injections
+            if self.adapter_config is not None:
+                self.setup_adapter()
+        flush()
         ### HOOK ###
         params = self.hook_add_extra_train_params(params)
+        self.params = []
+
+        for param in params:
+            if isinstance(param, dict):
+                self.params += param['params']
+            else:
+                self.params.append(param)
+
+        if self.train_config.start_step is not None:
+            self.step_num = self.train_config.start_step
+            self.start_step = self.step_num
 
         optimizer_type = self.train_config.optimizer.lower()
-        optimizer = get_optimizer(params, optimizer_type, learning_rate=self.train_config.lr,
+        optimizer = get_optimizer(self.params, optimizer_type, learning_rate=self.train_config.lr,
                                   optimizer_params=self.train_config.optimizer_params)
         self.optimizer = optimizer
 
+        # check if it exists
+        optimizer_state_filename = f'optimizer.pt'
+        optimizer_state_file_path = os.path.join(self.save_root, optimizer_state_filename)
+        if os.path.exists(optimizer_state_file_path):
+            # try to load
+            # previous param groups
+            # previous_params = copy.deepcopy(optimizer.param_groups)
+            try:
+                print(f"Loading optimizer state from {optimizer_state_file_path}")
+                optimizer_state_dict = torch.load(optimizer_state_file_path)
+                optimizer.load_state_dict(optimizer_state_dict)
+            except Exception as e:
+                print(f"Failed to load optimizer state from {optimizer_state_file_path}")
+                print(e)
+
+            # Update the learning rates if they changed
+            # optimizer.param_groups = previous_params
+
+        lr_scheduler_params = self.train_config.lr_scheduler_params
+
+        # make sure it had bare minimum
+        if 'max_iterations' not in lr_scheduler_params:
+            lr_scheduler_params['total_iters'] = self.train_config.steps
+
         lr_scheduler = get_lr_scheduler(
             self.train_config.lr_scheduler,
             optimizer,
-            max_iterations=self.train_config.steps,
-            lr_min=self.train_config.lr / 100,
+            **lr_scheduler_params
         )
-
         self.lr_scheduler = lr_scheduler
 
+        flush()
         ### HOOK ###
         self.hook_before_train_loop()
 
-        if self.has_first_sample_requested:
+        if self.has_first_sample_requested and self.step_num <= 1:
             self.print("Generating first sample from first sample config")
             self.sample(0, is_first=True)
 
         # sample first
         if self.train_config.skip_first_sample:
             self.print("Skipping first sample due to config setting")
-        else:
+        elif self.step_num <= 1:
             self.print("Generating baseline samples before training")
-            self.sample(0)
+            self.sample(self.step_num)
 
-        self.progress_bar = tqdm(
+        self.progress_bar = ToolkitProgressBar(
             total=self.train_config.steps,
             desc=self.job.name,
             leave=True,
             initial=self.step_num,
             iterable=range(0, self.train_config.steps),
         )
+        self.progress_bar.pause()
 
         if self.data_loader is not None:
             dataloader = self.data_loader
@@ -533,37 +1254,99 @@ class BaseSDTrainProcess(BaseTrainProcess):
         # zero any gradients
         optimizer.zero_grad()
 
+        self.lr_scheduler.step(self.step_num)
+
+        self.sd.set_device_state(self.train_device_state_preset)
+        flush()
         # self.step_num = 0
-        for step in range(self.step_num, self.train_config.steps):
+
+        ###################################################################
+        # TRAIN LOOP
+        ###################################################################
+
+        start_step_num = self.step_num
+        did_first_flush = False
+        for step in range(start_step_num, self.train_config.steps):
+            self.step_num = step
+            # default to true so various things can turn it off
+            self.is_grad_accumulation_step = True
+            if self.train_config.free_u:
+                self.sd.pipeline.enable_freeu(s1=0.9, s2=0.2, b1=1.1, b2=1.2)
+            self.progress_bar.unpause()
             with torch.no_grad():
                 # if is even step and we have a reg dataset, use that
                 # todo improve this logic to send one of each through if we can buckets and batch size might be an issue
-                if step % 2 == 0 and dataloader_reg is not None:
+                is_reg_step = False
+                is_save_step = self.save_config.save_every and self.step_num % self.save_config.save_every == 0
+                is_sample_step = self.sample_config.sample_every and self.step_num % self.sample_config.sample_every == 0
+                # don't do a reg step on sample or save steps as we dont want to normalize on those
+                if step % 2 == 0 and dataloader_reg is not None and not is_save_step and not is_sample_step:
                     try:
-                        batch = next(dataloader_iterator_reg)
+                        with self.timer('get_batch:reg'):
+                            batch = next(dataloader_iterator_reg)
                     except StopIteration:
-                        # hit the end of an epoch, reset
-                        dataloader_iterator_reg = iter(dataloader_reg)
-                        batch = next(dataloader_iterator_reg)
+                        with self.timer('reset_batch:reg'):
+                            # hit the end of an epoch, reset
+                            self.progress_bar.pause()
+                            dataloader_iterator_reg = iter(dataloader_reg)
+                            trigger_dataloader_setup_epoch(dataloader_reg)
+
+                        with self.timer('get_batch:reg'):
+                            batch = next(dataloader_iterator_reg)
+                        self.progress_bar.unpause()
+                    is_reg_step = True
                 elif dataloader is not None:
                     try:
-                        batch = next(dataloader_iterator)
+                        with self.timer('get_batch'):
+                            batch = next(dataloader_iterator)
                     except StopIteration:
-                        # hit the end of an epoch, reset
-                        dataloader_iterator = iter(dataloader)
-                        batch = next(dataloader_iterator)
+                        with self.timer('reset_batch'):
+                            # hit the end of an epoch, reset
+                            self.progress_bar.pause()
+                            dataloader_iterator = iter(dataloader)
+                            trigger_dataloader_setup_epoch(dataloader)
+                            self.epoch_num += 1
+                            if self.train_config.gradient_accumulation_steps == -1:
+                                # if we are accumulating for an entire epoch, trigger a step
+                                self.is_grad_accumulation_step = False
+                                self.grad_accumulation_step = 0
+                        with self.timer('get_batch'):
+                            batch = next(dataloader_iterator)
+                        self.progress_bar.unpause()
                 else:
                     batch = None
 
-                # turn on normalization if we are using it and it is not on
-                if self.network is not None and self.network_config.normalize and not self.network.is_normalizing:
-                    self.network.is_normalizing = True
+                # if we are doing a reg step, always accumulate
+                if is_reg_step:
+                    self.is_grad_accumulation_step = True
 
+                # setup accumulation
+                if self.train_config.gradient_accumulation_steps == -1:
+                    # epoch is handling the accumulation, dont touch it
+                    pass
+                else:
+                    # determine if we are accumulating or not
+                    # since optimizer step happens in the loop, we trigger it a step early
+                    # since we cannot reprocess it before them
+                    optimizer_step_at = self.train_config.gradient_accumulation_steps
+                    is_optimizer_step = self.grad_accumulation_step >= optimizer_step_at
+                    self.is_grad_accumulation_step = not is_optimizer_step
+                    if is_optimizer_step:
+                        self.grad_accumulation_step = 0
+
+            # flush()
             ### HOOK ###
+            self.timer.start('train_loop')
             loss_dict = self.hook_train_loop(batch)
-            flush()
+            self.timer.stop('train_loop')
+            if not did_first_flush:
+                flush()
+                did_first_flush = True
+            # flush()
+            # setup the networks to gradient checkpointing and everything works
 
             with torch.no_grad():
+                # torch.cuda.empty_cache()
                 if self.train_config.optimizer.lower().startswith('dadaptation') or \
                         self.train_config.optimizer.lower().startswith('prodigy'):
                     learning_rate = (
@@ -579,37 +1362,66 @@ class BaseSDTrainProcess(BaseTrainProcess):
 
                 self.progress_bar.set_postfix_str(prog_bar_string)
 
+                # if the batch is a DataLoaderBatchDTO, then we need to clean it up
+                if isinstance(batch, DataLoaderBatchDTO):
+                    with self.timer('batch_cleanup'):
+                        batch.cleanup()
+
                 # don't do on first step
                 if self.step_num != self.start_step:
-                    # pause progress bar
-                    self.progress_bar.unpause()  # makes it so doesn't track time
-                    if self.sample_config.sample_every and self.step_num % self.sample_config.sample_every == 0:
+                    if is_sample_step:
+                        self.progress_bar.pause()
+                        flush()
                         # print above the progress bar
+                        if self.train_config.free_u:
+                            self.sd.pipeline.disable_freeu()
                         self.sample(self.step_num)
+                        self.progress_bar.unpause()
 
-                    if self.save_config.save_every and self.step_num % self.save_config.save_every == 0:
+                    if is_save_step:
                         # print above the progress bar
+                        self.progress_bar.pause()
                         self.print(f"Saving at step {self.step_num}")
                         self.save(self.step_num)
+                        self.progress_bar.unpause()
 
                     if self.logging_config.log_every and self.step_num % self.logging_config.log_every == 0:
-                        # log to tensorboard
-                        if self.writer is not None:
-                            for key, value in loss_dict.items():
-                                self.writer.add_scalar(f"{key}", value, self.step_num)
-                            self.writer.add_scalar(f"lr", learning_rate, self.step_num)
-                    self.progress_bar.refresh()
+                        self.progress_bar.pause()
+                        with self.timer('log_to_tensorboard'):
+                            # log to tensorboard
+                            if self.writer is not None:
+                                for key, value in loss_dict.items():
+                                    self.writer.add_scalar(f"{key}", value, self.step_num)
+                                self.writer.add_scalar(f"lr", learning_rate, self.step_num)
+                            self.progress_bar.unpause()
+
+                    if self.performance_log_every > 0 and self.step_num % self.performance_log_every == 0:
+                        self.progress_bar.pause()
+                        # print the timers and clear them
+                        self.timer.print()
+                        self.timer.reset()
+                        self.progress_bar.unpause()
 
                 # sets progress bar to match out step
                 self.progress_bar.update(step - self.progress_bar.n)
-                # end of step
-                self.step_num = step
 
-                # apply network normalizer if we are using it
-                if self.network is not None and self.network.is_normalizing:
-                    self.network.apply_stored_normalizer()
+                #############################
+                # End of step
+                #############################
 
-        self.sample(self.step_num + 1)
+                # update various steps
+                self.step_num = step + 1
+                self.grad_accumulation_step += 1
+
+
+        ###################################################################
+        ##  END TRAIN LOOP
+        ###################################################################
+
+        self.progress_bar.close()
+        if self.train_config.free_u:
+            self.sd.pipeline.disable_freeu()
+        self.sample(self.step_num)
         print("")
         self.save()
 
diff --git a/jobs/process/BaseTrainProcess.py b/jobs/process/BaseTrainProcess.py
index f594704c..d1885de2 100644
--- a/jobs/process/BaseTrainProcess.py
+++ b/jobs/process/BaseTrainProcess.py
@@ -1,8 +1,10 @@
+import random
 from datetime import datetime
 import os
 from collections import OrderedDict
 from typing import TYPE_CHECKING, Union
 
+import torch
 import yaml
 
 from jobs.process.BaseProcess import BaseProcess
@@ -28,9 +30,18 @@ class BaseTrainProcess(BaseProcess):
         self.job: Union['TrainJob', 'BaseJob', 'ExtensionJob']
         self.progress_bar: 'tqdm' = None
 
+        self.training_seed = self.get_conf('training_seed', self.job.training_seed if hasattr(self.job, 'training_seed') else None)
+        # if training seed is set, use it
+        if self.training_seed is not None:
+            torch.manual_seed(self.training_seed)
+            if torch.cuda.is_available():
+                torch.cuda.manual_seed(self.training_seed)
+            random.seed(self.training_seed)
+
         self.progress_bar = None
         self.writer = None
-        self.training_folder = self.get_conf('training_folder', self.job.training_folder if hasattr(self.job, 'training_folder') else None)
+        self.training_folder = self.get_conf('training_folder',
+                                             self.job.training_folder if hasattr(self.job, 'training_folder') else None)
         self.save_root = os.path.join(self.training_folder, self.name)
         self.step = 0
         self.first_step = 0
@@ -62,8 +73,7 @@ class BaseTrainProcess(BaseProcess):
             self.writer = SummaryWriter(summary_dir)
 
     def save_training_config(self):
-        timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
         os.makedirs(self.save_root, exist_ok=True)
-        save_dif = os.path.join(self.save_root, f'process_config_{timestamp}.yaml')
+        save_dif = os.path.join(self.save_root, f'config.yaml')
         with open(save_dif, 'w') as f:
-            yaml.dump(self.raw_process_config, f)
+            yaml.dump(self.job.raw_config, f)
diff --git a/jobs/process/GenerateProcess.py b/jobs/process/GenerateProcess.py
index a005ae0a..13acca2d 100644
--- a/jobs/process/GenerateProcess.py
+++ b/jobs/process/GenerateProcess.py
@@ -98,7 +98,7 @@ class GenerateProcess(BaseProcess):
                 add_prompt_file=self.generate_config.prompt_file
             ))
         # generate images
-        self.sd.generate_images(prompt_image_configs)
+        self.sd.generate_images(prompt_image_configs, sampler=self.generate_config.sampler)
 
         print("Done generating images")
         # cleanup
diff --git a/jobs/process/TrainESRGANProcess.py b/jobs/process/TrainESRGANProcess.py
index 477d1921..4ff3a69d 100644
--- a/jobs/process/TrainESRGANProcess.py
+++ b/jobs/process/TrainESRGANProcess.py
@@ -3,10 +3,12 @@ import glob
 import os
 import time
 from collections import OrderedDict
+from typing import List, Optional
 
 from PIL import Image
 from PIL.ImageOps import exif_transpose
-# from basicsr.archs.rrdbnet_arch import RRDBNet
+
+from toolkit.basic import flush
 from toolkit.models.RRDB import RRDBNet as ESRGAN, esrgan_safetensors_keys
 from safetensors.torch import save_file, load_file
 from torch.utils.data import DataLoader, ConcatDataset
@@ -67,9 +69,10 @@ class TrainESRGANProcess(BaseTrainProcess):
         self.augmentations = self.get_conf('augmentations', {})
         self.torch_dtype = get_torch_dtype(self.dtype)
         if self.torch_dtype == torch.bfloat16:
-            self.esrgan_dtype = torch.float16
+            self.esrgan_dtype = torch.float32
         else:
             self.esrgan_dtype = torch.float32
+
         self.vgg_19 = None
         self.style_weight_scalers = []
         self.content_weight_scalers = []
@@ -232,6 +235,7 @@ class TrainESRGANProcess(BaseTrainProcess):
                 pattern_size=self.zoom,
                 dtype=self.torch_dtype
             ).to(self.device, dtype=self.torch_dtype)
+            self._pattern_loss = self._pattern_loss.to(self.device, dtype=self.torch_dtype)
         loss = torch.mean(self._pattern_loss(pred, target))
         return loss
 
@@ -269,13 +273,63 @@ class TrainESRGANProcess(BaseTrainProcess):
         if self.use_critic:
             self.critic.save(step)
 
-    def sample(self, step=None):
+    def sample(self, step=None, batch: Optional[List[torch.Tensor]] = None):
         sample_folder = os.path.join(self.save_root, 'samples')
         if not os.path.exists(sample_folder):
             os.makedirs(sample_folder, exist_ok=True)
+        batch_sample_folder = os.path.join(self.save_root, 'samples_batch')
+
+        batch_targets = None
+        batch_inputs = None
+        if batch is not None and not os.path.exists(batch_sample_folder):
+            os.makedirs(batch_sample_folder, exist_ok=True)
 
         self.model.eval()
 
+        def process_and_save(img, target_img, save_path):
+            img = img.to(self.device, dtype=self.esrgan_dtype)
+            output = self.model(img)
+            # output = (output / 2 + 0.5).clamp(0, 1)
+            output = output.clamp(0, 1)
+            img = img.clamp(0, 1)
+            # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
+            output = output.cpu().permute(0, 2, 3, 1).squeeze(0).float().numpy()
+            img = img.cpu().permute(0, 2, 3, 1).squeeze(0).float().numpy()
+
+            # convert to pillow image
+            output = Image.fromarray((output * 255).astype(np.uint8))
+            img = Image.fromarray((img * 255).astype(np.uint8))
+
+            if isinstance(target_img, torch.Tensor):
+                # convert to pil
+                target_img = target_img.cpu().permute(0, 2, 3, 1).squeeze(0).float().numpy()
+                target_img = Image.fromarray((target_img * 255).astype(np.uint8))
+
+            # upscale to size * self.upscale_sample while maintaining pixels
+            output = output.resize(
+                (self.resolution * self.upscale_sample, self.resolution * self.upscale_sample),
+                resample=Image.NEAREST
+            )
+            img = img.resize(
+                (self.resolution * self.upscale_sample, self.resolution * self.upscale_sample),
+                resample=Image.NEAREST
+            )
+
+            width, height = output.size
+
+            # stack input image and decoded image
+            target_image = target_img.resize((width, height))
+            output = output.resize((width, height))
+            img = img.resize((width, height))
+
+            output_img = Image.new('RGB', (width * 3, height))
+
+            output_img.paste(img, (0, 0))
+            output_img.paste(output, (width, 0))
+            output_img.paste(target_image, (width * 2, 0))
+
+            output_img.save(save_path)
+
         with torch.no_grad():
             for i, img_url in enumerate(self.sample_sources):
                 img = exif_transpose(Image.open(img_url))
@@ -295,30 +349,6 @@ class TrainESRGANProcess(BaseTrainProcess):
 
                 img = IMAGE_TRANSFORMS(img).unsqueeze(0).to(self.device, dtype=self.esrgan_dtype)
                 img = img
-                output = self.model(img)
-                # output = (output / 2 + 0.5).clamp(0, 1)
-                output = output.clamp(0, 1)
-                # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
-                output = output.cpu().permute(0, 2, 3, 1).squeeze(0).float().numpy()
-
-                # convert to pillow image
-                output = Image.fromarray((output * 255).astype(np.uint8))
-
-                # upscale to size * self.upscale_sample while maintaining pixels
-                output = output.resize(
-                    (self.resolution * self.upscale_sample, self.resolution * self.upscale_sample),
-                    resample=Image.NEAREST
-                )
-
-                width, height = output.size
-
-                # stack input image and decoded image
-                target_image = target_image.resize((width, height))
-                output = output.resize((width, height))
-
-                output_img = Image.new('RGB', (width * 2, height))
-                output_img.paste(target_image, (0, 0))
-                output_img.paste(output, (width, 0))
 
                 step_num = ''
                 if step is not None:
@@ -327,8 +357,24 @@ class TrainESRGANProcess(BaseTrainProcess):
                 seconds_since_epoch = int(time.time())
                 # zero-pad 2 digits
                 i_str = str(i).zfill(2)
-                filename = f"{seconds_since_epoch}{step_num}_{i_str}.png"
-                output_img.save(os.path.join(sample_folder, filename))
+                filename = f"{seconds_since_epoch}{step_num}_{i_str}.jpg"
+                process_and_save(img, target_image, os.path.join(sample_folder, filename))
+
+            if batch is not None:
+                batch_targets = batch[0].detach()
+                batch_inputs = batch[1].detach()
+                batch_targets = torch.chunk(batch_targets, batch_targets.shape[0], dim=0)
+                batch_inputs = torch.chunk(batch_inputs, batch_inputs.shape[0], dim=0)
+
+                for i in range(len(batch_inputs)):
+                    if step is not None:
+                        # zero-pad 9 digits
+                        step_num = f"_{str(step).zfill(9)}"
+                    seconds_since_epoch = int(time.time())
+                    # zero-pad 2 digits
+                    i_str = str(i).zfill(2)
+                    filename = f"{seconds_since_epoch}{step_num}_{i_str}.jpg"
+                    process_and_save(batch_inputs[i], batch_targets[i], os.path.join(batch_sample_folder, filename))
 
         self.model.train()
 
@@ -376,13 +422,14 @@ class TrainESRGANProcess(BaseTrainProcess):
     def run(self):
         super().run()
         self.load_datasets()
+        steps_per_step = (self.critic.num_critic_per_gen + 1)
 
-        max_step_epochs = self.max_steps // len(self.data_loader)
+        max_step_epochs = self.max_steps // (len(self.data_loader) // steps_per_step)
         num_epochs = self.epochs
         if num_epochs is None or num_epochs > max_step_epochs:
             num_epochs = max_step_epochs
 
-        max_epoch_steps = len(self.data_loader) * num_epochs
+        max_epoch_steps = len(self.data_loader) * num_epochs * steps_per_step
         num_steps = self.max_steps
         if num_steps is None or num_steps > max_epoch_steps:
             num_steps = max_epoch_steps
@@ -445,35 +492,60 @@ class TrainESRGANProcess(BaseTrainProcess):
         print("Generating baseline samples")
         self.sample(step=0)
         # range start at self.epoch_num go to self.epochs
+        critic_losses = []
         for epoch in range(self.epoch_num, self.epochs, 1):
             if self.step_num >= self.max_steps:
                 break
+            flush()
             for targets, inputs in self.data_loader:
                 if self.step_num >= self.max_steps:
                     break
                 with torch.no_grad():
-                    targets = targets.to(self.device, dtype=self.esrgan_dtype).clamp(0, 1)
-                    inputs = inputs.to(self.device, dtype=self.esrgan_dtype).clamp(0, 1)
+                    is_critic_only_step = False
+                    if self.use_critic and 1 / (self.critic.num_critic_per_gen + 1) < np.random.uniform():
+                        is_critic_only_step = True
 
-                pred = self.model(inputs)
+                    targets = targets.to(self.device, dtype=self.esrgan_dtype).clamp(0, 1).detach()
+                    inputs = inputs.to(self.device, dtype=self.esrgan_dtype).clamp(0, 1).detach()
 
-                pred = pred.to(self.device, dtype=self.torch_dtype).clamp(0, 1)
-                targets = targets.to(self.device, dtype=self.torch_dtype).clamp(0, 1)
+                optimizer.zero_grad()
+                # dont do grads here for critic step
+                do_grad = not is_critic_only_step
+                with torch.set_grad_enabled(do_grad):
+                    pred = self.model(inputs)
 
-                # Run through VGG19
-                if self.style_weight > 0 or self.content_weight > 0 or self.use_critic:
-                    stacked = torch.cat([pred, targets], dim=0)
-                    # stacked = (stacked / 2 + 0.5).clamp(0, 1)
-                    stacked = stacked.clamp(0, 1)
-                    self.vgg_19(stacked)
+                    pred = pred.to(self.device, dtype=self.torch_dtype).clamp(0, 1)
+                    targets = targets.to(self.device, dtype=self.torch_dtype).clamp(0, 1)
+                    if torch.isnan(pred).any():
+                        raise ValueError('pred has nan values')
+                    if torch.isnan(targets).any():
+                        raise ValueError('targets has nan values')
 
-                if self.use_critic:
+                    # Run through VGG19
+                    if self.style_weight > 0 or self.content_weight > 0 or self.use_critic:
+                        stacked = torch.cat([pred, targets], dim=0)
+                        # stacked = (stacked / 2 + 0.5).clamp(0, 1)
+                        stacked = stacked.clamp(0, 1)
+                        self.vgg_19(stacked)
+                        # make sure we dont have nans
+                        if torch.isnan(self.vgg19_pool_4.tensor).any():
+                            raise ValueError('vgg19_pool_4 has nan values')
+
+                if is_critic_only_step:
                     critic_d_loss = self.critic.step(self.vgg19_pool_4.tensor.detach())
+                    critic_losses.append(critic_d_loss)
+                    # don't do generator step
+                    continue
                 else:
-                    critic_d_loss = 0.0
+                    # doing a regular step
+                    if len(critic_losses) == 0:
+                        critic_d_loss = 0
+                    else:
+                        critic_d_loss = sum(critic_losses) / len(critic_losses)
 
                 style_loss = self.get_style_loss() * self.style_weight
                 content_loss = self.get_content_loss() * self.content_weight
+
                 mse_loss = self.get_mse_loss(pred, targets) * self.mse_weight
                 tv_loss = self.get_tv_loss(pred, targets) * self.tv_weight
                 pattern_loss = self.get_pattern_loss(pred, targets) * self.pattern_weight
@@ -483,10 +555,13 @@ class TrainESRGANProcess(BaseTrainProcess):
                     critic_gen_loss = torch.tensor(0.0, device=self.device, dtype=self.torch_dtype)
 
                 loss = style_loss + content_loss + mse_loss + tv_loss + critic_gen_loss + pattern_loss
+                # make sure non nan
+                if torch.isnan(loss):
+                    raise ValueError('loss is nan')
 
                 # Backward pass and optimization
-                optimizer.zero_grad()
                 loss.backward()
+                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                 optimizer.step()
                 scheduler.step()
 
@@ -549,7 +624,7 @@ class TrainESRGANProcess(BaseTrainProcess):
                     if self.sample_every and self.step_num % self.sample_every == 0:
                         # print above the progress bar
                         self.print(f"Sampling at step {self.step_num}")
-                        self.sample(self.step_num)
+                        self.sample(self.step_num, batch=[targets, inputs])
 
                     if self.save_every and self.step_num % self.save_every == 0:
                         # print above the progress bar
diff --git a/jobs/process/TrainLoRAHack.py b/jobs/process/TrainLoRAHack.py
deleted file mode 100644
index 2a5a6539..00000000
--- a/jobs/process/TrainLoRAHack.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# ref:
-# - https://github.com/p1atdev/LECO/blob/main/train_lora.py
-import time
-from collections import OrderedDict
-import os
-
-from toolkit.config_modules import SliderConfig
-from toolkit.paths import REPOS_ROOT
-import sys
-
-sys.path.append(REPOS_ROOT)
-sys.path.append(os.path.join(REPOS_ROOT, 'leco'))
-from toolkit.train_tools import get_torch_dtype, apply_noise_offset
-import gc
-
-import torch
-from leco import train_util, model_util
-from leco.prompt_util import PromptEmbedsCache
-from .BaseSDTrainProcess import BaseSDTrainProcess, StableDiffusion
-
-
-def flush():
-    torch.cuda.empty_cache()
-    gc.collect()
-
-
-class LoRAHack:
-    def __init__(self, **kwargs):
-        self.type = kwargs.get('type', 'suppression')
-
-
-class TrainLoRAHack(BaseSDTrainProcess):
-    def __init__(self, process_id: int, job, config: OrderedDict):
-        super().__init__(process_id, job, config)
-        self.hack_config = LoRAHack(**self.get_conf('hack', {}))
-
-    def hook_before_train_loop(self):
-        # we don't need text encoder so move it to cpu
-        self.sd.text_encoder.to("cpu")
-        flush()
-        # end hook_before_train_loop
-
-        if self.hack_config.type == 'suppression':
-            # set all params to self.current_suppression
-            params = self.network.parameters()
-            for param in params:
-                # get random noise for each param
-                noise = torch.randn_like(param) - 0.5
-                # apply noise to param
-                param.data = noise * 0.001
-
-
-    def supress_loop(self):
-        dtype = get_torch_dtype(self.train_config.dtype)
-
-
-        loss_dict = OrderedDict(
-            {'sup': 0.0}
-        )
-        # increase noise
-        for param in self.network.parameters():
-            # get random noise for each param
-            noise = torch.randn_like(param) - 0.5
-            # apply noise to param
-            param.data = param.data + noise * 0.001
-
-
-
-        return loss_dict
-
-    def hook_train_loop(self, batch):
-        if self.hack_config.type == 'suppression':
-            return self.supress_loop()
-        else:
-            raise NotImplementedError(f'unknown hack type: {self.hack_config.type}')
-        # end hook_train_loop
diff --git a/jobs/process/TrainSliderProcess.py b/jobs/process/TrainSliderProcess.py
index 6e9aae48..9f18e6bc 100644
--- a/jobs/process/TrainSliderProcess.py
+++ b/jobs/process/TrainSliderProcess.py
@@ -1,9 +1,19 @@
+import copy
+import os
 import random
 from collections import OrderedDict
+from typing import Union
+
+from PIL import Image
+from diffusers import T2IAdapter
+from torchvision.transforms import transforms
 from tqdm import tqdm
 
+from toolkit.basic import value_map
 from toolkit.config_modules import SliderConfig
-from toolkit.train_tools import get_torch_dtype, apply_snr_weight
+from toolkit.data_transfer_object.data_loader import DataLoaderBatchDTO
+from toolkit.sd_device_states_presets import get_train_sd_device_state_preset
+from toolkit.train_tools import get_torch_dtype, apply_snr_weight, apply_learnable_snr_gos
 import gc
 from toolkit import train_tools
 from toolkit.prompt_utils import \
@@ -21,6 +31,11 @@ def flush():
     gc.collect()
 
 
+adapter_transforms = transforms.Compose([
+    transforms.ToTensor(),
+])
+
+
 class TrainSliderProcess(BaseSDTrainProcess):
     def __init__(self, process_id: int, job, config: OrderedDict):
         super().__init__(process_id, job, config)
@@ -42,6 +57,27 @@ class TrainSliderProcess(BaseSDTrainProcess):
             # trim targets
             self.slider_config.targets = self.slider_config.targets[:self.train_config.steps]
 
+        # get presets
+        self.eval_slider_device_state = get_train_sd_device_state_preset(
+            self.device_torch,
+            train_unet=False,
+            train_text_encoder=False,
+            cached_latents=self.is_latents_cached,
+            train_lora=False,
+            train_adapter=False,
+            train_embedding=False,
+        )
+
+        self.train_slider_device_state = get_train_sd_device_state_preset(
+            self.device_torch,
+            train_unet=self.train_config.train_unet,
+            train_text_encoder=False,
+            cached_latents=self.is_latents_cached,
+            train_lora=True,
+            train_adapter=False,
+            train_embedding=False,
+        )
+
     def before_model_load(self):
         pass
 
@@ -66,6 +102,7 @@ class TrainSliderProcess(BaseSDTrainProcess):
                 # trim list to our max steps
 
         cache = PromptEmbedsCache()
+        print(f"Building prompt cache")
 
         # get encoded latents for our prompts
         with torch.no_grad():
@@ -175,31 +212,107 @@ class TrainSliderProcess(BaseSDTrainProcess):
             self.sd.vae.to(self.device_torch)
         # end hook_before_train_loop
 
-    def hook_train_loop(self, batch):
-        dtype = get_torch_dtype(self.train_config.dtype)
+    def before_dataset_load(self):
+        if self.slider_config.use_adapter == 'depth':
+            print(f"Loading T2I Adapter for depth")
+            # called before LoRA network is loaded but after model is loaded
+            # attach the adapter here so it is there before we load the network
+            adapter_path = 'TencentARC/t2iadapter_depth_sd15v2'
+            if self.model_config.is_xl:
+                adapter_path = 'TencentARC/t2i-adapter-depth-midas-sdxl-1.0'
 
+            print(f"Loading T2I Adapter from {adapter_path}")
 
-        # get a random pair
-        prompt_pair: EncodedPromptPair = self.prompt_pairs[
-            torch.randint(0, len(self.prompt_pairs), (1,)).item()
-        ]
-        # move to device and dtype
-        prompt_pair.to(self.device_torch, dtype=dtype)
+            # dont name this adapter since we are not training it
+            self.t2i_adapter = T2IAdapter.from_pretrained(
+                adapter_path, torch_dtype=get_torch_dtype(self.train_config.dtype), varient="fp16"
+            ).to(self.device_torch)
+            self.t2i_adapter.eval()
+            self.t2i_adapter.requires_grad_(False)
+            flush()
 
-        # get a random resolution
-        height, width = self.slider_config.resolutions[
-            torch.randint(0, len(self.slider_config.resolutions), (1,)).item()
-        ]
-        if self.train_config.gradient_checkpointing:
-            # may get disabled elsewhere
-            self.sd.unet.enable_gradient_checkpointing()
+    @torch.no_grad()
+    def get_adapter_images(self, batch: Union[None, 'DataLoaderBatchDTO']):
+
+        img_ext_list = ['.jpg', '.jpeg', '.png', '.webp']
+        adapter_folder_path = self.slider_config.adapter_img_dir
+        adapter_images = []
+        # loop through images
+        for file_item in batch.file_items:
+            img_path = file_item.path
+            file_name_no_ext = os.path.basename(img_path).split('.')[0]
+            # find the image
+            for ext in img_ext_list:
+                if os.path.exists(os.path.join(adapter_folder_path, file_name_no_ext + ext)):
+                    adapter_images.append(os.path.join(adapter_folder_path, file_name_no_ext + ext))
+                    break
+        width, height = batch.file_items[0].crop_width, batch.file_items[0].crop_height
+        adapter_tensors = []
+        # load images with torch transforms
+        for idx, adapter_image in enumerate(adapter_images):
+            # we need to centrally crop the largest dimension of the image to match the batch shape after scaling
+            # to the smallest dimension
+            img: Image.Image = Image.open(adapter_image)
+            if img.width > img.height:
+                # scale down so height is the same as batch
+                new_height = height
+                new_width = int(img.width * (height / img.height))
+            else:
+                new_width = width
+                new_height = int(img.height * (width / img.width))
+
+            img = img.resize((new_width, new_height))
+            crop_fn = transforms.CenterCrop((height, width))
+            # crop the center to match batch
+            img = crop_fn(img)
+            img = adapter_transforms(img)
+            adapter_tensors.append(img)
+
+        # stack them
+        adapter_tensors = torch.stack(adapter_tensors).to(
+            self.device_torch, dtype=get_torch_dtype(self.train_config.dtype)
+        )
+        return adapter_tensors
+
+    def hook_train_loop(self, batch: Union['DataLoaderBatchDTO', None]):
+        # set to eval mode
+        self.sd.set_device_state(self.eval_slider_device_state)
+        with torch.no_grad():
+            dtype = get_torch_dtype(self.train_config.dtype)
+
+            # get a random pair
+            prompt_pair: EncodedPromptPair = self.prompt_pairs[
+                torch.randint(0, len(self.prompt_pairs), (1,)).item()
+            ]
+            # move to device and dtype
+            prompt_pair.to(self.device_torch, dtype=dtype)
+
+            # get a random resolution
+            height, width = self.slider_config.resolutions[
+                torch.randint(0, len(self.slider_config.resolutions), (1,)).item()
+            ]
+            if self.train_config.gradient_checkpointing:
+                # may get disabled elsewhere
+                self.sd.unet.enable_gradient_checkpointing()
 
         noise_scheduler = self.sd.noise_scheduler
         optimizer = self.optimizer
         lr_scheduler = self.lr_scheduler
+
         loss_function = torch.nn.MSELoss()
 
+        pred_kwargs = {}
+
         def get_noise_pred(neg, pos, gs, cts, dn):
+            down_kwargs = copy.deepcopy(pred_kwargs)
+            if 'down_block_additional_residuals' in down_kwargs:
+                dbr_batch_size = down_kwargs['down_block_additional_residuals'][0].shape[0]
+                if dbr_batch_size != dn.shape[0]:
+                    amount_to_add = int(dn.shape[0] * 2 / dbr_batch_size)
+                    down_kwargs['down_block_additional_residuals'] = [
+                        torch.cat([sample.clone()] * amount_to_add) for sample in
+                        down_kwargs['down_block_additional_residuals']
+                    ]
             return self.sd.predict_noise(
                 latents=dn,
                 text_embeddings=train_tools.concat_prompt_embeddings(
@@ -209,9 +322,13 @@ class TrainSliderProcess(BaseSDTrainProcess):
                 ),
                 timestep=cts,
                 guidance_scale=gs,
+                **down_kwargs
             )
 
         with torch.no_grad():
+            adapter_images = None
+            self.sd.unet.eval()
+
             # for a complete slider, the batch size is 4 to begin with now
             true_batch_size = prompt_pair.target_class.text_embeds.shape[0] * self.train_config.batch_size
             from_batch = False
@@ -219,9 +336,32 @@ class TrainSliderProcess(BaseSDTrainProcess):
                 # traing from a batch of images, not generating ourselves
                 from_batch = True
                 noisy_latents, noise, timesteps, conditioned_prompts, imgs = self.process_general_training_batch(batch)
+                if self.slider_config.adapter_img_dir is not None:
+                    adapter_images = self.get_adapter_images(batch)
+                    adapter_strength_min = 0.9
+                    adapter_strength_max = 1.0
 
-                denoised_latent_chunks = [noisy_latents] * self.prompt_chunk_size
-                denoised_latents = torch.cat(denoised_latent_chunks, dim=0)
+                    def rand_strength(sample):
+                        adapter_conditioning_scale = torch.rand(
+                            (1,), device=self.device_torch, dtype=dtype
+                        )
+
+                        adapter_conditioning_scale = value_map(
+                            adapter_conditioning_scale,
+                            0.0,
+                            1.0,
+                            adapter_strength_min,
+                            adapter_strength_max
+                        )
+                        return sample.to(self.device_torch, dtype=dtype).detach() * adapter_conditioning_scale
+
+                    down_block_additional_residuals = self.t2i_adapter(adapter_images)
+                    down_block_additional_residuals = [
+                        rand_strength(sample) for sample in down_block_additional_residuals
+                    ]
+                    pred_kwargs['down_block_additional_residuals'] = down_block_additional_residuals
+
+                denoised_latents = torch.cat([noisy_latents] * self.prompt_chunk_size, dim=0)
                 current_timestep = timesteps
             else:
 
@@ -229,14 +369,11 @@ class TrainSliderProcess(BaseSDTrainProcess):
                     self.train_config.max_denoising_steps, device=self.device_torch
                 )
 
-                self.optimizer.zero_grad()
-
                 # ger a random number of steps
                 timesteps_to = torch.randint(
                     1, self.train_config.max_denoising_steps, (1,)
                 ).item()
 
-
                 # get noise
                 noise = self.sd.get_latent_noise(
                     pixel_height=height,
@@ -249,32 +386,60 @@ class TrainSliderProcess(BaseSDTrainProcess):
                 latents = noise * self.sd.noise_scheduler.init_noise_sigma
                 latents = latents.to(self.device_torch, dtype=dtype)
 
-                with self.network:
-                    assert self.network.is_active
-                    # pass the multiplier list to the network
-                    self.network.multiplier = prompt_pair.multiplier_list
-                    denoised_latents = self.sd.diffuse_some_steps(
-                        latents,  # pass simple noise latents
-                        train_tools.concat_prompt_embeddings(
-                            prompt_pair.positive_target,  # unconditional
-                            prompt_pair.target_class,  # target
-                            self.train_config.batch_size,
-                        ),
-                        start_timesteps=0,
-                        total_timesteps=timesteps_to,
-                        guidance_scale=3,
-                    )
+                assert not self.network.is_active
+                self.sd.unet.eval()
+                # pass the multiplier list to the network
+                self.network.multiplier = prompt_pair.multiplier_list
+                denoised_latents = self.sd.diffuse_some_steps(
+                    latents,  # pass simple noise latents
+                    train_tools.concat_prompt_embeddings(
+                        prompt_pair.positive_target,  # unconditional
+                        prompt_pair.target_class,  # target
+                        self.train_config.batch_size,
+                    ),
+                    start_timesteps=0,
+                    total_timesteps=timesteps_to,
+                    guidance_scale=3,
+                )
+
 
                 noise_scheduler.set_timesteps(1000)
 
-                # split the latents into out prompt pair chunks
-                denoised_latent_chunks = torch.chunk(denoised_latents, self.prompt_chunk_size, dim=0)
-                denoised_latent_chunks = [x.detach() for x in denoised_latent_chunks]
-
                 current_timestep_index = int(timesteps_to * 1000 / self.train_config.max_denoising_steps)
                 current_timestep = noise_scheduler.timesteps[current_timestep_index]
 
+            # split the latents into out prompt pair chunks
+            denoised_latent_chunks = torch.chunk(denoised_latents, self.prompt_chunk_size, dim=0)
+            denoised_latent_chunks = [x.detach() for x in denoised_latent_chunks]
+
             # flush()  # 4.2GB to 3GB on 512x512
+            mask_multiplier = torch.ones((denoised_latents.shape[0], 1, 1, 1), device=self.device_torch, dtype=dtype)
+            has_mask = False
+            if batch and batch.mask_tensor is not None:
+                with self.timer('get_mask_multiplier'):
+                    # upsampling no supported for bfloat16
+                    mask_multiplier = batch.mask_tensor.to(self.device_torch, dtype=torch.float16).detach()
+                    # scale down to the size of the latents, mask multiplier shape(bs, 1, width, height), noisy_latents shape(bs, channels, width, height)
+                    mask_multiplier = torch.nn.functional.interpolate(
+                        mask_multiplier, size=(noisy_latents.shape[2], noisy_latents.shape[3])
+                    )
+                    # expand to match latents
+                    mask_multiplier = mask_multiplier.expand(-1, noisy_latents.shape[1], -1, -1)
+                    mask_multiplier = mask_multiplier.to(self.device_torch, dtype=dtype).detach()
+                    has_mask = True
+
+            if has_mask:
+                unmasked_target = get_noise_pred(
+                    prompt_pair.positive_target,  # negative prompt
+                    prompt_pair.target_class,  # positive prompt
+                    1,
+                    current_timestep,
+                    denoised_latents
+                )
+                unmasked_target = unmasked_target.detach()
+                unmasked_target.requires_grad = False
+            else:
+                unmasked_target = None
 
             # 4.20 GB RAM for 512x512
             positive_latents = get_noise_pred(
@@ -286,7 +451,6 @@ class TrainSliderProcess(BaseSDTrainProcess):
             )
             positive_latents = positive_latents.detach()
             positive_latents.requires_grad = False
-            positive_latents_chunks = torch.chunk(positive_latents, self.prompt_chunk_size, dim=0)
 
             neutral_latents = get_noise_pred(
                 prompt_pair.positive_target,  # negative prompt
@@ -297,7 +461,6 @@ class TrainSliderProcess(BaseSDTrainProcess):
             )
             neutral_latents = neutral_latents.detach()
             neutral_latents.requires_grad = False
-            neutral_latents_chunks = torch.chunk(neutral_latents, self.prompt_chunk_size, dim=0)
 
             unconditional_latents = get_noise_pred(
                 prompt_pair.positive_target,  # negative prompt
@@ -308,13 +471,14 @@ class TrainSliderProcess(BaseSDTrainProcess):
             )
             unconditional_latents = unconditional_latents.detach()
             unconditional_latents.requires_grad = False
-            unconditional_latents_chunks = torch.chunk(unconditional_latents, self.prompt_chunk_size, dim=0)
 
             denoised_latents = denoised_latents.detach()
 
-        flush()  # 4.2GB to 3GB on 512x512
+        self.sd.set_device_state(self.train_slider_device_state)
+        self.sd.unet.train()
+        # start accumulating gradients
+        self.optimizer.zero_grad(set_to_none=True)
 
-        # 4.20 GB RAM for 512x512
         anchor_loss_float = None
         if len(self.anchor_pairs) > 0:
             with torch.no_grad():
@@ -369,9 +533,34 @@ class TrainSliderProcess(BaseSDTrainProcess):
             del anchor_target_noise
             # move anchor back to cpu
             anchor.to("cpu")
-            flush()
 
-        prompt_pair_chunks = split_prompt_pairs(prompt_pair, self.prompt_chunk_size)
+        with torch.no_grad():
+            if self.slider_config.low_ram:
+                prompt_pair_chunks = split_prompt_pairs(prompt_pair.detach(), self.prompt_chunk_size)
+                denoised_latent_chunks = denoised_latent_chunks  # just to have it in one place
+                positive_latents_chunks = torch.chunk(positive_latents.detach(), self.prompt_chunk_size, dim=0)
+                neutral_latents_chunks = torch.chunk(neutral_latents.detach(), self.prompt_chunk_size, dim=0)
+                unconditional_latents_chunks = torch.chunk(
+                    unconditional_latents.detach(),
+                    self.prompt_chunk_size,
+                    dim=0
+                )
+                mask_multiplier_chunks = torch.chunk(mask_multiplier, self.prompt_chunk_size, dim=0)
+                if unmasked_target is not None:
+                    unmasked_target_chunks = torch.chunk(unmasked_target, self.prompt_chunk_size, dim=0)
+                else:
+                    unmasked_target_chunks = [None for _ in range(self.prompt_chunk_size)]
+            else:
+                # run through in one instance
+                prompt_pair_chunks = [prompt_pair.detach()]
+                denoised_latent_chunks = [torch.cat(denoised_latent_chunks, dim=0).detach()]
+                positive_latents_chunks = [positive_latents.detach()]
+                neutral_latents_chunks = [neutral_latents.detach()]
+                unconditional_latents_chunks = [unconditional_latents.detach()]
+                mask_multiplier_chunks = [mask_multiplier]
+                unmasked_target_chunks = [unmasked_target]
+
+            # flush()
         assert len(prompt_pair_chunks) == len(denoised_latent_chunks)
         # 3.28 GB RAM for 512x512
         with self.network:
@@ -381,13 +570,17 @@ class TrainSliderProcess(BaseSDTrainProcess):
                     denoised_latent_chunk, \
                     positive_latents_chunk, \
                     neutral_latents_chunk, \
-                    unconditional_latents_chunk \
+                    unconditional_latents_chunk, \
+                    mask_multiplier_chunk, \
+                    unmasked_target_chunk \
                     in zip(
                 prompt_pair_chunks,
                 denoised_latent_chunks,
                 positive_latents_chunks,
                 neutral_latents_chunks,
                 unconditional_latents_chunks,
+                mask_multiplier_chunks,
+                unmasked_target_chunks
             ):
                 self.network.multiplier = prompt_pair_chunk.multiplier_list
                 target_latents = get_noise_pred(
@@ -421,17 +614,43 @@ class TrainSliderProcess(BaseSDTrainProcess):
 
                 # 16.15 GB RAM for 512x512 -> 4.20GB RAM for 512x512 with new grad_checkpointing
                 loss = torch.nn.functional.mse_loss(target_latents.float(), offset_neutral.float(), reduction="none")
+
+                # do inverted mask to preserve non masked
+                if has_mask and unmasked_target_chunk is not None:
+                    loss = loss * mask_multiplier_chunk
+                    # match the mask unmasked_target_chunk
+                    mask_target_loss = torch.nn.functional.mse_loss(
+                        target_latents.float(),
+                        unmasked_target_chunk.float(),
+                        reduction="none"
+                    )
+                    mask_target_loss = mask_target_loss * (1.0 - mask_multiplier_chunk)
+                    loss += mask_target_loss
+
                 loss = loss.mean([1, 2, 3])
 
+                if self.train_config.learnable_snr_gos:
+                    if from_batch:
+                        # match batch size
+                        loss = apply_snr_weight(loss, timesteps, self.sd.noise_scheduler,
+                                                self.train_config.min_snr_gamma)
+                    else:
+                        # match batch size
+                        timesteps_index_list = [current_timestep_index for _ in range(target_latents.shape[0])]
+                        # add snr_gamma
+                        loss = apply_learnable_snr_gos(loss, timesteps_index_list, self.snr_gos)
                 if self.train_config.min_snr_gamma is not None and self.train_config.min_snr_gamma > 0.000001:
                     if from_batch:
                         # match batch size
-                        loss = apply_snr_weight(loss, timesteps, self.sd.noise_scheduler, self.train_config.min_snr_gamma)
+                        loss = apply_snr_weight(loss, timesteps, self.sd.noise_scheduler,
+                                                self.train_config.min_snr_gamma)
                     else:
                         # match batch size
                         timesteps_index_list = [current_timestep_index for _ in range(target_latents.shape[0])]
                         # add min_snr_gamma
-                        loss = apply_snr_weight(loss, timesteps_index_list, noise_scheduler, self.train_config.min_snr_gamma)
+                        loss = apply_snr_weight(loss, timesteps_index_list, noise_scheduler,
+                                                self.train_config.min_snr_gamma)
+
 
                 loss = loss.mean() * prompt_pair_chunk.weight
 
@@ -440,7 +659,7 @@ class TrainSliderProcess(BaseSDTrainProcess):
                 del target_latents
                 del offset_neutral
                 del loss
-                flush()
+                # flush()
 
         optimizer.step()
         lr_scheduler.step()
@@ -457,7 +676,7 @@ class TrainSliderProcess(BaseSDTrainProcess):
         )
         # move back to cpu
         prompt_pair.to("cpu")
-        flush()
+        # flush()
 
         # reset network
         self.network.multiplier = 1.0
diff --git a/jobs/process/__init__.py b/jobs/process/__init__.py
index 766c6b11..387be088 100644
--- a/jobs/process/__init__.py
+++ b/jobs/process/__init__.py
@@ -7,7 +7,6 @@ from .TrainVAEProcess import TrainVAEProcess
 from .BaseMergeProcess import BaseMergeProcess
 from .TrainSliderProcess import TrainSliderProcess
 from .TrainSliderProcessOld import TrainSliderProcessOld
-from .TrainLoRAHack import TrainLoRAHack
 from .TrainSDRescaleProcess import TrainSDRescaleProcess
 from .ModRescaleLoraProcess import ModRescaleLoraProcess
 from .GenerateProcess import GenerateProcess
diff --git a/jobs/process/models/vgg19_critic.py b/jobs/process/models/vgg19_critic.py
index a5ef92be..8cf438bf 100644
--- a/jobs/process/models/vgg19_critic.py
+++ b/jobs/process/models/vgg19_critic.py
@@ -154,28 +154,28 @@ class Critic:
         # train critic here
         self.model.train()
         self.model.requires_grad_(True)
+        self.optimizer.zero_grad()
 
         critic_losses = []
-        for i in range(self.num_critic_per_gen):
-            inputs = vgg_output.detach()
-            inputs = inputs.to(self.device, dtype=self.torch_dtype)
-            self.optimizer.zero_grad()
+        inputs = vgg_output.detach()
+        inputs = inputs.to(self.device, dtype=self.torch_dtype)
+        self.optimizer.zero_grad()
 
-            vgg_pred, vgg_target = torch.chunk(inputs, 2, dim=0)
+        vgg_pred, vgg_target = torch.chunk(inputs, 2, dim=0)
 
-            stacked_output = self.model(inputs)
-            out_pred, out_target = torch.chunk(stacked_output, 2, dim=0)
+        stacked_output = self.model(inputs).float()
+        out_pred, out_target = torch.chunk(stacked_output, 2, dim=0)
 
-            # Compute gradient penalty
-            gradient_penalty = get_gradient_penalty(self.model, vgg_target, vgg_pred, self.device)
+        # Compute gradient penalty
+        gradient_penalty = get_gradient_penalty(self.model, vgg_target, vgg_pred, self.device)
 
-            # Compute WGAN-GP critic loss
-            critic_loss = -(torch.mean(out_target) - torch.mean(out_pred)) + self.lambda_gp * gradient_penalty
-            critic_loss.backward()
-            self.optimizer.zero_grad()
-            self.optimizer.step()
-            self.scheduler.step()
-            critic_losses.append(critic_loss.item())
+        # Compute WGAN-GP critic loss
+        critic_loss = -(torch.mean(out_target) - torch.mean(out_pred)) + self.lambda_gp * gradient_penalty
+        critic_loss.backward()
+        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+        self.optimizer.step()
+        self.scheduler.step()
+        critic_losses.append(critic_loss.item())
 
         # avg loss
         loss = np.mean(critic_losses)
diff --git a/repositories/batch_annotator b/repositories/batch_annotator
new file mode 160000
index 00000000..420e142f
--- /dev/null
+++ b/repositories/batch_annotator
@@ -0,0 +1 @@
+Subproject commit 420e142f6ad3cc14b3ea0500affc2c6c7e7544bf
diff --git a/repositories/ipadapter b/repositories/ipadapter
new file mode 160000
index 00000000..d8ab37c4
--- /dev/null
+++ b/repositories/ipadapter
@@ -0,0 +1 @@
+Subproject commit d8ab37c421c1ab95d15abe094e8266a6d01e26ef
diff --git a/requirements.txt b/requirements.txt
index a11b6211..150b3dfb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
 torch
 torchvision
 safetensors
-diffusers
-transformers
-lycoris_lora
+diffusers==0.21.3
+git+https://github.com/huggingface/transformers.git
+lycoris-lora==1.8.3
 flatten_json
 pyyaml
 oyaml
@@ -15,4 +15,10 @@ accelerate
 toml
 albumentations
 pydantic
-omegaconf
\ No newline at end of file
+omegaconf
+k-diffusion
+open_clip_torch
+timm
+prodigyopt
+controlnet_aux==0.0.7
+python-dotenv
\ No newline at end of file
diff --git a/run.py b/run.py
index 91b69bc8..8c767ec5 100644
--- a/run.py
+++ b/run.py
@@ -1,8 +1,22 @@
 import os
 import sys
 from typing import Union, OrderedDict
+from dotenv import load_dotenv
+# Load the .env file if it exists
+load_dotenv()
 
 sys.path.insert(0, os.getcwd())
+# must come before ANY torch or fastai imports
+# import toolkit.cuda_malloc
+
+# turn off diffusers telemetry until I can figure out how to make it opt-in
+os.environ['DISABLE_TELEMETRY'] = 'YES'
+
+# check if we have DEBUG_TOOLKIT in env
+if os.environ.get("DEBUG_TOOLKIT", "0") == "1":
+    # set torch to trace mode
+    import torch
+    torch.autograd.set_detect_anomaly(True)
 import argparse
 from toolkit.job import get_job
 
diff --git a/scripts/convert_cog.py b/scripts/convert_cog.py
new file mode 100644
index 00000000..ba4f6e73
--- /dev/null
+++ b/scripts/convert_cog.py
@@ -0,0 +1,128 @@
+import json
+from collections import OrderedDict
+import os
+import torch
+from safetensors import safe_open
+from safetensors.torch import save_file
+
+device = torch.device('cpu')
+
+# [diffusers] -> kohya
+embedding_mapping = {
+    'text_encoders_0': 'clip_l',
+    'text_encoders_1': 'clip_g'
+}
+
+PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+KEYMAP_ROOT = os.path.join(PROJECT_ROOT, 'toolkit', 'keymaps')
+sdxl_keymap_path = os.path.join(KEYMAP_ROOT, 'stable_diffusion_locon_sdxl.json')
+
+# load keymap
+with open(sdxl_keymap_path, 'r') as f:
+    ldm_diffusers_keymap = json.load(f)['ldm_diffusers_keymap']
+
+# invert the item / key pairs
+diffusers_ldm_keymap = {v: k for k, v in ldm_diffusers_keymap.items()}
+
+
+def get_ldm_key(diffuser_key):
+    diffuser_key = f"lora_unet_{diffuser_key.replace('.', '_')}"
+    diffuser_key = diffuser_key.replace('_lora_down_weight', '.lora_down.weight')
+    diffuser_key = diffuser_key.replace('_lora_up_weight', '.lora_up.weight')
+    diffuser_key = diffuser_key.replace('_alpha', '.alpha')
+    diffuser_key = diffuser_key.replace('_processor_to_', '_to_')
+    diffuser_key = diffuser_key.replace('_to_out.', '_to_out_0.')
+    if diffuser_key in diffusers_ldm_keymap:
+        return diffusers_ldm_keymap[diffuser_key]
+    else:
+        raise KeyError(f"Key {diffuser_key} not found in keymap")
+
+
+def convert_cog(lora_path, embedding_path):
+    embedding_state_dict = OrderedDict()
+    lora_state_dict = OrderedDict()
+
+    # # normal dict
+    # normal_dict = OrderedDict()
+    # example_path = "/mnt/Models/stable-diffusion/models/LoRA/sdxl/LogoRedmond_LogoRedAF.safetensors"
+    # with safe_open(example_path, framework="pt", device='cpu') as f:
+    #     keys = list(f.keys())
+    #     for key in keys:
+    #         normal_dict[key] = f.get_tensor(key)
+
+    with safe_open(embedding_path, framework="pt", device='cpu') as f:
+        keys = list(f.keys())
+        for key in keys:
+            new_key = embedding_mapping[key]
+            embedding_state_dict[new_key] = f.get_tensor(key)
+
+    with safe_open(lora_path, framework="pt", device='cpu') as f:
+        keys = list(f.keys())
+        lora_rank = None
+
+        # get the lora dim first. Check first 3 linear layers just to be safe
+        for key in keys:
+            new_key = get_ldm_key(key)
+            tensor = f.get_tensor(key)
+            num_checked = 0
+            if len(tensor.shape) == 2:
+                this_dim = min(tensor.shape)
+                if lora_rank is None:
+                    lora_rank = this_dim
+                elif lora_rank != this_dim:
+                    raise ValueError(f"lora rank is not consistent, got {tensor.shape}")
+                else:
+                    num_checked += 1
+            if num_checked >= 3:
+                break
+
+        for key in keys:
+            new_key = get_ldm_key(key)
+            tensor = f.get_tensor(key)
+            if new_key.endswith('.lora_down.weight'):
+                alpha_key = new_key.replace('.lora_down.weight', '.alpha')
+                # diffusers does not have alpha, they usa an alpha multiplier of 1 which is a tensor weight of the dims
+                # assume first smallest dim is the lora rank if shape is 2
+                lora_state_dict[alpha_key] = torch.ones(1).to(tensor.device, tensor.dtype) * lora_rank
+
+            lora_state_dict[new_key] = tensor
+
+    return lora_state_dict, embedding_state_dict
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'lora_path',
+        type=str,
+        help='Path to lora file'
+    )
+    parser.add_argument(
+        'embedding_path',
+        type=str,
+        help='Path to embedding file'
+    )
+
+    parser.add_argument(
+        '--lora_output',
+        type=str,
+        default="lora_output",
+    )
+
+    parser.add_argument(
+        '--embedding_output',
+        type=str,
+        default="embedding_output",
+    )
+
+    args = parser.parse_args()
+
+    lora_state_dict, embedding_state_dict = convert_cog(args.lora_path, args.embedding_path)
+
+    # save them
+    save_file(lora_state_dict, args.lora_output)
+    save_file(embedding_state_dict, args.embedding_output)
+    print(f"Saved lora to {args.lora_output}")
+    print(f"Saved embedding to {args.embedding_output}")
diff --git a/scripts/make_diffusers_model.py b/scripts/make_diffusers_model.py
new file mode 100644
index 00000000..1ec6c93a
--- /dev/null
+++ b/scripts/make_diffusers_model.py
@@ -0,0 +1,57 @@
+import argparse
+from collections import OrderedDict
+
+import torch
+
+from toolkit.config_modules import ModelConfig
+from toolkit.stable_diffusion_model import StableDiffusion
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    'input_path',
+    type=str,
+    help='Path to original sdxl model'
+)
+parser.add_argument(
+    'output_path',
+    type=str,
+    help='output path'
+)
+parser.add_argument('--sdxl', action='store_true', help='is sdxl model')
+parser.add_argument('--refiner', action='store_true', help='is refiner model')
+parser.add_argument('--ssd', action='store_true', help='is ssd model')
+parser.add_argument('--sd2', action='store_true', help='is sd 2 model')
+
+args = parser.parse_args()
+device = torch.device('cpu')
+dtype = torch.float32
+
+print(f"Loading model from {args.input_path}")
+
+
+diffusers_model_config = ModelConfig(
+        name_or_path=args.input_path,
+        is_xl=args.sdxl,
+        is_v2=args.sd2,
+        is_ssd=args.ssd,
+        dtype=dtype,
+    )
+diffusers_sd = StableDiffusion(
+    model_config=diffusers_model_config,
+    device=device,
+    dtype=dtype,
+)
+diffusers_sd.load_model()
+
+
+print(f"Loaded model from {args.input_path}")
+
+diffusers_sd.pipeline.fuse_lora()
+
+meta = OrderedDict()
+
+diffusers_sd.save(args.output_path, meta=meta)
+
+
+print(f"Saved to {args.output_path}")
diff --git a/scripts/make_lcm_sdxl_model.py b/scripts/make_lcm_sdxl_model.py
new file mode 100644
index 00000000..20e95ce7
--- /dev/null
+++ b/scripts/make_lcm_sdxl_model.py
@@ -0,0 +1,67 @@
+import argparse
+from collections import OrderedDict
+
+import torch
+
+from toolkit.config_modules import ModelConfig
+from toolkit.stable_diffusion_model import StableDiffusion
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    'input_path',
+    type=str,
+    help='Path to original sdxl model'
+)
+parser.add_argument(
+    'output_path',
+    type=str,
+    help='output path'
+)
+parser.add_argument('--sdxl', action='store_true', help='is sdxl model')
+parser.add_argument('--refiner', action='store_true', help='is refiner model')
+parser.add_argument('--ssd', action='store_true', help='is ssd model')
+parser.add_argument('--sd2', action='store_true', help='is sd 2 model')
+
+args = parser.parse_args()
+device = torch.device('cpu')
+dtype = torch.float32
+
+print(f"Loading model from {args.input_path}")
+
+if args.sdxl:
+    adapter_id = "latent-consistency/lcm-lora-sdxl"
+if args.refiner:
+    adapter_id = "latent-consistency/lcm-lora-sdxl"
+elif args.ssd:
+    adapter_id = "latent-consistency/lcm-lora-ssd-1b"
+else:
+    adapter_id = "latent-consistency/lcm-lora-sdv1-5"
+
+
+diffusers_model_config = ModelConfig(
+        name_or_path=args.input_path,
+        is_xl=args.sdxl,
+        is_v2=args.sd2,
+        is_ssd=args.ssd,
+        dtype=dtype,
+    )
+diffusers_sd = StableDiffusion(
+    model_config=diffusers_model_config,
+    device=device,
+    dtype=dtype,
+)
+diffusers_sd.load_model()
+
+
+print(f"Loaded model from {args.input_path}")
+
+diffusers_sd.pipeline.load_lora_weights(adapter_id)
+diffusers_sd.pipeline.fuse_lora()
+
+meta = OrderedDict()
+
+diffusers_sd.save(args.output_path, meta=meta)
+
+
+print(f"Saved to {args.output_path}")
diff --git a/scripts/train_dreambooth.py b/scripts/train_dreambooth.py
deleted file mode 100644
index 8442ddeb..00000000
--- a/scripts/train_dreambooth.py
+++ /dev/null
@@ -1,547 +0,0 @@
-import gc
-import time
-import argparse
-import itertools
-import math
-import os
-from multiprocessing import Value
-
-from tqdm import tqdm
-import torch
-from accelerate.utils import set_seed
-import diffusers
-from diffusers import DDPMScheduler
-
-import library.train_util as train_util
-import library.config_util as config_util
-from library.config_util import (
-    ConfigSanitizer,
-    BlueprintGenerator,
-)
-import custom_tools.train_tools as train_tools
-import library.custom_train_functions as custom_train_functions
-from library.custom_train_functions import (
-    apply_snr_weight,
-    get_weighted_text_embeddings,
-    prepare_scheduler_for_custom_training,
-    pyramid_noise_like,
-    apply_noise_offset,
-    scale_v_prediction_loss_like_noise_prediction,
-)
-
-# perlin_noise,
-
-PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-SD_SCRIPTS_ROOT = os.path.join(PROJECT_ROOT, "repositories", "sd-scripts")
-
-
-def train(args):
-    train_util.verify_training_args(args)
-    train_util.prepare_dataset_args(args, False)
-
-    cache_latents = args.cache_latents
-
-    if args.seed is not None:
-        set_seed(args.seed)  # 乱数系列を初期化する
-
-    tokenizer = train_util.load_tokenizer(args)
-
-    # データセットを準備する
-    if args.dataset_class is None:
-        blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, False, True))
-        if args.dataset_config is not None:
-            print(f"Load dataset config from {args.dataset_config}")
-            user_config = config_util.load_user_config(args.dataset_config)
-            ignored = ["train_data_dir", "reg_data_dir"]
-            if any(getattr(args, attr) is not None for attr in ignored):
-                print(
-                    "ignore following options because config file is found: {0} / 設定ファイルが利用されるため以下のオプションは無視されます: {0}".format(
-                        ", ".join(ignored)
-                    )
-                )
-        else:
-            user_config = {
-                "datasets": [
-                    {"subsets": config_util.generate_dreambooth_subsets_config_by_subdirs(args.train_data_dir, args.reg_data_dir)}
-                ]
-            }
-
-        blueprint = blueprint_generator.generate(user_config, args, tokenizer=tokenizer)
-        train_dataset_group = config_util.generate_dataset_group_by_blueprint(blueprint.dataset_group)
-    else:
-        train_dataset_group = train_util.load_arbitrary_dataset(args, tokenizer)
-
-    current_epoch = Value("i", 0)
-    current_step = Value("i", 0)
-    ds_for_collater = train_dataset_group if args.max_data_loader_n_workers == 0 else None
-    collater = train_util.collater_class(current_epoch, current_step, ds_for_collater)
-
-    if args.no_token_padding:
-        train_dataset_group.disable_token_padding()
-
-    if args.debug_dataset:
-        train_util.debug_dataset(train_dataset_group)
-        return
-
-    if cache_latents:
-        assert (
-            train_dataset_group.is_latent_cacheable()
-        ), "when caching latents, either color_aug or random_crop cannot be used / latentをキャッシュするときはcolor_augとrandom_cropは使えません"
-
-    # replace captions with names
-    if args.name_replace is not None:
-        print(f"Replacing captions [name] with '{args.name_replace}'")
-
-        train_dataset_group = train_tools.replace_filewords_in_dataset_group(
-            train_dataset_group, args
-        )
-
-    # acceleratorを準備する
-    print("prepare accelerator")
-
-    if args.gradient_accumulation_steps > 1:
-        print(
-            f"gradient_accumulation_steps is {args.gradient_accumulation_steps}. accelerate does not support gradient_accumulation_steps when training multiple models (U-Net and Text Encoder), so something might be wrong"
-        )
-        print(
-            f"gradient_accumulation_stepsが{args.gradient_accumulation_steps}に設定されています。accelerateは複数モデル（U-NetおよびText Encoder）の学習時にgradient_accumulation_stepsをサポートしていないため結果は未知数です"
-        )
-
-    accelerator, unwrap_model = train_util.prepare_accelerator(args)
-
-    # mixed precisionに対応した型を用意しておき適宜castする
-    weight_dtype, save_dtype = train_util.prepare_dtype(args)
-
-    # モデルを読み込む
-    text_encoder, vae, unet, load_stable_diffusion_format = train_util.load_target_model(args, weight_dtype, accelerator)
-
-    # verify load/save model formats
-    if load_stable_diffusion_format:
-        src_stable_diffusion_ckpt = args.pretrained_model_name_or_path
-        src_diffusers_model_path = None
-    else:
-        src_stable_diffusion_ckpt = None
-        src_diffusers_model_path = args.pretrained_model_name_or_path
-
-    if args.save_model_as is None:
-        save_stable_diffusion_format = load_stable_diffusion_format
-        use_safetensors = args.use_safetensors
-    else:
-        save_stable_diffusion_format = args.save_model_as.lower() == "ckpt" or args.save_model_as.lower() == "safetensors"
-        use_safetensors = args.use_safetensors or ("safetensors" in args.save_model_as.lower())
-
-    # モデルに xformers とか memory efficient attention を組み込む
-    train_util.replace_unet_modules(unet, args.mem_eff_attn, args.xformers)
-
-    # 学習を準備する
-    if cache_latents:
-        vae.to(accelerator.device, dtype=weight_dtype)
-        vae.requires_grad_(False)
-        vae.eval()
-        with torch.no_grad():
-            train_dataset_group.cache_latents(vae, args.vae_batch_size, args.cache_latents_to_disk, accelerator.is_main_process)
-        vae.to("cpu")
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        gc.collect()
-
-        accelerator.wait_for_everyone()
-
-    # 学習を準備する：モデルを適切な状態にする
-    train_text_encoder = args.stop_text_encoder_training is None or args.stop_text_encoder_training >= 0
-    unet.requires_grad_(True)  # 念のため追加
-    text_encoder.requires_grad_(train_text_encoder)
-    if not train_text_encoder:
-        print("Text Encoder is not trained.")
-
-    if args.gradient_checkpointing:
-        unet.enable_gradient_checkpointing()
-        text_encoder.gradient_checkpointing_enable()
-
-    if not cache_latents:
-        vae.requires_grad_(False)
-        vae.eval()
-        vae.to(accelerator.device, dtype=weight_dtype)
-
-    # 学習に必要なクラスを準備する
-    print("prepare optimizer, data loader etc.")
-    if train_text_encoder:
-        trainable_params = itertools.chain(unet.parameters(), text_encoder.parameters())
-    else:
-        trainable_params = unet.parameters()
-
-    _, _, optimizer = train_util.get_optimizer(args, trainable_params)
-
-    # dataloaderを準備する
-    # DataLoaderのプロセス数：0はメインプロセスになる
-    n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1)  # cpu_count-1 ただし最大で指定された数まで
-    train_dataloader = torch.utils.data.DataLoader(
-        train_dataset_group,
-        batch_size=1,
-        shuffle=True,
-        collate_fn=collater,
-        num_workers=n_workers,
-        persistent_workers=args.persistent_data_loader_workers,
-    )
-
-    # 学習ステップ数を計算する
-    if args.max_train_epochs is not None:
-        args.max_train_steps = args.max_train_epochs * math.ceil(
-            len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps
-        )
-        print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}")
-
-    # データセット側にも学習ステップを送信
-    train_dataset_group.set_max_train_steps(args.max_train_steps)
-
-    if args.stop_text_encoder_training is None:
-        args.stop_text_encoder_training = args.max_train_steps + 1  # do not stop until end
-
-    # lr schedulerを用意する TODO gradient_accumulation_stepsの扱いが何かおかしいかもしれない。後で確認する
-    lr_scheduler = train_util.get_scheduler_fix(args, optimizer, accelerator.num_processes)
-
-    # 実験的機能：勾配も含めたfp16学習を行う　モデル全体をfp16にする
-    if args.full_fp16:
-        assert (
-            args.mixed_precision == "fp16"
-        ), "full_fp16 requires mixed precision='fp16' / full_fp16を使う場合はmixed_precision='fp16'を指定してください。"
-        print("enable full fp16 training.")
-        unet.to(weight_dtype)
-        text_encoder.to(weight_dtype)
-
-    # acceleratorがなんかよろしくやってくれるらしい
-    if train_text_encoder:
-        unet, text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
-            unet, text_encoder, optimizer, train_dataloader, lr_scheduler
-        )
-    else:
-        unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
-
-    # transform DDP after prepare
-    text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
-
-    if not train_text_encoder:
-        text_encoder.to(accelerator.device, dtype=weight_dtype)  # to avoid 'cpu' vs 'cuda' error
-
-    # 実験的機能：勾配も含めたfp16学習を行う　PyTorchにパッチを当ててfp16でのgrad scaleを有効にする
-    if args.full_fp16:
-        train_util.patch_accelerator_for_fp16_training(accelerator)
-
-    # resumeする
-    train_util.resume_from_local_or_hf_if_specified(accelerator, args)
-
-    # epoch数を計算する
-    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
-    num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
-    if (args.save_n_epoch_ratio is not None) and (args.save_n_epoch_ratio > 0):
-        args.save_every_n_epochs = math.floor(num_train_epochs / args.save_n_epoch_ratio) or 1
-
-    # 学習する
-    total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
-    print("running training / 学習開始")
-    print(f"  num train images * repeats / 学習画像の数×繰り返し回数: {train_dataset_group.num_train_images}")
-    print(f"  num reg images / 正則化画像の数: {train_dataset_group.num_reg_images}")
-    print(f"  num batches per epoch / 1epochのバッチ数: {len(train_dataloader)}")
-    print(f"  num epochs / epoch数: {num_train_epochs}")
-    print(f"  batch size per device / バッチサイズ: {args.train_batch_size}")
-    print(f"  total train batch size (with parallel & distributed & accumulation) / 総バッチサイズ（並列学習、勾配合計含む）: {total_batch_size}")
-    print(f"  gradient ccumulation steps / 勾配を合計するステップ数 = {args.gradient_accumulation_steps}")
-    print(f"  total optimization steps / 学習ステップ数: {args.max_train_steps}")
-
-    progress_bar = tqdm(range(args.max_train_steps), smoothing=0, disable=not accelerator.is_local_main_process, desc="steps")
-    global_step = 0
-
-    noise_scheduler = DDPMScheduler(
-        beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000, clip_sample=False
-    )
-    prepare_scheduler_for_custom_training(noise_scheduler, accelerator.device)
-
-    if accelerator.is_main_process:
-        accelerator.init_trackers("dreambooth" if args.log_tracker_name is None else args.log_tracker_name)
-
-    if args.sample_first or args.sample_only:
-        # Do initial sample before starting training
-        train_tools.sample_images(accelerator, args, 0, global_step, accelerator.device, vae, tokenizer,
-                                 text_encoder, unet, force_sample=True)
-
-    if args.sample_only:
-        return
-    loss_list = []
-    loss_total = 0.0
-    for epoch in range(num_train_epochs):
-        print(f"\nepoch {epoch+1}/{num_train_epochs}")
-        current_epoch.value = epoch + 1
-
-        # 指定したステップ数までText Encoderを学習する：epoch最初の状態
-        unet.train()
-        # train==True is required to enable gradient_checkpointing
-        if args.gradient_checkpointing or global_step < args.stop_text_encoder_training:
-            text_encoder.train()
-
-        for step, batch in enumerate(train_dataloader):
-            current_step.value = global_step
-            # 指定したステップ数でText Encoderの学習を止める
-            if global_step == args.stop_text_encoder_training:
-                print(f"stop text encoder training at step {global_step}")
-                if not args.gradient_checkpointing:
-                    text_encoder.train(False)
-                text_encoder.requires_grad_(False)
-
-            with accelerator.accumulate(unet):
-                with torch.no_grad():
-                    # latentに変換
-                    if cache_latents:
-                        latents = batch["latents"].to(accelerator.device)
-                    else:
-                        latents = vae.encode(batch["images"].to(dtype=weight_dtype)).latent_dist.sample()
-                    latents = latents * 0.18215
-                b_size = latents.shape[0]
-
-                # Sample noise that we'll add to the latents
-                if args.train_noise_seed is not None:
-                    torch.manual_seed(args.train_noise_seed)
-                    torch.cuda.manual_seed(args.train_noise_seed)
-                    # make same seed for each item in the batch by stacking them
-                    single_noise = torch.randn_like(latents[0])
-                    noise = torch.stack([single_noise for _ in range(b_size)])
-                    noise = noise.to(latents.device)
-                elif args.seed_lock:
-                    noise = train_tools.get_noise_from_latents(latents)
-                else:
-                    noise = torch.randn_like(latents, device=latents.device)
-
-                if args.noise_offset:
-                    noise = apply_noise_offset(latents, noise, args.noise_offset, args.adaptive_noise_scale)
-                elif args.multires_noise_iterations:
-                    noise = pyramid_noise_like(noise, latents.device, args.multires_noise_iterations, args.multires_noise_discount)
-                # elif args.perlin_noise:
-                #     noise = perlin_noise(noise, latents.device, args.perlin_noise)  # only shape of noise is used currently
-
-                # Get the text embedding for conditioning
-                with torch.set_grad_enabled(global_step < args.stop_text_encoder_training):
-                    if args.weighted_captions:
-                        encoder_hidden_states = get_weighted_text_embeddings(
-                            tokenizer,
-                            text_encoder,
-                            batch["captions"],
-                            accelerator.device,
-                            args.max_token_length // 75 if args.max_token_length else 1,
-                            clip_skip=args.clip_skip,
-                        )
-                    else:
-                        input_ids = batch["input_ids"].to(accelerator.device)
-                        encoder_hidden_states = train_util.get_hidden_states(
-                            args, input_ids, tokenizer, text_encoder, None if not args.full_fp16 else weight_dtype
-                        )
-
-                # Sample a random timestep for each image
-                timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (b_size,), device=latents.device)
-                timesteps = timesteps.long()
-
-                # Add noise to the latents according to the noise magnitude at each timestep
-                # (this is the forward diffusion process)
-                noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
-
-                # Predict the noise residual
-                with accelerator.autocast():
-                    noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample
-
-                if args.v_parameterization:
-                    # v-parameterization training
-                    target = noise_scheduler.get_velocity(latents, noise, timesteps)
-                else:
-                    target = noise
-
-                loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none")
-                loss = loss.mean([1, 2, 3])
-
-                loss_weights = batch["loss_weights"]  # 各sampleごとのweight
-                loss = loss * loss_weights
-
-                if args.min_snr_gamma:
-                    loss = apply_snr_weight(loss, timesteps, noise_scheduler, args.min_snr_gamma)
-                if args.scale_v_pred_loss_like_noise_pred:
-                    loss = scale_v_prediction_loss_like_noise_prediction(loss, timesteps, noise_scheduler)
-
-                loss = loss.mean()  # 平均なのでbatch_sizeで割る必要なし
-
-                accelerator.backward(loss)
-                if accelerator.sync_gradients and args.max_grad_norm != 0.0:
-                    if train_text_encoder:
-                        params_to_clip = itertools.chain(unet.parameters(), text_encoder.parameters())
-                    else:
-                        params_to_clip = unet.parameters()
-                    accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm)
-
-                optimizer.step()
-                lr_scheduler.step()
-                optimizer.zero_grad(set_to_none=True)
-
-            # Checks if the accelerator has performed an optimization step behind the scenes
-            if accelerator.sync_gradients:
-                progress_bar.update(1)
-                global_step += 1
-
-                train_util.sample_images(
-                    accelerator, args, None, global_step, accelerator.device, vae, tokenizer, text_encoder, unet
-                )
-
-                # 指定ステップごとにモデルを保存
-                if args.save_every_n_steps is not None and global_step % args.save_every_n_steps == 0:
-                    accelerator.wait_for_everyone()
-                    if accelerator.is_main_process:
-                        src_path = src_stable_diffusion_ckpt if save_stable_diffusion_format else src_diffusers_model_path
-                        train_util.save_sd_model_on_epoch_end_or_stepwise(
-                            args,
-                            False,
-                            accelerator,
-                            src_path,
-                            save_stable_diffusion_format,
-                            use_safetensors,
-                            save_dtype,
-                            epoch,
-                            num_train_epochs,
-                            global_step,
-                            unwrap_model(text_encoder),
-                            unwrap_model(unet),
-                            vae,
-                        )
-
-            current_loss = loss.detach().item()
-            if args.logging_dir is not None:
-                logs = {"loss": current_loss, "lr": float(lr_scheduler.get_last_lr()[0])}
-                if args.optimizer_type.lower().startswith("DAdapt".lower()) or args.optimizer_type.lower() == "Prodigy".lower():  # tracking d*lr value
-                    logs["lr/d*lr"] = (
-                        lr_scheduler.optimizers[0].param_groups[0]["d"] * lr_scheduler.optimizers[0].param_groups[0]["lr"]
-                    )
-                accelerator.log(logs, step=global_step)
-
-            if epoch == 0:
-                loss_list.append(current_loss)
-            else:
-                loss_total -= loss_list[step]
-                loss_list[step] = current_loss
-            loss_total += current_loss
-            avr_loss = loss_total / len(loss_list)
-            logs = {"loss": avr_loss}  # , "lr": lr_scheduler.get_last_lr()[0]}
-            progress_bar.set_postfix(**logs)
-
-            if global_step >= args.max_train_steps:
-                break
-
-        if args.logging_dir is not None:
-            logs = {"loss/epoch": loss_total / len(loss_list)}
-            accelerator.log(logs, step=epoch + 1)
-
-        accelerator.wait_for_everyone()
-
-        if args.save_every_n_epochs is not None:
-            if accelerator.is_main_process:
-                # checking for saving is in util
-                src_path = src_stable_diffusion_ckpt if save_stable_diffusion_format else src_diffusers_model_path
-                train_util.save_sd_model_on_epoch_end_or_stepwise(
-                    args,
-                    True,
-                    accelerator,
-                    src_path,
-                    save_stable_diffusion_format,
-                    use_safetensors,
-                    save_dtype,
-                    epoch,
-                    num_train_epochs,
-                    global_step,
-                    unwrap_model(text_encoder),
-                    unwrap_model(unet),
-                    vae,
-                )
-
-        train_util.sample_images(accelerator, args, epoch + 1, global_step, accelerator.device, vae, tokenizer, text_encoder, unet)
-
-    is_main_process = accelerator.is_main_process
-    if is_main_process:
-        unet = unwrap_model(unet)
-        text_encoder = unwrap_model(text_encoder)
-
-    accelerator.end_training()
-
-    if args.save_state and is_main_process:
-        train_util.save_state_on_train_end(args, accelerator)
-
-    del accelerator  # この後メモリを使うのでこれは消す
-
-    if is_main_process:
-        src_path = src_stable_diffusion_ckpt if save_stable_diffusion_format else src_diffusers_model_path
-        train_util.save_sd_model_on_train_end(
-            args, src_path, save_stable_diffusion_format, use_safetensors, save_dtype, epoch, global_step, text_encoder, unet, vae
-        )
-        print("model saved.")
-
-
-def setup_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser()
-
-    train_util.add_sd_models_arguments(parser)
-    train_util.add_dataset_arguments(parser, True, False, True)
-    train_util.add_training_arguments(parser, True)
-    train_util.add_sd_saving_arguments(parser)
-    train_util.add_optimizer_arguments(parser)
-    config_util.add_config_arguments(parser)
-    custom_train_functions.add_custom_train_arguments(parser)
-
-    parser.add_argument(
-        "--no_token_padding",
-        action="store_true",
-        help="disable token padding (same as Diffuser's DreamBooth) / トークンのpaddingを無効にする（Diffusers版DreamBoothと同じ動作）",
-    )
-    parser.add_argument(
-        "--stop_text_encoder_training",
-        type=int,
-        default=None,
-        help="steps to stop text encoder training, -1 for no training / Text Encoderの学習を止めるステップ数、-1で最初から学習しない",
-    )
-
-    parser.add_argument(
-        "--sample_first",
-        action="store_true",
-        help="Sample first interval before training",
-        default=False
-    )
-
-    parser.add_argument(
-        "--name_replace",
-        type=str,
-        help="Replaces [name] in prompts. Used is sampling, training, and regs",
-        default=None
-    )
-
-    parser.add_argument(
-        "--train_noise_seed",
-        type=int,
-        help="Use custom seed for training noise",
-        default=None
-    )
-
-    parser.add_argument(
-        "--sample_only",
-        action="store_true",
-        help="Only generate samples. Used for generating training data with specific seeds to alter during training",
-        default=False
-    )
-
-    parser.add_argument(
-        "--seed_lock",
-        action="store_true",
-        help="Locks the seed to the latent images so the same latent will always have the same noise",
-        default=False
-    )
-
-    return parser
-
-
-if __name__ == "__main__":
-    parser = setup_parser()
-
-    args = parser.parse_args()
-    args = train_util.read_config_from_file(args, parser)
-
-    train(args)
diff --git a/testing/generate_lora_mapping.py b/testing/generate_lora_mapping.py
new file mode 100644
index 00000000..e632d2a6
--- /dev/null
+++ b/testing/generate_lora_mapping.py
@@ -0,0 +1,130 @@
+from collections import OrderedDict
+
+import torch
+from safetensors.torch import load_file
+import argparse
+import os
+import json
+
+PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+keymap_path = os.path.join(PROJECT_ROOT, 'toolkit', 'keymaps', 'stable_diffusion_sdxl.json')
+
+# load keymap
+with open(keymap_path, 'r') as f:
+    keymap = json.load(f)
+
+lora_keymap = OrderedDict()
+
+# convert keymap to lora key naming
+for ldm_key, diffusers_key in keymap['ldm_diffusers_keymap'].items():
+    if ldm_key.endswith('.bias') or diffusers_key.endswith('.bias'):
+        # skip it
+        continue
+    # sdxl has same te for locon with kohya and ours
+    if ldm_key.startswith('conditioner'):
+        #skip it
+        continue
+    # ignore vae
+    if ldm_key.startswith('first_stage_model'):
+        continue
+    ldm_key = ldm_key.replace('model.diffusion_model.', 'lora_unet_')
+    ldm_key = ldm_key.replace('.weight', '')
+    ldm_key = ldm_key.replace('.', '_')
+
+    diffusers_key = diffusers_key.replace('unet_', 'lora_unet_')
+    diffusers_key = diffusers_key.replace('.weight', '')
+    diffusers_key = diffusers_key.replace('.', '_')
+
+    lora_keymap[f"{ldm_key}.alpha"] = f"{diffusers_key}.alpha"
+    lora_keymap[f"{ldm_key}.lora_down.weight"] = f"{diffusers_key}.lora_down.weight"
+    lora_keymap[f"{ldm_key}.lora_up.weight"] = f"{diffusers_key}.lora_up.weight"
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("input", help="input file")
+parser.add_argument("input2", help="input2 file")
+
+args = parser.parse_args()
+
+# name = args.name
+# if args.sdxl:
+#     name += '_sdxl'
+# elif args.sd2:
+#     name += '_sd2'
+# else:
+#     name += '_sd1'
+name = 'stable_diffusion_locon_sdxl'
+
+locon_save = load_file(args.input)
+our_save = load_file(args.input2)
+
+our_extra_keys = list(set(our_save.keys()) - set(locon_save.keys()))
+locon_extra_keys = list(set(locon_save.keys()) - set(our_save.keys()))
+
+print(f"we have {len(our_extra_keys)} extra keys")
+print(f"locon has {len(locon_extra_keys)} extra keys")
+
+save_dtype = torch.float16
+print(f"our extra keys: {our_extra_keys}")
+print(f"locon extra keys: {locon_extra_keys}")
+
+
+def export_state_dict(our_save):
+    converted_state_dict = OrderedDict()
+    for key, value in our_save.items():
+        # test encoders share keys for some reason
+        if key.startswith('lora_te'):
+            converted_state_dict[key] = value.detach().to('cpu', dtype=save_dtype)
+        else:
+            converted_key = key
+            for ldm_key, diffusers_key in lora_keymap.items():
+                if converted_key == diffusers_key:
+                    converted_key = ldm_key
+
+            converted_state_dict[converted_key] = value.detach().to('cpu', dtype=save_dtype)
+    return converted_state_dict
+
+def import_state_dict(loaded_state_dict):
+    converted_state_dict = OrderedDict()
+    for key, value in loaded_state_dict.items():
+        if key.startswith('lora_te'):
+            converted_state_dict[key] = value.detach().to('cpu', dtype=save_dtype)
+        else:
+            converted_key = key
+            for ldm_key, diffusers_key in lora_keymap.items():
+                if converted_key == ldm_key:
+                    converted_key = diffusers_key
+
+            converted_state_dict[converted_key] = value.detach().to('cpu', dtype=save_dtype)
+    return converted_state_dict
+
+
+# check it again
+converted_state_dict = export_state_dict(our_save)
+converted_extra_keys = list(set(converted_state_dict.keys()) - set(locon_save.keys()))
+locon_extra_keys = list(set(locon_save.keys()) - set(converted_state_dict.keys()))
+
+
+print(f"we have {len(converted_extra_keys)} extra keys")
+print(f"locon has {len(locon_extra_keys)} extra keys")
+
+print(f"our extra keys: {converted_extra_keys}")
+
+# convert back
+cycle_state_dict = import_state_dict(converted_state_dict)
+cycle_extra_keys = list(set(cycle_state_dict.keys()) - set(our_save.keys()))
+our_extra_keys = list(set(our_save.keys()) - set(cycle_state_dict.keys()))
+
+print(f"we have {len(our_extra_keys)} extra keys")
+print(f"cycle has {len(cycle_extra_keys)} extra keys")
+
+# save keymap
+to_save = OrderedDict()
+to_save['ldm_diffusers_keymap'] = lora_keymap
+
+with open(os.path.join(PROJECT_ROOT, 'toolkit', 'keymaps', f'{name}.json'), 'w') as f:
+    json.dump(to_save, f, indent=4)
+
+
+
diff --git a/testing/generate_weight_mappings.py b/testing/generate_weight_mappings.py
index 17425a58..c063a9f7 100644
--- a/testing/generate_weight_mappings.py
+++ b/testing/generate_weight_mappings.py
@@ -6,6 +6,8 @@ import os
 # add project root to sys path
 import sys
 
+from diffusers import DiffusionPipeline, StableDiffusionXLPipeline
+
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import torch
@@ -50,6 +52,8 @@ parser.add_argument(
 
 parser.add_argument('--name', type=str, default='stable_diffusion', help='name for mapping to make')
 parser.add_argument('--sdxl', action='store_true', help='is sdxl model')
+parser.add_argument('--refiner', action='store_true', help='is refiner model')
+parser.add_argument('--ssd', action='store_true', help='is ssd model')
 parser.add_argument('--sd2', action='store_true', help='is sd 2 model')
 
 args = parser.parse_args()
@@ -60,24 +64,76 @@ find_matches = False
 
 print(f'Loading diffusers model')
 
-diffusers_model_config = ModelConfig(
-    name_or_path=file_path,
-    is_xl=args.sdxl,
-    is_v2=args.sd2,
-    dtype=dtype,
-)
-diffusers_sd = StableDiffusion(
-    model_config=diffusers_model_config,
-    device=device,
-    dtype=dtype,
-)
-diffusers_sd.load_model()
-# delete things we dont need
-del diffusers_sd.tokenizer
-flush()
+ignore_ldm_begins_with = []
+
+diffusers_file_path = file_path
+if args.ssd:
+    diffusers_file_path = "segmind/SSD-1B"
+
+# if args.refiner:
+#     diffusers_file_path = "stabilityai/stable-diffusion-xl-refiner-1.0"
+
+diffusers_file_path = file_path if len(args.file_1) == 1 else args.file_1[1]
+
+if not args.refiner:
+
+    diffusers_model_config = ModelConfig(
+        name_or_path=diffusers_file_path,
+        is_xl=args.sdxl,
+        is_v2=args.sd2,
+        is_ssd=args.ssd,
+        dtype=dtype,
+    )
+    diffusers_sd = StableDiffusion(
+        model_config=diffusers_model_config,
+        device=device,
+        dtype=dtype,
+    )
+    diffusers_sd.load_model()
+    # delete things we dont need
+    del diffusers_sd.tokenizer
+    flush()
+
+    print(f'Loading ldm model')
+    diffusers_state_dict = diffusers_sd.state_dict()
+else:
+    # refiner wont work directly with stable diffusion
+    # so we need to load the model and then load the state dict
+    diffusers_pipeline = StableDiffusionXLPipeline.from_single_file(
+        diffusers_file_path,
+        torch_dtype=torch.float16,
+        use_safetensors=True,
+        variant="fp16",
+    ).to(device)
+    # diffusers_pipeline = StableDiffusionXLPipeline.from_single_file(
+    #     file_path,
+    #     torch_dtype=torch.float16,
+    #     use_safetensors=True,
+    #     variant="fp16",
+    # ).to(device)
+
+    SD_PREFIX_VAE = "vae"
+    SD_PREFIX_UNET = "unet"
+    SD_PREFIX_REFINER_UNET = "refiner_unet"
+    SD_PREFIX_TEXT_ENCODER = "te"
+
+    SD_PREFIX_TEXT_ENCODER1 = "te0"
+    SD_PREFIX_TEXT_ENCODER2 = "te1"
+
+    diffusers_state_dict = OrderedDict()
+    for k, v in diffusers_pipeline.vae.state_dict().items():
+        new_key = k if k.startswith(f"{SD_PREFIX_VAE}") else f"{SD_PREFIX_VAE}_{k}"
+        diffusers_state_dict[new_key] = v
+    for k, v in diffusers_pipeline.text_encoder_2.state_dict().items():
+        new_key = k if k.startswith(f"{SD_PREFIX_TEXT_ENCODER2}_") else f"{SD_PREFIX_TEXT_ENCODER2}_{k}"
+        diffusers_state_dict[new_key] = v
+    for k, v in diffusers_pipeline.unet.state_dict().items():
+        new_key = k if k.startswith(f"{SD_PREFIX_UNET}_") else f"{SD_PREFIX_UNET}_{k}"
+        diffusers_state_dict[new_key] = v
+
+    # add ignore ones as we are only going to focus on unet and copy the rest
+    # ignore_ldm_begins_with = ["conditioner.", "first_stage_model."]
 
-print(f'Loading ldm model')
-diffusers_state_dict = diffusers_sd.state_dict()
 diffusers_dict_keys = list(diffusers_state_dict.keys())
 
 ldm_state_dict = load_file(file_path)
@@ -93,18 +149,26 @@ total_keys = len(ldm_dict_keys)
 matched_ldm_keys = []
 matched_diffusers_keys = []
 
-error_margin = 1e-4
+error_margin = 1e-8
+
+tmp_merge_key = "TMP___MERGE"
 
 te_suffix = ''
 proj_pattern_weight = None
 proj_pattern_bias = None
 text_proj_layer = None
-if args.sdxl:
+if args.sdxl or args.ssd:
     te_suffix = '1'
     ldm_res_block_prefix = "conditioner.embedders.1.model.transformer.resblocks"
     proj_pattern_weight = r"conditioner\.embedders\.1\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_weight"
     proj_pattern_bias = r"conditioner\.embedders\.1\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_bias"
     text_proj_layer = "conditioner.embedders.1.model.text_projection"
+if args.refiner:
+    te_suffix = '1'
+    ldm_res_block_prefix = "conditioner.embedders.0.model.transformer.resblocks"
+    proj_pattern_weight = r"conditioner\.embedders\.0\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_weight"
+    proj_pattern_bias = r"conditioner\.embedders\.0\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_bias"
+    text_proj_layer = "conditioner.embedders.0.model.text_projection"
 if args.sd2:
     te_suffix = ''
     ldm_res_block_prefix = "cond_stage_model.model.transformer.resblocks"
@@ -112,10 +176,13 @@ if args.sd2:
     proj_pattern_bias = r"cond_stage_model\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_bias"
     text_proj_layer = "cond_stage_model.model.text_projection"
 
-if args.sdxl or args.sd2:
+if args.sdxl or args.sd2 or args.ssd or args.refiner:
     if "conditioner.embedders.1.model.text_projection" in ldm_dict_keys:
         # d_model = int(checkpoint[prefix + "text_projection"].shape[0]))
         d_model = int(ldm_state_dict["conditioner.embedders.1.model.text_projection"].shape[0])
+    elif "conditioner.embedders.0.model.text_projection" in ldm_dict_keys:
+        # d_model = int(checkpoint[prefix + "text_projection"].shape[0]))
+        d_model = int(ldm_state_dict["conditioner.embedders.0.model.text_projection"].shape[0])
     else:
         d_model = 1024
 
@@ -139,7 +206,7 @@ if args.sdxl or args.sd2:
                     f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.weight")
                 # make diffusers convertable_dict
                 diffusers_state_dict[
-                    f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.weight"] = new_val
+                    f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.{tmp_merge_key}.weight"] = new_val
 
                 # add operator
                 ldm_operator_map[ldm_key] = {
@@ -148,7 +215,6 @@ if args.sdxl or args.sd2:
                         f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.weight",
                         f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.weight",
                     ],
-                    "target": f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.weight"
                 }
 
                 # text_model_dict[new_key + ".q_proj.weight"] = checkpoint[key][:d_model, :]
@@ -189,7 +255,7 @@ if args.sdxl or args.sd2:
                 matched_diffusers_keys.append(f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.bias")
                 # make diffusers convertable_dict
                 diffusers_state_dict[
-                    f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.bias"] = new_val
+                    f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.{tmp_merge_key}.bias"] = new_val
 
                 # add operator
                 ldm_operator_map[ldm_key] = {
@@ -198,7 +264,6 @@ if args.sdxl or args.sd2:
                         f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.bias",
                         f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.bias",
                     ],
-                    # "target": f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.bias"
                 }
 
                 # add diffusers operators
@@ -237,11 +302,11 @@ for ldm_key in ldm_dict_keys:
         diffusers_reduced_shape_tuple = get_reduced_shape(diffusers_shape_tuple)
 
         # That was easy. Same key
-        if ldm_key == diffusers_key:
-            ldm_diffusers_keymap[ldm_key] = diffusers_key
-            matched_ldm_keys.append(ldm_key)
-            matched_diffusers_keys.append(diffusers_key)
-            break
+        # if ldm_key == diffusers_key:
+        #     ldm_diffusers_keymap[ldm_key] = diffusers_key
+        #     matched_ldm_keys.append(ldm_key)
+        #     matched_diffusers_keys.append(diffusers_key)
+        #     break
 
         # if we already have this key mapped, skip it
         if diffusers_key in matched_diffusers_keys:
@@ -266,7 +331,7 @@ for ldm_key in ldm_dict_keys:
             did_reduce_diffusers = True
 
         # check to see if they match within a margin of error
-        mse = torch.nn.functional.mse_loss(ldm_weight, diffusers_weight)
+        mse = torch.nn.functional.mse_loss(ldm_weight.float(), diffusers_weight.float())
         if mse < error_margin:
             ldm_diffusers_keymap[ldm_key] = diffusers_key
             matched_ldm_keys.append(ldm_key)
@@ -289,6 +354,10 @@ pbar.close()
 name = args.name
 if args.sdxl:
     name += '_sdxl'
+elif args.ssd:
+    name += '_ssd'
+elif args.refiner:
+    name += '_refiner'
 elif args.sd2:
     name += '_sd2'
 else:
@@ -359,13 +428,35 @@ for key in unmatched_ldm_keys:
 save_file(remaining_ldm_values, os.path.join(KEYMAPS_FOLDER, f'{name}_ldm_base.safetensors'))
 print(f'Saved remaining ldm values to {os.path.join(KEYMAPS_FOLDER, f"{name}_ldm_base.safetensors")}')
 
+# do cleanup of some left overs and bugs
+to_remove = []
+for ldm_key, diffusers_key in ldm_diffusers_keymap.items():
+    # get rid of tmp merge keys used to slicing
+    if tmp_merge_key in diffusers_key or tmp_merge_key in ldm_key:
+        to_remove.append(ldm_key)
+
+for key in to_remove:
+    del ldm_diffusers_keymap[key]
+
+to_remove = []
+# remove identical shape mappings. Not sure why they exist but they do
+for ldm_key, shape_list in ldm_diffusers_shape_map.items():
+    # remove identical shape mappings. Not sure why they exist but they do
+    # convert to json string to make it easier to compare
+    ldm_shape = json.dumps(shape_list[0])
+    diffusers_shape = json.dumps(shape_list[1])
+    if ldm_shape == diffusers_shape:
+        to_remove.append(ldm_key)
+
+for key in to_remove:
+    del ldm_diffusers_shape_map[key]
+
 dest_path = os.path.join(KEYMAPS_FOLDER, f'{name}.json')
 save_obj = OrderedDict()
 save_obj["ldm_diffusers_keymap"] = ldm_diffusers_keymap
 save_obj["ldm_diffusers_shape_map"] = ldm_diffusers_shape_map
 save_obj["ldm_diffusers_operator_map"] = ldm_operator_map
 save_obj["diffusers_ldm_operator_map"] = diffusers_operator_map
-
 with open(dest_path, 'w') as f:
     f.write(json.dumps(save_obj, indent=4))
 
diff --git a/testing/test_bucket_dataloader.py b/testing/test_bucket_dataloader.py
index 85a21ef7..e8208107 100644
--- a/testing/test_bucket_dataloader.py
+++ b/testing/test_bucket_dataloader.py
@@ -1,37 +1,107 @@
-from torch.utils.data import ConcatDataset, DataLoader
-from tqdm import tqdm
-# make sure we can import from the toolkit
+import time
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from torchvision import transforms
 import sys
 import os
+import cv2
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from toolkit.data_loader import AiToolkitDataset, get_dataloader_from_datasets
+from toolkit.paths import SD_SCRIPTS_ROOT
+
+from toolkit.image_utils import show_img
+
+sys.path.append(SD_SCRIPTS_ROOT)
+
+from library.model_util import load_vae
+from toolkit.data_transfer_object.data_loader import DataLoaderBatchDTO
+from toolkit.data_loader import AiToolkitDataset, get_dataloader_from_datasets, \
+    trigger_dataloader_setup_epoch
 from toolkit.config_modules import DatasetConfig
 import argparse
 
 parser = argparse.ArgumentParser()
 parser.add_argument('dataset_folder', type=str, default='input')
+parser.add_argument('--epochs', type=int, default=1)
+
 
 args = parser.parse_args()
 
 dataset_folder = args.dataset_folder
-resolution = 512
+resolution = 1024
 bucket_tolerance = 64
-batch_size = 4
+batch_size = 1
+
+
+##
 
 dataset_config = DatasetConfig(
-    folder_path=dataset_folder,
+    dataset_path=dataset_folder,
     resolution=resolution,
-    caption_type='txt',
+    caption_ext='json',
     default_caption='default',
     buckets=True,
     bucket_tolerance=bucket_tolerance,
+    poi='person',
+    augmentations=[
+        {
+            'method': 'RandomBrightnessContrast',
+            'brightness_limit': (-0.3, 0.3),
+            'contrast_limit': (-0.3, 0.3),
+            'brightness_by_max': False,
+            'p': 1.0
+        },
+        {
+            'method': 'HueSaturationValue',
+            'hue_shift_limit': (-0, 0),
+            'sat_shift_limit': (-40, 40),
+            'val_shift_limit': (-40, 40),
+            'p': 1.0
+        },
+        # {
+        #     'method': 'RGBShift',
+        #     'r_shift_limit': (-20, 20),
+        #     'g_shift_limit': (-20, 20),
+        #     'b_shift_limit': (-20, 20),
+        #     'p': 1.0
+        # },
+    ]
+
+
 )
 
-dataloader = get_dataloader_from_datasets([dataset_config], batch_size=batch_size)
+dataloader: DataLoader = get_dataloader_from_datasets([dataset_config], batch_size=batch_size)
+
 
 # run through an epoch ang check sizes
-for batch in dataloader:
-    print(list(batch[0].shape))
+dataloader_iterator = iter(dataloader)
+for epoch in range(args.epochs):
+    for batch in dataloader:
+        batch: 'DataLoaderBatchDTO'
+        img_batch = batch.tensor
+
+        chunks = torch.chunk(img_batch, batch_size, dim=0)
+        # put them so they are size by side
+        big_img = torch.cat(chunks, dim=3)
+        big_img = big_img.squeeze(0)
+
+        min_val = big_img.min()
+        max_val = big_img.max()
+
+        big_img = (big_img / 2 + 0.5).clamp(0, 1)
+
+        # convert to image
+        img = transforms.ToPILImage()(big_img)
+
+        show_img(img)
+
+        time.sleep(1.0)
+    # if not last epoch
+    if epoch < args.epochs - 1:
+        trigger_dataloader_setup_epoch(dataloader)
+
+cv2.destroyAllWindows()
 
 print('done')
diff --git a/testing/test_model_load_save.py b/testing/test_model_load_save.py
index dd4883cc..87bdfb3e 100644
--- a/testing/test_model_load_save.py
+++ b/testing/test_model_load_save.py
@@ -3,6 +3,8 @@ import os
 # add project root to sys path
 import sys
 
+from tqdm import tqdm
+
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import torch
@@ -20,6 +22,8 @@ from toolkit.stable_diffusion_model import StableDiffusion
 # you probably wont need this. Unless they change them.... again... again
 # on second thought, you probably will
 
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
 device = torch.device('cpu')
 dtype = torch.float32
 
@@ -109,7 +113,26 @@ keys_in_both.sort()
 
 if len(keys_not_in_state_dict_2) == 0 and len(keys_not_in_state_dict_1) == 0:
     print("All keys match!")
-    exit(0)
+    print("Checking values...")
+    mismatch_keys = []
+    loss = torch.nn.MSELoss()
+    tolerance = 1e-6
+    for key in tqdm(keys_in_both):
+        if loss(state_dict_file_1[key], state_dict_file_2[key]) > tolerance:
+            print(f"Values for key {key} don't match!")
+            print(f"Loss: {loss(state_dict_file_1[key], state_dict_file_2[key])}")
+            mismatch_keys.append(key)
+
+    if len(mismatch_keys) == 0:
+        print("All values match!")
+    else:
+        print("Some valued font match!")
+        print(mismatch_keys)
+        mismatched_path = os.path.join(project_root, 'config', 'mismatch.json')
+        with open(mismatched_path, 'w') as f:
+            f.write(json.dumps(mismatch_keys, indent=4))
+        exit(0)
+
 else:
     print("Keys don't match!, generating info...")
 
@@ -132,17 +155,17 @@ for key in keys_not_in_state_dict_2:
 
 # print(json_data)
 
-project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
 json_save_path = os.path.join(project_root, 'config', 'keys.json')
 json_matched_save_path = os.path.join(project_root, 'config', 'matched.json')
 json_duped_save_path = os.path.join(project_root, 'config', 'duped.json')
 state_dict_1_filename = os.path.basename(args.file_1[0])
-state_dict_2_filename = os.path.basename(args.file_2[0])
+# state_dict_2_filename = os.path.basename(args.file_2[0])
 # save key names for each in own file
 with open(os.path.join(project_root, 'config', f'{state_dict_1_filename}.json'), 'w') as f:
     f.write(json.dumps(state_dict_1_keys, indent=4))
 
-with open(os.path.join(project_root, 'config', f'{state_dict_2_filename}.json'), 'w') as f:
+with open(os.path.join(project_root, 'config', f'{state_dict_1_filename}_loop.json'), 'w') as f:
     f.write(json.dumps(state_dict_2_keys, indent=4))
 
 with open(json_save_path, 'w') as f:
diff --git a/toolkit/basic.py b/toolkit/basic.py
index 248ffc4e..6a70bf61 100644
--- a/toolkit/basic.py
+++ b/toolkit/basic.py
@@ -1,4 +1,50 @@
+import gc
+
+import torch
 
 
 def value_map(inputs, min_in, max_in, min_out, max_out):
     return (inputs - min_in) * (max_out - min_out) / (max_in - min_in) + min_out
+
+
+def flush(garbage_collect=True):
+    torch.cuda.empty_cache()
+    if garbage_collect:
+        gc.collect()
+
+
+def get_mean_std(tensor):
+    if len(tensor.shape) == 3:
+        tensor = tensor.unsqueeze(0)
+    elif len(tensor.shape) != 4:
+        raise Exception("Expected tensor of shape (batch_size, channels, width, height)")
+    mean, variance = torch.mean(
+        tensor, dim=[2, 3], keepdim=True
+    ), torch.var(
+        tensor, dim=[2, 3],
+        keepdim=True
+    )
+    std = torch.sqrt(variance + 1e-5)
+    return mean, std
+
+
+def adain(content_features, style_features):
+    # Assumes that the content and style features are of shape (batch_size, channels, width, height)
+
+    # Step 1: Calculate mean and variance of content features
+    content_mean, content_var = torch.mean(content_features, dim=[2, 3], keepdim=True), torch.var(content_features,
+                                                                                                  dim=[2, 3],
+                                                                                                  keepdim=True)
+    # Step 2: Calculate mean and variance of style features
+    style_mean, style_var = torch.mean(style_features, dim=[2, 3], keepdim=True), torch.var(style_features, dim=[2, 3],
+                                                                                            keepdim=True)
+
+    # Step 3: Normalize content features
+    content_std = torch.sqrt(content_var + 1e-5)
+    normalized_content = (content_features - content_mean) / content_std
+
+    # Step 4: Scale and shift normalized content with style's statistics
+    style_std = torch.sqrt(style_var + 1e-5)
+    stylized_content = normalized_content * style_std + style_mean
+
+    return stylized_content
diff --git a/toolkit/buckets.py b/toolkit/buckets.py
new file mode 100644
index 00000000..be69085b
--- /dev/null
+++ b/toolkit/buckets.py
@@ -0,0 +1,127 @@
+from typing import Type, List, Union, TypedDict
+
+
+class BucketResolution(TypedDict):
+    width: int
+    height: int
+
+
+# resolutions SDXL was trained on with a 1024x1024 base resolution
+resolutions_1024: List[BucketResolution] = [
+    # SDXL Base resolution
+    {"width": 1024, "height": 1024},
+    # SDXL Resolutions, widescreen
+    {"width": 2048, "height": 512},
+    {"width": 1984, "height": 512},
+    {"width": 1920, "height": 512},
+    {"width": 1856, "height": 512},
+    {"width": 1792, "height": 576},
+    {"width": 1728, "height": 576},
+    {"width": 1664, "height": 576},
+    {"width": 1600, "height": 640},
+    {"width": 1536, "height": 640},
+    {"width": 1472, "height": 704},
+    {"width": 1408, "height": 704},
+    {"width": 1344, "height": 704},
+    {"width": 1344, "height": 768},
+    {"width": 1280, "height": 768},
+    {"width": 1216, "height": 832},
+    {"width": 1152, "height": 832},
+    {"width": 1152, "height": 896},
+    {"width": 1088, "height": 896},
+    {"width": 1088, "height": 960},
+    {"width": 1024, "height": 960},
+    # SDXL Resolutions, portrait
+    {"width": 960, "height": 1024},
+    {"width": 960, "height": 1088},
+    {"width": 896, "height": 1088},
+    {"width": 896, "height": 1152},  # 2:3
+    {"width": 832, "height": 1152},
+    {"width": 832, "height": 1216},
+    {"width": 768, "height": 1280},
+    {"width": 768, "height": 1344},
+    {"width": 704, "height": 1408},
+    {"width": 704, "height": 1472},
+    {"width": 640, "height": 1536},
+    {"width": 640, "height": 1600},
+    {"width": 576, "height": 1664},
+    {"width": 576, "height": 1728},
+    {"width": 576, "height": 1792},
+    {"width": 512, "height": 1856},
+    {"width": 512, "height": 1920},
+    {"width": 512, "height": 1984},
+    {"width": 512, "height": 2048},
+]
+
+
+def get_bucket_sizes(resolution: int = 512, divisibility: int = 8) -> List[BucketResolution]:
+    # determine scaler form 1024 to resolution
+    scaler = resolution / 1024
+
+    bucket_size_list = []
+    for bucket in resolutions_1024:
+        # must be divisible by 8
+        width = int(bucket["width"] * scaler)
+        height = int(bucket["height"] * scaler)
+        if width % divisibility != 0:
+            width = width - (width % divisibility)
+        if height % divisibility != 0:
+            height = height - (height % divisibility)
+        bucket_size_list.append({"width": width, "height": height})
+
+    return bucket_size_list
+
+
+def get_resolution(width, height):
+    num_pixels = width * height
+    # determine same number of pixels for square image
+    square_resolution = int(num_pixels ** 0.5)
+    return square_resolution
+
+
+def get_bucket_for_image_size(
+        width: int,
+        height: int,
+        bucket_size_list: List[BucketResolution] = None,
+        resolution: Union[int, None] = None,
+        divisibility: int = 8
+) -> BucketResolution:
+
+    if bucket_size_list is None and resolution is None:
+        # get resolution from width and height
+        resolution = get_resolution(width, height)
+    if bucket_size_list is None:
+        # if real resolution is smaller, use that instead
+        real_resolution = get_resolution(width, height)
+        resolution = min(resolution, real_resolution)
+        bucket_size_list = get_bucket_sizes(resolution=resolution, divisibility=divisibility)
+
+    # Check for exact match first
+    for bucket in bucket_size_list:
+        if bucket["width"] == width and bucket["height"] == height:
+            return bucket
+
+    # If exact match not found, find the closest bucket
+    closest_bucket = None
+    min_removed_pixels = float("inf")
+
+    for bucket in bucket_size_list:
+        scale_w = bucket["width"] / width
+        scale_h = bucket["height"] / height
+
+        # To minimize pixels, we use the larger scale factor to minimize the amount that has to be cropped.
+        scale = max(scale_w, scale_h)
+
+        new_width = int(width * scale)
+        new_height = int(height * scale)
+
+        removed_pixels = (new_width - bucket["width"]) * new_height + (new_height - bucket["height"]) * new_width
+
+        if removed_pixels < min_removed_pixels:
+            min_removed_pixels = removed_pixels
+            closest_bucket = bucket
+
+    if closest_bucket is None:
+        raise ValueError("No suitable bucket found")
+
+    return closest_bucket
diff --git a/toolkit/config.py b/toolkit/config.py
index 493996cd..30a6d538 100644
--- a/toolkit/config.py
+++ b/toolkit/config.py
@@ -17,6 +17,24 @@ def get_cwd_abs_path(path):
     return path
 
 
+def replace_env_vars_in_string(s: str) -> str:
+    """
+    Replace placeholders like ${VAR_NAME} with the value of the corresponding environment variable.
+    If the environment variable is not set, raise an error.
+    """
+
+    def replacer(match):
+        var_name = match.group(1)
+        value = os.environ.get(var_name)
+
+        if value is None:
+            raise ValueError(f"Environment variable {var_name} not set. Please ensure it's defined before proceeding.")
+
+        return value
+
+    return re.sub(r'\$\{([^}]+)\}', replacer, s)
+
+
 def preprocess_config(config: OrderedDict, name: str = None):
     if "job" not in config:
         raise ValueError("config file must have a job key")
@@ -81,13 +99,14 @@ def get_config(
         raise ValueError(f"Could not find config file {config_file_path}")
 
     # if we found it, check if it is a json or yaml file
-    if real_config_path.endswith('.json') or real_config_path.endswith('.jsonc'):
-        with open(real_config_path, 'r', encoding='utf-8') as f:
-            config = json.load(f, object_pairs_hook=OrderedDict)
-    elif real_config_path.endswith('.yaml') or real_config_path.endswith('.yml'):
-        with open(real_config_path, 'r', encoding='utf-8') as f:
-            config = yaml.load(f, Loader=fixed_loader)
-    else:
-        raise ValueError(f"Config file {config_file_path} must be a json or yaml file")
+    with open(real_config_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+        content_with_env_replaced = replace_env_vars_in_string(content)
+        if real_config_path.endswith('.json') or real_config_path.endswith('.jsonc'):
+            config = json.loads(content_with_env_replaced, object_pairs_hook=OrderedDict)
+        elif real_config_path.endswith('.yaml') or real_config_path.endswith('.yml'):
+            config = yaml.load(content_with_env_replaced, Loader=fixed_loader)
+        else:
+            raise ValueError(f"Config file {config_file_path} must be a json or yaml file")
 
     return preprocess_config(config, name)
diff --git a/toolkit/config_modules.py b/toolkit/config_modules.py
index 5005bee4..8fdb2670 100644
--- a/toolkit/config_modules.py
+++ b/toolkit/config_modules.py
@@ -1,14 +1,25 @@
 import os
 import time
-from typing import List, Optional, Literal
+from typing import List, Optional, Literal, Union
 import random
 
+import torch
+
+from toolkit.prompt_utils import PromptEmbeds
+
+ImgExt = Literal['jpg', 'png', 'webp']
+
+SaveFormat = Literal['safetensors', 'diffusers']
+
 
 class SaveConfig:
     def __init__(self, **kwargs):
         self.save_every: int = kwargs.get('save_every', 1000)
         self.dtype: str = kwargs.get('save_dtype', 'float16')
         self.max_step_saves_to_keep: int = kwargs.get('max_step_saves_to_keep', 5)
+        self.save_format: SaveFormat = kwargs.get('save_format', 'safetensors')
+        if self.save_format not in ['safetensors', 'diffusers']:
+            raise ValueError(f"save_format must be safetensors or diffusers, got {self.save_format}")
 
 
 class LogingConfig:
@@ -20,6 +31,7 @@ class LogingConfig:
 
 class SampleConfig:
     def __init__(self, **kwargs):
+        self.sampler: str = kwargs.get('sampler', 'ddpm')
         self.sample_every: int = kwargs.get('sample_every', 100)
         self.width: int = kwargs.get('width', 512)
         self.height: int = kwargs.get('height', 512)
@@ -31,11 +43,59 @@ class SampleConfig:
         self.sample_steps = kwargs.get('sample_steps', 20)
         self.network_multiplier = kwargs.get('network_multiplier', 1)
         self.guidance_rescale = kwargs.get('guidance_rescale', 0.0)
+        self.ext: ImgExt = kwargs.get('format', 'jpg')
+        self.adapter_conditioning_scale = kwargs.get('adapter_conditioning_scale', 1.0)
+        self.refiner_start_at = kwargs.get('refiner_start_at', 0.5)  # step to start using refiner on sample if it exists
+
+
+class LormModuleSettingsConfig:
+    def __init__(self, **kwargs):
+        self.contains: str = kwargs.get('contains', '4nt$3')
+        self.extract_mode: str = kwargs.get('extract_mode', 'ratio')
+        # min num parameters to attach to
+        self.parameter_threshold: int = kwargs.get('parameter_threshold', 0)
+        self.extract_mode_param: dict = kwargs.get('extract_mode_param', 0.25)
+
+
+class LoRMConfig:
+    def __init__(self, **kwargs):
+        self.extract_mode: str = kwargs.get('extract_mode', 'ratio')
+        self.do_conv: bool = kwargs.get('do_conv', False)
+        self.extract_mode_param: dict = kwargs.get('extract_mode_param', 0.25)
+        self.parameter_threshold: int = kwargs.get('parameter_threshold', 0)
+        module_settings = kwargs.get('module_settings', [])
+        default_module_settings = {
+            'extract_mode': self.extract_mode,
+            'extract_mode_param': self.extract_mode_param,
+            'parameter_threshold': self.parameter_threshold,
+        }
+        module_settings = [{**default_module_settings, **module_setting, } for module_setting in module_settings]
+        self.module_settings: List[LormModuleSettingsConfig] = [LormModuleSettingsConfig(**module_setting) for
+                                                                module_setting in module_settings]
+
+    def get_config_for_module(self, block_name):
+        for setting in self.module_settings:
+            contain_pieces = setting.contains.split('|')
+            if all(contain_piece in block_name for contain_piece in contain_pieces):
+                return setting
+            # try replacing the . with _
+            contain_pieces = setting.contains.replace('.', '_').split('|')
+            if all(contain_piece in block_name for contain_piece in contain_pieces):
+                return setting
+            # do default
+        return LormModuleSettingsConfig(**{
+            'extract_mode': self.extract_mode,
+            'extract_mode_param': self.extract_mode_param,
+            'parameter_threshold': self.parameter_threshold,
+        })
+
+
+NetworkType = Literal['lora', 'locon', 'lorm']
 
 
 class NetworkConfig:
     def __init__(self, **kwargs):
-        self.type: str = kwargs.get('type', 'lora')
+        self.type: NetworkType = kwargs.get('type', 'lora')
         rank = kwargs.get('rank', None)
         linear = kwargs.get('linear', None)
         if rank is not None:
@@ -48,7 +108,37 @@ class NetworkConfig:
         self.alpha: float = kwargs.get('alpha', 1.0)
         self.linear_alpha: float = kwargs.get('linear_alpha', self.alpha)
         self.conv_alpha: float = kwargs.get('conv_alpha', self.conv)
-        self.normalize = kwargs.get('normalize', False)
+        self.dropout: Union[float, None] = kwargs.get('dropout', None)
+
+        self.lorm_config: Union[LoRMConfig, None] = None
+        lorm = kwargs.get('lorm', None)
+        if lorm is not None:
+            self.lorm_config: LoRMConfig = LoRMConfig(**lorm)
+
+        if self.type == 'lorm':
+            # set linear to arbitrary values so it makes them
+            self.linear = 4
+            self.rank = 4
+            if self.lorm_config.do_conv:
+                self.conv = 4
+
+
+AdapterTypes = Literal['t2i', 'ip', 'ip+']
+
+
+class AdapterConfig:
+    def __init__(self, **kwargs):
+        self.type: AdapterTypes = kwargs.get('type', 't2i')  # t2i, ip
+        self.in_channels: int = kwargs.get('in_channels', 3)
+        self.channels: List[int] = kwargs.get('channels', [320, 640, 1280, 1280])
+        self.num_res_blocks: int = kwargs.get('num_res_blocks', 2)
+        self.downscale_factor: int = kwargs.get('downscale_factor', 8)
+        self.adapter_type: str = kwargs.get('adapter_type', 'full_adapter')
+        self.image_dir: str = kwargs.get('image_dir', None)
+        self.test_img_path: str = kwargs.get('test_img_path', None)
+        self.train: str = kwargs.get('train', False)
+        self.image_encoder_path: str = kwargs.get('image_encoder_path', None)
+        self.name_or_path = kwargs.get('name_or_path', None)
 
 
 class EmbeddingConfig:
@@ -59,26 +149,94 @@ class EmbeddingConfig:
         self.save_format = kwargs.get('save_format', 'safetensors')
 
 
+ContentOrStyleType = Literal['balanced', 'style', 'content']
+LossTarget = Literal['noise', 'source', 'unaugmented', 'differential_noise']
+
+
 class TrainConfig:
     def __init__(self, **kwargs):
         self.noise_scheduler = kwargs.get('noise_scheduler', 'ddpm')
+        self.content_or_style: ContentOrStyleType = kwargs.get('content_or_style', 'balanced')
         self.steps: int = kwargs.get('steps', 1000)
         self.lr = kwargs.get('lr', 1e-6)
+        self.unet_lr = kwargs.get('unet_lr', self.lr)
+        self.text_encoder_lr = kwargs.get('text_encoder_lr', self.lr)
+        self.refiner_lr = kwargs.get('refiner_lr', self.lr)
+        self.embedding_lr = kwargs.get('embedding_lr', self.lr)
+        self.adapter_lr = kwargs.get('adapter_lr', self.lr)
         self.optimizer = kwargs.get('optimizer', 'adamw')
+        self.optimizer_params = kwargs.get('optimizer_params', {})
         self.lr_scheduler = kwargs.get('lr_scheduler', 'constant')
-        self.max_denoising_steps: int = kwargs.get('max_denoising_steps', 50)
+        self.lr_scheduler_params = kwargs.get('lr_scheduler_params', {})
+        self.min_denoising_steps: int = kwargs.get('min_denoising_steps', 0)
+        self.max_denoising_steps: int = kwargs.get('max_denoising_steps', 1000)
         self.batch_size: int = kwargs.get('batch_size', 1)
         self.dtype: str = kwargs.get('dtype', 'fp32')
         self.xformers = kwargs.get('xformers', False)
+        self.sdp = kwargs.get('sdp', False)
         self.train_unet = kwargs.get('train_unet', True)
         self.train_text_encoder = kwargs.get('train_text_encoder', True)
+        self.train_refiner = kwargs.get('train_refiner', True)
         self.min_snr_gamma = kwargs.get('min_snr_gamma', None)
+        self.snr_gamma = kwargs.get('snr_gamma', None)
+        # trains a gamma, offset, and scale to adjust loss to adapt to timestep differentials
+        # this should balance the learning rate across all timesteps over time
+        self.learnable_snr_gos = kwargs.get('learnable_snr_gos', False)
         self.noise_offset = kwargs.get('noise_offset', 0.0)
-        self.optimizer_params = kwargs.get('optimizer_params', {})
         self.skip_first_sample = kwargs.get('skip_first_sample', False)
         self.gradient_checkpointing = kwargs.get('gradient_checkpointing', True)
         self.weight_jitter = kwargs.get('weight_jitter', 0.0)
         self.merge_network_on_save = kwargs.get('merge_network_on_save', False)
+        self.max_grad_norm = kwargs.get('max_grad_norm', 1.0)
+        self.start_step = kwargs.get('start_step', None)
+        self.free_u = kwargs.get('free_u', False)
+        self.adapter_assist_name_or_path: Optional[str] = kwargs.get('adapter_assist_name_or_path', None)
+        self.noise_multiplier = kwargs.get('noise_multiplier', 1.0)
+        self.img_multiplier = kwargs.get('img_multiplier', 1.0)
+        self.latent_multiplier = kwargs.get('latent_multiplier', 1.0)
+        self.negative_prompt = kwargs.get('negative_prompt', None)
+        # multiplier applied to loos on regularization images
+        self.reg_weight = kwargs.get('reg_weight', 1.0)
+
+        # dropout that happens before encoding. It functions independently per text encoder
+        self.prompt_dropout_prob = kwargs.get('prompt_dropout_prob', 0.0)
+
+        # match the norm of the noise before computing loss. This will help the model maintain its
+        # current understandin of the brightness of images.
+
+        self.match_noise_norm = kwargs.get('match_noise_norm', False)
+
+        # set to -1 to accumulate gradients for entire epoch
+        # warning, only do this with a small dataset or you will run out of memory
+        self.gradient_accumulation_steps = kwargs.get('gradient_accumulation_steps', 1)
+
+        # short long captions will double your batch size. This only works when a dataset is
+        # prepared with a json caption file that has both short and long captions in it. It will
+        # Double up every image and run it through with both short and long captions. The idea
+        # is that the network will learn how to generate good images with both short and long captions
+        self.short_and_long_captions = kwargs.get('short_and_long_captions', False)
+        # if above is NOT true, this will make it so the long caption foes to te2 and the short caption goes to te1 for sdxl only
+        self.short_and_long_captions_encoder_split = kwargs.get('short_and_long_captions_encoder_split', False)
+
+        # basically gradient accumulation but we run just 1 item through the network
+        # and accumulate gradients. This can be used as basic gradient accumulation but is very helpful
+        # for training tricks that increase batch size but need a single gradient step
+        self.single_item_batching = kwargs.get('single_item_batching', False)
+
+        match_adapter_assist = kwargs.get('match_adapter_assist', False)
+        self.match_adapter_chance = kwargs.get('match_adapter_chance', 0.0)
+        self.loss_target: LossTarget = kwargs.get('loss_target',
+                                                  'noise')  # noise, source, unaugmented, differential_noise
+
+        # When a mask is passed in a dataset, and this is true,
+        # we will predict noise without a the LoRa network and use the prediction as a target for
+        # unmasked reign. It is unmasked regularization basically
+        self.inverted_mask_prior = kwargs.get('inverted_mask_prior', False)
+        self.inverted_mask_prior_multiplier = kwargs.get('inverted_mask_prior_multiplier', 0.5)
+
+        # legacy
+        if match_adapter_assist and self.match_adapter_chance == 0.0:
+            self.match_adapter_chance = 1.0
 
 
 class ModelConfig:
@@ -86,17 +244,27 @@ class ModelConfig:
         self.name_or_path: str = kwargs.get('name_or_path', None)
         self.is_v2: bool = kwargs.get('is_v2', False)
         self.is_xl: bool = kwargs.get('is_xl', False)
+        self.is_ssd: bool = kwargs.get('is_ssd', False)
         self.is_v_pred: bool = kwargs.get('is_v_pred', False)
         self.dtype: str = kwargs.get('dtype', 'float16')
         self.vae_path = kwargs.get('vae_path', None)
+        self.refiner_name_or_path = kwargs.get('refiner_name_or_path', None)
+        self._original_refiner_name_or_path = self.refiner_name_or_path
+        self.refiner_start_at = kwargs.get('refiner_start_at', 0.5)
 
         # only for SDXL models for now
         self.use_text_encoder_1: bool = kwargs.get('use_text_encoder_1', True)
         self.use_text_encoder_2: bool = kwargs.get('use_text_encoder_2', True)
 
+        self.experimental_xl: bool = kwargs.get('experimental_xl', False)
+
         if self.name_or_path is None:
             raise ValueError('name_or_path must be specified')
 
+        if self.is_ssd:
+            # sed sdxl as true since it is mostly the same architecture
+            self.is_xl = True
+
 
 class ReferenceDatasetConfig:
     def __init__(self, **kwargs):
@@ -126,6 +294,14 @@ class SliderTargetConfig:
         self.shuffle: bool = kwargs.get('shuffle', False)
 
 
+class GuidanceConfig:
+    def __init__(self, **kwargs):
+        self.target_class: str = kwargs.get('target_class', '')
+        self.guidance_scale: float = kwargs.get('guidance_scale', 1.0)
+        self.positive_prompt: str = kwargs.get('positive_prompt', '')
+        self.negative_prompt: str = kwargs.get('negative_prompt', '')
+
+
 class SliderConfigAnchors:
     def __init__(self, **kwargs):
         self.prompt = kwargs.get('prompt', '')
@@ -143,28 +319,41 @@ class SliderConfig:
         self.prompt_file: str = kwargs.get('prompt_file', None)
         self.prompt_tensors: str = kwargs.get('prompt_tensors', None)
         self.batch_full_slide: bool = kwargs.get('batch_full_slide', True)
+        self.use_adapter: bool = kwargs.get('use_adapter', None)  # depth
+        self.adapter_img_dir = kwargs.get('adapter_img_dir', None)
+        self.low_ram = kwargs.get('low_ram', False)
 
         # expand targets if shuffling
         from toolkit.prompt_utils import get_slider_target_permutations
         self.targets: List[SliderTargetConfig] = []
         targets = [SliderTargetConfig(**target) for target in targets]
         # do permutations if shuffle is true
+        print(f"Building slider targets")
         for target in targets:
             if target.shuffle:
-                target_permutations = get_slider_target_permutations(target)
+                target_permutations = get_slider_target_permutations(target, max_permutations=8)
                 self.targets = self.targets + target_permutations
             else:
                 self.targets.append(target)
+        print(f"Built {len(self.targets)} slider targets (with permutations)")
 
 
 class DatasetConfig:
-    caption_type: Literal["txt", "caption"] = 'txt'
+    """
+    Dataset config for sd-datasets
+
+    """
 
     def __init__(self, **kwargs):
         self.type = kwargs.get('type', 'image')  # sd, slider, reference
+        # will be legacy
         self.folder_path: str = kwargs.get('folder_path', None)
+        # can be json or folder path
+        self.dataset_path: str = kwargs.get('dataset_path', None)
+
         self.default_caption: str = kwargs.get('default_caption', None)
-        self.caption_type: str = kwargs.get('caption_type', None)
+        self.random_triggers: List[str] = kwargs.get('random_triggers', [])
+        self.caption_ext: str = kwargs.get('caption_ext', None)
         self.random_scale: bool = kwargs.get('random_scale', False)
         self.random_crop: bool = kwargs.get('random_crop', False)
         self.resolution: int = kwargs.get('resolution', 512)
@@ -172,6 +361,66 @@ class DatasetConfig:
         self.buckets: bool = kwargs.get('buckets', False)
         self.bucket_tolerance: int = kwargs.get('bucket_tolerance', 64)
         self.is_reg: bool = kwargs.get('is_reg', False)
+        self.network_weight: float = float(kwargs.get('network_weight', 1.0))
+        self.token_dropout_rate: float = float(kwargs.get('token_dropout_rate', 0.0))
+        self.shuffle_tokens: bool = kwargs.get('shuffle_tokens', False)
+        self.caption_dropout_rate: float = float(kwargs.get('caption_dropout_rate', 0.0))
+        self.flip_x: bool = kwargs.get('flip_x', False)
+        self.flip_y: bool = kwargs.get('flip_y', False)
+        self.augments: List[str] = kwargs.get('augments', [])
+        self.control_path: str = kwargs.get('control_path', None)  # depth maps, etc
+        self.alpha_mask: bool = kwargs.get('alpha_mask', False)  # if true, will use alpha channel as mask
+        self.mask_path: str = kwargs.get('mask_path',
+                                         None)  # focus mask (black and white. White has higher loss than black)
+        self.unconditional_path: str = kwargs.get('unconditional_path', None)  # path where matching unconditional images are located
+        self.invert_mask: bool = kwargs.get('invert_mask', False)  # invert mask
+        self.mask_min_value: float = kwargs.get('mask_min_value', 0.01)  # min value for . 0 - 1
+        self.poi: Union[str, None] = kwargs.get('poi',
+                                                None)  # if one is set and in json data, will be used as auto crop scale point of interes
+        self.num_repeats: int = kwargs.get('num_repeats', 1)  # number of times to repeat dataset
+        # cache latents will store them in memory
+        self.cache_latents: bool = kwargs.get('cache_latents', False)
+        # cache latents to disk will store them on disk. If both are true, it will save to disk, but keep in memory
+        self.cache_latents_to_disk: bool = kwargs.get('cache_latents_to_disk', False)
+
+        # https://albumentations.ai/docs/api_reference/augmentations/transforms
+        # augmentations are returned as a separate image and cannot currently be cached
+        self.augmentations: List[dict] = kwargs.get('augmentations', None)
+        self.shuffle_augmentations: bool = kwargs.get('shuffle_augmentations', False)
+
+        has_augmentations = self.augmentations is not None and len(self.augmentations) > 0
+
+        if (len(self.augments) > 0 or has_augmentations) and (self.cache_latents or self.cache_latents_to_disk):
+            print(f"WARNING: Augments are not supported with caching latents. Setting cache_latents to False")
+            self.cache_latents = False
+            self.cache_latents_to_disk = False
+
+        # legacy compatability
+        legacy_caption_type = kwargs.get('caption_type', None)
+        if legacy_caption_type:
+            self.caption_ext = legacy_caption_type
+        self.caption_type = self.caption_ext
+
+
+def preprocess_dataset_raw_config(raw_config: List[dict]) -> List[dict]:
+    """
+    This just splits up the datasets by resolutions so you dont have to do it manually
+    :param raw_config:
+    :return:
+    """
+    # split up datasets by resolutions
+    new_config = []
+    for dataset in raw_config:
+        resolution = dataset.get('resolution', 512)
+        if isinstance(resolution, list):
+            resolution_list = resolution
+        else:
+            resolution_list = [resolution]
+        for res in resolution_list:
+            dataset_copy = dataset.copy()
+            dataset_copy['resolution'] = res
+            new_config.append(dataset_copy)
+    return new_config
 
 
 class GenerateImageConfig:
@@ -191,9 +440,14 @@ class GenerateImageConfig:
             # the tag [time] will be replaced with milliseconds since epoch
             output_path: str = None,  # full image path
             output_folder: str = None,  # folder to save image in if output_path is not specified
-            output_ext: str = 'png',  # extension to save image as if output_path is not specified
+            output_ext: str = ImgExt,  # extension to save image as if output_path is not specified
             output_tail: str = '',  # tail to add to output filename
             add_prompt_file: bool = False,  # add a prompt file with generated image
+            adapter_image_path: str = None,  # path to adapter image
+            adapter_conditioning_scale: float = 1.0,  # scale for adapter conditioning
+            latents: Union[torch.Tensor | None] = None,  # input latent to start with,
+            extra_kwargs: dict = None,  # extra data to save with prompt file
+            refiner_start_at: float = 0.5,  # start at this percentage of a step. 0.0 to 1.0 . 1.0 is the end
     ):
         self.width: int = width
         self.height: int = height
@@ -204,6 +458,7 @@ class GenerateImageConfig:
         self.prompt_2: str = prompt_2
         self.negative_prompt: str = negative_prompt
         self.negative_prompt_2: str = negative_prompt_2
+        self.latents: Union[torch.Tensor | None] = latents
 
         self.output_path: str = output_path
         self.seed: int = seed
@@ -216,6 +471,10 @@ class GenerateImageConfig:
         self.add_prompt_file: bool = add_prompt_file
         self.output_tail: str = output_tail
         self.gen_time: int = int(time.time() * 1000)
+        self.adapter_image_path: str = adapter_image_path
+        self.adapter_conditioning_scale: float = adapter_conditioning_scale
+        self.extra_kwargs = extra_kwargs if extra_kwargs is not None else {}
+        self.refiner_start_at = refiner_start_at
 
         # prompt string will override any settings above
         self._process_prompt_string()
@@ -370,3 +629,15 @@ class GenerateImageConfig:
                         self.network_multiplier = float(content)
                     elif flag == 'gr':
                         self.guidance_rescale = float(content)
+                    elif flag == 'a':
+                        self.adapter_conditioning_scale = float(content)
+                    elif flag == 'ref':
+                        self.refiner_start_at = float(content)
+
+    def post_process_embeddings(
+            self,
+            conditional_prompt_embeds: PromptEmbeds,
+            unconditional_prompt_embeds: Optional[PromptEmbeds] = None,
+    ):
+        # this is called after prompt embeds are encoded. We can override them in the future here
+        pass
diff --git a/toolkit/cuda_malloc.py b/toolkit/cuda_malloc.py
new file mode 100644
index 00000000..239b9666
--- /dev/null
+++ b/toolkit/cuda_malloc.py
@@ -0,0 +1,93 @@
+# ref comfy ui
+import os
+import importlib.util
+
+
+# Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
+def get_gpu_names():
+    if os.name == 'nt':
+        import ctypes
+
+        # Define necessary C structures and types
+        class DISPLAY_DEVICEA(ctypes.Structure):
+            _fields_ = [
+                ('cb', ctypes.c_ulong),
+                ('DeviceName', ctypes.c_char * 32),
+                ('DeviceString', ctypes.c_char * 128),
+                ('StateFlags', ctypes.c_ulong),
+                ('DeviceID', ctypes.c_char * 128),
+                ('DeviceKey', ctypes.c_char * 128)
+            ]
+
+        # Load user32.dll
+        user32 = ctypes.windll.user32
+
+        # Call EnumDisplayDevicesA
+        def enum_display_devices():
+            device_info = DISPLAY_DEVICEA()
+            device_info.cb = ctypes.sizeof(device_info)
+            device_index = 0
+            gpu_names = set()
+
+            while user32.EnumDisplayDevicesA(None, device_index, ctypes.byref(device_info), 0):
+                device_index += 1
+                gpu_names.add(device_info.DeviceString.decode('utf-8'))
+            return gpu_names
+
+        return enum_display_devices()
+    else:
+        return set()
+
+
+blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950",
+             "GeForce 945M",
+             "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745",
+             "Quadro K620",
+             "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000",
+             "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000",
+             "Quadro M5500", "Quadro M6000",
+             "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M",
+             "GeForce GTX 1650", "GeForce GTX 1630"
+             }
+
+
+def cuda_malloc_supported():
+    try:
+        names = get_gpu_names()
+    except:
+        names = set()
+    for x in names:
+        if "NVIDIA" in x:
+            for b in blacklist:
+                if b in x:
+                    return False
+    return True
+
+
+cuda_malloc = False
+
+if not cuda_malloc:
+    try:
+        version = ""
+        torch_spec = importlib.util.find_spec("torch")
+        for folder in torch_spec.submodule_search_locations:
+            ver_file = os.path.join(folder, "version.py")
+            if os.path.isfile(ver_file):
+                spec = importlib.util.spec_from_file_location("torch_version_import", ver_file)
+                module = importlib.util.module_from_spec(spec)
+                spec.loader.exec_module(module)
+                version = module.__version__
+        if int(version[0]) >= 2:  # enable by default for torch version 2.0 and up
+            cuda_malloc = cuda_malloc_supported()
+    except:
+        pass
+
+if cuda_malloc:
+    env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None)
+    if env_var is None:
+        env_var = "backend:cudaMallocAsync"
+    else:
+        env_var += ",backend:cudaMallocAsync"
+
+    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var
+    print("CUDA Malloc Async Enabled")
diff --git a/toolkit/data_loader.py b/toolkit/data_loader.py
index cc50c0d9..4d62b72d 100644
--- a/toolkit/data_loader.py
+++ b/toolkit/data_loader.py
@@ -1,10 +1,13 @@
+import copy
+import json
 import os
 import random
-from typing import List
+import traceback
+from functools import lru_cache
+from typing import List, TYPE_CHECKING
 
 import cv2
 import numpy as np
-import torch
 from PIL import Image
 from PIL.ImageOps import exif_transpose
 from torchvision import transforms
@@ -12,9 +15,13 @@ from torch.utils.data import Dataset, DataLoader, ConcatDataset
 from tqdm import tqdm
 import albumentations as A
 
-from toolkit import image_utils
-from toolkit.config_modules import DatasetConfig
-from toolkit.dataloader_mixins import CaptionMixin, BucketsMixin
+from toolkit.buckets import get_bucket_for_image_size, BucketResolution
+from toolkit.config_modules import DatasetConfig, preprocess_dataset_raw_config
+from toolkit.dataloader_mixins import CaptionMixin, BucketsMixin, LatentCachingMixin, Augments
+from toolkit.data_transfer_object.data_loader import FileItemDTO, DataLoaderBatchDTO
+
+if TYPE_CHECKING:
+    from toolkit.stable_diffusion_model import StableDiffusion
 
 
 class ImageDataset(Dataset, CaptionMixin):
@@ -27,7 +34,7 @@ class ImageDataset(Dataset, CaptionMixin):
         self.include_prompt = self.get_config('include_prompt', False)
         self.default_prompt = self.get_config('default_prompt', '')
         if self.include_prompt:
-            self.caption_type = self.get_config('caption_type', 'txt')
+            self.caption_type = self.get_config('caption_ext', 'txt')
         else:
             self.caption_type = None
         # we always random crop if random scale is enabled
@@ -48,6 +55,8 @@ class ImageDataset(Dataset, CaptionMixin):
             else:
                 bad_count += 1
 
+        self.file_list = new_file_list
+
         print(f"  -  Found {len(self.file_list)} images")
         print(f"  -  Found {bad_count} images that are too small")
         assert len(self.file_list) > 0, f"no images found in {self.path}"
@@ -85,7 +94,10 @@ class ImageDataset(Dataset, CaptionMixin):
                     scale_size = self.resolution
                 else:
                     scale_size = random.randint(self.resolution, int(min_img_size))
-                img = img.resize((scale_size, scale_size), Image.BICUBIC)
+                scaler = scale_size / min_img_size
+                scale_width = int((img.width + 5) * scaler)
+                scale_height = int((img.height + 5) * scaler)
+                img = img.resize((scale_width, scale_height), Image.BICUBIC)
             img = transforms.RandomCrop(self.resolution)(img)
         else:
             img = transforms.CenterCrop(min_img_size)(img)
@@ -100,21 +112,7 @@ class ImageDataset(Dataset, CaptionMixin):
             return img
 
 
-class Augments:
-    def __init__(self, **kwargs):
-        self.method_name = kwargs.get('method', None)
-        self.params = kwargs.get('params', {})
 
-        # convert kwargs enums for cv2
-        for key, value in self.params.items():
-            if isinstance(value, str):
-                # split the string
-                split_string = value.split('.')
-                if len(split_string) == 2 and split_string[0] == 'cv2':
-                    if hasattr(cv2, split_string[1]):
-                        self.params[key] = getattr(cv2, split_string[1].upper())
-                    else:
-                        raise ValueError(f"invalid cv2 enum: {split_string[1]}")
 
 
 class AugmentedImageDataset(ImageDataset):
@@ -265,6 +263,38 @@ class PairedImageDataset(Dataset):
             img1 = exif_transpose(Image.open(img_path)).convert('RGB')
             img_path = img_path_or_tuple[1]
             img2 = exif_transpose(Image.open(img_path)).convert('RGB')
+
+            # always use # 2 (pos)
+            bucket_resolution = get_bucket_for_image_size(
+                width=img2.width,
+                height=img2.height,
+                resolution=self.size,
+                # divisibility=self.
+            )
+
+            # images will be same base dimension, but may be trimmed. We need to shrink and then central crop
+            if bucket_resolution['width'] > bucket_resolution['height']:
+                img1_scale_to_height = bucket_resolution["height"]
+                img1_scale_to_width = int(img1.width * (bucket_resolution["height"] / img1.height))
+                img2_scale_to_height = bucket_resolution["height"]
+                img2_scale_to_width = int(img2.width * (bucket_resolution["height"] / img2.height))
+            else:
+                img1_scale_to_width = bucket_resolution["width"]
+                img1_scale_to_height = int(img1.height * (bucket_resolution["width"] / img1.width))
+                img2_scale_to_width = bucket_resolution["width"]
+                img2_scale_to_height = int(img2.height * (bucket_resolution["width"] / img2.width))
+
+            img1_crop_height = bucket_resolution["height"]
+            img1_crop_width = bucket_resolution["width"]
+            img2_crop_height = bucket_resolution["height"]
+            img2_crop_width = bucket_resolution["width"]
+
+            # scale then center crop images
+            img1 = img1.resize((img1_scale_to_width, img1_scale_to_height), Image.BICUBIC)
+            img1 = transforms.CenterCrop((img1_crop_height, img1_crop_width))(img1)
+            img2 = img2.resize((img2_scale_to_width, img2_scale_to_height), Image.BICUBIC)
+            img2 = transforms.CenterCrop((img2_crop_height, img2_crop_width))(img2)
+
             # combine them side by side
             img = Image.new('RGB', (img1.width + img2.width, max(img1.height, img2.height)))
             img.paste(img1, (0, 0))
@@ -272,52 +302,45 @@ class PairedImageDataset(Dataset):
         else:
             img_path = img_path_or_tuple
             img = exif_transpose(Image.open(img_path)).convert('RGB')
+            height = self.size
+            # determine width to keep aspect ratio
+            width = int(img.size[0] * height / img.size[1])
+
+            # Downscale the source image first
+            img = img.resize((width, height), Image.BICUBIC)
 
         prompt = self.get_prompt_item(index)
-
-        height = self.size
-        # determine width to keep aspect ratio
-        width = int(img.size[0] * height / img.size[1])
-
-        # Downscale the source image first
-        img = img.resize((width, height), Image.BICUBIC)
         img = self.transform(img)
 
         return img, prompt, (self.neg_weight, self.pos_weight)
 
 
-printed_messages = []
+class AiToolkitDataset(LatentCachingMixin, BucketsMixin, CaptionMixin, Dataset):
 
-
-def print_once(msg):
-    global printed_messages
-    if msg not in printed_messages:
-        print(msg)
-        printed_messages.append(msg)
-
-
-class FileItem:
-    def __init__(self, **kwargs):
-        self.path = kwargs.get('path', None)
-        self.width = kwargs.get('width', None)
-        self.height = kwargs.get('height', None)
-        # we scale first, then crop
-        self.scale_to_width = kwargs.get('scale_to_width', self.width)
-        self.scale_to_height = kwargs.get('scale_to_height', self.height)
-        # crop values are from scaled size
-        self.crop_x = kwargs.get('crop_x', 0)
-        self.crop_y = kwargs.get('crop_y', 0)
-        self.crop_width = kwargs.get('crop_width', self.scale_to_width)
-        self.crop_height = kwargs.get('crop_height', self.scale_to_height)
-
-
-class AiToolkitDataset(Dataset, CaptionMixin, BucketsMixin):
-
-    def __init__(self, dataset_config: 'DatasetConfig', batch_size=1):
+    def __init__(
+            self,
+            dataset_config: 'DatasetConfig',
+            batch_size=1,
+            sd: 'StableDiffusion' = None,
+    ):
         super().__init__()
         self.dataset_config = dataset_config
-        self.folder_path = dataset_config.folder_path
-        self.caption_type = dataset_config.caption_type
+        folder_path = dataset_config.folder_path
+        self.dataset_path = dataset_config.dataset_path
+        if self.dataset_path is None:
+            self.dataset_path = folder_path
+
+        self.is_caching_latents = dataset_config.cache_latents or dataset_config.cache_latents_to_disk
+        self.is_caching_latents_to_memory = dataset_config.cache_latents
+        self.is_caching_latents_to_disk = dataset_config.cache_latents_to_disk
+        self.epoch_num = 0
+
+        self.sd = sd
+
+        if self.sd is None and self.is_caching_latents:
+            raise ValueError(f"sd is required for caching latents")
+
+        self.caption_type = dataset_config.caption_ext
         self.default_caption = dataset_config.default_caption
         self.random_scale = dataset_config.random_scale
         self.scale = dataset_config.scale
@@ -325,176 +348,211 @@ class AiToolkitDataset(Dataset, CaptionMixin, BucketsMixin):
         # we always random crop if random scale is enabled
         self.random_crop = self.random_scale if self.random_scale else dataset_config.random_crop
         self.resolution = dataset_config.resolution
-        self.file_list: List['FileItem'] = []
+        self.caption_dict = None
+        self.file_list: List['FileItemDTO'] = []
 
-        # get the file list
-        file_list = [
-            os.path.join(self.folder_path, file) for file in os.listdir(self.folder_path) if
-            file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp'))
-        ]
+        # check if dataset_path is a folder or json
+        if os.path.isdir(self.dataset_path):
+            file_list = [
+                os.path.join(self.dataset_path, file) for file in os.listdir(self.dataset_path) if
+                file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp'))
+            ]
+        else:
+            # assume json
+            with open(self.dataset_path, 'r') as f:
+                self.caption_dict = json.load(f)
+                # keys are file paths
+                file_list = list(self.caption_dict.keys())
+
+        if self.dataset_config.num_repeats > 1:
+            # repeat the list
+            file_list = file_list * self.dataset_config.num_repeats
 
         # this might take a while
         print(f"  -  Preprocessing image dimensions")
         bad_count = 0
         for file in tqdm(file_list):
             try:
-                w, h = image_utils.get_image_size(file)
-            except image_utils.UnknownImageFormat:
-                print_once(f'Warning: Some images in the dataset cannot be fast read. ' + \
-                           f'This process is faster for png, jpeg')
-                img = Image.open(file)
-                h, w = img.size
-            if int(min(h, w) * self.scale) >= self.resolution:
-                self.file_list.append(
-                    FileItem(
-                        path=file,
-                        width=w,
-                        height=h,
-                        scale_to_width=int(w * self.scale),
-                        scale_to_height=int(h * self.scale),
-                    )
+                file_item = FileItemDTO(
+                    path=file,
+                    dataset_config=dataset_config
                 )
-            else:
+                self.file_list.append(file_item)
+            except Exception as e:
+                print(traceback.format_exc())
+                print(f"Error processing image: {file}")
+                print(e)
                 bad_count += 1
 
         print(f"  -  Found {len(self.file_list)} images")
-        print(f"  -  Found {bad_count} images that are too small")
-        assert len(self.file_list) > 0, f"no images found in {self.folder_path}"
+        # print(f"  -  Found {bad_count} images that are too small")
+        assert len(self.file_list) > 0, f"no images found in {self.dataset_path}"
 
-        if self.dataset_config.buckets:
-            # setup buckets
-            self.setup_buckets()
+        # handle x axis flips
+        if self.dataset_config.flip_x:
+            print("  -  adding x axis flips")
+            current_file_list = [x for x in self.file_list]
+            for file_item in current_file_list:
+                # create a copy that is flipped on the x axis
+                new_file_item = copy.deepcopy(file_item)
+                new_file_item.flip_x = True
+                self.file_list.append(new_file_item)
+
+        # handle y axis flips
+        if self.dataset_config.flip_y:
+            print("  -  adding y axis flips")
+            current_file_list = [x for x in self.file_list]
+            for file_item in current_file_list:
+                # create a copy that is flipped on the y axis
+                new_file_item = copy.deepcopy(file_item)
+                new_file_item.flip_y = True
+                self.file_list.append(new_file_item)
+
+        if self.dataset_config.flip_x or self.dataset_config.flip_y:
+            print(f"  -  Found {len(self.file_list)} images after adding flips")
 
         self.transform = transforms.Compose([
             transforms.ToTensor(),
             transforms.Normalize([0.5], [0.5]),  # normalize to [-1, 1]
         ])
 
+        self.setup_epoch()
+
+    def setup_epoch(self):
+        if self.epoch_num == 0:
+            # initial setup
+            # do not call for now
+            if self.dataset_config.buckets:
+                # setup buckets
+                self.setup_buckets()
+            if self.is_caching_latents:
+                self.cache_latents_all_latents()
+        else:
+            if self.dataset_config.poi is not None:
+                # handle cropping to a specific point of interest
+                # setup buckets every epoch
+                self.setup_buckets(quiet=True)
+        self.epoch_num += 1
+
     def __len__(self):
         if self.dataset_config.buckets:
             return len(self.batch_indices)
         return len(self.file_list)
 
-    def _get_single_item(self, index):
-        file_item = self.file_list[index]
-        # todo make sure this matches
-        img = exif_transpose(Image.open(file_item.path)).convert('RGB')
-        w, h = img.size
-        if w > h and file_item.scale_to_width < file_item.scale_to_height:
-            # throw error, they should match
-            raise ValueError(
-                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={file_item.scale_to_width}, file_item.scale_to_height={file_item.scale_to_height}, file_item.path={file_item.path}")
-        elif h > w and file_item.scale_to_height < file_item.scale_to_width:
-            # throw error, they should match
-            raise ValueError(
-                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={file_item.scale_to_width}, file_item.scale_to_height={file_item.scale_to_height}, file_item.path={file_item.path}")
-
-        # Downscale the source image first
-        img = img.resize((int(img.size[0] * self.scale), int(img.size[1] * self.scale)), Image.BICUBIC)
-        min_img_size = min(img.size)
-
-        if self.dataset_config.buckets:
-            # todo allow scaling and cropping, will be hard to add
-            # scale and crop based on file item
-            img = img.resize((file_item.scale_to_width, file_item.scale_to_height), Image.BICUBIC)
-            img = transforms.CenterCrop((file_item.crop_height, file_item.crop_width))(img)
-        else:
-            if self.random_crop:
-                if self.random_scale and min_img_size > self.resolution:
-                    if min_img_size < self.resolution:
-                        print(
-                            f"Unexpected values: min_img_size={min_img_size}, self.resolution={self.resolution}, image file={file_item.path}")
-                        scale_size = self.resolution
-                    else:
-                        scale_size = random.randint(self.resolution, int(min_img_size))
-                    img = img.resize((scale_size, scale_size), Image.BICUBIC)
-                img = transforms.RandomCrop(self.resolution)(img)
-            else:
-                img = transforms.CenterCrop(min_img_size)(img)
-                img = img.resize((self.resolution, self.resolution), Image.BICUBIC)
-
-        img = self.transform(img)
-
-        # todo convert it all
-        dataset_config_dict = {
-            "is_reg": 1 if self.dataset_config.is_reg else 0,
-        }
-
-        if self.caption_type is not None:
-            prompt = self.get_caption_item(index)
-            return img, prompt, dataset_config_dict
-        else:
-            return img, dataset_config_dict
+    def _get_single_item(self, index) -> 'FileItemDTO':
+        file_item = copy.deepcopy(self.file_list[index])
+        file_item.load_and_process_image(self.transform)
+        file_item.load_caption(self.caption_dict)
+        return file_item
 
     def __getitem__(self, item):
         if self.dataset_config.buckets:
+            # for buckets we collate ourselves for now
+            # todo allow a scheduler to dynamically make buckets
             # we collate ourselves
+            if len(self.batch_indices) - 1 < item:
+                # tried everything to solve this. No way to reset length when redoing things. Pick another index
+                item = random.randint(0, len(self.batch_indices) - 1)
             idx_list = self.batch_indices[item]
-            tensor_list = []
-            prompt_list = []
-            dataset_config_dict_list = []
-            for idx in idx_list:
-                if self.caption_type is not None:
-                    img, prompt, dataset_config_dict = self._get_single_item(idx)
-                    prompt_list.append(prompt)
-                    dataset_config_dict_list.append(dataset_config_dict)
-                else:
-                    img, dataset_config_dict = self._get_single_item(idx)
-                    dataset_config_dict_list.append(dataset_config_dict)
-                tensor_list.append(img.unsqueeze(0))
-
-            if self.caption_type is not None:
-                return torch.cat(tensor_list, dim=0), prompt_list, dataset_config_dict_list
-            else:
-                return torch.cat(tensor_list, dim=0), dataset_config_dict_list
+            return [self._get_single_item(idx) for idx in idx_list]
         else:
             # Dataloader is batching
             return self._get_single_item(item)
 
 
-def get_dataloader_from_datasets(dataset_options, batch_size=1):
-    # TODO do bucketing
+def get_dataloader_from_datasets(
+        dataset_options,
+        batch_size=1,
+        sd: 'StableDiffusion' = None,
+) -> DataLoader:
     if dataset_options is None or len(dataset_options) == 0:
         return None
 
     datasets = []
     has_buckets = False
+    is_caching_latents = False
+
+    dataset_config_list = []
+    # preprocess them all
     for dataset_option in dataset_options:
         if isinstance(dataset_option, DatasetConfig):
-            config = dataset_option
+            dataset_config_list.append(dataset_option)
         else:
-            config = DatasetConfig(**dataset_option)
+            # preprocess raw data
+            split_configs = preprocess_dataset_raw_config([dataset_option])
+            for x in split_configs:
+                dataset_config_list.append(DatasetConfig(**x))
+
+    for config in dataset_config_list:
+
         if config.type == 'image':
-            dataset = AiToolkitDataset(config, batch_size=batch_size)
+            dataset = AiToolkitDataset(config, batch_size=batch_size, sd=sd)
             datasets.append(dataset)
             if config.buckets:
                 has_buckets = True
+            if config.cache_latents or config.cache_latents_to_disk:
+                is_caching_latents = True
         else:
             raise ValueError(f"invalid dataset type: {config.type}")
 
     concatenated_dataset = ConcatDataset(datasets)
+
+    # todo build scheduler that can get buckets from all datasets that match
+    # todo and evenly distribute reg images
+
+    def dto_collation(batch: List['FileItemDTO']):
+        # create DTO batch
+        batch = DataLoaderBatchDTO(
+            file_items=batch
+        )
+        return batch
+
+    # check if is caching latents
+
+
     if has_buckets:
         # make sure they all have buckets
         for dataset in datasets:
             assert dataset.dataset_config.buckets, f"buckets not found on dataset {dataset.dataset_config.folder_path}, you either need all buckets or none"
 
-        def custom_collate_fn(batch):
-            # just return as is
-            return batch
-
         data_loader = DataLoader(
             concatenated_dataset,
-            batch_size=None,  # we batch in the dataloader
+            batch_size=None,  # we batch in the datasets for now
             drop_last=False,
             shuffle=True,
-            collate_fn=custom_collate_fn,  # Use the custom collate function
-            num_workers=2
+            collate_fn=dto_collation,  # Use the custom collate function
+            num_workers=4
         )
     else:
         data_loader = DataLoader(
             concatenated_dataset,
             batch_size=batch_size,
             shuffle=True,
-            num_workers=2
+            num_workers=4,
+            collate_fn=dto_collation
         )
     return data_loader
+
+
+def trigger_dataloader_setup_epoch(dataloader: DataLoader):
+    # hacky but needed because of different types of datasets and dataloaders
+    dataloader.len = None
+    if isinstance(dataloader.dataset, list):
+        for dataset in dataloader.dataset:
+            if hasattr(dataset, 'datasets'):
+                for sub_dataset in dataset.datasets:
+                    if hasattr(sub_dataset, 'setup_epoch'):
+                        sub_dataset.setup_epoch()
+                        sub_dataset.len = None
+            elif hasattr(dataset, 'setup_epoch'):
+                dataset.setup_epoch()
+                dataset.len = None
+    elif hasattr(dataloader.dataset, 'setup_epoch'):
+        dataloader.dataset.setup_epoch()
+        dataloader.dataset.len = None
+    elif hasattr(dataloader.dataset, 'datasets'):
+        dataloader.dataset.len = None
+        for sub_dataset in dataloader.dataset.datasets:
+            if hasattr(sub_dataset, 'setup_epoch'):
+                sub_dataset.setup_epoch()
+                sub_dataset.len = None
diff --git a/toolkit/data_transfer_object/data_loader.py b/toolkit/data_transfer_object/data_loader.py
new file mode 100644
index 00000000..f71add54
--- /dev/null
+++ b/toolkit/data_transfer_object/data_loader.py
@@ -0,0 +1,202 @@
+from typing import TYPE_CHECKING, List, Union
+import torch
+import random
+
+from PIL import Image
+from PIL.ImageOps import exif_transpose
+
+from toolkit import image_utils
+from toolkit.dataloader_mixins import CaptionProcessingDTOMixin, ImageProcessingDTOMixin, LatentCachingFileItemDTOMixin, \
+    ControlFileItemDTOMixin, ArgBreakMixin, PoiFileItemDTOMixin, MaskFileItemDTOMixin, AugmentationFileItemDTOMixin, \
+    UnconditionalFileItemDTOMixin
+
+if TYPE_CHECKING:
+    from toolkit.config_modules import DatasetConfig
+
+printed_messages = []
+
+
+def print_once(msg):
+    global printed_messages
+    if msg not in printed_messages:
+        print(msg)
+        printed_messages.append(msg)
+
+
+class FileItemDTO(
+    LatentCachingFileItemDTOMixin,
+    CaptionProcessingDTOMixin,
+    ImageProcessingDTOMixin,
+    ControlFileItemDTOMixin,
+    MaskFileItemDTOMixin,
+    AugmentationFileItemDTOMixin,
+    UnconditionalFileItemDTOMixin,
+    PoiFileItemDTOMixin,
+    ArgBreakMixin,
+):
+    def __init__(self, *args, **kwargs):
+        self.path = kwargs.get('path', None)
+        self.dataset_config: 'DatasetConfig' = kwargs.get('dataset_config', None)
+        # process width and height
+        try:
+            w, h = image_utils.get_image_size(self.path)
+        except image_utils.UnknownImageFormat:
+            print_once(f'Warning: Some images in the dataset cannot be fast read. ' + \
+                       f'This process is faster for png, jpeg')
+            img = exif_transpose(Image.open(self.path))
+            h, w = img.size
+        self.width: int = w
+        self.height: int = h
+        super().__init__(*args, **kwargs)
+
+        # self.caption_path: str = kwargs.get('caption_path', None)
+        self.raw_caption: str = kwargs.get('raw_caption', None)
+        # we scale first, then crop
+        self.scale_to_width: int = kwargs.get('scale_to_width', int(self.width * self.dataset_config.scale))
+        self.scale_to_height: int = kwargs.get('scale_to_height', int(self.height * self.dataset_config.scale))
+        # crop values are from scaled size
+        self.crop_x: int = kwargs.get('crop_x', 0)
+        self.crop_y: int = kwargs.get('crop_y', 0)
+        self.crop_width: int = kwargs.get('crop_width', self.scale_to_width)
+        self.crop_height: int = kwargs.get('crop_height', self.scale_to_height)
+        self.flip_x: bool = kwargs.get('flip_x', False)
+        self.flip_y: bool = kwargs.get('flip_x', False)
+        self.augments: List[str] = self.dataset_config.augments
+
+        self.network_weight: float = self.dataset_config.network_weight
+        self.is_reg = self.dataset_config.is_reg
+        self.tensor: Union[torch.Tensor, None] = None
+
+    def cleanup(self):
+        self.tensor = None
+        self.cleanup_latent()
+        self.cleanup_control()
+        self.cleanup_mask()
+        self.cleanup_unconditional()
+
+
+class DataLoaderBatchDTO:
+    def __init__(self, **kwargs):
+        try:
+            self.file_items: List['FileItemDTO'] = kwargs.get('file_items', None)
+            is_latents_cached = self.file_items[0].is_latent_cached
+            self.tensor: Union[torch.Tensor, None] = None
+            self.latents: Union[torch.Tensor, None] = None
+            self.control_tensor: Union[torch.Tensor, None] = None
+            self.mask_tensor: Union[torch.Tensor, None] = None
+            self.unaugmented_tensor: Union[torch.Tensor, None] = None
+            self.unconditional_tensor: Union[torch.Tensor, None] = None
+            self.unconditional_latents: Union[torch.Tensor, None] = None
+            self.sigmas: Union[torch.Tensor, None] = None  # can be added elseware and passed along training code
+            if not is_latents_cached:
+                # only return a tensor if latents are not cached
+                self.tensor: torch.Tensor = torch.cat([x.tensor.unsqueeze(0) for x in self.file_items])
+            # if we have encoded latents, we concatenate them
+            self.latents: Union[torch.Tensor, None] = None
+            if is_latents_cached:
+                self.latents = torch.cat([x.get_latent().unsqueeze(0) for x in self.file_items])
+            self.control_tensor: Union[torch.Tensor, None] = None
+            # if self.file_items[0].control_tensor is not None:
+            # if any have a control tensor, we concatenate them
+            if any([x.control_tensor is not None for x in self.file_items]):
+                # find one to use as a base
+                base_control_tensor = None
+                for x in self.file_items:
+                    if x.control_tensor is not None:
+                        base_control_tensor = x.control_tensor
+                        break
+                control_tensors = []
+                for x in self.file_items:
+                    if x.control_tensor is None:
+                        control_tensors.append(torch.zeros_like(base_control_tensor))
+                    else:
+                        control_tensors.append(x.control_tensor)
+                self.control_tensor = torch.cat([x.unsqueeze(0) for x in control_tensors])
+
+            if any([x.mask_tensor is not None for x in self.file_items]):
+                # find one to use as a base
+                base_mask_tensor = None
+                for x in self.file_items:
+                    if x.mask_tensor is not None:
+                        base_mask_tensor = x.mask_tensor
+                        break
+                mask_tensors = []
+                for x in self.file_items:
+                    if x.mask_tensor is None:
+                        mask_tensors.append(torch.zeros_like(base_mask_tensor))
+                    else:
+                        mask_tensors.append(x.mask_tensor)
+                self.mask_tensor = torch.cat([x.unsqueeze(0) for x in mask_tensors])
+
+            # add unaugmented tensors for ones with augments
+            if any([x.unaugmented_tensor is not None for x in self.file_items]):
+                # find one to use as a base
+                base_unaugmented_tensor = None
+                for x in self.file_items:
+                    if x.unaugmented_tensor is not None:
+                        base_unaugmented_tensor = x.unaugmented_tensor
+                        break
+                unaugmented_tensor = []
+                for x in self.file_items:
+                    if x.unaugmented_tensor is None:
+                        unaugmented_tensor.append(torch.zeros_like(base_unaugmented_tensor))
+                    else:
+                        unaugmented_tensor.append(x.unaugmented_tensor)
+                self.unaugmented_tensor = torch.cat([x.unsqueeze(0) for x in unaugmented_tensor])
+
+            # add unconditional tensors
+            if any([x.unconditional_tensor is not None for x in self.file_items]):
+                # find one to use as a base
+                base_unconditional_tensor = None
+                for x in self.file_items:
+                    if x.unaugmented_tensor is not None:
+                        base_unconditional_tensor = x.unconditional_tensor
+                        break
+                unconditional_tensor = []
+                for x in self.file_items:
+                    if x.unconditional_tensor is None:
+                        unconditional_tensor.append(torch.zeros_like(base_unconditional_tensor))
+                    else:
+                        unconditional_tensor.append(x.unconditional_tensor)
+                self.unconditional_tensor = torch.cat([x.unsqueeze(0) for x in unconditional_tensor])
+        except Exception as e:
+            print(e)
+            raise e
+
+    def get_is_reg_list(self):
+        return [x.is_reg for x in self.file_items]
+
+    def get_network_weight_list(self):
+        return [x.network_weight for x in self.file_items]
+
+    def get_caption_list(
+            self,
+            trigger=None,
+            to_replace_list=None,
+            add_if_not_present=True
+    ):
+        return [x.get_caption(
+            trigger=trigger,
+            to_replace_list=to_replace_list,
+            add_if_not_present=add_if_not_present
+        ) for x in self.file_items]
+
+    def get_caption_short_list(
+            self,
+            trigger=None,
+            to_replace_list=None,
+            add_if_not_present=True
+    ):
+        return [x.get_caption(
+            trigger=trigger,
+            to_replace_list=to_replace_list,
+            add_if_not_present=add_if_not_present,
+            short_caption=True
+        ) for x in self.file_items]
+
+    def cleanup(self):
+        del self.latents
+        del self.tensor
+        del self.control_tensor
+        for file_item in self.file_items:
+            file_item.cleanup()
diff --git a/toolkit/dataloader_mixins.py b/toolkit/dataloader_mixins.py
index fea29421..4842556a 100644
--- a/toolkit/dataloader_mixins.py
+++ b/toolkit/dataloader_mixins.py
@@ -1,9 +1,76 @@
+import base64
+import hashlib
+import json
+import math
 import os
-from typing import TYPE_CHECKING, List, Dict
+import random
+from collections import OrderedDict
+from typing import TYPE_CHECKING, List, Dict, Union
+
+import cv2
+import numpy as np
+import torch
+from safetensors.torch import load_file, save_file
+from tqdm import tqdm
+
+from toolkit.basic import flush, value_map
+from toolkit.buckets import get_bucket_for_image_size, get_resolution
+from toolkit.metadata import get_meta_for_safetensors
+from toolkit.prompt_utils import inject_trigger_into_prompt
+from torchvision import transforms
+from PIL import Image, ImageFilter, ImageOps
+from PIL.ImageOps import exif_transpose
+import albumentations as A
+
+from toolkit.train_tools import get_torch_dtype
+
+if TYPE_CHECKING:
+    from toolkit.data_loader import AiToolkitDataset
+    from toolkit.data_transfer_object.data_loader import FileItemDTO
+
+
+# def get_associated_caption_from_img_path(img_path):
+# https://demo.albumentations.ai/
+class Augments:
+    def __init__(self, **kwargs):
+        self.method_name = kwargs.get('method', None)
+        self.params = kwargs.get('params', {})
+
+        # convert kwargs enums for cv2
+        for key, value in self.params.items():
+            if isinstance(value, str):
+                # split the string
+                split_string = value.split('.')
+                if len(split_string) == 2 and split_string[0] == 'cv2':
+                    if hasattr(cv2, split_string[1]):
+                        self.params[key] = getattr(cv2, split_string[1].upper())
+                    else:
+                        raise ValueError(f"invalid cv2 enum: {split_string[1]}")
+
+
+transforms_dict = {
+    'ColorJitter': transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.03),
+    'RandomEqualize': transforms.RandomEqualize(p=0.2),
+}
+
+caption_ext_list = ['txt', 'json', 'caption']
+
+
+def clean_caption(caption):
+    # remove any newlines
+    caption = caption.replace('\n', ', ')
+    # remove new lines for all operating systems
+    caption = caption.replace('\r', ', ')
+    caption_split = caption.split(',')
+    # remove empty strings
+    caption_split = [p.strip() for p in caption_split if p.strip()]
+    # join back together
+    caption = ', '.join(caption_split)
+    return caption
 
 
 class CaptionMixin:
-    def get_caption_item(self, index):
+    def get_caption_item(self: 'AiToolkitDataset', index):
         if not hasattr(self, 'caption_type'):
             raise Exception('caption_type not found on class instance')
         if not hasattr(self, 'file_list'):
@@ -13,29 +80,31 @@ class CaptionMixin:
             img_path = img_path_or_tuple[0] if isinstance(img_path_or_tuple[0], str) else img_path_or_tuple[0].path
             # check if either has a prompt file
             path_no_ext = os.path.splitext(img_path)[0]
-            prompt_path = path_no_ext + '.txt'
-            if not os.path.exists(prompt_path):
-                img_path = img_path_or_tuple[1] if isinstance(img_path_or_tuple[1], str) else img_path_or_tuple[1].path
-                path_no_ext = os.path.splitext(img_path)[0]
-                prompt_path = path_no_ext + '.txt'
+            prompt_path = None
+            for ext in caption_ext_list:
+                prompt_path = path_no_ext + '.' + ext
+                if os.path.exists(prompt_path):
+                    break
         else:
             img_path = img_path_or_tuple if isinstance(img_path_or_tuple, str) else img_path_or_tuple.path
             # see if prompt file exists
             path_no_ext = os.path.splitext(img_path)[0]
-            prompt_path = path_no_ext + '.txt'
+            prompt_path = None
+            for ext in caption_ext_list:
+                prompt_path = path_no_ext + '.' + ext
+                if os.path.exists(prompt_path):
+                    break
 
         if os.path.exists(prompt_path):
             with open(prompt_path, 'r', encoding='utf-8') as f:
                 prompt = f.read()
-                # remove any newlines
-                prompt = prompt.replace('\n', ', ')
-                # remove new lines for all operating systems
-                prompt = prompt.replace('\r', ', ')
-                prompt_split = prompt.split(',')
-                # remove empty strings
-                prompt_split = [p.strip() for p in prompt_split if p.strip()]
-                # join back together
-                prompt = ', '.join(prompt_split)
+                # check if is json
+                if prompt_path.endswith('.json'):
+                    prompt = json.loads(prompt)
+                    if 'caption' in prompt:
+                        prompt = prompt['caption']
+
+                prompt = clean_caption(prompt)
         else:
             prompt = ''
             # get default_prompt if it exists on the class instance
@@ -48,7 +117,7 @@ class CaptionMixin:
 
 if TYPE_CHECKING:
     from toolkit.config_modules import DatasetConfig
-    from toolkit.data_loader import FileItem
+    from toolkit.data_transfer_object.data_loader import FileItemDTO
 
 
 class Bucket:
@@ -63,86 +132,911 @@ class BucketsMixin:
         self.buckets: Dict[str, Bucket] = {}
         self.batch_indices: List[List[int]] = []
 
-    def build_batch_indices(self):
+    def build_batch_indices(self: 'AiToolkitDataset'):
+        self.batch_indices = []
         for key, bucket in self.buckets.items():
             for start_idx in range(0, len(bucket.file_list_idx), self.batch_size):
                 end_idx = min(start_idx + self.batch_size, len(bucket.file_list_idx))
                 batch = bucket.file_list_idx[start_idx:end_idx]
                 self.batch_indices.append(batch)
 
-    def setup_buckets(self):
+    def shuffle_buckets(self: 'AiToolkitDataset'):
+        for key, bucket in self.buckets.items():
+            random.shuffle(bucket.file_list_idx)
+
+    def setup_buckets(self: 'AiToolkitDataset', quiet=False):
         if not hasattr(self, 'file_list'):
             raise Exception(f'file_list not found on class instance {self.__class__.__name__}')
         if not hasattr(self, 'dataset_config'):
             raise Exception(f'dataset_config not found on class instance {self.__class__.__name__}')
 
+        if self.epoch_num > 0 and self.dataset_config.poi is None:
+            # no need to rebuild buckets for now
+            # todo handle random cropping for buckets
+            return
+        self.buckets = {}  # clear it
+
         config: 'DatasetConfig' = self.dataset_config
         resolution = config.resolution
         bucket_tolerance = config.bucket_tolerance
-        file_list: List['FileItem'] = self.file_list
-
-        # make sure out resolution is divisible by bucket_tolerance
-        if resolution % bucket_tolerance != 0:
-            # reduce it to the nearest divisible number
-            resolution = resolution - (resolution % bucket_tolerance)
+        file_list: List['FileItemDTO'] = self.file_list
 
         # for file_item in enumerate(file_list):
         for idx, file_item in enumerate(file_list):
-            width = file_item.crop_width
-            height = file_item.crop_height
+            file_item: 'FileItemDTO' = file_item
+            width = int(file_item.width * file_item.dataset_config.scale)
+            height = int(file_item.height * file_item.dataset_config.scale)
 
-            # determine new size, smallest dimension should be equal to resolution
-            # the other dimension should be the same ratio it is now (bigger)
-            new_width = resolution
-            new_height = resolution
-            if width > height:
-                # scale width to match new resolution,
-                new_width = int(width * (resolution / height))
-                file_item.crop_width = new_width
-                file_item.scale_to_width = new_width
-                file_item.crop_height = resolution
-                file_item.scale_to_height = resolution
-                # make sure new_width is divisible by bucket_tolerance
-                if new_width % bucket_tolerance != 0:
-                    # reduce it to the nearest divisible number
-                    reduction = new_width % bucket_tolerance
-                    file_item.crop_width = new_width - reduction
-                    new_width = file_item.crop_width
-                    # adjust the new x position so we evenly crop
-                    file_item.crop_x = int(file_item.crop_x + (reduction / 2))
-            elif height > width:
-                # scale height to match new resolution
-                new_height = int(height * (resolution / width))
-                file_item.crop_height = new_height
-                file_item.scale_to_height = new_height
-                file_item.scale_to_width = resolution
-                file_item.crop_width = resolution
-                # make sure new_height is divisible by bucket_tolerance
-                if new_height % bucket_tolerance != 0:
-                    # reduce it to the nearest divisible number
-                    reduction = new_height % bucket_tolerance
-                    file_item.crop_height = new_height - reduction
-                    new_height = file_item.crop_height
-                    # adjust the new x position so we evenly crop
-                    file_item.crop_y = int(file_item.crop_y + (reduction / 2))
-            else:
-                # square image
-                file_item.crop_height = resolution
-                file_item.scale_to_height = resolution
-                file_item.scale_to_width = resolution
-                file_item.crop_width = resolution
+            did_process_poi = False
+            if file_item.has_point_of_interest:
+                # Attempt to process the poi if we can. It wont process if the image is smaller than the resolution
+                did_process_poi = file_item.setup_poi_bucket()
+            if not did_process_poi:
+                bucket_resolution = get_bucket_for_image_size(
+                    width, height,
+                    resolution=resolution,
+                    divisibility=bucket_tolerance
+                )
+
+                # Calculate scale factors for width and height
+                width_scale_factor = bucket_resolution["width"] / width
+                height_scale_factor = bucket_resolution["height"] / height
+
+                # Use the maximum of the scale factors to ensure both dimensions are scaled above the bucket resolution
+                max_scale_factor = max(width_scale_factor, height_scale_factor)
+
+                # round up
+                file_item.scale_to_width = int(math.ceil(width * max_scale_factor))
+                file_item.scale_to_height = int(math.ceil(height * max_scale_factor))
+
+                file_item.crop_height = bucket_resolution["height"]
+                file_item.crop_width = bucket_resolution["width"]
+
+                new_width = bucket_resolution["width"]
+                new_height = bucket_resolution["height"]
+
+                if self.dataset_config.random_crop:
+                    # random crop
+                    crop_x = random.randint(0, file_item.scale_to_width - new_width)
+                    crop_y = random.randint(0, file_item.scale_to_height - new_height)
+                    file_item.crop_x = crop_x
+                    file_item.crop_y = crop_y
+                else:
+                    # do central crop
+                    file_item.crop_x = int((file_item.scale_to_width - new_width) / 2)
+                    file_item.crop_y = int((file_item.scale_to_height - new_height) / 2)
+
+                if file_item.crop_y < 0 or file_item.crop_x < 0:
+                    print('debug')
 
             # check if bucket exists, if not, create it
-            bucket_key = f'{new_width}x{new_height}'
+            bucket_key = f'{file_item.crop_width}x{file_item.crop_height}'
             if bucket_key not in self.buckets:
-                self.buckets[bucket_key] = Bucket(new_width, new_height)
+                self.buckets[bucket_key] = Bucket(file_item.crop_width, file_item.crop_height)
             self.buckets[bucket_key].file_list_idx.append(idx)
 
         # print the buckets
+        self.shuffle_buckets()
         self.build_batch_indices()
-        print(f'Bucket sizes for {self.__class__.__name__}:')
-        for key, bucket in self.buckets.items():
-            print(f'{key}: {len(bucket.file_list_idx)} files')
-        print(f'{len(self.buckets)} buckets made')
+        if not quiet:
+            print(f'Bucket sizes for {self.dataset_path}:')
+            for key, bucket in self.buckets.items():
+                print(f'{key}: {len(bucket.file_list_idx)} files')
+            print(f'{len(self.buckets)} buckets made')
 
-        # file buckets made
+
+class CaptionProcessingDTOMixin:
+    def __init__(self: 'FileItemDTO', *args, **kwargs):
+        if hasattr(super(), '__init__'):
+            super().__init__(*args, **kwargs)
+            self.raw_caption: str = None
+            self.raw_caption_short: str = None
+
+    # todo allow for loading from sd-scripts style dict
+    def load_caption(self: 'FileItemDTO', caption_dict: Union[dict, None]):
+        if self.raw_caption is not None:
+            # we already loaded it
+            pass
+        elif caption_dict is not None and self.path in caption_dict and "caption" in caption_dict[self.path]:
+            self.raw_caption = caption_dict[self.path]["caption"]
+            if 'caption_short' in caption_dict[self.path]:
+                self.raw_caption_short = caption_dict[self.path]["caption_short"]
+        else:
+            # see if prompt file exists
+            path_no_ext = os.path.splitext(self.path)[0]
+            prompt_ext = self.dataset_config.caption_ext
+            prompt_path = f"{path_no_ext}.{prompt_ext}"
+            short_caption = None
+
+            if os.path.exists(prompt_path):
+                with open(prompt_path, 'r', encoding='utf-8') as f:
+                    prompt = f.read()
+                    short_caption = None
+                    if prompt_path.endswith('.json'):
+                        # replace any line endings with commas for \n \r \r\n
+                        prompt = prompt.replace('\r\n', ' ')
+                        prompt = prompt.replace('\n', ' ')
+                        prompt = prompt.replace('\r', ' ')
+
+                        prompt = json.loads(prompt)
+                        if 'caption' in prompt:
+                            prompt = prompt['caption']
+                        if 'caption_short' in prompt:
+                            short_caption = prompt['caption_short']
+                    prompt = clean_caption(prompt)
+                    if short_caption is not None:
+                        short_caption = clean_caption(short_caption)
+            else:
+                prompt = ''
+                if self.dataset_config.default_caption is not None:
+                    prompt = self.dataset_config.default_caption
+
+            if short_caption is None:
+                short_caption = self.dataset_config.default_caption
+            self.raw_caption = prompt
+            self.raw_caption_short = short_caption
+
+    def get_caption(
+            self: 'FileItemDTO',
+            trigger=None,
+            to_replace_list=None,
+            add_if_not_present=False,
+            short_caption=False
+    ):
+        if short_caption:
+            raw_caption = self.raw_caption_short
+        else:
+            raw_caption = self.raw_caption
+        if raw_caption is None:
+            raw_caption = ''
+        # handle dropout
+        if self.dataset_config.caption_dropout_rate > 0 and not short_caption:
+            # get a random float form 0 to 1
+            rand = random.random()
+            if rand < self.dataset_config.caption_dropout_rate:
+                # drop the caption
+                return ''
+
+        # get tokens
+        token_list = raw_caption.split(',')
+        # trim whitespace
+        token_list = [x.strip() for x in token_list]
+        # remove empty strings
+        token_list = [x for x in token_list if x]
+
+        if self.dataset_config.shuffle_tokens:
+            random.shuffle(token_list)
+
+        # handle token dropout
+        if self.dataset_config.token_dropout_rate > 0 and not short_caption:
+            new_token_list = []
+            for token in token_list:
+                # get a random float form 0 to 1
+                rand = random.random()
+                if rand > self.dataset_config.token_dropout_rate:
+                    # keep the token
+                    new_token_list.append(token)
+            token_list = new_token_list
+
+        # join back together
+        caption = ', '.join(token_list)
+        caption = inject_trigger_into_prompt(caption, trigger, to_replace_list, add_if_not_present)
+
+        if self.dataset_config.random_triggers and len(self.dataset_config.random_triggers) > 0:
+            # add random triggers
+            caption = caption + ', ' + random.choice(self.dataset_config.random_triggers)
+
+        if self.dataset_config.shuffle_tokens:
+            # shuffle again
+            token_list = caption.split(',')
+            # trim whitespace
+            token_list = [x.strip() for x in token_list]
+            # remove empty strings
+            token_list = [x for x in token_list if x]
+            random.shuffle(token_list)
+            caption = ', '.join(token_list)
+
+        return caption
+
+
+class ImageProcessingDTOMixin:
+    def load_and_process_image(
+            self: 'FileItemDTO',
+            transform: Union[None, transforms.Compose],
+            only_load_latents=False
+    ):
+        # if we are caching latents, just do that
+        if self.is_latent_cached:
+            self.get_latent()
+            if self.has_control_image:
+                self.load_control_image()
+            if self.has_mask_image:
+                self.load_mask_image()
+            if self.has_unconditional:
+                self.load_unconditional_image()
+            return
+        try:
+            img = Image.open(self.path)
+            img = exif_transpose(img)
+        except Exception as e:
+            print(f"Error: {e}")
+            print(f"Error loading image: {self.path}")
+
+        if self.use_alpha_as_mask:
+            # we do this to make sure it does not replace the alpha with another color
+            # we want the image just without the alpha channel
+            np_img = np.array(img)
+            # strip off alpha
+            np_img = np_img[:, :, :3]
+            img = Image.fromarray(np_img)
+
+        img = img.convert('RGB')
+        w, h = img.size
+        if w > h and self.scale_to_width < self.scale_to_height:
+            # throw error, they should match
+            raise ValueError(
+                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={self.scale_to_width}, file_item.scale_to_height={self.scale_to_height}, file_item.path={self.path}")
+        elif h > w and self.scale_to_height < self.scale_to_width:
+            # throw error, they should match
+            raise ValueError(
+                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={self.scale_to_width}, file_item.scale_to_height={self.scale_to_height}, file_item.path={self.path}")
+
+        if self.flip_x:
+            # do a flip
+            img.transpose(Image.FLIP_LEFT_RIGHT)
+        if self.flip_y:
+            # do a flip
+            img.transpose(Image.FLIP_TOP_BOTTOM)
+
+        if self.dataset_config.buckets:
+            # scale and crop based on file item
+            img = img.resize((self.scale_to_width, self.scale_to_height), Image.BICUBIC)
+            # crop to x_crop, y_crop, x_crop + crop_width, y_crop + crop_height
+            if img.width < self.crop_x + self.crop_width or img.height < self.crop_y + self.crop_height:
+                # todo look into this. This still happens sometimes
+                print('size mismatch')
+            img = img.crop((
+                self.crop_x,
+                self.crop_y,
+                self.crop_x + self.crop_width,
+                self.crop_y + self.crop_height
+            ))
+
+            # img = transforms.CenterCrop((self.crop_height, self.crop_width))(img)
+        else:
+            # Downscale the source image first
+            # TODO this is nto right
+            img = img.resize(
+                (int(img.size[0] * self.dataset_config.scale), int(img.size[1] * self.dataset_config.scale)),
+                Image.BICUBIC)
+            min_img_size = min(img.size)
+            if self.dataset_config.random_crop:
+                if self.dataset_config.random_scale and min_img_size > self.dataset_config.resolution:
+                    if min_img_size < self.dataset_config.resolution:
+                        print(
+                            f"Unexpected values: min_img_size={min_img_size}, self.resolution={self.dataset_config.resolution}, image file={self.path}")
+                        scale_size = self.dataset_config.resolution
+                    else:
+                        scale_size = random.randint(self.dataset_config.resolution, int(min_img_size))
+                    scaler = scale_size / min_img_size
+                    scale_width = int((img.width + 5) * scaler)
+                    scale_height = int((img.height + 5) * scaler)
+                    img = img.resize((scale_width, scale_height), Image.BICUBIC)
+                img = transforms.RandomCrop(self.dataset_config.resolution)(img)
+            else:
+                img = transforms.CenterCrop(min_img_size)(img)
+                img = img.resize((self.dataset_config.resolution, self.dataset_config.resolution), Image.BICUBIC)
+
+        if self.augments is not None and len(self.augments) > 0:
+            # do augmentations
+            for augment in self.augments:
+                if augment in transforms_dict:
+                    img = transforms_dict[augment](img)
+
+        if self.has_augmentations:
+            # augmentations handles transforms
+            img = self.augment_image(img, transform=transform)
+        elif transform:
+            img = transform(img)
+
+        self.tensor = img
+        if not only_load_latents:
+            if self.has_control_image:
+                self.load_control_image()
+            if self.has_mask_image:
+                self.load_mask_image()
+            if self.has_unconditional:
+                self.load_unconditional_image()
+
+
+class ControlFileItemDTOMixin:
+    def __init__(self: 'FileItemDTO', *args, **kwargs):
+        if hasattr(super(), '__init__'):
+            super().__init__(*args, **kwargs)
+        self.has_control_image = False
+        self.control_path: Union[str, None] = None
+        self.control_tensor: Union[torch.Tensor, None] = None
+        dataset_config: 'DatasetConfig' = kwargs.get('dataset_config', None)
+        if dataset_config.control_path is not None:
+            # find the control image path
+            control_path = dataset_config.control_path
+            # we are using control images
+            img_path = kwargs.get('path', None)
+            img_ext_list = ['.jpg', '.jpeg', '.png', '.webp']
+            file_name_no_ext = os.path.splitext(os.path.basename(img_path))[0]
+            for ext in img_ext_list:
+                if os.path.exists(os.path.join(control_path, file_name_no_ext + ext)):
+                    self.control_path = os.path.join(control_path, file_name_no_ext + ext)
+                    self.has_control_image = True
+                    break
+
+    def load_control_image(self: 'FileItemDTO'):
+        try:
+            img = Image.open(self.control_path).convert('RGB')
+            img = exif_transpose(img)
+        except Exception as e:
+            print(f"Error: {e}")
+            print(f"Error loading image: {self.control_path}")
+        w, h = img.size
+        if w > h and self.scale_to_width < self.scale_to_height:
+            # throw error, they should match
+            raise ValueError(
+                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={self.scale_to_width}, file_item.scale_to_height={self.scale_to_height}, file_item.path={self.path}")
+        elif h > w and self.scale_to_height < self.scale_to_width:
+            # throw error, they should match
+            raise ValueError(
+                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={self.scale_to_width}, file_item.scale_to_height={self.scale_to_height}, file_item.path={self.path}")
+
+        if self.flip_x:
+            # do a flip
+            img.transpose(Image.FLIP_LEFT_RIGHT)
+        if self.flip_y:
+            # do a flip
+            img.transpose(Image.FLIP_TOP_BOTTOM)
+
+        if self.dataset_config.buckets:
+            # scale and crop based on file item
+            img = img.resize((self.scale_to_width, self.scale_to_height), Image.BICUBIC)
+            # img = transforms.CenterCrop((self.crop_height, self.crop_width))(img)
+            # crop
+            img = img.crop((
+                self.crop_x,
+                self.crop_y,
+                self.crop_x + self.crop_width,
+                self.crop_y + self.crop_height
+            ))
+        else:
+            raise Exception("Control images not supported for non-bucket datasets")
+
+        self.control_tensor = transforms.ToTensor()(img)
+
+    def cleanup_control(self: 'FileItemDTO'):
+        self.control_tensor = None
+
+
+class AugmentationFileItemDTOMixin:
+    def __init__(self: 'FileItemDTO', *args, **kwargs):
+        if hasattr(super(), '__init__'):
+            super().__init__(*args, **kwargs)
+        self.has_augmentations = False
+        self.unaugmented_tensor: Union[torch.Tensor, None] = None
+        # self.augmentations: Union[None, List[Augments]] = None
+        self.dataset_config: 'DatasetConfig' = kwargs.get('dataset_config', None)
+        self.build_augmentation_transform()
+
+    def build_augmentation_transform(self: 'FileItemDTO'):
+        if self.dataset_config.augmentations is not None and len(self.dataset_config.augmentations) > 0:
+            self.has_augmentations = True
+            augmentations = [Augments(**aug) for aug in self.dataset_config.augmentations]
+
+            if self.dataset_config.shuffle_augmentations:
+                random.shuffle(augmentations)
+
+            augmentation_list = []
+            for aug in augmentations:
+                # make sure method name is valid
+                assert hasattr(A, aug.method_name), f"invalid augmentation method: {aug.method_name}"
+                # get the method
+                method = getattr(A, aug.method_name)
+                # add the method to the list
+                augmentation_list.append(method(**aug.params))
+
+            self.aug_transform = A.Compose(augmentation_list)
+
+    def augment_image(self: 'FileItemDTO', img: Image, transform: Union[None, transforms.Compose], ):
+
+        # rebuild each time if shuffle
+        if self.dataset_config.shuffle_augmentations:
+            self.build_augmentation_transform()
+
+        # save the original tensor
+        self.unaugmented_tensor = transforms.ToTensor()(img) if transform is None else transform(img)
+
+        open_cv_image = np.array(img)
+        # Convert RGB to BGR
+        open_cv_image = open_cv_image[:, :, ::-1].copy()
+
+        # apply augmentations
+        augmented = self.aug_transform(image=open_cv_image)["image"]
+
+        # convert back to RGB tensor
+        augmented = cv2.cvtColor(augmented, cv2.COLOR_BGR2RGB)
+
+        # convert to PIL image
+        augmented = Image.fromarray(augmented)
+
+        augmented_tensor = transforms.ToTensor()(augmented) if transform is None else transform(augmented)
+
+        return augmented_tensor
+
+    def cleanup_control(self: 'FileItemDTO'):
+        self.unaugmented_tensor = None
+
+
+class MaskFileItemDTOMixin:
+    def __init__(self: 'FileItemDTO', *args, **kwargs):
+        if hasattr(super(), '__init__'):
+            super().__init__(*args, **kwargs)
+        self.has_mask_image = False
+        self.mask_path: Union[str, None] = None
+        self.mask_tensor: Union[torch.Tensor, None] = None
+        self.use_alpha_as_mask: bool = False
+        dataset_config: 'DatasetConfig' = kwargs.get('dataset_config', None)
+        self.mask_min_value = dataset_config.mask_min_value
+        if dataset_config.alpha_mask:
+            self.use_alpha_as_mask = True
+            self.mask_path = kwargs.get('path', None)
+            self.has_mask_image = True
+        elif dataset_config.mask_path is not None:
+            # find the control image path
+            mask_path = dataset_config.mask_path if dataset_config.mask_path is not None else dataset_config.alpha_mask
+            # we are using control images
+            img_path = kwargs.get('path', None)
+            img_ext_list = ['.jpg', '.jpeg', '.png', '.webp']
+            file_name_no_ext = os.path.splitext(os.path.basename(img_path))[0]
+            for ext in img_ext_list:
+                if os.path.exists(os.path.join(mask_path, file_name_no_ext + ext)):
+                    self.mask_path = os.path.join(mask_path, file_name_no_ext + ext)
+                    self.has_mask_image = True
+                    break
+
+    def load_mask_image(self: 'FileItemDTO'):
+        try:
+            img = Image.open(self.mask_path)
+            img = exif_transpose(img)
+        except Exception as e:
+            print(f"Error: {e}")
+            print(f"Error loading image: {self.mask_path}")
+
+        if self.use_alpha_as_mask:
+            # pipeline expectws an rgb image so we need to put alpha in all channels
+            np_img = np.array(img)
+            np_img[:, :, :3] = np_img[:, :, 3:]
+
+            np_img = np_img[:, :, :3]
+            img = Image.fromarray(np_img)
+
+        img = img.convert('RGB')
+        if self.dataset_config.invert_mask:
+            img = ImageOps.invert(img)
+        w, h = img.size
+        if w > h and self.scale_to_width < self.scale_to_height:
+            # throw error, they should match
+            raise ValueError(
+                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={self.scale_to_width}, file_item.scale_to_height={self.scale_to_height}, file_item.path={self.path}")
+        elif h > w and self.scale_to_height < self.scale_to_width:
+            # throw error, they should match
+            raise ValueError(
+                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={self.scale_to_width}, file_item.scale_to_height={self.scale_to_height}, file_item.path={self.path}")
+
+        if self.flip_x:
+            # do a flip
+            img.transpose(Image.FLIP_LEFT_RIGHT)
+        if self.flip_y:
+            # do a flip
+            img.transpose(Image.FLIP_TOP_BOTTOM)
+
+        # randomly apply a blur up to 0.5% of the size of the min (width, height)
+        min_size = min(img.width, img.height)
+        blur_radius = int(min_size * random.random() * 0.005)
+        img = img.filter(ImageFilter.GaussianBlur(radius=blur_radius))
+
+        # make grayscale
+        img = img.convert('L')
+
+        if self.dataset_config.buckets:
+            # scale and crop based on file item
+            img = img.resize((self.scale_to_width, self.scale_to_height), Image.BICUBIC)
+            # img = transforms.CenterCrop((self.crop_height, self.crop_width))(img)
+            # crop
+            img = img.crop((
+                self.crop_x,
+                self.crop_y,
+                self.crop_x + self.crop_width,
+                self.crop_y + self.crop_height
+            ))
+        else:
+            raise Exception("Mask images not supported for non-bucket datasets")
+
+        self.mask_tensor = transforms.ToTensor()(img)
+        self.mask_tensor = value_map(self.mask_tensor, 0, 1.0, self.mask_min_value, 1.0)
+        # convert to grayscale
+
+    def cleanup_mask(self: 'FileItemDTO'):
+        self.mask_tensor = None
+
+
+class UnconditionalFileItemDTOMixin:
+    def __init__(self: 'FileItemDTO', *args, **kwargs):
+        if hasattr(super(), '__init__'):
+            super().__init__(*args, **kwargs)
+        self.has_unconditional = False
+        self.unconditional_path: Union[str, None] = None
+        self.unconditional_tensor: Union[torch.Tensor, None] = None
+        self.unconditional_latent: Union[torch.Tensor, None] = None
+        self.unconditional_transforms = transforms.Compose(
+            [
+                transforms.ToTensor(),
+                transforms.Normalize([0.5], [0.5]),
+            ]
+        )
+        dataset_config: 'DatasetConfig' = kwargs.get('dataset_config', None)
+
+        if dataset_config.unconditional_path is not None:
+            # we are using control images
+            img_path = kwargs.get('path', None)
+            img_ext_list = ['.jpg', '.jpeg', '.png', '.webp']
+            file_name_no_ext = os.path.splitext(os.path.basename(img_path))[0]
+            for ext in img_ext_list:
+                if os.path.exists(os.path.join(dataset_config.unconditional_path, file_name_no_ext + ext)):
+                    self.unconditional_path = os.path.join(dataset_config.unconditional_path, file_name_no_ext + ext)
+                    self.has_unconditional = True
+                    break
+
+    def load_unconditional_image(self: 'FileItemDTO'):
+        try:
+            img = Image.open(self.unconditional_path)
+            img = exif_transpose(img)
+        except Exception as e:
+            print(f"Error: {e}")
+            print(f"Error loading image: {self.mask_path}")
+
+        img = img.convert('RGB')
+        w, h = img.size
+        if w > h and self.scale_to_width < self.scale_to_height:
+            # throw error, they should match
+            raise ValueError(
+                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={self.scale_to_width}, file_item.scale_to_height={self.scale_to_height}, file_item.path={self.path}")
+        elif h > w and self.scale_to_height < self.scale_to_width:
+            # throw error, they should match
+            raise ValueError(
+                f"unexpected values: w={w}, h={h}, file_item.scale_to_width={self.scale_to_width}, file_item.scale_to_height={self.scale_to_height}, file_item.path={self.path}")
+
+        if self.flip_x:
+            # do a flip
+            img.transpose(Image.FLIP_LEFT_RIGHT)
+        if self.flip_y:
+            # do a flip
+            img.transpose(Image.FLIP_TOP_BOTTOM)
+
+        if self.dataset_config.buckets:
+            # scale and crop based on file item
+            img = img.resize((self.scale_to_width, self.scale_to_height), Image.BICUBIC)
+            # img = transforms.CenterCrop((self.crop_height, self.crop_width))(img)
+            # crop
+            img = img.crop((
+                self.crop_x,
+                self.crop_y,
+                self.crop_x + self.crop_width,
+                self.crop_y + self.crop_height
+            ))
+        else:
+            raise Exception("Unconditional images are not supported for non-bucket datasets")
+
+        self.unconditional_tensor = self.unconditional_transforms(img)
+
+    def cleanup_unconditional(self: 'FileItemDTO'):
+        self.unconditional_tensor = None
+        self.unconditional_latent = None
+
+
+class PoiFileItemDTOMixin:
+    # Point of interest bounding box. Allows for dynamic cropping without cropping out the main subject
+    # items in the poi will always be inside the image when random cropping
+    def __init__(self: 'FileItemDTO', *args, **kwargs):
+        if hasattr(super(), '__init__'):
+            super().__init__(*args, **kwargs)
+        # poi is a name of the box point of interest in the caption json file
+        dataset_config = kwargs.get('dataset_config', None)
+        path = kwargs.get('path', None)
+        self.poi: Union[str, None] = dataset_config.poi
+        self.has_point_of_interest = self.poi is not None
+        self.poi_x: Union[int, None] = None
+        self.poi_y: Union[int, None] = None
+        self.poi_width: Union[int, None] = None
+        self.poi_height: Union[int, None] = None
+
+        if self.poi is not None:
+            # make sure latent caching is off
+            if dataset_config.cache_latents or dataset_config.cache_latents_to_disk:
+                raise Exception(
+                    f"Error: poi is not supported when caching latents. Please set cache_latents and cache_latents_to_disk to False in the dataset config"
+                )
+                # make sure we are loading through json
+            if dataset_config.caption_ext != 'json':
+                raise Exception(
+                    f"Error: poi is only supported when using json captions. Please set caption_ext to json in the dataset config"
+                )
+            self.poi = self.poi.strip()
+            # get the caption path
+            file_path_no_ext = os.path.splitext(path)[0]
+            caption_path = file_path_no_ext + '.json'
+            if not os.path.exists(caption_path):
+                raise Exception(f"Error: caption file not found for poi: {caption_path}")
+            with open(caption_path, 'r', encoding='utf-8') as f:
+                json_data = json.load(f)
+            if 'poi' not in json_data:
+                print(f"Warning: poi not found in caption file: {caption_path}")
+            if self.poi not in json_data['poi']:
+                print(f"Warning: poi not found in caption file: {caption_path}")
+            # poi has, x, y, width, height
+            # do full image if no poi
+            self.poi_x = 0
+            self.poi_y = 0
+            self.poi_width = self.width
+            self.poi_height = self.height
+            try:
+                if self.poi in json_data['poi']:
+                    poi = json_data['poi'][self.poi]
+                    self.poi_x = int(poi['x'])
+                    self.poi_y = int(poi['y'])
+                    self.poi_width = int(poi['width'])
+                    self.poi_height = int(poi['height'])
+            except Exception as e:
+                pass
+
+            # handle flipping
+            if kwargs.get('flip_x', False):
+                # flip the poi
+                self.poi_x = self.width - self.poi_x - self.poi_width
+            if kwargs.get('flip_y', False):
+                # flip the poi
+                self.poi_y = self.height - self.poi_y - self.poi_height
+
+    def setup_poi_bucket(self: 'FileItemDTO'):
+        initial_width = int(self.width * self.dataset_config.scale)
+        initial_height = int(self.height * self.dataset_config.scale)
+        # we are using poi, so we need to calculate the bucket based on the poi
+
+        # if img resolution is less than dataset resolution, just return and let the normal bucketing happen
+        img_resolution = get_resolution(initial_width, initial_height)
+        if img_resolution <= self.dataset_config.resolution:
+            return False  # will trigger normal bucketing
+
+        bucket_tolerance = self.dataset_config.bucket_tolerance
+        poi_x = int(self.poi_x * self.dataset_config.scale)
+        poi_y = int(self.poi_y * self.dataset_config.scale)
+        poi_width = int(self.poi_width * self.dataset_config.scale)
+        poi_height = int(self.poi_height * self.dataset_config.scale)
+
+        # loop to keep expanding until we are at the proper resolution. This is not ideal, we can probably handle it better
+        num_loops = 0
+        while True:
+            # crop left
+            if poi_x > 0:
+                poi_x = random.randint(0, poi_x)
+            else:
+                poi_x = 0
+
+            # crop right
+            cr_min = poi_x + poi_width
+            if cr_min < initial_width:
+                crop_right = random.randint(poi_x + poi_width, initial_width)
+            else:
+                crop_right = initial_width
+
+            poi_width = crop_right - poi_x
+
+            if poi_y > 0:
+                poi_y = random.randint(0, poi_y)
+            else:
+                poi_y = 0
+
+            if poi_y + poi_height < initial_height:
+                crop_bottom = random.randint(poi_y + poi_height, initial_height)
+            else:
+                crop_bottom = initial_height
+
+            poi_height = crop_bottom - poi_y
+            # now we have our random crop, but it may be smaller than resolution. Check and expand if needed
+            current_resolution = get_resolution(poi_width, poi_height)
+            if current_resolution >= self.dataset_config.resolution:
+                # We can break now
+                break
+            else:
+                num_loops += 1
+                if num_loops > 100:
+                    print(
+                        f"Warning: poi bucketing looped too many times. This should not happen. Please report this issue.")
+                    return False
+
+        new_width = poi_width
+        new_height = poi_height
+
+        bucket_resolution = get_bucket_for_image_size(
+            new_width, new_height,
+            resolution=self.dataset_config.resolution,
+            divisibility=bucket_tolerance
+        )
+
+        width_scale_factor = bucket_resolution["width"] / new_width
+        height_scale_factor = bucket_resolution["height"] / new_height
+        # Use the maximum of the scale factors to ensure both dimensions are scaled above the bucket resolution
+        max_scale_factor = max(width_scale_factor, height_scale_factor)
+
+        self.scale_to_width = int(initial_width * max_scale_factor)
+        self.scale_to_height = int(initial_height * max_scale_factor)
+        self.crop_width = bucket_resolution['width']
+        self.crop_height = bucket_resolution['height']
+        self.crop_x = int(poi_x * max_scale_factor)
+        self.crop_y = int(poi_y * max_scale_factor)
+
+        return True
+
+
+class ArgBreakMixin:
+    # just stops super calls form hitting object
+    def __init__(self, *args, **kwargs):
+        pass
+
+
+class LatentCachingFileItemDTOMixin:
+    def __init__(self, *args, **kwargs):
+        # if we have super, call it
+        if hasattr(super(), '__init__'):
+            super().__init__(*args, **kwargs)
+        self._encoded_latent: Union[torch.Tensor, None] = None
+        self._latent_path: Union[str, None] = None
+        self.is_latent_cached = False
+        self.is_caching_to_disk = False
+        self.is_caching_to_memory = False
+        self.latent_load_device = 'cpu'
+        # sd1 or sdxl or others
+        self.latent_space_version = 'sd1'
+        # todo, increment this if we change the latent format to invalidate cache
+        self.latent_version = 1
+
+    def get_latent_info_dict(self: 'FileItemDTO'):
+        item = OrderedDict([
+            ("filename", os.path.basename(self.path)),
+            ("scale_to_width", self.scale_to_width),
+            ("scale_to_height", self.scale_to_height),
+            ("crop_x", self.crop_x),
+            ("crop_y", self.crop_y),
+            ("crop_width", self.crop_width),
+            ("crop_height", self.crop_height),
+            ("latent_space_version", self.latent_space_version),
+            ("latent_version", self.latent_version),
+        ])
+        # when adding items, do it after so we dont change old latents
+        if self.flip_x:
+            item["flip_x"] = True
+        if self.flip_y:
+            item["flip_y"] = True
+        return item
+
+    def get_latent_path(self: 'FileItemDTO', recalculate=False):
+        if self._latent_path is not None and not recalculate:
+            return self._latent_path
+        else:
+            # we store latents in a folder in same path as image called _latent_cache
+            img_dir = os.path.dirname(self.path)
+            latent_dir = os.path.join(img_dir, '_latent_cache')
+            hash_dict = self.get_latent_info_dict()
+            filename_no_ext = os.path.splitext(os.path.basename(self.path))[0]
+            # get base64 hash of md5 checksum of hash_dict
+            hash_input = json.dumps(hash_dict, sort_keys=True).encode('utf-8')
+            hash_str = base64.urlsafe_b64encode(hashlib.md5(hash_input).digest()).decode('ascii')
+            hash_str = hash_str.replace('=', '')
+            self._latent_path = os.path.join(latent_dir, f'{filename_no_ext}_{hash_str}.safetensors')
+
+        return self._latent_path
+
+    def cleanup_latent(self):
+        if self._encoded_latent is not None:
+            if not self.is_caching_to_memory:
+                # we are caching on disk, don't save in memory
+                self._encoded_latent = None
+            else:
+                # move it back to cpu
+                self._encoded_latent = self._encoded_latent.to('cpu')
+
+    def get_latent(self, device=None):
+        if not self.is_latent_cached:
+            return None
+        if self._encoded_latent is None:
+            # load it from disk
+            state_dict = load_file(
+                self.get_latent_path(),
+                # device=device if device is not None else self.latent_load_device
+                device='cpu'
+            )
+            self._encoded_latent = state_dict['latent']
+        return self._encoded_latent
+
+
+class LatentCachingMixin:
+    def __init__(self: 'AiToolkitDataset', **kwargs):
+        # if we have super, call it
+        if hasattr(super(), '__init__'):
+            super().__init__(**kwargs)
+        self.latent_cache = {}
+
+    def cache_latents_all_latents(self: 'AiToolkitDataset'):
+        print(f"Caching latents for {self.dataset_path}")
+        # cache all latents to disk
+        to_disk = self.is_caching_latents_to_disk
+        to_memory = self.is_caching_latents_to_memory
+
+        if to_disk:
+            print(" - Saving latents to disk")
+        if to_memory:
+            print(" - Keeping latents in memory")
+        # move sd items to cpu except for vae
+        self.sd.set_device_state_preset('cache_latents')
+
+        # use tqdm to show progress
+        i = 0
+        for file_item in tqdm(self.file_list, desc=f'Caching latents{" to disk" if to_disk else ""}'):
+            # set latent space version
+            if self.sd.is_xl:
+                file_item.latent_space_version = 'sdxl'
+            else:
+                file_item.latent_space_version = 'sd1'
+            file_item.is_caching_to_disk = to_disk
+            file_item.is_caching_to_memory = to_memory
+            file_item.latent_load_device = self.sd.device
+
+            latent_path = file_item.get_latent_path(recalculate=True)
+            # check if it is saved to disk already
+            if os.path.exists(latent_path):
+                if to_memory:
+                    # load it into memory
+                    state_dict = load_file(latent_path, device='cpu')
+                    file_item._encoded_latent = state_dict['latent'].to('cpu', dtype=self.sd.torch_dtype)
+            else:
+                # not saved to disk, calculate
+                # load the image first
+                file_item.load_and_process_image(self.transform, only_load_latents=True)
+                dtype = self.sd.torch_dtype
+                device = self.sd.device_torch
+                # add batch dimension
+                imgs = file_item.tensor.unsqueeze(0).to(device, dtype=dtype)
+                latent = self.sd.encode_images(imgs).squeeze(0)
+                # save_latent
+                if to_disk:
+                    state_dict = OrderedDict([
+                        ('latent', latent.clone().detach().cpu()),
+                    ])
+                    # metadata
+                    meta = get_meta_for_safetensors(file_item.get_latent_info_dict())
+                    os.makedirs(os.path.dirname(latent_path), exist_ok=True)
+                    save_file(state_dict, latent_path, metadata=meta)
+
+                if to_memory:
+                    # keep it in memory
+                    file_item._encoded_latent = latent.to('cpu', dtype=self.sd.torch_dtype)
+
+                del imgs
+                del latent
+                del file_item.tensor
+
+                flush(garbage_collect=False)
+            file_item.is_latent_cached = True
+            i += 1
+            # flush every 100
+            # if i % 100 == 0:
+            #     flush()
+
+        # restore device state
+        self.sd.restore_device_state()
diff --git a/toolkit/embedding.py b/toolkit/embedding.py
index 3eb4483a..31ac4ce2 100644
--- a/toolkit/embedding.py
+++ b/toolkit/embedding.py
@@ -21,7 +21,8 @@ class Embedding:
     def __init__(
             self,
             sd: 'StableDiffusion',
-            embed_config: 'EmbeddingConfig'
+            embed_config: 'EmbeddingConfig',
+            state_dict: OrderedDict = None,
     ):
         self.name = embed_config.trigger
         self.sd = sd
@@ -38,74 +39,115 @@ class Embedding:
             additional_tokens.append(f"{self.embed_config.trigger}_{i}")
         placeholder_tokens += additional_tokens
 
-        num_added_tokens = self.sd.tokenizer.add_tokens(placeholder_tokens)
-        if num_added_tokens != self.embed_config.tokens:
-            raise ValueError(
-                f"The tokenizer already contains the token {self.embed_config.trigger}. Please pass a different"
-                " `placeholder_token` that is not already in the tokenizer."
-            )
+        # handle dual tokenizer
+        self.tokenizer_list = self.sd.tokenizer if isinstance(self.sd.tokenizer, list) else [self.sd.tokenizer]
+        self.text_encoder_list = self.sd.text_encoder if isinstance(self.sd.text_encoder, list) else [
+            self.sd.text_encoder]
 
-        # Convert the initializer_token, placeholder_token to ids
-        init_token_ids = self.sd.tokenizer.encode(self.embed_config.init_words, add_special_tokens=False)
-        # if length of token ids is more than number of orm embedding tokens fill with *
-        if len(init_token_ids) > self.embed_config.tokens:
-            init_token_ids = init_token_ids[:self.embed_config.tokens]
-        elif len(init_token_ids) < self.embed_config.tokens:
-            pad_token_id = self.sd.tokenizer.encode(["*"], add_special_tokens=False)
-            init_token_ids += pad_token_id * (self.embed_config.tokens - len(init_token_ids))
+        self.placeholder_token_ids = []
+        self.embedding_tokens = []
 
-        self.placeholder_token_ids = self.sd.tokenizer.convert_tokens_to_ids(placeholder_tokens)
+        print(f"Adding {placeholder_tokens} tokens to tokenizer")
+        print(f"Adding {self.embed_config.tokens} tokens to tokenizer")
 
-        # Resize the token embeddings as we are adding new special tokens to the tokenizer
-        # todo SDXL has 2 text encoders, need to do both for all of this
-        self.sd.text_encoder.resize_token_embeddings(len(self.sd.tokenizer))
+        for text_encoder, tokenizer in zip(self.text_encoder_list, self.tokenizer_list):
+            num_added_tokens = tokenizer.add_tokens(placeholder_tokens)
+            if num_added_tokens != self.embed_config.tokens:
+                raise ValueError(
+                    f"The tokenizer already contains the token {self.embed_config.trigger}. Please pass a different"
+                    f" `placeholder_token` that is not already in the tokenizer. Only added {num_added_tokens}"
+                )
 
-        # Initialise the newly added placeholder token with the embeddings of the initializer token
-        token_embeds = self.sd.text_encoder.get_input_embeddings().weight.data
-        with torch.no_grad():
-            for initializer_token_id, token_id in zip(init_token_ids, self.placeholder_token_ids):
-                token_embeds[token_id] = token_embeds[initializer_token_id].clone()
+            # Convert the initializer_token, placeholder_token to ids
+            init_token_ids = tokenizer.encode(self.embed_config.init_words, add_special_tokens=False)
+            # if length of token ids is more than number of orm embedding tokens fill with *
+            if len(init_token_ids) > self.embed_config.tokens:
+                init_token_ids = init_token_ids[:self.embed_config.tokens]
+            elif len(init_token_ids) < self.embed_config.tokens:
+                pad_token_id = tokenizer.encode(["*"], add_special_tokens=False)
+                init_token_ids += pad_token_id * (self.embed_config.tokens - len(init_token_ids))
 
-        # replace "[name] with this. on training. This is automatically generated in pipeline on inference
-        self.embedding_tokens = " ".join(self.sd.tokenizer.convert_ids_to_tokens(self.placeholder_token_ids))
+            placeholder_token_ids = tokenizer.encode(placeholder_tokens, add_special_tokens=False)
+            self.placeholder_token_ids.append(placeholder_token_ids)
 
-    # returns the string to have in the prompt to trigger the embedding
-    def get_embedding_string(self):
-        return self.embedding_tokens
+            # Resize the token embeddings as we are adding new special tokens to the tokenizer
+            text_encoder.resize_token_embeddings(len(tokenizer))
+
+            # Initialise the newly added placeholder token with the embeddings of the initializer token
+            token_embeds = text_encoder.get_input_embeddings().weight.data
+            with torch.no_grad():
+                for initializer_token_id, token_id in zip(init_token_ids, placeholder_token_ids):
+                    token_embeds[token_id] = token_embeds[initializer_token_id].clone()
+
+            # replace "[name] with this. on training. This is automatically generated in pipeline on inference
+            self.embedding_tokens.append(" ".join(tokenizer.convert_ids_to_tokens(placeholder_token_ids)))
+
+        # backup text encoder embeddings
+        self.orig_embeds_params = [x.get_input_embeddings().weight.data.clone() for x in self.text_encoder_list]
+
+    def restore_embeddings(self):
+        # Let's make sure we don't update any embedding weights besides the newly added token
+        for text_encoder, tokenizer, orig_embeds, placeholder_token_ids in zip(self.text_encoder_list,
+                                                                               self.tokenizer_list,
+                                                                               self.orig_embeds_params,
+                                                                               self.placeholder_token_ids):
+            index_no_updates = torch.ones((len(tokenizer),), dtype=torch.bool)
+            index_no_updates[
+            min(placeholder_token_ids): max(placeholder_token_ids) + 1] = False
+            with torch.no_grad():
+                text_encoder.get_input_embeddings().weight[
+                    index_no_updates
+                ] = orig_embeds[index_no_updates]
 
     def get_trainable_params(self):
-        # todo only get this one as we could have more than one
-        return self.sd.text_encoder.get_input_embeddings().parameters()
+        params = []
+        for text_encoder in self.text_encoder_list:
+            params += text_encoder.get_input_embeddings().parameters()
+        return params
 
-    # make setter and getter for vec
-    @property
-    def vec(self):
+    def _get_vec(self, text_encoder_idx=0):
         # should we get params instead
         # create vector from token embeds
-        token_embeds = self.sd.text_encoder.get_input_embeddings().weight.data
+        token_embeds = self.text_encoder_list[text_encoder_idx].get_input_embeddings().weight.data
         # stack the tokens along batch axis adding that axis
         new_vector = torch.stack(
-            [token_embeds[token_id] for token_id in self.placeholder_token_ids],
+            [token_embeds[token_id] for token_id in self.placeholder_token_ids[text_encoder_idx]],
             dim=0
         )
         return new_vector
 
-    @vec.setter
-    def vec(self, new_vector):
+    def _set_vec(self, new_vector, text_encoder_idx=0):
         # shape is (1, 768) for SD 1.5 for 1 token
-        token_embeds = self.sd.text_encoder.get_input_embeddings().weight.data
+        token_embeds = self.text_encoder_list[text_encoder_idx].get_input_embeddings().weight.data
         for i in range(new_vector.shape[0]):
             # apply the weights to the placeholder tokens while preserving gradient
-            token_embeds[self.placeholder_token_ids[i]] = new_vector[i].clone()
-            x = 1
+            token_embeds[self.placeholder_token_ids[text_encoder_idx][i]] = new_vector[i].clone()
+
+    # make setter and getter for vec
+    @property
+    def vec(self):
+        return self._get_vec(0)
+
+    @vec.setter
+    def vec(self, new_vector):
+        self._set_vec(new_vector, 0)
+
+    @property
+    def vec2(self):
+        return self._get_vec(1)
+
+    @vec2.setter
+    def vec2(self, new_vector):
+        self._set_vec(new_vector, 1)
 
     # diffusers automatically expands the token meaning test123 becomes test123 test123_1 test123_2 etc
     # however, on training we don't use that pipeline, so we have to do it ourselves
     def inject_embedding_to_prompt(self, prompt, expand_token=False, to_replace_list=None, add_if_not_present=True):
         output_prompt = prompt
-        default_replacements = [self.name, self.trigger, "[name]", "[trigger]", self.embedding_tokens]
+        embedding_tokens = self.embedding_tokens[0]  # shoudl be the same
+        default_replacements = ["[name]", "[trigger]"]
 
-        replace_with = self.embedding_tokens if expand_token else self.trigger
+        replace_with = embedding_tokens if expand_token else self.trigger
         if to_replace_list is None:
             to_replace_list = default_replacements
         else:
@@ -120,7 +162,7 @@ class Embedding:
             output_prompt = output_prompt.replace(to_replace, replace_with)
 
         # see how many times replace_with is in the prompt
-        num_instances = prompt.count(replace_with)
+        num_instances = output_prompt.count(replace_with)
 
         if num_instances == 0 and add_if_not_present:
             # add it to the beginning of the prompt
@@ -128,10 +170,21 @@ class Embedding:
 
         if num_instances > 1:
             print(
-                f"Warning: {self.name} token appears {num_instances} times in prompt {output_prompt}. This may cause issues.")
+                f"Warning: {replace_with} token appears {num_instances} times in prompt {output_prompt}. This may cause issues.")
 
         return output_prompt
 
+    def state_dict(self):
+        if self.sd.is_xl:
+            state_dict = OrderedDict()
+            state_dict['clip_l'] = self.vec
+            state_dict['clip_g'] = self.vec2
+        else:
+            state_dict = OrderedDict()
+            state_dict['emb_params'] = self.vec
+
+        return state_dict
+
     def save(self, filename):
         # todo check to see how to get the vector out of the embedding
 
@@ -145,13 +198,14 @@ class Embedding:
             "sd_checkpoint_name": None,
             "notes": None,
         }
+        # TODO we do not currently support this. Check how auto is doing it. Only safetensors supported sor sdxl
         if filename.endswith('.pt'):
             torch.save(embedding_data, filename)
         elif filename.endswith('.bin'):
             torch.save(embedding_data, filename)
         elif filename.endswith('.safetensors'):
             # save the embedding as a safetensors file
-            state_dict = {"emb_params": self.vec}
+            state_dict = self.state_dict()
             # add all embedding data (except string_to_param), to metadata
             metadata = OrderedDict({k: json.dumps(v) for k, v in embedding_data.items() if k != "string_to_param"})
             metadata["string_to_param"] = {"*": "emb_params"}
@@ -163,6 +217,7 @@ class Embedding:
         path = os.path.realpath(file_path)
         filename = os.path.basename(path)
         name, ext = os.path.splitext(filename)
+        tensors = {}
         ext = ext.upper()
         if ext in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
             _, second_ext = os.path.splitext(name)
@@ -170,10 +225,12 @@ class Embedding:
                 return
 
         if ext in ['.BIN', '.PT']:
+            # todo check this
+            if self.sd.is_xl:
+               raise Exception("XL not supported yet for bin, pt")
             data = torch.load(path, map_location="cpu")
         elif ext in ['.SAFETENSORS']:
             # rebuild the embedding from the safetensors file if it has it
-            tensors = {}
             with safetensors.torch.safe_open(path, framework="pt", device="cpu") as f:
                 metadata = f.metadata()
                 for k in f.keys():
@@ -195,26 +252,32 @@ class Embedding:
         else:
             return
 
-        # textual inversion embeddings
-        if 'string_to_param' in data:
-            param_dict = data['string_to_param']
-            if hasattr(param_dict, '_parameters'):
-                param_dict = getattr(param_dict,
-                                     '_parameters')  # fix for torch 1.12.1 loading saved file from torch 1.11
-            assert len(param_dict) == 1, 'embedding file has multiple terms in it'
-            emb = next(iter(param_dict.items()))[1]
-        # diffuser concepts
-        elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
-            assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
-
-            emb = next(iter(data.values()))
-            if len(emb.shape) == 1:
-                emb = emb.unsqueeze(0)
+        if self.sd.is_xl:
+            self.vec = tensors['clip_l'].detach().to(device, dtype=torch.float32)
+            self.vec2 = tensors['clip_g'].detach().to(device, dtype=torch.float32)
+            if 'step' in data:
+                self.step = int(data['step'])
         else:
-            raise Exception(
-                f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.")
+            # textual inversion embeddings
+            if 'string_to_param' in data:
+                param_dict = data['string_to_param']
+                if hasattr(param_dict, '_parameters'):
+                    param_dict = getattr(param_dict,
+                                         '_parameters')  # fix for torch 1.12.1 loading saved file from torch 1.11
+                assert len(param_dict) == 1, 'embedding file has multiple terms in it'
+                emb = next(iter(param_dict.items()))[1]
+            # diffuser concepts
+            elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
+                assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
 
-        if 'step' in data:
-            self.step = int(data['step'])
+                emb = next(iter(data.values()))
+                if len(emb.shape) == 1:
+                    emb = emb.unsqueeze(0)
+            else:
+                raise Exception(
+                    f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.")
 
-        self.vec = emb.detach().to(device, dtype=torch.float32)
+            if 'step' in data:
+                self.step = int(data['step'])
+
+            self.vec = emb.detach().to(device, dtype=torch.float32)
diff --git a/toolkit/image_utils.py b/toolkit/image_utils.py
index cb74ed4f..b4dbc38d 100644
--- a/toolkit/image_utils.py
+++ b/toolkit/image_utils.py
@@ -1,9 +1,16 @@
 # ref https://github.com/scardine/image_size/blob/master/get_image_size.py
+import atexit
 import collections
 import json
 import os
 import io
 import struct
+from typing import TYPE_CHECKING
+
+import cv2
+import numpy as np
+import torch
+from diffusers import AutoencoderTiny
 
 FILE_UNKNOWN = "Sorry, don't know how to get size for this file."
 
@@ -112,7 +119,7 @@ def get_image_metadata_from_bytesio(input, size, file_path=None):
         width = int(w)
         height = int(h)
     elif ((size >= 24) and data.startswith(b'\211PNG\r\n\032\n')
-            and (data[12:16] == b'IHDR')):
+          and (data[12:16] == b'IHDR')):
         # PNGs
         imgtype = PNG
         w, h = struct.unpack(">LL", data[16:24])
@@ -190,7 +197,7 @@ def get_image_metadata_from_bytesio(input, size, file_path=None):
             9: (4, boChar + "l"),  # SLONG
             10: (8, boChar + "ll"),  # SRATIONAL
             11: (4, boChar + "f"),  # FLOAT
-            12: (8, boChar + "d")   # DOUBLE
+            12: (8, boChar + "d")  # DOUBLE
         }
         ifdOffset = struct.unpack(boChar + "L", data[4:8])[0]
         try:
@@ -206,7 +213,7 @@ def get_image_metadata_from_bytesio(input, size, file_path=None):
                 input.seek(entryOffset)
                 tag = input.read(2)
                 tag = struct.unpack(boChar + "H", tag)[0]
-                if(tag == 256 or tag == 257):
+                if (tag == 256 or tag == 257):
                     # if type indicates that value fits into 4 bytes, value
                     # offset is not an offset but value itself
                     type = input.read(2)
@@ -229,7 +236,7 @@ def get_image_metadata_from_bytesio(input, size, file_path=None):
         except Exception as e:
             raise UnknownImageFormat(str(e))
     elif size >= 2:
-            # see http://en.wikipedia.org/wiki/ICO_(file_format)
+        # see http://en.wikipedia.org/wiki/ICO_(file_format)
         imgtype = 'ICO'
         input.seek(0)
         reserved = input.read(2)
@@ -350,13 +357,13 @@ def main(argv=None):
 
     prs.add_option('-v', '--verbose',
                    dest='verbose',
-                   action='store_true',)
+                   action='store_true', )
     prs.add_option('-q', '--quiet',
                    dest='quiet',
-                   action='store_true',)
+                   action='store_true', )
     prs.add_option('-t', '--test',
                    dest='run_tests',
-                   action='store_true',)
+                   action='store_true', )
 
     argv = list(argv) if argv is not None else sys.argv[1:]
     (opts, args) = prs.parse_args(args=argv)
@@ -417,6 +424,60 @@ def main(argv=None):
     return EX_OK
 
 
+is_window_shown = False
+
+
+def show_img(img, name='AI Toolkit'):
+    global is_window_shown
+
+    img = np.clip(img, 0, 255).astype(np.uint8)
+    cv2.imshow(name, img[:, :, ::-1])
+    k = cv2.waitKey(10) & 0xFF
+    if k == 27:  # Esc key to stop
+        print('\nESC pressed, stopping')
+        raise KeyboardInterrupt
+    if not is_window_shown:
+        is_window_shown = True
+
+
+
+def show_tensors(imgs: torch.Tensor, name='AI Toolkit'):
+    # if rank is 4
+    if len(imgs.shape) == 4:
+        img_list = torch.chunk(imgs, imgs.shape[0], dim=0)
+    else:
+        img_list = [imgs]
+    # put images side by side
+    img = torch.cat(img_list, dim=3)
+    # img is -1 to 1, convert to 0 to 255
+    img = img / 2 + 0.5
+    img_numpy = img.to(torch.float32).detach().cpu().numpy()
+    img_numpy = np.clip(img_numpy, 0, 1) * 255
+    # convert to numpy Move channel to last
+    img_numpy = img_numpy.transpose(0, 2, 3, 1)
+    # convert to uint8
+    img_numpy = img_numpy.astype(np.uint8)
+    show_img(img_numpy[0], name=name)
+
+
+def show_latents(latents: torch.Tensor, vae: 'AutoencoderTiny', name='AI Toolkit'):
+    # decode latents
+    if vae.device == 'cpu':
+        vae.to(latents.device)
+    latents = latents / vae.config['scaling_factor']
+    imgs = vae.decode(latents).sample
+    show_tensors(imgs, name=name)
+
+
+
+def on_exit():
+    if is_window_shown:
+        cv2.destroyAllWindows()
+
+
+atexit.register(on_exit)
+
 if __name__ == "__main__":
     import sys
-    sys.exit(main(argv=sys.argv[1:]))
\ No newline at end of file
+
+    sys.exit(main(argv=sys.argv[1:]))
diff --git a/toolkit/inversion_utils.py b/toolkit/inversion_utils.py
new file mode 100644
index 00000000..51a61d83
--- /dev/null
+++ b/toolkit/inversion_utils.py
@@ -0,0 +1,410 @@
+# ref https://huggingface.co/spaces/editing-images/ledits/blob/main/inversion_utils.py
+
+import torch
+import os
+from tqdm import tqdm
+
+from toolkit import train_tools
+from toolkit.prompt_utils import PromptEmbeds
+from toolkit.stable_diffusion_model import StableDiffusion
+
+
+def mu_tilde(model, xt, x0, timestep):
+    "mu_tilde(x_t, x_0) DDPM paper eq. 7"
+    prev_timestep = timestep - model.scheduler.config.num_train_timesteps // model.scheduler.num_inference_steps
+    alpha_prod_t_prev = model.scheduler.alphas_cumprod[
+        prev_timestep] if prev_timestep >= 0 else model.scheduler.final_alpha_cumprod
+    alpha_t = model.scheduler.alphas[timestep]
+    beta_t = 1 - alpha_t
+    alpha_bar = model.scheduler.alphas_cumprod[timestep]
+    return ((alpha_prod_t_prev ** 0.5 * beta_t) / (1 - alpha_bar)) * x0 + (
+            (alpha_t ** 0.5 * (1 - alpha_prod_t_prev)) / (1 - alpha_bar)) * xt
+
+
+def sample_xts_from_x0(sd: StableDiffusion, sample: torch.Tensor, num_inference_steps=50):
+    """
+    Samples from P(x_1:T|x_0)
+    """
+    # torch.manual_seed(43256465436)
+    alpha_bar = sd.noise_scheduler.alphas_cumprod
+    sqrt_one_minus_alpha_bar = (1 - alpha_bar) ** 0.5
+    alphas = sd.noise_scheduler.alphas
+    betas = 1 - alphas
+    # variance_noise_shape = (
+    #     num_inference_steps,
+    #     sd.unet.in_channels,
+    #     sd.unet.sample_size,
+    #     sd.unet.sample_size)
+    variance_noise_shape = list(sample.shape)
+    variance_noise_shape[0] = num_inference_steps
+
+    timesteps = sd.noise_scheduler.timesteps.to(sd.device)
+    t_to_idx = {int(v): k for k, v in enumerate(timesteps)}
+    xts = torch.zeros(variance_noise_shape).to(sample.device, dtype=torch.float16)
+    for t in reversed(timesteps):
+        idx = t_to_idx[int(t)]
+        xts[idx] = sample * (alpha_bar[t] ** 0.5) + torch.randn_like(sample, dtype=torch.float16) * sqrt_one_minus_alpha_bar[t]
+    xts = torch.cat([xts, sample], dim=0)
+
+    return xts
+
+
+def encode_text(model, prompts):
+    text_input = model.tokenizer(
+        prompts,
+        padding="max_length",
+        max_length=model.tokenizer.model_max_length,
+        truncation=True,
+        return_tensors="pt",
+    )
+    with torch.no_grad():
+        text_encoding = model.text_encoder(text_input.input_ids.to(model.device))[0]
+    return text_encoding
+
+
+def forward_step(sd: StableDiffusion, model_output, timestep, sample):
+    next_timestep = min(
+        sd.noise_scheduler.config['num_train_timesteps'] - 2,
+        timestep + sd.noise_scheduler.config['num_train_timesteps'] // sd.noise_scheduler.num_inference_steps
+    )
+
+    # 2. compute alphas, betas
+    alpha_prod_t = sd.noise_scheduler.alphas_cumprod[timestep]
+    # alpha_prod_t_next = self.scheduler.alphas_cumprod[next_timestep] if next_ltimestep >= 0 else self.scheduler.final_alpha_cumprod
+
+    beta_prod_t = 1 - alpha_prod_t
+
+    # 3. compute predicted original sample from predicted noise also called
+    # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+    pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
+
+    # 5. TODO: simple noising implementation
+    next_sample = sd.noise_scheduler.add_noise(
+        pred_original_sample,
+        model_output,
+        torch.LongTensor([next_timestep]))
+    return next_sample
+
+
+def get_variance(sd: StableDiffusion, timestep):  # , prev_timestep):
+    prev_timestep = timestep - sd.noise_scheduler.config['num_train_timesteps'] // sd.noise_scheduler.num_inference_steps
+    alpha_prod_t = sd.noise_scheduler.alphas_cumprod[timestep]
+    alpha_prod_t_prev = sd.noise_scheduler.alphas_cumprod[
+        prev_timestep] if prev_timestep >= 0 else sd.noise_scheduler.final_alpha_cumprod
+    beta_prod_t = 1 - alpha_prod_t
+    beta_prod_t_prev = 1 - alpha_prod_t_prev
+    variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev)
+    return variance
+
+
+def get_time_ids_from_latents(sd: StableDiffusion, latents: torch.Tensor):
+    VAE_SCALE_FACTOR = 2 ** (len(sd.vae.config['block_out_channels']) - 1)
+    if sd.is_xl:
+        bs, ch, h, w = list(latents.shape)
+
+        height = h * VAE_SCALE_FACTOR
+        width = w * VAE_SCALE_FACTOR
+
+        dtype = latents.dtype
+        # just do it without any cropping nonsense
+        target_size = (height, width)
+        original_size = (height, width)
+        crops_coords_top_left = (0, 0)
+        add_time_ids = list(original_size + crops_coords_top_left + target_size)
+        add_time_ids = torch.tensor([add_time_ids])
+        add_time_ids = add_time_ids.to(latents.device, dtype=dtype)
+
+        batch_time_ids = torch.cat(
+            [add_time_ids for _ in range(bs)]
+        )
+        return batch_time_ids
+    else:
+        return None
+
+
+def inversion_forward_process(
+        sd: StableDiffusion,
+        sample: torch.Tensor,
+        conditional_embeddings: PromptEmbeds,
+        unconditional_embeddings: PromptEmbeds,
+        etas=None,
+        prog_bar=False,
+        cfg_scale=3.5,
+        num_inference_steps=50, eps=None
+):
+    current_num_timesteps = len(sd.noise_scheduler.timesteps)
+    sd.noise_scheduler.set_timesteps(num_inference_steps, device=sd.device)
+
+    timesteps = sd.noise_scheduler.timesteps.to(sd.device)
+    # variance_noise_shape = (
+    #     num_inference_steps,
+    #     sd.unet.in_channels,
+    #     sd.unet.sample_size,
+    #     sd.unet.sample_size
+    # )
+    variance_noise_shape = list(sample.shape)
+    variance_noise_shape[0] = num_inference_steps
+    if etas is None or (type(etas) in [int, float] and etas == 0):
+        eta_is_zero = True
+        zs = None
+    else:
+        eta_is_zero = False
+        if type(etas) in [int, float]: etas = [etas] * sd.noise_scheduler.num_inference_steps
+        xts = sample_xts_from_x0(sd, sample, num_inference_steps=num_inference_steps)
+        alpha_bar = sd.noise_scheduler.alphas_cumprod
+        zs = torch.zeros(size=variance_noise_shape, device=sd.device, dtype=torch.float16)
+
+    t_to_idx = {int(v): k for k, v in enumerate(timesteps)}
+    noisy_sample = sample
+    op = tqdm(reversed(timesteps), desc="Inverting...") if prog_bar else reversed(timesteps)
+
+    for timestep in op:
+        idx = t_to_idx[int(timestep)]
+        # 1. predict noise residual
+        if not eta_is_zero:
+            noisy_sample = xts[idx][None]
+
+        added_cond_kwargs = {}
+
+        with torch.no_grad():
+            text_embeddings = train_tools.concat_prompt_embeddings(
+                unconditional_embeddings,  # negative embedding
+                conditional_embeddings,  # positive embedding
+                1,  # batch size
+            )
+            if sd.is_xl:
+                add_time_ids = get_time_ids_from_latents(sd, noisy_sample)
+                # add extra for cfg
+                add_time_ids = torch.cat(
+                    [add_time_ids] * 2, dim=0
+                )
+
+                added_cond_kwargs = {
+                    "text_embeds": text_embeddings.pooled_embeds,
+                    "time_ids": add_time_ids,
+                }
+
+            # double up for cfg
+            latent_model_input = torch.cat(
+                [noisy_sample] * 2, dim=0
+            )
+
+            noise_pred = sd.unet(
+                latent_model_input,
+                timestep,
+                encoder_hidden_states=text_embeddings.text_embeds,
+                added_cond_kwargs=added_cond_kwargs,
+            ).sample
+
+            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+
+            # out = sd.unet.forward(noisy_sample, timestep=timestep, encoder_hidden_states=uncond_embedding)
+            # cond_out = sd.unet.forward(noisy_sample, timestep=timestep, encoder_hidden_states=text_embeddings)
+
+        noise_pred = noise_pred_uncond + cfg_scale * (noise_pred_text - noise_pred_uncond)
+
+        if eta_is_zero:
+            # 2. compute more noisy image and set x_t -> x_t+1
+            noisy_sample = forward_step(sd, noise_pred, timestep, noisy_sample)
+            xts = None
+
+        else:
+            xtm1 = xts[idx + 1][None]
+            # pred of x0
+            pred_original_sample = (noisy_sample - (1 - alpha_bar[timestep]) ** 0.5 * noise_pred) / alpha_bar[
+                timestep] ** 0.5
+
+            # direction to xt
+            prev_timestep = timestep - sd.noise_scheduler.config[
+                'num_train_timesteps'] // sd.noise_scheduler.num_inference_steps
+            alpha_prod_t_prev = sd.noise_scheduler.alphas_cumprod[
+                prev_timestep] if prev_timestep >= 0 else sd.noise_scheduler.final_alpha_cumprod
+
+            variance = get_variance(sd, timestep)
+            pred_sample_direction = (1 - alpha_prod_t_prev - etas[idx] * variance) ** (0.5) * noise_pred
+
+            mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
+
+            z = (xtm1 - mu_xt) / (etas[idx] * variance ** 0.5)
+            zs[idx] = z
+
+            # correction to avoid error accumulation
+            xtm1 = mu_xt + (etas[idx] * variance ** 0.5) * z
+            xts[idx + 1] = xtm1
+
+    if not zs is None:
+        zs[-1] = torch.zeros_like(zs[-1])
+
+    # restore timesteps
+    sd.noise_scheduler.set_timesteps(current_num_timesteps, device=sd.device)
+
+    return noisy_sample, zs, xts
+
+
+#
+# def inversion_forward_process(
+#         model,
+#         sample,
+#         etas=None,
+#         prog_bar=False,
+#         prompt="",
+#         cfg_scale=3.5,
+#         num_inference_steps=50, eps=None
+# ):
+#     if not prompt == "":
+#         text_embeddings = encode_text(model, prompt)
+#     uncond_embedding = encode_text(model, "")
+#     timesteps = model.scheduler.timesteps.to(model.device)
+#     variance_noise_shape = (
+#         num_inference_steps,
+#         model.unet.in_channels,
+#         model.unet.sample_size,
+#         model.unet.sample_size)
+#     if etas is None or (type(etas) in [int, float] and etas == 0):
+#         eta_is_zero = True
+#         zs = None
+#     else:
+#         eta_is_zero = False
+#         if type(etas) in [int, float]: etas = [etas] * model.scheduler.num_inference_steps
+#         xts = sample_xts_from_x0(model, sample, num_inference_steps=num_inference_steps)
+#         alpha_bar = model.scheduler.alphas_cumprod
+#         zs = torch.zeros(size=variance_noise_shape, device=model.device, dtype=torch.float16)
+#
+#     t_to_idx = {int(v): k for k, v in enumerate(timesteps)}
+#     noisy_sample = sample
+#     op = tqdm(reversed(timesteps), desc="Inverting...") if prog_bar else reversed(timesteps)
+#
+#     for t in op:
+#         idx = t_to_idx[int(t)]
+#         # 1. predict noise residual
+#         if not eta_is_zero:
+#             noisy_sample = xts[idx][None]
+#
+#         with torch.no_grad():
+#             out = model.unet.forward(noisy_sample, timestep=t, encoder_hidden_states=uncond_embedding)
+#             if not prompt == "":
+#                 cond_out = model.unet.forward(noisy_sample, timestep=t, encoder_hidden_states=text_embeddings)
+#
+#         if not prompt == "":
+#             ## classifier free guidance
+#             noise_pred = out.sample + cfg_scale * (cond_out.sample - out.sample)
+#         else:
+#             noise_pred = out.sample
+#
+#         if eta_is_zero:
+#             # 2. compute more noisy image and set x_t -> x_t+1
+#             noisy_sample = forward_step(model, noise_pred, t, noisy_sample)
+#
+#         else:
+#             xtm1 = xts[idx + 1][None]
+#             # pred of x0
+#             pred_original_sample = (noisy_sample - (1 - alpha_bar[t]) ** 0.5 * noise_pred) / alpha_bar[t] ** 0.5
+#
+#             # direction to xt
+#             prev_timestep = t - model.scheduler.config.num_train_timesteps // model.scheduler.num_inference_steps
+#             alpha_prod_t_prev = model.scheduler.alphas_cumprod[
+#                 prev_timestep] if prev_timestep >= 0 else model.scheduler.final_alpha_cumprod
+#
+#             variance = get_variance(model, t)
+#             pred_sample_direction = (1 - alpha_prod_t_prev - etas[idx] * variance) ** (0.5) * noise_pred
+#
+#             mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
+#
+#             z = (xtm1 - mu_xt) / (etas[idx] * variance ** 0.5)
+#             zs[idx] = z
+#
+#             # correction to avoid error accumulation
+#             xtm1 = mu_xt + (etas[idx] * variance ** 0.5) * z
+#             xts[idx + 1] = xtm1
+#
+#     if not zs is None:
+#         zs[-1] = torch.zeros_like(zs[-1])
+#
+#     return noisy_sample, zs, xts
+
+
+def reverse_step(model, model_output, timestep, sample, eta=0, variance_noise=None):
+    # 1. get previous step value (=t-1)
+    prev_timestep = timestep - model.scheduler.config.num_train_timesteps // model.scheduler.num_inference_steps
+    # 2. compute alphas, betas
+    alpha_prod_t = model.scheduler.alphas_cumprod[timestep]
+    alpha_prod_t_prev = model.scheduler.alphas_cumprod[
+        prev_timestep] if prev_timestep >= 0 else model.scheduler.final_alpha_cumprod
+    beta_prod_t = 1 - alpha_prod_t
+    # 3. compute predicted original sample from predicted noise also called
+    # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+    pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
+    # 5. compute variance: "sigma_t(η)" -> see formula (16)
+    # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1)
+    # variance = self.scheduler._get_variance(timestep, prev_timestep)
+    variance = get_variance(model, timestep)  # , prev_timestep)
+    std_dev_t = eta * variance ** (0.5)
+    # Take care of asymetric reverse process (asyrp)
+    model_output_direction = model_output
+    # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+    # pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * model_output_direction
+    pred_sample_direction = (1 - alpha_prod_t_prev - eta * variance) ** (0.5) * model_output_direction
+    # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+    prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
+    # 8. Add noice if eta > 0
+    if eta > 0:
+        if variance_noise is None:
+            variance_noise = torch.randn(model_output.shape, device=model.device, dtype=torch.float16)
+        sigma_z = eta * variance ** (0.5) * variance_noise
+        prev_sample = prev_sample + sigma_z
+
+    return prev_sample
+
+
+def inversion_reverse_process(
+        model,
+        xT,
+        etas=0,
+        prompts="",
+        cfg_scales=None,
+        prog_bar=False,
+        zs=None,
+        controller=None,
+        asyrp=False):
+    batch_size = len(prompts)
+
+    cfg_scales_tensor = torch.Tensor(cfg_scales).view(-1, 1, 1, 1).to(model.device, dtype=torch.float16)
+
+    text_embeddings = encode_text(model, prompts)
+    uncond_embedding = encode_text(model, [""] * batch_size)
+
+    if etas is None: etas = 0
+    if type(etas) in [int, float]: etas = [etas] * model.scheduler.num_inference_steps
+    assert len(etas) == model.scheduler.num_inference_steps
+    timesteps = model.scheduler.timesteps.to(model.device)
+
+    xt = xT.expand(batch_size, -1, -1, -1)
+    op = tqdm(timesteps[-zs.shape[0]:]) if prog_bar else timesteps[-zs.shape[0]:]
+
+    t_to_idx = {int(v): k for k, v in enumerate(timesteps[-zs.shape[0]:])}
+
+    for t in op:
+        idx = t_to_idx[int(t)]
+        ## Unconditional embedding
+        with torch.no_grad():
+            uncond_out = model.unet.forward(xt, timestep=t,
+                                            encoder_hidden_states=uncond_embedding)
+
+            ## Conditional embedding
+        if prompts:
+            with torch.no_grad():
+                cond_out = model.unet.forward(xt, timestep=t,
+                                              encoder_hidden_states=text_embeddings)
+
+        z = zs[idx] if not zs is None else None
+        z = z.expand(batch_size, -1, -1, -1)
+        if prompts:
+            ## classifier free guidance
+            noise_pred = uncond_out.sample + cfg_scales_tensor * (cond_out.sample - uncond_out.sample)
+        else:
+            noise_pred = uncond_out.sample
+        # 2. compute less noisy image and set x_t -> x_t-1
+        xt = reverse_step(model, noise_pred, t, xt, eta=etas[idx], variance_noise=z)
+        if controller is not None:
+            xt = controller.step_callback(xt)
+    return xt, zs
diff --git a/toolkit/ip_adapter.py b/toolkit/ip_adapter.py
new file mode 100644
index 00000000..f252af28
--- /dev/null
+++ b/toolkit/ip_adapter.py
@@ -0,0 +1,157 @@
+import torch
+import sys
+
+from PIL import Image
+from torch.nn import Parameter
+from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
+
+from toolkit.paths import REPOS_ROOT
+from toolkit.train_tools import get_torch_dtype
+
+sys.path.append(REPOS_ROOT)
+from typing import TYPE_CHECKING, Union, Iterator, Mapping, Any, Tuple, List
+from collections import OrderedDict
+from ipadapter.ip_adapter.attention_processor import AttnProcessor, IPAttnProcessor
+from ipadapter.ip_adapter.ip_adapter import ImageProjModel
+from ipadapter.ip_adapter.resampler import Resampler
+from toolkit.config_modules import AdapterConfig
+from toolkit.prompt_utils import PromptEmbeds
+import weakref
+
+if TYPE_CHECKING:
+    from toolkit.stable_diffusion_model import StableDiffusion
+
+
+# loosely based on # ref https://github.com/tencent-ailab/IP-Adapter/blob/main/tutorial_train.py
+class IPAdapter(torch.nn.Module):
+    """IP-Adapter"""
+
+    def __init__(self, sd: 'StableDiffusion', adapter_config: 'AdapterConfig'):
+        super().__init__()
+        self.config = adapter_config
+        self.sd_ref: weakref.ref = weakref.ref(sd)
+        self.clip_image_processor = CLIPImageProcessor()
+        self.device = self.sd_ref().unet.device
+        self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(adapter_config.image_encoder_path)
+        if adapter_config.type == 'ip':
+            # ip-adapter
+            image_proj_model = ImageProjModel(
+                cross_attention_dim=sd.unet.config['cross_attention_dim'],
+                clip_embeddings_dim=self.image_encoder.config.projection_dim,
+                clip_extra_context_tokens=4,
+            )
+        elif adapter_config.type == 'ip+':
+            # ip-adapter-plus
+            num_tokens = 16
+            image_proj_model = Resampler(
+                dim=sd.unet.config['cross_attention_dim'],
+                depth=4,
+                dim_head=64,
+                heads=12,
+                num_queries=num_tokens,
+                embedding_dim=self.image_encoder.config.hidden_size,
+                output_dim=sd.unet.config['cross_attention_dim'],
+                ff_mult=4
+            )
+        else:
+            raise ValueError(f"unknown adapter type: {adapter_config.type}")
+
+        # init adapter modules
+        attn_procs = {}
+        unet_sd = sd.unet.state_dict()
+        for name in sd.unet.attn_processors.keys():
+            cross_attention_dim = None if name.endswith("attn1.processor") else sd.unet.config['cross_attention_dim']
+            if name.startswith("mid_block"):
+                hidden_size = sd.unet.config['block_out_channels'][-1]
+            elif name.startswith("up_blocks"):
+                block_id = int(name[len("up_blocks.")])
+                hidden_size = list(reversed(sd.unet.config['block_out_channels']))[block_id]
+            elif name.startswith("down_blocks"):
+                block_id = int(name[len("down_blocks.")])
+                hidden_size = sd.unet.config['block_out_channels'][block_id]
+            else:
+                # they didnt have this, but would lead to undefined below
+                raise ValueError(f"unknown attn processor name: {name}")
+            if cross_attention_dim is None:
+                attn_procs[name] = AttnProcessor()
+            else:
+                layer_name = name.split(".processor")[0]
+                weights = {
+                    "to_k_ip.weight": unet_sd[layer_name + ".to_k.weight"],
+                    "to_v_ip.weight": unet_sd[layer_name + ".to_v.weight"],
+                }
+                attn_procs[name] = IPAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim)
+                attn_procs[name].load_state_dict(weights)
+        sd.unet.set_attn_processor(attn_procs)
+        adapter_modules = torch.nn.ModuleList(sd.unet.attn_processors.values())
+
+        sd.adapter = self
+        self.unet_ref: weakref.ref = weakref.ref(sd.unet)
+        self.image_proj_model = image_proj_model
+        self.adapter_modules = adapter_modules
+
+    def to(self, *args, **kwargs):
+        super().to(*args, **kwargs)
+        self.image_encoder.to(*args, **kwargs)
+        self.image_proj_model.to(*args, **kwargs)
+        self.adapter_modules.to(*args, **kwargs)
+        return self
+
+    def load_ip_adapter(self, state_dict: Union[OrderedDict, dict]):
+        self.image_proj_model.load_state_dict(state_dict["image_proj"])
+        ip_layers = torch.nn.ModuleList(self.pipe.unet.attn_processors.values())
+        ip_layers.load_state_dict(state_dict["ip_adapter"])
+
+    def state_dict(self) -> OrderedDict:
+        state_dict = OrderedDict()
+        state_dict["image_proj"] = self.image_proj_model.state_dict()
+        state_dict["ip_adapter"] = self.adapter_modules.state_dict()
+        return state_dict
+
+    def set_scale(self, scale):
+        for attn_processor in self.pipe.unet.attn_processors.values():
+            if isinstance(attn_processor, IPAttnProcessor):
+                attn_processor.scale = scale
+
+    @torch.no_grad()
+    def get_clip_image_embeds_from_pil(self, pil_image: Union[Image.Image, List[Image.Image]], drop=False) -> torch.Tensor:
+        # todo: add support for sdxl
+        if isinstance(pil_image, Image.Image):
+            pil_image = [pil_image]
+        clip_image = self.clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
+        clip_image = clip_image.to(self.device, dtype=torch.float16)
+        if drop:
+            clip_image = clip_image * 0
+        clip_image_embeds = self.image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
+        return clip_image_embeds
+
+    @torch.no_grad()
+    def get_clip_image_embeds_from_tensors(self, tensors_0_1: torch.Tensor, drop=False) -> torch.Tensor:
+        # tensors should be 0-1
+        # todo: add support for sdxl
+        if tensors_0_1.ndim == 3:
+            tensors_0_1 = tensors_0_1.unsqueeze(0)
+        tensors_0_1 = tensors_0_1.to(self.device, dtype=torch.float16)
+        clip_image = self.clip_image_processor(images=tensors_0_1, return_tensors="pt", do_resize=False).pixel_values
+        clip_image = clip_image.to(self.device, dtype=torch.float16)
+        if drop:
+            clip_image = clip_image * 0
+        clip_image_embeds = self.image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
+        return clip_image_embeds
+
+    # use drop for prompt dropout, or negatives
+    def forward(self, embeddings: PromptEmbeds, clip_image_embeds: torch.Tensor) -> PromptEmbeds:
+        clip_image_embeds = clip_image_embeds.detach()
+        clip_image_embeds = clip_image_embeds.to(self.device, dtype=get_torch_dtype(self.sd_ref().dtype))
+        image_prompt_embeds = self.image_proj_model(clip_image_embeds.detach())
+        embeddings.text_embeds = torch.cat([embeddings.text_embeds, image_prompt_embeds], dim=1)
+        return embeddings
+
+    def parameters(self, recurse: bool = True) -> Iterator[Parameter]:
+        for attn_processor in self.adapter_modules:
+            yield from attn_processor.parameters(recurse)
+        yield from self.image_proj_model.parameters(recurse)
+
+    def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
+        self.image_proj_model.load_state_dict(state_dict["image_proj"], strict=strict)
+        self.adapter_modules.load_state_dict(state_dict["ip_adapter"], strict=strict)
diff --git a/toolkit/keymaps/stable_diffusion_locon_sdxl.json b/toolkit/keymaps/stable_diffusion_locon_sdxl.json
new file mode 100644
index 00000000..8fe83f0b
--- /dev/null
+++ b/toolkit/keymaps/stable_diffusion_locon_sdxl.json
@@ -0,0 +1,3154 @@
+{
+    "ldm_diffusers_keymap": {
+        "lora_unet_input_blocks_0_0.alpha": "lora_unet_conv_in.alpha",
+        "lora_unet_input_blocks_0_0.lora_down.weight": "lora_unet_conv_in.lora_down.weight",
+        "lora_unet_input_blocks_0_0.lora_up.weight": "lora_unet_conv_in.lora_up.weight",
+        "lora_unet_input_blocks_1_0_emb_layers_1.alpha": "lora_unet_down_blocks_0_resnets_0_time_emb_proj.alpha",
+        "lora_unet_input_blocks_1_0_emb_layers_1.lora_down.weight": "lora_unet_down_blocks_0_resnets_0_time_emb_proj.lora_down.weight",
+        "lora_unet_input_blocks_1_0_emb_layers_1.lora_up.weight": "lora_unet_down_blocks_0_resnets_0_time_emb_proj.lora_up.weight",
+        "lora_unet_input_blocks_1_0_in_layers_0.alpha": "lora_unet_down_blocks_0_resnets_0_norm1.alpha",
+        "lora_unet_input_blocks_1_0_in_layers_0.lora_down.weight": "lora_unet_down_blocks_0_resnets_0_norm1.lora_down.weight",
+        "lora_unet_input_blocks_1_0_in_layers_0.lora_up.weight": "lora_unet_down_blocks_0_resnets_0_norm1.lora_up.weight",
+        "lora_unet_input_blocks_1_0_in_layers_2.alpha": "lora_unet_down_blocks_0_resnets_0_conv1.alpha",
+        "lora_unet_input_blocks_1_0_in_layers_2.lora_down.weight": "lora_unet_down_blocks_0_resnets_0_conv1.lora_down.weight",
+        "lora_unet_input_blocks_1_0_in_layers_2.lora_up.weight": "lora_unet_down_blocks_0_resnets_0_conv1.lora_up.weight",
+        "lora_unet_input_blocks_1_0_out_layers_0.alpha": "lora_unet_down_blocks_0_resnets_0_norm2.alpha",
+        "lora_unet_input_blocks_1_0_out_layers_0.lora_down.weight": "lora_unet_down_blocks_0_resnets_0_norm2.lora_down.weight",
+        "lora_unet_input_blocks_1_0_out_layers_0.lora_up.weight": "lora_unet_down_blocks_0_resnets_0_norm2.lora_up.weight",
+        "lora_unet_input_blocks_1_0_out_layers_3.alpha": "lora_unet_down_blocks_0_resnets_0_conv2.alpha",
+        "lora_unet_input_blocks_1_0_out_layers_3.lora_down.weight": "lora_unet_down_blocks_0_resnets_0_conv2.lora_down.weight",
+        "lora_unet_input_blocks_1_0_out_layers_3.lora_up.weight": "lora_unet_down_blocks_0_resnets_0_conv2.lora_up.weight",
+        "lora_unet_input_blocks_2_0_emb_layers_1.alpha": "lora_unet_down_blocks_0_resnets_1_time_emb_proj.alpha",
+        "lora_unet_input_blocks_2_0_emb_layers_1.lora_down.weight": "lora_unet_down_blocks_0_resnets_1_time_emb_proj.lora_down.weight",
+        "lora_unet_input_blocks_2_0_emb_layers_1.lora_up.weight": "lora_unet_down_blocks_0_resnets_1_time_emb_proj.lora_up.weight",
+        "lora_unet_input_blocks_2_0_in_layers_0.alpha": "lora_unet_down_blocks_0_resnets_1_norm1.alpha",
+        "lora_unet_input_blocks_2_0_in_layers_0.lora_down.weight": "lora_unet_down_blocks_0_resnets_1_norm1.lora_down.weight",
+        "lora_unet_input_blocks_2_0_in_layers_0.lora_up.weight": "lora_unet_down_blocks_0_resnets_1_norm1.lora_up.weight",
+        "lora_unet_input_blocks_2_0_in_layers_2.alpha": "lora_unet_down_blocks_0_resnets_1_conv1.alpha",
+        "lora_unet_input_blocks_2_0_in_layers_2.lora_down.weight": "lora_unet_down_blocks_0_resnets_1_conv1.lora_down.weight",
+        "lora_unet_input_blocks_2_0_in_layers_2.lora_up.weight": "lora_unet_down_blocks_0_resnets_1_conv1.lora_up.weight",
+        "lora_unet_input_blocks_2_0_out_layers_0.alpha": "lora_unet_down_blocks_0_resnets_1_norm2.alpha",
+        "lora_unet_input_blocks_2_0_out_layers_0.lora_down.weight": "lora_unet_down_blocks_0_resnets_1_norm2.lora_down.weight",
+        "lora_unet_input_blocks_2_0_out_layers_0.lora_up.weight": "lora_unet_down_blocks_0_resnets_1_norm2.lora_up.weight",
+        "lora_unet_input_blocks_2_0_out_layers_3.alpha": "lora_unet_down_blocks_0_resnets_1_conv2.alpha",
+        "lora_unet_input_blocks_2_0_out_layers_3.lora_down.weight": "lora_unet_down_blocks_0_resnets_1_conv2.lora_down.weight",
+        "lora_unet_input_blocks_2_0_out_layers_3.lora_up.weight": "lora_unet_down_blocks_0_resnets_1_conv2.lora_up.weight",
+        "lora_unet_input_blocks_3_0_op.alpha": "lora_unet_down_blocks_0_downsamplers_0_conv.alpha",
+        "lora_unet_input_blocks_3_0_op.lora_down.weight": "lora_unet_down_blocks_0_downsamplers_0_conv.lora_down.weight",
+        "lora_unet_input_blocks_3_0_op.lora_up.weight": "lora_unet_down_blocks_0_downsamplers_0_conv.lora_up.weight",
+        "lora_unet_input_blocks_4_0_emb_layers_1.alpha": "lora_unet_down_blocks_1_resnets_0_time_emb_proj.alpha",
+        "lora_unet_input_blocks_4_0_emb_layers_1.lora_down.weight": "lora_unet_down_blocks_1_resnets_0_time_emb_proj.lora_down.weight",
+        "lora_unet_input_blocks_4_0_emb_layers_1.lora_up.weight": "lora_unet_down_blocks_1_resnets_0_time_emb_proj.lora_up.weight",
+        "lora_unet_input_blocks_4_0_in_layers_0.alpha": "lora_unet_down_blocks_1_resnets_0_norm1.alpha",
+        "lora_unet_input_blocks_4_0_in_layers_0.lora_down.weight": "lora_unet_down_blocks_1_resnets_0_norm1.lora_down.weight",
+        "lora_unet_input_blocks_4_0_in_layers_0.lora_up.weight": "lora_unet_down_blocks_1_resnets_0_norm1.lora_up.weight",
+        "lora_unet_input_blocks_4_0_in_layers_2.alpha": "lora_unet_down_blocks_1_resnets_0_conv1.alpha",
+        "lora_unet_input_blocks_4_0_in_layers_2.lora_down.weight": "lora_unet_down_blocks_1_resnets_0_conv1.lora_down.weight",
+        "lora_unet_input_blocks_4_0_in_layers_2.lora_up.weight": "lora_unet_down_blocks_1_resnets_0_conv1.lora_up.weight",
+        "lora_unet_input_blocks_4_0_out_layers_0.alpha": "lora_unet_down_blocks_1_resnets_0_norm2.alpha",
+        "lora_unet_input_blocks_4_0_out_layers_0.lora_down.weight": "lora_unet_down_blocks_1_resnets_0_norm2.lora_down.weight",
+        "lora_unet_input_blocks_4_0_out_layers_0.lora_up.weight": "lora_unet_down_blocks_1_resnets_0_norm2.lora_up.weight",
+        "lora_unet_input_blocks_4_0_out_layers_3.alpha": "lora_unet_down_blocks_1_resnets_0_conv2.alpha",
+        "lora_unet_input_blocks_4_0_out_layers_3.lora_down.weight": "lora_unet_down_blocks_1_resnets_0_conv2.lora_down.weight",
+        "lora_unet_input_blocks_4_0_out_layers_3.lora_up.weight": "lora_unet_down_blocks_1_resnets_0_conv2.lora_up.weight",
+        "lora_unet_input_blocks_4_0_skip_connection.alpha": "lora_unet_down_blocks_1_resnets_0_conv_shortcut.alpha",
+        "lora_unet_input_blocks_4_0_skip_connection.lora_down.weight": "lora_unet_down_blocks_1_resnets_0_conv_shortcut.lora_down.weight",
+        "lora_unet_input_blocks_4_0_skip_connection.lora_up.weight": "lora_unet_down_blocks_1_resnets_0_conv_shortcut.lora_up.weight",
+        "lora_unet_input_blocks_4_1_norm.alpha": "lora_unet_down_blocks_1_attentions_0_norm.alpha",
+        "lora_unet_input_blocks_4_1_norm.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_norm.lora_down.weight",
+        "lora_unet_input_blocks_4_1_norm.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_norm.lora_up.weight",
+        "lora_unet_input_blocks_4_1_proj_in.alpha": "lora_unet_down_blocks_1_attentions_0_proj_in.alpha",
+        "lora_unet_input_blocks_4_1_proj_in.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_proj_in.lora_down.weight",
+        "lora_unet_input_blocks_4_1_proj_in.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_proj_in.lora_up.weight",
+        "lora_unet_input_blocks_4_1_proj_out.alpha": "lora_unet_down_blocks_1_attentions_0_proj_out.alpha",
+        "lora_unet_input_blocks_4_1_proj_out.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_proj_out.lora_down.weight",
+        "lora_unet_input_blocks_4_1_proj_out.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_proj_out.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_norm1.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_norm1.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_norm2.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_norm2.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_norm3.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_norm3.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_norm1.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_norm1.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_norm2.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_norm2.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_norm3.alpha": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_norm3.alpha",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_input_blocks_4_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_down_blocks_1_attentions_0_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_input_blocks_5_0_emb_layers_1.alpha": "lora_unet_down_blocks_1_resnets_1_time_emb_proj.alpha",
+        "lora_unet_input_blocks_5_0_emb_layers_1.lora_down.weight": "lora_unet_down_blocks_1_resnets_1_time_emb_proj.lora_down.weight",
+        "lora_unet_input_blocks_5_0_emb_layers_1.lora_up.weight": "lora_unet_down_blocks_1_resnets_1_time_emb_proj.lora_up.weight",
+        "lora_unet_input_blocks_5_0_in_layers_0.alpha": "lora_unet_down_blocks_1_resnets_1_norm1.alpha",
+        "lora_unet_input_blocks_5_0_in_layers_0.lora_down.weight": "lora_unet_down_blocks_1_resnets_1_norm1.lora_down.weight",
+        "lora_unet_input_blocks_5_0_in_layers_0.lora_up.weight": "lora_unet_down_blocks_1_resnets_1_norm1.lora_up.weight",
+        "lora_unet_input_blocks_5_0_in_layers_2.alpha": "lora_unet_down_blocks_1_resnets_1_conv1.alpha",
+        "lora_unet_input_blocks_5_0_in_layers_2.lora_down.weight": "lora_unet_down_blocks_1_resnets_1_conv1.lora_down.weight",
+        "lora_unet_input_blocks_5_0_in_layers_2.lora_up.weight": "lora_unet_down_blocks_1_resnets_1_conv1.lora_up.weight",
+        "lora_unet_input_blocks_5_0_out_layers_0.alpha": "lora_unet_down_blocks_1_resnets_1_norm2.alpha",
+        "lora_unet_input_blocks_5_0_out_layers_0.lora_down.weight": "lora_unet_down_blocks_1_resnets_1_norm2.lora_down.weight",
+        "lora_unet_input_blocks_5_0_out_layers_0.lora_up.weight": "lora_unet_down_blocks_1_resnets_1_norm2.lora_up.weight",
+        "lora_unet_input_blocks_5_0_out_layers_3.alpha": "lora_unet_down_blocks_1_resnets_1_conv2.alpha",
+        "lora_unet_input_blocks_5_0_out_layers_3.lora_down.weight": "lora_unet_down_blocks_1_resnets_1_conv2.lora_down.weight",
+        "lora_unet_input_blocks_5_0_out_layers_3.lora_up.weight": "lora_unet_down_blocks_1_resnets_1_conv2.lora_up.weight",
+        "lora_unet_input_blocks_5_1_norm.alpha": "lora_unet_down_blocks_1_attentions_1_norm.alpha",
+        "lora_unet_input_blocks_5_1_norm.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_norm.lora_down.weight",
+        "lora_unet_input_blocks_5_1_norm.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_norm.lora_up.weight",
+        "lora_unet_input_blocks_5_1_proj_in.alpha": "lora_unet_down_blocks_1_attentions_1_proj_in.alpha",
+        "lora_unet_input_blocks_5_1_proj_in.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_proj_in.lora_down.weight",
+        "lora_unet_input_blocks_5_1_proj_in.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_proj_in.lora_up.weight",
+        "lora_unet_input_blocks_5_1_proj_out.alpha": "lora_unet_down_blocks_1_attentions_1_proj_out.alpha",
+        "lora_unet_input_blocks_5_1_proj_out.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_proj_out.lora_down.weight",
+        "lora_unet_input_blocks_5_1_proj_out.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_proj_out.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_norm1.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_norm1.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_norm2.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_norm2.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_norm3.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_norm3.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_norm1.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_norm1.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_norm2.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_norm2.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_norm3.alpha": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_norm3.alpha",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_input_blocks_5_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_down_blocks_1_attentions_1_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_input_blocks_6_0_op.alpha": "lora_unet_down_blocks_1_downsamplers_0_conv.alpha",
+        "lora_unet_input_blocks_6_0_op.lora_down.weight": "lora_unet_down_blocks_1_downsamplers_0_conv.lora_down.weight",
+        "lora_unet_input_blocks_6_0_op.lora_up.weight": "lora_unet_down_blocks_1_downsamplers_0_conv.lora_up.weight",
+        "lora_unet_input_blocks_7_0_emb_layers_1.alpha": "lora_unet_down_blocks_2_resnets_0_time_emb_proj.alpha",
+        "lora_unet_input_blocks_7_0_emb_layers_1.lora_down.weight": "lora_unet_down_blocks_2_resnets_0_time_emb_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_0_emb_layers_1.lora_up.weight": "lora_unet_down_blocks_2_resnets_0_time_emb_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_0_in_layers_0.alpha": "lora_unet_down_blocks_2_resnets_0_norm1.alpha",
+        "lora_unet_input_blocks_7_0_in_layers_0.lora_down.weight": "lora_unet_down_blocks_2_resnets_0_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_0_in_layers_0.lora_up.weight": "lora_unet_down_blocks_2_resnets_0_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_0_in_layers_2.alpha": "lora_unet_down_blocks_2_resnets_0_conv1.alpha",
+        "lora_unet_input_blocks_7_0_in_layers_2.lora_down.weight": "lora_unet_down_blocks_2_resnets_0_conv1.lora_down.weight",
+        "lora_unet_input_blocks_7_0_in_layers_2.lora_up.weight": "lora_unet_down_blocks_2_resnets_0_conv1.lora_up.weight",
+        "lora_unet_input_blocks_7_0_out_layers_0.alpha": "lora_unet_down_blocks_2_resnets_0_norm2.alpha",
+        "lora_unet_input_blocks_7_0_out_layers_0.lora_down.weight": "lora_unet_down_blocks_2_resnets_0_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_0_out_layers_0.lora_up.weight": "lora_unet_down_blocks_2_resnets_0_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_0_out_layers_3.alpha": "lora_unet_down_blocks_2_resnets_0_conv2.alpha",
+        "lora_unet_input_blocks_7_0_out_layers_3.lora_down.weight": "lora_unet_down_blocks_2_resnets_0_conv2.lora_down.weight",
+        "lora_unet_input_blocks_7_0_out_layers_3.lora_up.weight": "lora_unet_down_blocks_2_resnets_0_conv2.lora_up.weight",
+        "lora_unet_input_blocks_7_0_skip_connection.alpha": "lora_unet_down_blocks_2_resnets_0_conv_shortcut.alpha",
+        "lora_unet_input_blocks_7_0_skip_connection.lora_down.weight": "lora_unet_down_blocks_2_resnets_0_conv_shortcut.lora_down.weight",
+        "lora_unet_input_blocks_7_0_skip_connection.lora_up.weight": "lora_unet_down_blocks_2_resnets_0_conv_shortcut.lora_up.weight",
+        "lora_unet_input_blocks_7_1_norm.alpha": "lora_unet_down_blocks_2_attentions_0_norm.alpha",
+        "lora_unet_input_blocks_7_1_norm.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_norm.lora_down.weight",
+        "lora_unet_input_blocks_7_1_norm.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_norm.lora_up.weight",
+        "lora_unet_input_blocks_7_1_proj_in.alpha": "lora_unet_down_blocks_2_attentions_0_proj_in.alpha",
+        "lora_unet_input_blocks_7_1_proj_in.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_proj_in.lora_down.weight",
+        "lora_unet_input_blocks_7_1_proj_in.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_proj_in.lora_up.weight",
+        "lora_unet_input_blocks_7_1_proj_out.alpha": "lora_unet_down_blocks_2_attentions_0_proj_out.alpha",
+        "lora_unet_input_blocks_7_1_proj_out.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_proj_out.lora_down.weight",
+        "lora_unet_input_blocks_7_1_proj_out.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_proj_out.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_2_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_2_norm3.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_3_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_3_norm3.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_4_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_4_norm3.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_5_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_5_norm3.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_6_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_6_norm3.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_7_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_7_norm3.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_8_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_8_norm3.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_k.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_q.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_v.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_ff_net_2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_norm1.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_norm1.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_norm1.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_norm1.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_norm2.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_norm2.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_norm2.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_norm2.lora_up.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_norm3.alpha": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_norm3.alpha",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_norm3.lora_down.weight",
+        "lora_unet_input_blocks_7_1_transformer_blocks_9_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_0_transformer_blocks_9_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_0_emb_layers_1.alpha": "lora_unet_down_blocks_2_resnets_1_time_emb_proj.alpha",
+        "lora_unet_input_blocks_8_0_emb_layers_1.lora_down.weight": "lora_unet_down_blocks_2_resnets_1_time_emb_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_0_emb_layers_1.lora_up.weight": "lora_unet_down_blocks_2_resnets_1_time_emb_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_0_in_layers_0.alpha": "lora_unet_down_blocks_2_resnets_1_norm1.alpha",
+        "lora_unet_input_blocks_8_0_in_layers_0.lora_down.weight": "lora_unet_down_blocks_2_resnets_1_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_0_in_layers_0.lora_up.weight": "lora_unet_down_blocks_2_resnets_1_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_0_in_layers_2.alpha": "lora_unet_down_blocks_2_resnets_1_conv1.alpha",
+        "lora_unet_input_blocks_8_0_in_layers_2.lora_down.weight": "lora_unet_down_blocks_2_resnets_1_conv1.lora_down.weight",
+        "lora_unet_input_blocks_8_0_in_layers_2.lora_up.weight": "lora_unet_down_blocks_2_resnets_1_conv1.lora_up.weight",
+        "lora_unet_input_blocks_8_0_out_layers_0.alpha": "lora_unet_down_blocks_2_resnets_1_norm2.alpha",
+        "lora_unet_input_blocks_8_0_out_layers_0.lora_down.weight": "lora_unet_down_blocks_2_resnets_1_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_0_out_layers_0.lora_up.weight": "lora_unet_down_blocks_2_resnets_1_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_0_out_layers_3.alpha": "lora_unet_down_blocks_2_resnets_1_conv2.alpha",
+        "lora_unet_input_blocks_8_0_out_layers_3.lora_down.weight": "lora_unet_down_blocks_2_resnets_1_conv2.lora_down.weight",
+        "lora_unet_input_blocks_8_0_out_layers_3.lora_up.weight": "lora_unet_down_blocks_2_resnets_1_conv2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_norm.alpha": "lora_unet_down_blocks_2_attentions_1_norm.alpha",
+        "lora_unet_input_blocks_8_1_norm.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_norm.lora_down.weight",
+        "lora_unet_input_blocks_8_1_norm.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_norm.lora_up.weight",
+        "lora_unet_input_blocks_8_1_proj_in.alpha": "lora_unet_down_blocks_2_attentions_1_proj_in.alpha",
+        "lora_unet_input_blocks_8_1_proj_in.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_proj_in.lora_down.weight",
+        "lora_unet_input_blocks_8_1_proj_in.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_proj_in.lora_up.weight",
+        "lora_unet_input_blocks_8_1_proj_out.alpha": "lora_unet_down_blocks_2_attentions_1_proj_out.alpha",
+        "lora_unet_input_blocks_8_1_proj_out.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_proj_out.lora_down.weight",
+        "lora_unet_input_blocks_8_1_proj_out.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_proj_out.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_2_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_2_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_3_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_3_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_4_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_4_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_5_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_5_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_6_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_6_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_7_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_7_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_8_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_8_norm3.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn1_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn1_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_k.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_k.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_k.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_k.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_k.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_k.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_out_0.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_out_0.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_out_0.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_out_0.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_out_0.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_out_0.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_q.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_q.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_q.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_q.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_q.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_q.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_v.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_v.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_v.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_v.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_attn2_to_v.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_attn2_to_v.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_ff_net_0_proj.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_ff_net_0_proj.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_ff_net_0_proj.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_ff_net_0_proj.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_ff_net_0_proj.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_ff_net_0_proj.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_ff_net_2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_ff_net_2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_ff_net_2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_ff_net_2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_ff_net_2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_ff_net_2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_norm1.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_norm1.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_norm1.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_norm1.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_norm1.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_norm1.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_norm2.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_norm2.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_norm2.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_norm2.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_norm2.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_norm2.lora_up.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_norm3.alpha": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_norm3.alpha",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_norm3.lora_down.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_norm3.lora_down.weight",
+        "lora_unet_input_blocks_8_1_transformer_blocks_9_norm3.lora_up.weight": "lora_unet_down_blocks_2_attentions_1_transformer_blocks_9_norm3.lora_up.weight",
+        "lora_unet_label_emb_0_0.alpha": "lora_unet_add_embedding_linear_1.alpha",
+        "lora_unet_label_emb_0_0.lora_down.weight": "lora_unet_add_embedding_linear_1.lora_down.weight",
+        "lora_unet_label_emb_0_0.lora_up.weight": "lora_unet_add_embedding_linear_1.lora_up.weight",
+        "lora_unet_label_emb_0_2.alpha": "lora_unet_add_embedding_linear_2.alpha",
+        "lora_unet_label_emb_0_2.lora_down.weight": "lora_unet_add_embedding_linear_2.lora_down.weight",
+        "lora_unet_label_emb_0_2.lora_up.weight": "lora_unet_add_embedding_linear_2.lora_up.weight",
+        "lora_unet_middle_block_0_emb_layers_1.alpha": "lora_unet_mid_block_resnets_0_time_emb_proj.alpha",
+        "lora_unet_middle_block_0_emb_layers_1.lora_down.weight": "lora_unet_mid_block_resnets_0_time_emb_proj.lora_down.weight",
+        "lora_unet_middle_block_0_emb_layers_1.lora_up.weight": "lora_unet_mid_block_resnets_0_time_emb_proj.lora_up.weight",
+        "lora_unet_middle_block_0_in_layers_0.alpha": "lora_unet_mid_block_resnets_0_norm1.alpha",
+        "lora_unet_middle_block_0_in_layers_0.lora_down.weight": "lora_unet_mid_block_resnets_0_norm1.lora_down.weight",
+        "lora_unet_middle_block_0_in_layers_0.lora_up.weight": "lora_unet_mid_block_resnets_0_norm1.lora_up.weight",
+        "lora_unet_middle_block_0_in_layers_2.alpha": "lora_unet_mid_block_resnets_0_conv1.alpha",
+        "lora_unet_middle_block_0_in_layers_2.lora_down.weight": "lora_unet_mid_block_resnets_0_conv1.lora_down.weight",
+        "lora_unet_middle_block_0_in_layers_2.lora_up.weight": "lora_unet_mid_block_resnets_0_conv1.lora_up.weight",
+        "lora_unet_middle_block_0_out_layers_0.alpha": "lora_unet_mid_block_resnets_0_norm2.alpha",
+        "lora_unet_middle_block_0_out_layers_0.lora_down.weight": "lora_unet_mid_block_resnets_0_norm2.lora_down.weight",
+        "lora_unet_middle_block_0_out_layers_0.lora_up.weight": "lora_unet_mid_block_resnets_0_norm2.lora_up.weight",
+        "lora_unet_middle_block_0_out_layers_3.alpha": "lora_unet_mid_block_resnets_0_conv2.alpha",
+        "lora_unet_middle_block_0_out_layers_3.lora_down.weight": "lora_unet_mid_block_resnets_0_conv2.lora_down.weight",
+        "lora_unet_middle_block_0_out_layers_3.lora_up.weight": "lora_unet_mid_block_resnets_0_conv2.lora_up.weight",
+        "lora_unet_middle_block_1_norm.alpha": "lora_unet_mid_block_attentions_0_norm.alpha",
+        "lora_unet_middle_block_1_norm.lora_down.weight": "lora_unet_mid_block_attentions_0_norm.lora_down.weight",
+        "lora_unet_middle_block_1_norm.lora_up.weight": "lora_unet_mid_block_attentions_0_norm.lora_up.weight",
+        "lora_unet_middle_block_1_proj_in.alpha": "lora_unet_mid_block_attentions_0_proj_in.alpha",
+        "lora_unet_middle_block_1_proj_in.lora_down.weight": "lora_unet_mid_block_attentions_0_proj_in.lora_down.weight",
+        "lora_unet_middle_block_1_proj_in.lora_up.weight": "lora_unet_mid_block_attentions_0_proj_in.lora_up.weight",
+        "lora_unet_middle_block_1_proj_out.alpha": "lora_unet_mid_block_attentions_0_proj_out.alpha",
+        "lora_unet_middle_block_1_proj_out.lora_down.weight": "lora_unet_mid_block_attentions_0_proj_out.lora_down.weight",
+        "lora_unet_middle_block_1_proj_out.lora_up.weight": "lora_unet_mid_block_attentions_0_proj_out.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_0_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_1_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_2_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_2_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_2_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_2_norm3.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_3_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_3_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_3_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_3_norm3.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_4_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_4_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_4_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_4_norm3.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_5_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_5_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_5_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_5_norm3.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_6_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_6_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_6_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_6_norm3.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_7_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_7_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_7_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_7_norm3.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_8_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_8_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_8_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_8_norm3.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn1_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn1_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_k.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_k.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_k.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_k.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_k.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_k.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_out_0.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_out_0.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_out_0.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_out_0.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_out_0.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_out_0.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_q.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_q.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_q.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_q.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_q.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_q.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_v.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_v.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_v.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_v.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_attn2_to_v.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_attn2_to_v.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_ff_net_0_proj.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_ff_net_0_proj.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_ff_net_0_proj.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_ff_net_0_proj.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_ff_net_0_proj.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_ff_net_0_proj.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_ff_net_2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_ff_net_2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_ff_net_2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_ff_net_2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_ff_net_2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_ff_net_2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_norm1.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_norm1.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_norm1.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_norm1.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_norm1.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_norm1.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_norm2.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_norm2.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_norm2.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_norm2.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_norm2.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_norm2.lora_up.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_norm3.alpha": "lora_unet_mid_block_attentions_0_transformer_blocks_9_norm3.alpha",
+        "lora_unet_middle_block_1_transformer_blocks_9_norm3.lora_down.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_norm3.lora_down.weight",
+        "lora_unet_middle_block_1_transformer_blocks_9_norm3.lora_up.weight": "lora_unet_mid_block_attentions_0_transformer_blocks_9_norm3.lora_up.weight",
+        "lora_unet_middle_block_2_emb_layers_1.alpha": "lora_unet_mid_block_resnets_1_time_emb_proj.alpha",
+        "lora_unet_middle_block_2_emb_layers_1.lora_down.weight": "lora_unet_mid_block_resnets_1_time_emb_proj.lora_down.weight",
+        "lora_unet_middle_block_2_emb_layers_1.lora_up.weight": "lora_unet_mid_block_resnets_1_time_emb_proj.lora_up.weight",
+        "lora_unet_middle_block_2_in_layers_0.alpha": "lora_unet_mid_block_resnets_1_norm1.alpha",
+        "lora_unet_middle_block_2_in_layers_0.lora_down.weight": "lora_unet_mid_block_resnets_1_norm1.lora_down.weight",
+        "lora_unet_middle_block_2_in_layers_0.lora_up.weight": "lora_unet_mid_block_resnets_1_norm1.lora_up.weight",
+        "lora_unet_middle_block_2_in_layers_2.alpha": "lora_unet_mid_block_resnets_1_conv1.alpha",
+        "lora_unet_middle_block_2_in_layers_2.lora_down.weight": "lora_unet_mid_block_resnets_1_conv1.lora_down.weight",
+        "lora_unet_middle_block_2_in_layers_2.lora_up.weight": "lora_unet_mid_block_resnets_1_conv1.lora_up.weight",
+        "lora_unet_middle_block_2_out_layers_0.alpha": "lora_unet_mid_block_resnets_1_norm2.alpha",
+        "lora_unet_middle_block_2_out_layers_0.lora_down.weight": "lora_unet_mid_block_resnets_1_norm2.lora_down.weight",
+        "lora_unet_middle_block_2_out_layers_0.lora_up.weight": "lora_unet_mid_block_resnets_1_norm2.lora_up.weight",
+        "lora_unet_middle_block_2_out_layers_3.alpha": "lora_unet_mid_block_resnets_1_conv2.alpha",
+        "lora_unet_middle_block_2_out_layers_3.lora_down.weight": "lora_unet_mid_block_resnets_1_conv2.lora_down.weight",
+        "lora_unet_middle_block_2_out_layers_3.lora_up.weight": "lora_unet_mid_block_resnets_1_conv2.lora_up.weight",
+        "lora_unet_out_0.alpha": "lora_unet_conv_norm_out.alpha",
+        "lora_unet_out_0.lora_down.weight": "lora_unet_conv_norm_out.lora_down.weight",
+        "lora_unet_out_0.lora_up.weight": "lora_unet_conv_norm_out.lora_up.weight",
+        "lora_unet_out_2.alpha": "lora_unet_conv_out.alpha",
+        "lora_unet_out_2.lora_down.weight": "lora_unet_conv_out.lora_down.weight",
+        "lora_unet_out_2.lora_up.weight": "lora_unet_conv_out.lora_up.weight",
+        "lora_unet_output_blocks_0_0_emb_layers_1.alpha": "lora_unet_up_blocks_0_resnets_0_time_emb_proj.alpha",
+        "lora_unet_output_blocks_0_0_emb_layers_1.lora_down.weight": "lora_unet_up_blocks_0_resnets_0_time_emb_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_0_emb_layers_1.lora_up.weight": "lora_unet_up_blocks_0_resnets_0_time_emb_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_0_in_layers_0.alpha": "lora_unet_up_blocks_0_resnets_0_norm1.alpha",
+        "lora_unet_output_blocks_0_0_in_layers_0.lora_down.weight": "lora_unet_up_blocks_0_resnets_0_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_0_in_layers_0.lora_up.weight": "lora_unet_up_blocks_0_resnets_0_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_0_in_layers_2.alpha": "lora_unet_up_blocks_0_resnets_0_conv1.alpha",
+        "lora_unet_output_blocks_0_0_in_layers_2.lora_down.weight": "lora_unet_up_blocks_0_resnets_0_conv1.lora_down.weight",
+        "lora_unet_output_blocks_0_0_in_layers_2.lora_up.weight": "lora_unet_up_blocks_0_resnets_0_conv1.lora_up.weight",
+        "lora_unet_output_blocks_0_0_out_layers_0.alpha": "lora_unet_up_blocks_0_resnets_0_norm2.alpha",
+        "lora_unet_output_blocks_0_0_out_layers_0.lora_down.weight": "lora_unet_up_blocks_0_resnets_0_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_0_out_layers_0.lora_up.weight": "lora_unet_up_blocks_0_resnets_0_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_0_out_layers_3.alpha": "lora_unet_up_blocks_0_resnets_0_conv2.alpha",
+        "lora_unet_output_blocks_0_0_out_layers_3.lora_down.weight": "lora_unet_up_blocks_0_resnets_0_conv2.lora_down.weight",
+        "lora_unet_output_blocks_0_0_out_layers_3.lora_up.weight": "lora_unet_up_blocks_0_resnets_0_conv2.lora_up.weight",
+        "lora_unet_output_blocks_0_0_skip_connection.alpha": "lora_unet_up_blocks_0_resnets_0_conv_shortcut.alpha",
+        "lora_unet_output_blocks_0_0_skip_connection.lora_down.weight": "lora_unet_up_blocks_0_resnets_0_conv_shortcut.lora_down.weight",
+        "lora_unet_output_blocks_0_0_skip_connection.lora_up.weight": "lora_unet_up_blocks_0_resnets_0_conv_shortcut.lora_up.weight",
+        "lora_unet_output_blocks_0_1_norm.alpha": "lora_unet_up_blocks_0_attentions_0_norm.alpha",
+        "lora_unet_output_blocks_0_1_norm.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_norm.lora_down.weight",
+        "lora_unet_output_blocks_0_1_norm.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_norm.lora_up.weight",
+        "lora_unet_output_blocks_0_1_proj_in.alpha": "lora_unet_up_blocks_0_attentions_0_proj_in.alpha",
+        "lora_unet_output_blocks_0_1_proj_in.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_proj_in.lora_down.weight",
+        "lora_unet_output_blocks_0_1_proj_in.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_proj_in.lora_up.weight",
+        "lora_unet_output_blocks_0_1_proj_out.alpha": "lora_unet_up_blocks_0_attentions_0_proj_out.alpha",
+        "lora_unet_output_blocks_0_1_proj_out.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_proj_out.lora_down.weight",
+        "lora_unet_output_blocks_0_1_proj_out.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_proj_out.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_2_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_2_norm3.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_3_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_3_norm3.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_4_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_4_norm3.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_5_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_5_norm3.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_6_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_6_norm3.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_7_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_7_norm3.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_8_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_8_norm3.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_k.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_q.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_v.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_ff_net_2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_norm1.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_norm1.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_norm1.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_norm1.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_norm2.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_norm2.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_norm2.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_norm2.lora_up.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_norm3.alpha": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_norm3.alpha",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_norm3.lora_down.weight",
+        "lora_unet_output_blocks_0_1_transformer_blocks_9_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_0_transformer_blocks_9_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_0_emb_layers_1.alpha": "lora_unet_up_blocks_0_resnets_1_time_emb_proj.alpha",
+        "lora_unet_output_blocks_1_0_emb_layers_1.lora_down.weight": "lora_unet_up_blocks_0_resnets_1_time_emb_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_0_emb_layers_1.lora_up.weight": "lora_unet_up_blocks_0_resnets_1_time_emb_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_0_in_layers_0.alpha": "lora_unet_up_blocks_0_resnets_1_norm1.alpha",
+        "lora_unet_output_blocks_1_0_in_layers_0.lora_down.weight": "lora_unet_up_blocks_0_resnets_1_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_0_in_layers_0.lora_up.weight": "lora_unet_up_blocks_0_resnets_1_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_0_in_layers_2.alpha": "lora_unet_up_blocks_0_resnets_1_conv1.alpha",
+        "lora_unet_output_blocks_1_0_in_layers_2.lora_down.weight": "lora_unet_up_blocks_0_resnets_1_conv1.lora_down.weight",
+        "lora_unet_output_blocks_1_0_in_layers_2.lora_up.weight": "lora_unet_up_blocks_0_resnets_1_conv1.lora_up.weight",
+        "lora_unet_output_blocks_1_0_out_layers_0.alpha": "lora_unet_up_blocks_0_resnets_1_norm2.alpha",
+        "lora_unet_output_blocks_1_0_out_layers_0.lora_down.weight": "lora_unet_up_blocks_0_resnets_1_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_0_out_layers_0.lora_up.weight": "lora_unet_up_blocks_0_resnets_1_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_0_out_layers_3.alpha": "lora_unet_up_blocks_0_resnets_1_conv2.alpha",
+        "lora_unet_output_blocks_1_0_out_layers_3.lora_down.weight": "lora_unet_up_blocks_0_resnets_1_conv2.lora_down.weight",
+        "lora_unet_output_blocks_1_0_out_layers_3.lora_up.weight": "lora_unet_up_blocks_0_resnets_1_conv2.lora_up.weight",
+        "lora_unet_output_blocks_1_0_skip_connection.alpha": "lora_unet_up_blocks_0_resnets_1_conv_shortcut.alpha",
+        "lora_unet_output_blocks_1_0_skip_connection.lora_down.weight": "lora_unet_up_blocks_0_resnets_1_conv_shortcut.lora_down.weight",
+        "lora_unet_output_blocks_1_0_skip_connection.lora_up.weight": "lora_unet_up_blocks_0_resnets_1_conv_shortcut.lora_up.weight",
+        "lora_unet_output_blocks_1_1_norm.alpha": "lora_unet_up_blocks_0_attentions_1_norm.alpha",
+        "lora_unet_output_blocks_1_1_norm.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_norm.lora_down.weight",
+        "lora_unet_output_blocks_1_1_norm.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_norm.lora_up.weight",
+        "lora_unet_output_blocks_1_1_proj_in.alpha": "lora_unet_up_blocks_0_attentions_1_proj_in.alpha",
+        "lora_unet_output_blocks_1_1_proj_in.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_proj_in.lora_down.weight",
+        "lora_unet_output_blocks_1_1_proj_in.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_proj_in.lora_up.weight",
+        "lora_unet_output_blocks_1_1_proj_out.alpha": "lora_unet_up_blocks_0_attentions_1_proj_out.alpha",
+        "lora_unet_output_blocks_1_1_proj_out.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_proj_out.lora_down.weight",
+        "lora_unet_output_blocks_1_1_proj_out.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_proj_out.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_2_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_2_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_3_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_3_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_4_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_4_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_5_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_5_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_6_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_6_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_7_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_7_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_8_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_8_norm3.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_k.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_q.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_v.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_ff_net_2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_norm1.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_norm1.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_norm1.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_norm1.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_norm2.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_norm2.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_norm2.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_norm2.lora_up.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_norm3.alpha": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_norm3.alpha",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_norm3.lora_down.weight",
+        "lora_unet_output_blocks_1_1_transformer_blocks_9_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_1_transformer_blocks_9_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_0_emb_layers_1.alpha": "lora_unet_up_blocks_0_resnets_2_time_emb_proj.alpha",
+        "lora_unet_output_blocks_2_0_emb_layers_1.lora_down.weight": "lora_unet_up_blocks_0_resnets_2_time_emb_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_0_emb_layers_1.lora_up.weight": "lora_unet_up_blocks_0_resnets_2_time_emb_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_0_in_layers_0.alpha": "lora_unet_up_blocks_0_resnets_2_norm1.alpha",
+        "lora_unet_output_blocks_2_0_in_layers_0.lora_down.weight": "lora_unet_up_blocks_0_resnets_2_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_0_in_layers_0.lora_up.weight": "lora_unet_up_blocks_0_resnets_2_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_0_in_layers_2.alpha": "lora_unet_up_blocks_0_resnets_2_conv1.alpha",
+        "lora_unet_output_blocks_2_0_in_layers_2.lora_down.weight": "lora_unet_up_blocks_0_resnets_2_conv1.lora_down.weight",
+        "lora_unet_output_blocks_2_0_in_layers_2.lora_up.weight": "lora_unet_up_blocks_0_resnets_2_conv1.lora_up.weight",
+        "lora_unet_output_blocks_2_0_out_layers_0.alpha": "lora_unet_up_blocks_0_resnets_2_norm2.alpha",
+        "lora_unet_output_blocks_2_0_out_layers_0.lora_down.weight": "lora_unet_up_blocks_0_resnets_2_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_0_out_layers_0.lora_up.weight": "lora_unet_up_blocks_0_resnets_2_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_0_out_layers_3.alpha": "lora_unet_up_blocks_0_resnets_2_conv2.alpha",
+        "lora_unet_output_blocks_2_0_out_layers_3.lora_down.weight": "lora_unet_up_blocks_0_resnets_2_conv2.lora_down.weight",
+        "lora_unet_output_blocks_2_0_out_layers_3.lora_up.weight": "lora_unet_up_blocks_0_resnets_2_conv2.lora_up.weight",
+        "lora_unet_output_blocks_2_0_skip_connection.alpha": "lora_unet_up_blocks_0_resnets_2_conv_shortcut.alpha",
+        "lora_unet_output_blocks_2_0_skip_connection.lora_down.weight": "lora_unet_up_blocks_0_resnets_2_conv_shortcut.lora_down.weight",
+        "lora_unet_output_blocks_2_0_skip_connection.lora_up.weight": "lora_unet_up_blocks_0_resnets_2_conv_shortcut.lora_up.weight",
+        "lora_unet_output_blocks_2_1_norm.alpha": "lora_unet_up_blocks_0_attentions_2_norm.alpha",
+        "lora_unet_output_blocks_2_1_norm.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_norm.lora_down.weight",
+        "lora_unet_output_blocks_2_1_norm.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_norm.lora_up.weight",
+        "lora_unet_output_blocks_2_1_proj_in.alpha": "lora_unet_up_blocks_0_attentions_2_proj_in.alpha",
+        "lora_unet_output_blocks_2_1_proj_in.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_proj_in.lora_down.weight",
+        "lora_unet_output_blocks_2_1_proj_in.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_proj_in.lora_up.weight",
+        "lora_unet_output_blocks_2_1_proj_out.alpha": "lora_unet_up_blocks_0_attentions_2_proj_out.alpha",
+        "lora_unet_output_blocks_2_1_proj_out.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_proj_out.lora_down.weight",
+        "lora_unet_output_blocks_2_1_proj_out.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_proj_out.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_2_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_2_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_3_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_3_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_4_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_4_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_5_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_5_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_6_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_6_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_7_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_7_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_8_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_8_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_k.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_k.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_out_0.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_q.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_q.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_v.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_v.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_ff_net_0_proj.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_ff_net_2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_ff_net_2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_ff_net_2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_ff_net_2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_norm1.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_norm1.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_norm1.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_norm1.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_norm1.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_norm1.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_norm2.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_norm2.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_norm2.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_norm2.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_norm2.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_norm2.lora_up.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_norm3.alpha": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_norm3.alpha",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_norm3.lora_down.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_norm3.lora_down.weight",
+        "lora_unet_output_blocks_2_1_transformer_blocks_9_norm3.lora_up.weight": "lora_unet_up_blocks_0_attentions_2_transformer_blocks_9_norm3.lora_up.weight",
+        "lora_unet_output_blocks_2_2_conv.alpha": "lora_unet_up_blocks_0_upsamplers_0_conv.alpha",
+        "lora_unet_output_blocks_2_2_conv.lora_down.weight": "lora_unet_up_blocks_0_upsamplers_0_conv.lora_down.weight",
+        "lora_unet_output_blocks_2_2_conv.lora_up.weight": "lora_unet_up_blocks_0_upsamplers_0_conv.lora_up.weight",
+        "lora_unet_output_blocks_3_0_emb_layers_1.alpha": "lora_unet_up_blocks_1_resnets_0_time_emb_proj.alpha",
+        "lora_unet_output_blocks_3_0_emb_layers_1.lora_down.weight": "lora_unet_up_blocks_1_resnets_0_time_emb_proj.lora_down.weight",
+        "lora_unet_output_blocks_3_0_emb_layers_1.lora_up.weight": "lora_unet_up_blocks_1_resnets_0_time_emb_proj.lora_up.weight",
+        "lora_unet_output_blocks_3_0_in_layers_0.alpha": "lora_unet_up_blocks_1_resnets_0_norm1.alpha",
+        "lora_unet_output_blocks_3_0_in_layers_0.lora_down.weight": "lora_unet_up_blocks_1_resnets_0_norm1.lora_down.weight",
+        "lora_unet_output_blocks_3_0_in_layers_0.lora_up.weight": "lora_unet_up_blocks_1_resnets_0_norm1.lora_up.weight",
+        "lora_unet_output_blocks_3_0_in_layers_2.alpha": "lora_unet_up_blocks_1_resnets_0_conv1.alpha",
+        "lora_unet_output_blocks_3_0_in_layers_2.lora_down.weight": "lora_unet_up_blocks_1_resnets_0_conv1.lora_down.weight",
+        "lora_unet_output_blocks_3_0_in_layers_2.lora_up.weight": "lora_unet_up_blocks_1_resnets_0_conv1.lora_up.weight",
+        "lora_unet_output_blocks_3_0_out_layers_0.alpha": "lora_unet_up_blocks_1_resnets_0_norm2.alpha",
+        "lora_unet_output_blocks_3_0_out_layers_0.lora_down.weight": "lora_unet_up_blocks_1_resnets_0_norm2.lora_down.weight",
+        "lora_unet_output_blocks_3_0_out_layers_0.lora_up.weight": "lora_unet_up_blocks_1_resnets_0_norm2.lora_up.weight",
+        "lora_unet_output_blocks_3_0_out_layers_3.alpha": "lora_unet_up_blocks_1_resnets_0_conv2.alpha",
+        "lora_unet_output_blocks_3_0_out_layers_3.lora_down.weight": "lora_unet_up_blocks_1_resnets_0_conv2.lora_down.weight",
+        "lora_unet_output_blocks_3_0_out_layers_3.lora_up.weight": "lora_unet_up_blocks_1_resnets_0_conv2.lora_up.weight",
+        "lora_unet_output_blocks_3_0_skip_connection.alpha": "lora_unet_up_blocks_1_resnets_0_conv_shortcut.alpha",
+        "lora_unet_output_blocks_3_0_skip_connection.lora_down.weight": "lora_unet_up_blocks_1_resnets_0_conv_shortcut.lora_down.weight",
+        "lora_unet_output_blocks_3_0_skip_connection.lora_up.weight": "lora_unet_up_blocks_1_resnets_0_conv_shortcut.lora_up.weight",
+        "lora_unet_output_blocks_3_1_norm.alpha": "lora_unet_up_blocks_1_attentions_0_norm.alpha",
+        "lora_unet_output_blocks_3_1_norm.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_norm.lora_down.weight",
+        "lora_unet_output_blocks_3_1_norm.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_norm.lora_up.weight",
+        "lora_unet_output_blocks_3_1_proj_in.alpha": "lora_unet_up_blocks_1_attentions_0_proj_in.alpha",
+        "lora_unet_output_blocks_3_1_proj_in.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_proj_in.lora_down.weight",
+        "lora_unet_output_blocks_3_1_proj_in.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_proj_in.lora_up.weight",
+        "lora_unet_output_blocks_3_1_proj_out.alpha": "lora_unet_up_blocks_1_attentions_0_proj_out.alpha",
+        "lora_unet_output_blocks_3_1_proj_out.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_proj_out.lora_down.weight",
+        "lora_unet_output_blocks_3_1_proj_out.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_proj_out.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_norm1.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_norm1.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_norm2.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_norm2.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_norm3.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_norm3.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_norm1.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_norm1.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_norm2.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_norm2.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_norm3.alpha": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_norm3.alpha",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_output_blocks_3_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_up_blocks_1_attentions_0_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_output_blocks_4_0_emb_layers_1.alpha": "lora_unet_up_blocks_1_resnets_1_time_emb_proj.alpha",
+        "lora_unet_output_blocks_4_0_emb_layers_1.lora_down.weight": "lora_unet_up_blocks_1_resnets_1_time_emb_proj.lora_down.weight",
+        "lora_unet_output_blocks_4_0_emb_layers_1.lora_up.weight": "lora_unet_up_blocks_1_resnets_1_time_emb_proj.lora_up.weight",
+        "lora_unet_output_blocks_4_0_in_layers_0.alpha": "lora_unet_up_blocks_1_resnets_1_norm1.alpha",
+        "lora_unet_output_blocks_4_0_in_layers_0.lora_down.weight": "lora_unet_up_blocks_1_resnets_1_norm1.lora_down.weight",
+        "lora_unet_output_blocks_4_0_in_layers_0.lora_up.weight": "lora_unet_up_blocks_1_resnets_1_norm1.lora_up.weight",
+        "lora_unet_output_blocks_4_0_in_layers_2.alpha": "lora_unet_up_blocks_1_resnets_1_conv1.alpha",
+        "lora_unet_output_blocks_4_0_in_layers_2.lora_down.weight": "lora_unet_up_blocks_1_resnets_1_conv1.lora_down.weight",
+        "lora_unet_output_blocks_4_0_in_layers_2.lora_up.weight": "lora_unet_up_blocks_1_resnets_1_conv1.lora_up.weight",
+        "lora_unet_output_blocks_4_0_out_layers_0.alpha": "lora_unet_up_blocks_1_resnets_1_norm2.alpha",
+        "lora_unet_output_blocks_4_0_out_layers_0.lora_down.weight": "lora_unet_up_blocks_1_resnets_1_norm2.lora_down.weight",
+        "lora_unet_output_blocks_4_0_out_layers_0.lora_up.weight": "lora_unet_up_blocks_1_resnets_1_norm2.lora_up.weight",
+        "lora_unet_output_blocks_4_0_out_layers_3.alpha": "lora_unet_up_blocks_1_resnets_1_conv2.alpha",
+        "lora_unet_output_blocks_4_0_out_layers_3.lora_down.weight": "lora_unet_up_blocks_1_resnets_1_conv2.lora_down.weight",
+        "lora_unet_output_blocks_4_0_out_layers_3.lora_up.weight": "lora_unet_up_blocks_1_resnets_1_conv2.lora_up.weight",
+        "lora_unet_output_blocks_4_0_skip_connection.alpha": "lora_unet_up_blocks_1_resnets_1_conv_shortcut.alpha",
+        "lora_unet_output_blocks_4_0_skip_connection.lora_down.weight": "lora_unet_up_blocks_1_resnets_1_conv_shortcut.lora_down.weight",
+        "lora_unet_output_blocks_4_0_skip_connection.lora_up.weight": "lora_unet_up_blocks_1_resnets_1_conv_shortcut.lora_up.weight",
+        "lora_unet_output_blocks_4_1_norm.alpha": "lora_unet_up_blocks_1_attentions_1_norm.alpha",
+        "lora_unet_output_blocks_4_1_norm.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_norm.lora_down.weight",
+        "lora_unet_output_blocks_4_1_norm.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_norm.lora_up.weight",
+        "lora_unet_output_blocks_4_1_proj_in.alpha": "lora_unet_up_blocks_1_attentions_1_proj_in.alpha",
+        "lora_unet_output_blocks_4_1_proj_in.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_proj_in.lora_down.weight",
+        "lora_unet_output_blocks_4_1_proj_in.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_proj_in.lora_up.weight",
+        "lora_unet_output_blocks_4_1_proj_out.alpha": "lora_unet_up_blocks_1_attentions_1_proj_out.alpha",
+        "lora_unet_output_blocks_4_1_proj_out.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_proj_out.lora_down.weight",
+        "lora_unet_output_blocks_4_1_proj_out.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_proj_out.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_norm1.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_norm1.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_norm2.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_norm2.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_norm3.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_norm3.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_norm1.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_norm1.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_norm2.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_norm2.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_norm3.alpha": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_norm3.alpha",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_output_blocks_4_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_up_blocks_1_attentions_1_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_output_blocks_5_0_emb_layers_1.alpha": "lora_unet_up_blocks_1_resnets_2_time_emb_proj.alpha",
+        "lora_unet_output_blocks_5_0_emb_layers_1.lora_down.weight": "lora_unet_up_blocks_1_resnets_2_time_emb_proj.lora_down.weight",
+        "lora_unet_output_blocks_5_0_emb_layers_1.lora_up.weight": "lora_unet_up_blocks_1_resnets_2_time_emb_proj.lora_up.weight",
+        "lora_unet_output_blocks_5_0_in_layers_0.alpha": "lora_unet_up_blocks_1_resnets_2_norm1.alpha",
+        "lora_unet_output_blocks_5_0_in_layers_0.lora_down.weight": "lora_unet_up_blocks_1_resnets_2_norm1.lora_down.weight",
+        "lora_unet_output_blocks_5_0_in_layers_0.lora_up.weight": "lora_unet_up_blocks_1_resnets_2_norm1.lora_up.weight",
+        "lora_unet_output_blocks_5_0_in_layers_2.alpha": "lora_unet_up_blocks_1_resnets_2_conv1.alpha",
+        "lora_unet_output_blocks_5_0_in_layers_2.lora_down.weight": "lora_unet_up_blocks_1_resnets_2_conv1.lora_down.weight",
+        "lora_unet_output_blocks_5_0_in_layers_2.lora_up.weight": "lora_unet_up_blocks_1_resnets_2_conv1.lora_up.weight",
+        "lora_unet_output_blocks_5_0_out_layers_0.alpha": "lora_unet_up_blocks_1_resnets_2_norm2.alpha",
+        "lora_unet_output_blocks_5_0_out_layers_0.lora_down.weight": "lora_unet_up_blocks_1_resnets_2_norm2.lora_down.weight",
+        "lora_unet_output_blocks_5_0_out_layers_0.lora_up.weight": "lora_unet_up_blocks_1_resnets_2_norm2.lora_up.weight",
+        "lora_unet_output_blocks_5_0_out_layers_3.alpha": "lora_unet_up_blocks_1_resnets_2_conv2.alpha",
+        "lora_unet_output_blocks_5_0_out_layers_3.lora_down.weight": "lora_unet_up_blocks_1_resnets_2_conv2.lora_down.weight",
+        "lora_unet_output_blocks_5_0_out_layers_3.lora_up.weight": "lora_unet_up_blocks_1_resnets_2_conv2.lora_up.weight",
+        "lora_unet_output_blocks_5_0_skip_connection.alpha": "lora_unet_up_blocks_1_resnets_2_conv_shortcut.alpha",
+        "lora_unet_output_blocks_5_0_skip_connection.lora_down.weight": "lora_unet_up_blocks_1_resnets_2_conv_shortcut.lora_down.weight",
+        "lora_unet_output_blocks_5_0_skip_connection.lora_up.weight": "lora_unet_up_blocks_1_resnets_2_conv_shortcut.lora_up.weight",
+        "lora_unet_output_blocks_5_1_norm.alpha": "lora_unet_up_blocks_1_attentions_2_norm.alpha",
+        "lora_unet_output_blocks_5_1_norm.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_norm.lora_down.weight",
+        "lora_unet_output_blocks_5_1_norm.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_norm.lora_up.weight",
+        "lora_unet_output_blocks_5_1_proj_in.alpha": "lora_unet_up_blocks_1_attentions_2_proj_in.alpha",
+        "lora_unet_output_blocks_5_1_proj_in.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_proj_in.lora_down.weight",
+        "lora_unet_output_blocks_5_1_proj_in.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_proj_in.lora_up.weight",
+        "lora_unet_output_blocks_5_1_proj_out.alpha": "lora_unet_up_blocks_1_attentions_2_proj_out.alpha",
+        "lora_unet_output_blocks_5_1_proj_out.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_proj_out.lora_down.weight",
+        "lora_unet_output_blocks_5_1_proj_out.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_proj_out.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_k.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_k.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_q.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_q.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_v.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_v.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_k.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_k.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_q.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_q.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_v.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_v.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_ff_net_0_proj.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_ff_net_2.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_ff_net_2.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_ff_net_2.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_ff_net_2.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_norm1.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_norm1.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_norm1.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_norm1.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_norm1.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_norm1.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_norm2.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_norm2.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_norm2.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_norm2.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_norm2.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_norm2.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_norm3.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_norm3.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_norm3.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_norm3.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_0_norm3.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_0_norm3.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_k.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_k.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_k.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_k.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_out_0.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_q.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_q.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_q.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_q.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_v.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_v.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_v.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn1_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn1_to_v.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_k.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_k.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_k.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_k.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_k.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_k.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_out_0.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_out_0.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_out_0.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_out_0.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_out_0.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_out_0.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_q.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_q.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_q.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_q.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_q.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_q.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_v.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_v.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_v.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_v.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_attn2_to_v.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_attn2_to_v.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_ff_net_0_proj.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_ff_net_0_proj.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_ff_net_0_proj.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_ff_net_0_proj.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_ff_net_0_proj.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_ff_net_0_proj.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_ff_net_2.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_ff_net_2.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_ff_net_2.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_ff_net_2.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_ff_net_2.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_ff_net_2.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_norm1.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_norm1.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_norm1.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_norm1.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_norm1.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_norm1.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_norm2.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_norm2.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_norm2.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_norm2.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_norm2.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_norm2.lora_up.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_norm3.alpha": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_norm3.alpha",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_norm3.lora_down.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_norm3.lora_down.weight",
+        "lora_unet_output_blocks_5_1_transformer_blocks_1_norm3.lora_up.weight": "lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_norm3.lora_up.weight",
+        "lora_unet_output_blocks_5_2_conv.alpha": "lora_unet_up_blocks_1_upsamplers_0_conv.alpha",
+        "lora_unet_output_blocks_5_2_conv.lora_down.weight": "lora_unet_up_blocks_1_upsamplers_0_conv.lora_down.weight",
+        "lora_unet_output_blocks_5_2_conv.lora_up.weight": "lora_unet_up_blocks_1_upsamplers_0_conv.lora_up.weight",
+        "lora_unet_output_blocks_6_0_emb_layers_1.alpha": "lora_unet_up_blocks_2_resnets_0_time_emb_proj.alpha",
+        "lora_unet_output_blocks_6_0_emb_layers_1.lora_down.weight": "lora_unet_up_blocks_2_resnets_0_time_emb_proj.lora_down.weight",
+        "lora_unet_output_blocks_6_0_emb_layers_1.lora_up.weight": "lora_unet_up_blocks_2_resnets_0_time_emb_proj.lora_up.weight",
+        "lora_unet_output_blocks_6_0_in_layers_0.alpha": "lora_unet_up_blocks_2_resnets_0_norm1.alpha",
+        "lora_unet_output_blocks_6_0_in_layers_0.lora_down.weight": "lora_unet_up_blocks_2_resnets_0_norm1.lora_down.weight",
+        "lora_unet_output_blocks_6_0_in_layers_0.lora_up.weight": "lora_unet_up_blocks_2_resnets_0_norm1.lora_up.weight",
+        "lora_unet_output_blocks_6_0_in_layers_2.alpha": "lora_unet_up_blocks_2_resnets_0_conv1.alpha",
+        "lora_unet_output_blocks_6_0_in_layers_2.lora_down.weight": "lora_unet_up_blocks_2_resnets_0_conv1.lora_down.weight",
+        "lora_unet_output_blocks_6_0_in_layers_2.lora_up.weight": "lora_unet_up_blocks_2_resnets_0_conv1.lora_up.weight",
+        "lora_unet_output_blocks_6_0_out_layers_0.alpha": "lora_unet_up_blocks_2_resnets_0_norm2.alpha",
+        "lora_unet_output_blocks_6_0_out_layers_0.lora_down.weight": "lora_unet_up_blocks_2_resnets_0_norm2.lora_down.weight",
+        "lora_unet_output_blocks_6_0_out_layers_0.lora_up.weight": "lora_unet_up_blocks_2_resnets_0_norm2.lora_up.weight",
+        "lora_unet_output_blocks_6_0_out_layers_3.alpha": "lora_unet_up_blocks_2_resnets_0_conv2.alpha",
+        "lora_unet_output_blocks_6_0_out_layers_3.lora_down.weight": "lora_unet_up_blocks_2_resnets_0_conv2.lora_down.weight",
+        "lora_unet_output_blocks_6_0_out_layers_3.lora_up.weight": "lora_unet_up_blocks_2_resnets_0_conv2.lora_up.weight",
+        "lora_unet_output_blocks_6_0_skip_connection.alpha": "lora_unet_up_blocks_2_resnets_0_conv_shortcut.alpha",
+        "lora_unet_output_blocks_6_0_skip_connection.lora_down.weight": "lora_unet_up_blocks_2_resnets_0_conv_shortcut.lora_down.weight",
+        "lora_unet_output_blocks_6_0_skip_connection.lora_up.weight": "lora_unet_up_blocks_2_resnets_0_conv_shortcut.lora_up.weight",
+        "lora_unet_output_blocks_7_0_emb_layers_1.alpha": "lora_unet_up_blocks_2_resnets_1_time_emb_proj.alpha",
+        "lora_unet_output_blocks_7_0_emb_layers_1.lora_down.weight": "lora_unet_up_blocks_2_resnets_1_time_emb_proj.lora_down.weight",
+        "lora_unet_output_blocks_7_0_emb_layers_1.lora_up.weight": "lora_unet_up_blocks_2_resnets_1_time_emb_proj.lora_up.weight",
+        "lora_unet_output_blocks_7_0_in_layers_0.alpha": "lora_unet_up_blocks_2_resnets_1_norm1.alpha",
+        "lora_unet_output_blocks_7_0_in_layers_0.lora_down.weight": "lora_unet_up_blocks_2_resnets_1_norm1.lora_down.weight",
+        "lora_unet_output_blocks_7_0_in_layers_0.lora_up.weight": "lora_unet_up_blocks_2_resnets_1_norm1.lora_up.weight",
+        "lora_unet_output_blocks_7_0_in_layers_2.alpha": "lora_unet_up_blocks_2_resnets_1_conv1.alpha",
+        "lora_unet_output_blocks_7_0_in_layers_2.lora_down.weight": "lora_unet_up_blocks_2_resnets_1_conv1.lora_down.weight",
+        "lora_unet_output_blocks_7_0_in_layers_2.lora_up.weight": "lora_unet_up_blocks_2_resnets_1_conv1.lora_up.weight",
+        "lora_unet_output_blocks_7_0_out_layers_0.alpha": "lora_unet_up_blocks_2_resnets_1_norm2.alpha",
+        "lora_unet_output_blocks_7_0_out_layers_0.lora_down.weight": "lora_unet_up_blocks_2_resnets_1_norm2.lora_down.weight",
+        "lora_unet_output_blocks_7_0_out_layers_0.lora_up.weight": "lora_unet_up_blocks_2_resnets_1_norm2.lora_up.weight",
+        "lora_unet_output_blocks_7_0_out_layers_3.alpha": "lora_unet_up_blocks_2_resnets_1_conv2.alpha",
+        "lora_unet_output_blocks_7_0_out_layers_3.lora_down.weight": "lora_unet_up_blocks_2_resnets_1_conv2.lora_down.weight",
+        "lora_unet_output_blocks_7_0_out_layers_3.lora_up.weight": "lora_unet_up_blocks_2_resnets_1_conv2.lora_up.weight",
+        "lora_unet_output_blocks_7_0_skip_connection.alpha": "lora_unet_up_blocks_2_resnets_1_conv_shortcut.alpha",
+        "lora_unet_output_blocks_7_0_skip_connection.lora_down.weight": "lora_unet_up_blocks_2_resnets_1_conv_shortcut.lora_down.weight",
+        "lora_unet_output_blocks_7_0_skip_connection.lora_up.weight": "lora_unet_up_blocks_2_resnets_1_conv_shortcut.lora_up.weight",
+        "lora_unet_output_blocks_8_0_emb_layers_1.alpha": "lora_unet_up_blocks_2_resnets_2_time_emb_proj.alpha",
+        "lora_unet_output_blocks_8_0_emb_layers_1.lora_down.weight": "lora_unet_up_blocks_2_resnets_2_time_emb_proj.lora_down.weight",
+        "lora_unet_output_blocks_8_0_emb_layers_1.lora_up.weight": "lora_unet_up_blocks_2_resnets_2_time_emb_proj.lora_up.weight",
+        "lora_unet_output_blocks_8_0_in_layers_0.alpha": "lora_unet_up_blocks_2_resnets_2_norm1.alpha",
+        "lora_unet_output_blocks_8_0_in_layers_0.lora_down.weight": "lora_unet_up_blocks_2_resnets_2_norm1.lora_down.weight",
+        "lora_unet_output_blocks_8_0_in_layers_0.lora_up.weight": "lora_unet_up_blocks_2_resnets_2_norm1.lora_up.weight",
+        "lora_unet_output_blocks_8_0_in_layers_2.alpha": "lora_unet_up_blocks_2_resnets_2_conv1.alpha",
+        "lora_unet_output_blocks_8_0_in_layers_2.lora_down.weight": "lora_unet_up_blocks_2_resnets_2_conv1.lora_down.weight",
+        "lora_unet_output_blocks_8_0_in_layers_2.lora_up.weight": "lora_unet_up_blocks_2_resnets_2_conv1.lora_up.weight",
+        "lora_unet_output_blocks_8_0_out_layers_0.alpha": "lora_unet_up_blocks_2_resnets_2_norm2.alpha",
+        "lora_unet_output_blocks_8_0_out_layers_0.lora_down.weight": "lora_unet_up_blocks_2_resnets_2_norm2.lora_down.weight",
+        "lora_unet_output_blocks_8_0_out_layers_0.lora_up.weight": "lora_unet_up_blocks_2_resnets_2_norm2.lora_up.weight",
+        "lora_unet_output_blocks_8_0_out_layers_3.alpha": "lora_unet_up_blocks_2_resnets_2_conv2.alpha",
+        "lora_unet_output_blocks_8_0_out_layers_3.lora_down.weight": "lora_unet_up_blocks_2_resnets_2_conv2.lora_down.weight",
+        "lora_unet_output_blocks_8_0_out_layers_3.lora_up.weight": "lora_unet_up_blocks_2_resnets_2_conv2.lora_up.weight",
+        "lora_unet_output_blocks_8_0_skip_connection.alpha": "lora_unet_up_blocks_2_resnets_2_conv_shortcut.alpha",
+        "lora_unet_output_blocks_8_0_skip_connection.lora_down.weight": "lora_unet_up_blocks_2_resnets_2_conv_shortcut.lora_down.weight",
+        "lora_unet_output_blocks_8_0_skip_connection.lora_up.weight": "lora_unet_up_blocks_2_resnets_2_conv_shortcut.lora_up.weight",
+        "lora_unet_time_embed_0.alpha": "lora_unet_time_embedding_linear_1.alpha",
+        "lora_unet_time_embed_0.lora_down.weight": "lora_unet_time_embedding_linear_1.lora_down.weight",
+        "lora_unet_time_embed_0.lora_up.weight": "lora_unet_time_embedding_linear_1.lora_up.weight",
+        "lora_unet_time_embed_2.alpha": "lora_unet_time_embedding_linear_2.alpha",
+        "lora_unet_time_embed_2.lora_down.weight": "lora_unet_time_embedding_linear_2.lora_down.weight",
+        "lora_unet_time_embed_2.lora_up.weight": "lora_unet_time_embedding_linear_2.lora_up.weight"
+    }
+}
\ No newline at end of file
diff --git a/toolkit/keymaps/stable_diffusion_refiner.json b/toolkit/keymaps/stable_diffusion_refiner.json
new file mode 100644
index 00000000..4c7525d8
--- /dev/null
+++ b/toolkit/keymaps/stable_diffusion_refiner.json
@@ -0,0 +1,3498 @@
+{
+    "ldm_diffusers_keymap": {
+        "conditioner.embedders.0.model.ln_final.bias": "te1_text_model.final_layer_norm.bias",
+        "conditioner.embedders.0.model.ln_final.weight": "te1_text_model.final_layer_norm.weight",
+        "conditioner.embedders.0.model.positional_embedding": "te1_text_model.embeddings.position_embedding.weight",
+        "conditioner.embedders.0.model.token_embedding.weight": "te1_text_model.embeddings.token_embedding.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.0.attn.out_proj.bias": "te1_text_model.encoder.layers.0.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.0.attn.out_proj.weight": "te1_text_model.encoder.layers.0.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.0.ln_1.bias": "te1_text_model.encoder.layers.0.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.0.ln_1.weight": "te1_text_model.encoder.layers.0.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.0.ln_2.bias": "te1_text_model.encoder.layers.0.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.0.ln_2.weight": "te1_text_model.encoder.layers.0.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.0.mlp.c_fc.bias": "te1_text_model.encoder.layers.0.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.0.mlp.c_fc.weight": "te1_text_model.encoder.layers.0.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.0.mlp.c_proj.bias": "te1_text_model.encoder.layers.0.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.0.mlp.c_proj.weight": "te1_text_model.encoder.layers.0.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.1.attn.out_proj.bias": "te1_text_model.encoder.layers.1.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.1.attn.out_proj.weight": "te1_text_model.encoder.layers.1.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.1.ln_1.bias": "te1_text_model.encoder.layers.1.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.1.ln_1.weight": "te1_text_model.encoder.layers.1.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.1.ln_2.bias": "te1_text_model.encoder.layers.1.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.1.ln_2.weight": "te1_text_model.encoder.layers.1.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.1.mlp.c_fc.bias": "te1_text_model.encoder.layers.1.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.1.mlp.c_fc.weight": "te1_text_model.encoder.layers.1.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.1.mlp.c_proj.bias": "te1_text_model.encoder.layers.1.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.1.mlp.c_proj.weight": "te1_text_model.encoder.layers.1.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.10.attn.out_proj.bias": "te1_text_model.encoder.layers.10.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.10.attn.out_proj.weight": "te1_text_model.encoder.layers.10.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.10.ln_1.bias": "te1_text_model.encoder.layers.10.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.10.ln_1.weight": "te1_text_model.encoder.layers.10.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.10.ln_2.bias": "te1_text_model.encoder.layers.10.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.10.ln_2.weight": "te1_text_model.encoder.layers.10.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.10.mlp.c_fc.bias": "te1_text_model.encoder.layers.10.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.10.mlp.c_fc.weight": "te1_text_model.encoder.layers.10.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.10.mlp.c_proj.bias": "te1_text_model.encoder.layers.10.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.10.mlp.c_proj.weight": "te1_text_model.encoder.layers.10.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.11.attn.out_proj.bias": "te1_text_model.encoder.layers.11.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.11.attn.out_proj.weight": "te1_text_model.encoder.layers.11.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.11.ln_1.bias": "te1_text_model.encoder.layers.11.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.11.ln_1.weight": "te1_text_model.encoder.layers.11.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.11.ln_2.bias": "te1_text_model.encoder.layers.11.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.11.ln_2.weight": "te1_text_model.encoder.layers.11.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.11.mlp.c_fc.bias": "te1_text_model.encoder.layers.11.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.11.mlp.c_fc.weight": "te1_text_model.encoder.layers.11.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.11.mlp.c_proj.bias": "te1_text_model.encoder.layers.11.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.11.mlp.c_proj.weight": "te1_text_model.encoder.layers.11.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.12.attn.out_proj.bias": "te1_text_model.encoder.layers.12.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.12.attn.out_proj.weight": "te1_text_model.encoder.layers.12.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.12.ln_1.bias": "te1_text_model.encoder.layers.12.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.12.ln_1.weight": "te1_text_model.encoder.layers.12.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.12.ln_2.bias": "te1_text_model.encoder.layers.12.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.12.ln_2.weight": "te1_text_model.encoder.layers.12.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.12.mlp.c_fc.bias": "te1_text_model.encoder.layers.12.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.12.mlp.c_fc.weight": "te1_text_model.encoder.layers.12.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.12.mlp.c_proj.bias": "te1_text_model.encoder.layers.12.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.12.mlp.c_proj.weight": "te1_text_model.encoder.layers.12.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.13.attn.out_proj.bias": "te1_text_model.encoder.layers.13.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.13.attn.out_proj.weight": "te1_text_model.encoder.layers.13.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.13.ln_1.bias": "te1_text_model.encoder.layers.13.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.13.ln_1.weight": "te1_text_model.encoder.layers.13.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.13.ln_2.bias": "te1_text_model.encoder.layers.13.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.13.ln_2.weight": "te1_text_model.encoder.layers.13.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.13.mlp.c_fc.bias": "te1_text_model.encoder.layers.13.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.13.mlp.c_fc.weight": "te1_text_model.encoder.layers.13.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.13.mlp.c_proj.bias": "te1_text_model.encoder.layers.13.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.13.mlp.c_proj.weight": "te1_text_model.encoder.layers.13.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.14.attn.out_proj.bias": "te1_text_model.encoder.layers.14.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.14.attn.out_proj.weight": "te1_text_model.encoder.layers.14.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.14.ln_1.bias": "te1_text_model.encoder.layers.14.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.14.ln_1.weight": "te1_text_model.encoder.layers.14.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.14.ln_2.bias": "te1_text_model.encoder.layers.14.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.14.ln_2.weight": "te1_text_model.encoder.layers.14.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.14.mlp.c_fc.bias": "te1_text_model.encoder.layers.14.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.14.mlp.c_fc.weight": "te1_text_model.encoder.layers.14.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.14.mlp.c_proj.bias": "te1_text_model.encoder.layers.14.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.14.mlp.c_proj.weight": "te1_text_model.encoder.layers.14.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.15.attn.out_proj.bias": "te1_text_model.encoder.layers.15.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.15.attn.out_proj.weight": "te1_text_model.encoder.layers.15.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.15.ln_1.bias": "te1_text_model.encoder.layers.15.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.15.ln_1.weight": "te1_text_model.encoder.layers.15.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.15.ln_2.bias": "te1_text_model.encoder.layers.15.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.15.ln_2.weight": "te1_text_model.encoder.layers.15.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.15.mlp.c_fc.bias": "te1_text_model.encoder.layers.15.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.15.mlp.c_fc.weight": "te1_text_model.encoder.layers.15.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.15.mlp.c_proj.bias": "te1_text_model.encoder.layers.15.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.15.mlp.c_proj.weight": "te1_text_model.encoder.layers.15.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.16.attn.out_proj.bias": "te1_text_model.encoder.layers.16.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.16.attn.out_proj.weight": "te1_text_model.encoder.layers.16.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.16.ln_1.bias": "te1_text_model.encoder.layers.16.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.16.ln_1.weight": "te1_text_model.encoder.layers.16.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.16.ln_2.bias": "te1_text_model.encoder.layers.16.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.16.ln_2.weight": "te1_text_model.encoder.layers.16.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.16.mlp.c_fc.bias": "te1_text_model.encoder.layers.16.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.16.mlp.c_fc.weight": "te1_text_model.encoder.layers.16.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.16.mlp.c_proj.bias": "te1_text_model.encoder.layers.16.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.16.mlp.c_proj.weight": "te1_text_model.encoder.layers.16.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.17.attn.out_proj.bias": "te1_text_model.encoder.layers.17.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.17.attn.out_proj.weight": "te1_text_model.encoder.layers.17.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.17.ln_1.bias": "te1_text_model.encoder.layers.17.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.17.ln_1.weight": "te1_text_model.encoder.layers.17.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.17.ln_2.bias": "te1_text_model.encoder.layers.17.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.17.ln_2.weight": "te1_text_model.encoder.layers.17.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.17.mlp.c_fc.bias": "te1_text_model.encoder.layers.17.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.17.mlp.c_fc.weight": "te1_text_model.encoder.layers.17.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.17.mlp.c_proj.bias": "te1_text_model.encoder.layers.17.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.17.mlp.c_proj.weight": "te1_text_model.encoder.layers.17.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.18.attn.out_proj.bias": "te1_text_model.encoder.layers.18.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.18.attn.out_proj.weight": "te1_text_model.encoder.layers.18.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.18.ln_1.bias": "te1_text_model.encoder.layers.18.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.18.ln_1.weight": "te1_text_model.encoder.layers.18.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.18.ln_2.bias": "te1_text_model.encoder.layers.18.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.18.ln_2.weight": "te1_text_model.encoder.layers.18.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.18.mlp.c_fc.bias": "te1_text_model.encoder.layers.18.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.18.mlp.c_fc.weight": "te1_text_model.encoder.layers.18.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.18.mlp.c_proj.bias": "te1_text_model.encoder.layers.18.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.18.mlp.c_proj.weight": "te1_text_model.encoder.layers.18.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.19.attn.out_proj.bias": "te1_text_model.encoder.layers.19.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.19.attn.out_proj.weight": "te1_text_model.encoder.layers.19.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.19.ln_1.bias": "te1_text_model.encoder.layers.19.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.19.ln_1.weight": "te1_text_model.encoder.layers.19.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.19.ln_2.bias": "te1_text_model.encoder.layers.19.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.19.ln_2.weight": "te1_text_model.encoder.layers.19.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.19.mlp.c_fc.bias": "te1_text_model.encoder.layers.19.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.19.mlp.c_fc.weight": "te1_text_model.encoder.layers.19.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.19.mlp.c_proj.bias": "te1_text_model.encoder.layers.19.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.19.mlp.c_proj.weight": "te1_text_model.encoder.layers.19.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.2.attn.out_proj.bias": "te1_text_model.encoder.layers.2.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.2.attn.out_proj.weight": "te1_text_model.encoder.layers.2.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.2.ln_1.bias": "te1_text_model.encoder.layers.2.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.2.ln_1.weight": "te1_text_model.encoder.layers.2.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.2.ln_2.bias": "te1_text_model.encoder.layers.2.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.2.ln_2.weight": "te1_text_model.encoder.layers.2.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.2.mlp.c_fc.bias": "te1_text_model.encoder.layers.2.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.2.mlp.c_fc.weight": "te1_text_model.encoder.layers.2.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.2.mlp.c_proj.bias": "te1_text_model.encoder.layers.2.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.2.mlp.c_proj.weight": "te1_text_model.encoder.layers.2.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.20.attn.out_proj.bias": "te1_text_model.encoder.layers.20.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.20.attn.out_proj.weight": "te1_text_model.encoder.layers.20.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.20.ln_1.bias": "te1_text_model.encoder.layers.20.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.20.ln_1.weight": "te1_text_model.encoder.layers.20.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.20.ln_2.bias": "te1_text_model.encoder.layers.20.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.20.ln_2.weight": "te1_text_model.encoder.layers.20.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.20.mlp.c_fc.bias": "te1_text_model.encoder.layers.20.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.20.mlp.c_fc.weight": "te1_text_model.encoder.layers.20.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.20.mlp.c_proj.bias": "te1_text_model.encoder.layers.20.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.20.mlp.c_proj.weight": "te1_text_model.encoder.layers.20.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.21.attn.out_proj.bias": "te1_text_model.encoder.layers.21.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.21.attn.out_proj.weight": "te1_text_model.encoder.layers.21.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.21.ln_1.bias": "te1_text_model.encoder.layers.21.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.21.ln_1.weight": "te1_text_model.encoder.layers.21.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.21.ln_2.bias": "te1_text_model.encoder.layers.21.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.21.ln_2.weight": "te1_text_model.encoder.layers.21.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.21.mlp.c_fc.bias": "te1_text_model.encoder.layers.21.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.21.mlp.c_fc.weight": "te1_text_model.encoder.layers.21.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.21.mlp.c_proj.bias": "te1_text_model.encoder.layers.21.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.21.mlp.c_proj.weight": "te1_text_model.encoder.layers.21.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.22.attn.out_proj.bias": "te1_text_model.encoder.layers.22.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.22.attn.out_proj.weight": "te1_text_model.encoder.layers.22.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.22.ln_1.bias": "te1_text_model.encoder.layers.22.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.22.ln_1.weight": "te1_text_model.encoder.layers.22.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.22.ln_2.bias": "te1_text_model.encoder.layers.22.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.22.ln_2.weight": "te1_text_model.encoder.layers.22.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.22.mlp.c_fc.bias": "te1_text_model.encoder.layers.22.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.22.mlp.c_fc.weight": "te1_text_model.encoder.layers.22.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.22.mlp.c_proj.bias": "te1_text_model.encoder.layers.22.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.22.mlp.c_proj.weight": "te1_text_model.encoder.layers.22.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.23.attn.out_proj.bias": "te1_text_model.encoder.layers.23.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.23.attn.out_proj.weight": "te1_text_model.encoder.layers.23.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.23.ln_1.bias": "te1_text_model.encoder.layers.23.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.23.ln_1.weight": "te1_text_model.encoder.layers.23.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.23.ln_2.bias": "te1_text_model.encoder.layers.23.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.23.ln_2.weight": "te1_text_model.encoder.layers.23.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.23.mlp.c_fc.bias": "te1_text_model.encoder.layers.23.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.23.mlp.c_fc.weight": "te1_text_model.encoder.layers.23.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.23.mlp.c_proj.bias": "te1_text_model.encoder.layers.23.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.23.mlp.c_proj.weight": "te1_text_model.encoder.layers.23.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.24.attn.out_proj.bias": "te1_text_model.encoder.layers.24.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.24.attn.out_proj.weight": "te1_text_model.encoder.layers.24.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.24.ln_1.bias": "te1_text_model.encoder.layers.24.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.24.ln_1.weight": "te1_text_model.encoder.layers.24.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.24.ln_2.bias": "te1_text_model.encoder.layers.24.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.24.ln_2.weight": "te1_text_model.encoder.layers.24.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.24.mlp.c_fc.bias": "te1_text_model.encoder.layers.24.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.24.mlp.c_fc.weight": "te1_text_model.encoder.layers.24.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.24.mlp.c_proj.bias": "te1_text_model.encoder.layers.24.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.24.mlp.c_proj.weight": "te1_text_model.encoder.layers.24.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.25.attn.out_proj.bias": "te1_text_model.encoder.layers.25.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.25.attn.out_proj.weight": "te1_text_model.encoder.layers.25.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.25.ln_1.bias": "te1_text_model.encoder.layers.25.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.25.ln_1.weight": "te1_text_model.encoder.layers.25.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.25.ln_2.bias": "te1_text_model.encoder.layers.25.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.25.ln_2.weight": "te1_text_model.encoder.layers.25.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.25.mlp.c_fc.bias": "te1_text_model.encoder.layers.25.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.25.mlp.c_fc.weight": "te1_text_model.encoder.layers.25.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.25.mlp.c_proj.bias": "te1_text_model.encoder.layers.25.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.25.mlp.c_proj.weight": "te1_text_model.encoder.layers.25.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.26.attn.out_proj.bias": "te1_text_model.encoder.layers.26.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.26.attn.out_proj.weight": "te1_text_model.encoder.layers.26.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.26.ln_1.bias": "te1_text_model.encoder.layers.26.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.26.ln_1.weight": "te1_text_model.encoder.layers.26.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.26.ln_2.bias": "te1_text_model.encoder.layers.26.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.26.ln_2.weight": "te1_text_model.encoder.layers.26.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.26.mlp.c_fc.bias": "te1_text_model.encoder.layers.26.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.26.mlp.c_fc.weight": "te1_text_model.encoder.layers.26.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.26.mlp.c_proj.bias": "te1_text_model.encoder.layers.26.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.26.mlp.c_proj.weight": "te1_text_model.encoder.layers.26.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.27.attn.out_proj.bias": "te1_text_model.encoder.layers.27.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.27.attn.out_proj.weight": "te1_text_model.encoder.layers.27.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.27.ln_1.bias": "te1_text_model.encoder.layers.27.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.27.ln_1.weight": "te1_text_model.encoder.layers.27.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.27.ln_2.bias": "te1_text_model.encoder.layers.27.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.27.ln_2.weight": "te1_text_model.encoder.layers.27.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.27.mlp.c_fc.bias": "te1_text_model.encoder.layers.27.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.27.mlp.c_fc.weight": "te1_text_model.encoder.layers.27.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.27.mlp.c_proj.bias": "te1_text_model.encoder.layers.27.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.27.mlp.c_proj.weight": "te1_text_model.encoder.layers.27.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.28.attn.out_proj.bias": "te1_text_model.encoder.layers.28.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.28.attn.out_proj.weight": "te1_text_model.encoder.layers.28.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.28.ln_1.bias": "te1_text_model.encoder.layers.28.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.28.ln_1.weight": "te1_text_model.encoder.layers.28.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.28.ln_2.bias": "te1_text_model.encoder.layers.28.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.28.ln_2.weight": "te1_text_model.encoder.layers.28.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.28.mlp.c_fc.bias": "te1_text_model.encoder.layers.28.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.28.mlp.c_fc.weight": "te1_text_model.encoder.layers.28.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.28.mlp.c_proj.bias": "te1_text_model.encoder.layers.28.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.28.mlp.c_proj.weight": "te1_text_model.encoder.layers.28.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.29.attn.out_proj.bias": "te1_text_model.encoder.layers.29.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.29.attn.out_proj.weight": "te1_text_model.encoder.layers.29.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.29.ln_1.bias": "te1_text_model.encoder.layers.29.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.29.ln_1.weight": "te1_text_model.encoder.layers.29.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.29.ln_2.bias": "te1_text_model.encoder.layers.29.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.29.ln_2.weight": "te1_text_model.encoder.layers.29.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.29.mlp.c_fc.bias": "te1_text_model.encoder.layers.29.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.29.mlp.c_fc.weight": "te1_text_model.encoder.layers.29.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.29.mlp.c_proj.bias": "te1_text_model.encoder.layers.29.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.29.mlp.c_proj.weight": "te1_text_model.encoder.layers.29.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.3.attn.out_proj.bias": "te1_text_model.encoder.layers.3.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.3.attn.out_proj.weight": "te1_text_model.encoder.layers.3.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.3.ln_1.bias": "te1_text_model.encoder.layers.3.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.3.ln_1.weight": "te1_text_model.encoder.layers.3.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.3.ln_2.bias": "te1_text_model.encoder.layers.3.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.3.ln_2.weight": "te1_text_model.encoder.layers.3.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.3.mlp.c_fc.bias": "te1_text_model.encoder.layers.3.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.3.mlp.c_fc.weight": "te1_text_model.encoder.layers.3.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.3.mlp.c_proj.bias": "te1_text_model.encoder.layers.3.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.3.mlp.c_proj.weight": "te1_text_model.encoder.layers.3.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.30.attn.out_proj.bias": "te1_text_model.encoder.layers.30.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.30.attn.out_proj.weight": "te1_text_model.encoder.layers.30.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.30.ln_1.bias": "te1_text_model.encoder.layers.30.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.30.ln_1.weight": "te1_text_model.encoder.layers.30.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.30.ln_2.bias": "te1_text_model.encoder.layers.30.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.30.ln_2.weight": "te1_text_model.encoder.layers.30.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.30.mlp.c_fc.bias": "te1_text_model.encoder.layers.30.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.30.mlp.c_fc.weight": "te1_text_model.encoder.layers.30.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.30.mlp.c_proj.bias": "te1_text_model.encoder.layers.30.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.30.mlp.c_proj.weight": "te1_text_model.encoder.layers.30.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.31.attn.out_proj.bias": "te1_text_model.encoder.layers.31.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.31.attn.out_proj.weight": "te1_text_model.encoder.layers.31.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.31.ln_1.bias": "te1_text_model.encoder.layers.31.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.31.ln_1.weight": "te1_text_model.encoder.layers.31.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.31.ln_2.bias": "te1_text_model.encoder.layers.31.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.31.ln_2.weight": "te1_text_model.encoder.layers.31.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.31.mlp.c_fc.bias": "te1_text_model.encoder.layers.31.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.31.mlp.c_fc.weight": "te1_text_model.encoder.layers.31.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.31.mlp.c_proj.bias": "te1_text_model.encoder.layers.31.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.31.mlp.c_proj.weight": "te1_text_model.encoder.layers.31.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.4.attn.out_proj.bias": "te1_text_model.encoder.layers.4.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.4.attn.out_proj.weight": "te1_text_model.encoder.layers.4.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.4.ln_1.bias": "te1_text_model.encoder.layers.4.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.4.ln_1.weight": "te1_text_model.encoder.layers.4.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.4.ln_2.bias": "te1_text_model.encoder.layers.4.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.4.ln_2.weight": "te1_text_model.encoder.layers.4.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.4.mlp.c_fc.bias": "te1_text_model.encoder.layers.4.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.4.mlp.c_fc.weight": "te1_text_model.encoder.layers.4.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.4.mlp.c_proj.bias": "te1_text_model.encoder.layers.4.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.4.mlp.c_proj.weight": "te1_text_model.encoder.layers.4.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.5.attn.out_proj.bias": "te1_text_model.encoder.layers.5.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.5.attn.out_proj.weight": "te1_text_model.encoder.layers.5.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.5.ln_1.bias": "te1_text_model.encoder.layers.5.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.5.ln_1.weight": "te1_text_model.encoder.layers.5.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.5.ln_2.bias": "te1_text_model.encoder.layers.5.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.5.ln_2.weight": "te1_text_model.encoder.layers.5.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.5.mlp.c_fc.bias": "te1_text_model.encoder.layers.5.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.5.mlp.c_fc.weight": "te1_text_model.encoder.layers.5.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.5.mlp.c_proj.bias": "te1_text_model.encoder.layers.5.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.5.mlp.c_proj.weight": "te1_text_model.encoder.layers.5.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.6.attn.out_proj.bias": "te1_text_model.encoder.layers.6.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.6.attn.out_proj.weight": "te1_text_model.encoder.layers.6.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.6.ln_1.bias": "te1_text_model.encoder.layers.6.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.6.ln_1.weight": "te1_text_model.encoder.layers.6.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.6.ln_2.bias": "te1_text_model.encoder.layers.6.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.6.ln_2.weight": "te1_text_model.encoder.layers.6.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.6.mlp.c_fc.bias": "te1_text_model.encoder.layers.6.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.6.mlp.c_fc.weight": "te1_text_model.encoder.layers.6.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.6.mlp.c_proj.bias": "te1_text_model.encoder.layers.6.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.6.mlp.c_proj.weight": "te1_text_model.encoder.layers.6.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.7.attn.out_proj.bias": "te1_text_model.encoder.layers.7.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.7.attn.out_proj.weight": "te1_text_model.encoder.layers.7.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.7.ln_1.bias": "te1_text_model.encoder.layers.7.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.7.ln_1.weight": "te1_text_model.encoder.layers.7.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.7.ln_2.bias": "te1_text_model.encoder.layers.7.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.7.ln_2.weight": "te1_text_model.encoder.layers.7.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.7.mlp.c_fc.bias": "te1_text_model.encoder.layers.7.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.7.mlp.c_fc.weight": "te1_text_model.encoder.layers.7.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.7.mlp.c_proj.bias": "te1_text_model.encoder.layers.7.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.7.mlp.c_proj.weight": "te1_text_model.encoder.layers.7.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.8.attn.out_proj.bias": "te1_text_model.encoder.layers.8.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.8.attn.out_proj.weight": "te1_text_model.encoder.layers.8.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.8.ln_1.bias": "te1_text_model.encoder.layers.8.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.8.ln_1.weight": "te1_text_model.encoder.layers.8.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.8.ln_2.bias": "te1_text_model.encoder.layers.8.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.8.ln_2.weight": "te1_text_model.encoder.layers.8.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.8.mlp.c_fc.bias": "te1_text_model.encoder.layers.8.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.8.mlp.c_fc.weight": "te1_text_model.encoder.layers.8.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.8.mlp.c_proj.bias": "te1_text_model.encoder.layers.8.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.8.mlp.c_proj.weight": "te1_text_model.encoder.layers.8.mlp.fc2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.9.attn.out_proj.bias": "te1_text_model.encoder.layers.9.self_attn.out_proj.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.9.attn.out_proj.weight": "te1_text_model.encoder.layers.9.self_attn.out_proj.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.9.ln_1.bias": "te1_text_model.encoder.layers.9.layer_norm1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.9.ln_1.weight": "te1_text_model.encoder.layers.9.layer_norm1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.9.ln_2.bias": "te1_text_model.encoder.layers.9.layer_norm2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.9.ln_2.weight": "te1_text_model.encoder.layers.9.layer_norm2.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.9.mlp.c_fc.bias": "te1_text_model.encoder.layers.9.mlp.fc1.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.9.mlp.c_fc.weight": "te1_text_model.encoder.layers.9.mlp.fc1.weight",
+        "conditioner.embedders.0.model.transformer.resblocks.9.mlp.c_proj.bias": "te1_text_model.encoder.layers.9.mlp.fc2.bias",
+        "conditioner.embedders.0.model.transformer.resblocks.9.mlp.c_proj.weight": "te1_text_model.encoder.layers.9.mlp.fc2.weight",
+        "first_stage_model.decoder.conv_in.bias": "vae_decoder.conv_in.bias",
+        "first_stage_model.decoder.conv_in.weight": "vae_decoder.conv_in.weight",
+        "first_stage_model.decoder.conv_out.bias": "vae_decoder.conv_out.bias",
+        "first_stage_model.decoder.conv_out.weight": "vae_decoder.conv_out.weight",
+        "first_stage_model.decoder.mid.attn_1.k.bias": "vae_decoder.mid_block.attentions.0.to_k.bias",
+        "first_stage_model.decoder.mid.attn_1.k.weight": "vae_decoder.mid_block.attentions.0.to_k.weight",
+        "first_stage_model.decoder.mid.attn_1.norm.bias": "vae_decoder.mid_block.attentions.0.group_norm.bias",
+        "first_stage_model.decoder.mid.attn_1.norm.weight": "vae_decoder.mid_block.attentions.0.group_norm.weight",
+        "first_stage_model.decoder.mid.attn_1.proj_out.bias": "vae_decoder.mid_block.attentions.0.to_out.0.bias",
+        "first_stage_model.decoder.mid.attn_1.proj_out.weight": "vae_decoder.mid_block.attentions.0.to_out.0.weight",
+        "first_stage_model.decoder.mid.attn_1.q.bias": "vae_decoder.mid_block.attentions.0.to_q.bias",
+        "first_stage_model.decoder.mid.attn_1.q.weight": "vae_decoder.mid_block.attentions.0.to_q.weight",
+        "first_stage_model.decoder.mid.attn_1.v.bias": "vae_decoder.mid_block.attentions.0.to_v.bias",
+        "first_stage_model.decoder.mid.attn_1.v.weight": "vae_decoder.mid_block.attentions.0.to_v.weight",
+        "first_stage_model.decoder.mid.block_1.conv1.bias": "vae_decoder.mid_block.resnets.0.conv1.bias",
+        "first_stage_model.decoder.mid.block_1.conv1.weight": "vae_decoder.mid_block.resnets.0.conv1.weight",
+        "first_stage_model.decoder.mid.block_1.conv2.bias": "vae_decoder.mid_block.resnets.0.conv2.bias",
+        "first_stage_model.decoder.mid.block_1.conv2.weight": "vae_decoder.mid_block.resnets.0.conv2.weight",
+        "first_stage_model.decoder.mid.block_1.norm1.bias": "vae_decoder.mid_block.resnets.0.norm1.bias",
+        "first_stage_model.decoder.mid.block_1.norm1.weight": "vae_decoder.mid_block.resnets.0.norm1.weight",
+        "first_stage_model.decoder.mid.block_1.norm2.bias": "vae_decoder.mid_block.resnets.0.norm2.bias",
+        "first_stage_model.decoder.mid.block_1.norm2.weight": "vae_decoder.mid_block.resnets.0.norm2.weight",
+        "first_stage_model.decoder.mid.block_2.conv1.bias": "vae_decoder.mid_block.resnets.1.conv1.bias",
+        "first_stage_model.decoder.mid.block_2.conv1.weight": "vae_decoder.mid_block.resnets.1.conv1.weight",
+        "first_stage_model.decoder.mid.block_2.conv2.bias": "vae_decoder.mid_block.resnets.1.conv2.bias",
+        "first_stage_model.decoder.mid.block_2.conv2.weight": "vae_decoder.mid_block.resnets.1.conv2.weight",
+        "first_stage_model.decoder.mid.block_2.norm1.bias": "vae_decoder.mid_block.resnets.1.norm1.bias",
+        "first_stage_model.decoder.mid.block_2.norm1.weight": "vae_decoder.mid_block.resnets.1.norm1.weight",
+        "first_stage_model.decoder.mid.block_2.norm2.bias": "vae_decoder.mid_block.resnets.1.norm2.bias",
+        "first_stage_model.decoder.mid.block_2.norm2.weight": "vae_decoder.mid_block.resnets.1.norm2.weight",
+        "first_stage_model.decoder.norm_out.bias": "vae_decoder.conv_norm_out.bias",
+        "first_stage_model.decoder.norm_out.weight": "vae_decoder.conv_norm_out.weight",
+        "first_stage_model.decoder.up.0.block.0.conv1.bias": "vae_decoder.up_blocks.3.resnets.0.conv1.bias",
+        "first_stage_model.decoder.up.0.block.0.conv1.weight": "vae_decoder.up_blocks.3.resnets.0.conv1.weight",
+        "first_stage_model.decoder.up.0.block.0.conv2.bias": "vae_decoder.up_blocks.3.resnets.0.conv2.bias",
+        "first_stage_model.decoder.up.0.block.0.conv2.weight": "vae_decoder.up_blocks.3.resnets.0.conv2.weight",
+        "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.bias",
+        "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.weight",
+        "first_stage_model.decoder.up.0.block.0.norm1.bias": "vae_decoder.up_blocks.3.resnets.0.norm1.bias",
+        "first_stage_model.decoder.up.0.block.0.norm1.weight": "vae_decoder.up_blocks.3.resnets.0.norm1.weight",
+        "first_stage_model.decoder.up.0.block.0.norm2.bias": "vae_decoder.up_blocks.3.resnets.0.norm2.bias",
+        "first_stage_model.decoder.up.0.block.0.norm2.weight": "vae_decoder.up_blocks.3.resnets.0.norm2.weight",
+        "first_stage_model.decoder.up.0.block.1.conv1.bias": "vae_decoder.up_blocks.3.resnets.1.conv1.bias",
+        "first_stage_model.decoder.up.0.block.1.conv1.weight": "vae_decoder.up_blocks.3.resnets.1.conv1.weight",
+        "first_stage_model.decoder.up.0.block.1.conv2.bias": "vae_decoder.up_blocks.3.resnets.1.conv2.bias",
+        "first_stage_model.decoder.up.0.block.1.conv2.weight": "vae_decoder.up_blocks.3.resnets.1.conv2.weight",
+        "first_stage_model.decoder.up.0.block.1.norm1.bias": "vae_decoder.up_blocks.3.resnets.1.norm1.bias",
+        "first_stage_model.decoder.up.0.block.1.norm1.weight": "vae_decoder.up_blocks.3.resnets.1.norm1.weight",
+        "first_stage_model.decoder.up.0.block.1.norm2.bias": "vae_decoder.up_blocks.3.resnets.1.norm2.bias",
+        "first_stage_model.decoder.up.0.block.1.norm2.weight": "vae_decoder.up_blocks.3.resnets.1.norm2.weight",
+        "first_stage_model.decoder.up.0.block.2.conv1.bias": "vae_decoder.up_blocks.3.resnets.2.conv1.bias",
+        "first_stage_model.decoder.up.0.block.2.conv1.weight": "vae_decoder.up_blocks.3.resnets.2.conv1.weight",
+        "first_stage_model.decoder.up.0.block.2.conv2.bias": "vae_decoder.up_blocks.3.resnets.2.conv2.bias",
+        "first_stage_model.decoder.up.0.block.2.conv2.weight": "vae_decoder.up_blocks.3.resnets.2.conv2.weight",
+        "first_stage_model.decoder.up.0.block.2.norm1.bias": "vae_decoder.up_blocks.3.resnets.2.norm1.bias",
+        "first_stage_model.decoder.up.0.block.2.norm1.weight": "vae_decoder.up_blocks.3.resnets.2.norm1.weight",
+        "first_stage_model.decoder.up.0.block.2.norm2.bias": "vae_decoder.up_blocks.3.resnets.2.norm2.bias",
+        "first_stage_model.decoder.up.0.block.2.norm2.weight": "vae_decoder.up_blocks.3.resnets.2.norm2.weight",
+        "first_stage_model.decoder.up.1.block.0.conv1.bias": "vae_decoder.up_blocks.2.resnets.0.conv1.bias",
+        "first_stage_model.decoder.up.1.block.0.conv1.weight": "vae_decoder.up_blocks.2.resnets.0.conv1.weight",
+        "first_stage_model.decoder.up.1.block.0.conv2.bias": "vae_decoder.up_blocks.2.resnets.0.conv2.bias",
+        "first_stage_model.decoder.up.1.block.0.conv2.weight": "vae_decoder.up_blocks.2.resnets.0.conv2.weight",
+        "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.bias",
+        "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.weight",
+        "first_stage_model.decoder.up.1.block.0.norm1.bias": "vae_decoder.up_blocks.2.resnets.0.norm1.bias",
+        "first_stage_model.decoder.up.1.block.0.norm1.weight": "vae_decoder.up_blocks.2.resnets.0.norm1.weight",
+        "first_stage_model.decoder.up.1.block.0.norm2.bias": "vae_decoder.up_blocks.2.resnets.0.norm2.bias",
+        "first_stage_model.decoder.up.1.block.0.norm2.weight": "vae_decoder.up_blocks.2.resnets.0.norm2.weight",
+        "first_stage_model.decoder.up.1.block.1.conv1.bias": "vae_decoder.up_blocks.2.resnets.1.conv1.bias",
+        "first_stage_model.decoder.up.1.block.1.conv1.weight": "vae_decoder.up_blocks.2.resnets.1.conv1.weight",
+        "first_stage_model.decoder.up.1.block.1.conv2.bias": "vae_decoder.up_blocks.2.resnets.1.conv2.bias",
+        "first_stage_model.decoder.up.1.block.1.conv2.weight": "vae_decoder.up_blocks.2.resnets.1.conv2.weight",
+        "first_stage_model.decoder.up.1.block.1.norm1.bias": "vae_decoder.up_blocks.2.resnets.1.norm1.bias",
+        "first_stage_model.decoder.up.1.block.1.norm1.weight": "vae_decoder.up_blocks.2.resnets.1.norm1.weight",
+        "first_stage_model.decoder.up.1.block.1.norm2.bias": "vae_decoder.up_blocks.2.resnets.1.norm2.bias",
+        "first_stage_model.decoder.up.1.block.1.norm2.weight": "vae_decoder.up_blocks.2.resnets.1.norm2.weight",
+        "first_stage_model.decoder.up.1.block.2.conv1.bias": "vae_decoder.up_blocks.2.resnets.2.conv1.bias",
+        "first_stage_model.decoder.up.1.block.2.conv1.weight": "vae_decoder.up_blocks.2.resnets.2.conv1.weight",
+        "first_stage_model.decoder.up.1.block.2.conv2.bias": "vae_decoder.up_blocks.2.resnets.2.conv2.bias",
+        "first_stage_model.decoder.up.1.block.2.conv2.weight": "vae_decoder.up_blocks.2.resnets.2.conv2.weight",
+        "first_stage_model.decoder.up.1.block.2.norm1.bias": "vae_decoder.up_blocks.2.resnets.2.norm1.bias",
+        "first_stage_model.decoder.up.1.block.2.norm1.weight": "vae_decoder.up_blocks.2.resnets.2.norm1.weight",
+        "first_stage_model.decoder.up.1.block.2.norm2.bias": "vae_decoder.up_blocks.2.resnets.2.norm2.bias",
+        "first_stage_model.decoder.up.1.block.2.norm2.weight": "vae_decoder.up_blocks.2.resnets.2.norm2.weight",
+        "first_stage_model.decoder.up.1.upsample.conv.bias": "vae_decoder.up_blocks.2.upsamplers.0.conv.bias",
+        "first_stage_model.decoder.up.1.upsample.conv.weight": "vae_decoder.up_blocks.2.upsamplers.0.conv.weight",
+        "first_stage_model.decoder.up.2.block.0.conv1.bias": "vae_decoder.up_blocks.1.resnets.0.conv1.bias",
+        "first_stage_model.decoder.up.2.block.0.conv1.weight": "vae_decoder.up_blocks.1.resnets.0.conv1.weight",
+        "first_stage_model.decoder.up.2.block.0.conv2.bias": "vae_decoder.up_blocks.1.resnets.0.conv2.bias",
+        "first_stage_model.decoder.up.2.block.0.conv2.weight": "vae_decoder.up_blocks.1.resnets.0.conv2.weight",
+        "first_stage_model.decoder.up.2.block.0.norm1.bias": "vae_decoder.up_blocks.1.resnets.0.norm1.bias",
+        "first_stage_model.decoder.up.2.block.0.norm1.weight": "vae_decoder.up_blocks.1.resnets.0.norm1.weight",
+        "first_stage_model.decoder.up.2.block.0.norm2.bias": "vae_decoder.up_blocks.1.resnets.0.norm2.bias",
+        "first_stage_model.decoder.up.2.block.0.norm2.weight": "vae_decoder.up_blocks.1.resnets.0.norm2.weight",
+        "first_stage_model.decoder.up.2.block.1.conv1.bias": "vae_decoder.up_blocks.1.resnets.1.conv1.bias",
+        "first_stage_model.decoder.up.2.block.1.conv1.weight": "vae_decoder.up_blocks.1.resnets.1.conv1.weight",
+        "first_stage_model.decoder.up.2.block.1.conv2.bias": "vae_decoder.up_blocks.1.resnets.1.conv2.bias",
+        "first_stage_model.decoder.up.2.block.1.conv2.weight": "vae_decoder.up_blocks.1.resnets.1.conv2.weight",
+        "first_stage_model.decoder.up.2.block.1.norm1.bias": "vae_decoder.up_blocks.1.resnets.1.norm1.bias",
+        "first_stage_model.decoder.up.2.block.1.norm1.weight": "vae_decoder.up_blocks.1.resnets.1.norm1.weight",
+        "first_stage_model.decoder.up.2.block.1.norm2.bias": "vae_decoder.up_blocks.1.resnets.1.norm2.bias",
+        "first_stage_model.decoder.up.2.block.1.norm2.weight": "vae_decoder.up_blocks.1.resnets.1.norm2.weight",
+        "first_stage_model.decoder.up.2.block.2.conv1.bias": "vae_decoder.up_blocks.1.resnets.2.conv1.bias",
+        "first_stage_model.decoder.up.2.block.2.conv1.weight": "vae_decoder.up_blocks.1.resnets.2.conv1.weight",
+        "first_stage_model.decoder.up.2.block.2.conv2.bias": "vae_decoder.up_blocks.1.resnets.2.conv2.bias",
+        "first_stage_model.decoder.up.2.block.2.conv2.weight": "vae_decoder.up_blocks.1.resnets.2.conv2.weight",
+        "first_stage_model.decoder.up.2.block.2.norm1.bias": "vae_decoder.up_blocks.1.resnets.2.norm1.bias",
+        "first_stage_model.decoder.up.2.block.2.norm1.weight": "vae_decoder.up_blocks.1.resnets.2.norm1.weight",
+        "first_stage_model.decoder.up.2.block.2.norm2.bias": "vae_decoder.up_blocks.1.resnets.2.norm2.bias",
+        "first_stage_model.decoder.up.2.block.2.norm2.weight": "vae_decoder.up_blocks.1.resnets.2.norm2.weight",
+        "first_stage_model.decoder.up.2.upsample.conv.bias": "vae_decoder.up_blocks.1.upsamplers.0.conv.bias",
+        "first_stage_model.decoder.up.2.upsample.conv.weight": "vae_decoder.up_blocks.1.upsamplers.0.conv.weight",
+        "first_stage_model.decoder.up.3.block.0.conv1.bias": "vae_decoder.up_blocks.0.resnets.0.conv1.bias",
+        "first_stage_model.decoder.up.3.block.0.conv1.weight": "vae_decoder.up_blocks.0.resnets.0.conv1.weight",
+        "first_stage_model.decoder.up.3.block.0.conv2.bias": "vae_decoder.up_blocks.0.resnets.0.conv2.bias",
+        "first_stage_model.decoder.up.3.block.0.conv2.weight": "vae_decoder.up_blocks.0.resnets.0.conv2.weight",
+        "first_stage_model.decoder.up.3.block.0.norm1.bias": "vae_decoder.up_blocks.0.resnets.0.norm1.bias",
+        "first_stage_model.decoder.up.3.block.0.norm1.weight": "vae_decoder.up_blocks.0.resnets.0.norm1.weight",
+        "first_stage_model.decoder.up.3.block.0.norm2.bias": "vae_decoder.up_blocks.0.resnets.0.norm2.bias",
+        "first_stage_model.decoder.up.3.block.0.norm2.weight": "vae_decoder.up_blocks.0.resnets.0.norm2.weight",
+        "first_stage_model.decoder.up.3.block.1.conv1.bias": "vae_decoder.up_blocks.0.resnets.1.conv1.bias",
+        "first_stage_model.decoder.up.3.block.1.conv1.weight": "vae_decoder.up_blocks.0.resnets.1.conv1.weight",
+        "first_stage_model.decoder.up.3.block.1.conv2.bias": "vae_decoder.up_blocks.0.resnets.1.conv2.bias",
+        "first_stage_model.decoder.up.3.block.1.conv2.weight": "vae_decoder.up_blocks.0.resnets.1.conv2.weight",
+        "first_stage_model.decoder.up.3.block.1.norm1.bias": "vae_decoder.up_blocks.0.resnets.1.norm1.bias",
+        "first_stage_model.decoder.up.3.block.1.norm1.weight": "vae_decoder.up_blocks.0.resnets.1.norm1.weight",
+        "first_stage_model.decoder.up.3.block.1.norm2.bias": "vae_decoder.up_blocks.0.resnets.1.norm2.bias",
+        "first_stage_model.decoder.up.3.block.1.norm2.weight": "vae_decoder.up_blocks.0.resnets.1.norm2.weight",
+        "first_stage_model.decoder.up.3.block.2.conv1.bias": "vae_decoder.up_blocks.0.resnets.2.conv1.bias",
+        "first_stage_model.decoder.up.3.block.2.conv1.weight": "vae_decoder.up_blocks.0.resnets.2.conv1.weight",
+        "first_stage_model.decoder.up.3.block.2.conv2.bias": "vae_decoder.up_blocks.0.resnets.2.conv2.bias",
+        "first_stage_model.decoder.up.3.block.2.conv2.weight": "vae_decoder.up_blocks.0.resnets.2.conv2.weight",
+        "first_stage_model.decoder.up.3.block.2.norm1.bias": "vae_decoder.up_blocks.0.resnets.2.norm1.bias",
+        "first_stage_model.decoder.up.3.block.2.norm1.weight": "vae_decoder.up_blocks.0.resnets.2.norm1.weight",
+        "first_stage_model.decoder.up.3.block.2.norm2.bias": "vae_decoder.up_blocks.0.resnets.2.norm2.bias",
+        "first_stage_model.decoder.up.3.block.2.norm2.weight": "vae_decoder.up_blocks.0.resnets.2.norm2.weight",
+        "first_stage_model.decoder.up.3.upsample.conv.bias": "vae_decoder.up_blocks.0.upsamplers.0.conv.bias",
+        "first_stage_model.decoder.up.3.upsample.conv.weight": "vae_decoder.up_blocks.0.upsamplers.0.conv.weight",
+        "first_stage_model.encoder.conv_in.bias": "vae_encoder.conv_in.bias",
+        "first_stage_model.encoder.conv_in.weight": "vae_encoder.conv_in.weight",
+        "first_stage_model.encoder.conv_out.bias": "vae_encoder.conv_out.bias",
+        "first_stage_model.encoder.conv_out.weight": "vae_encoder.conv_out.weight",
+        "first_stage_model.encoder.down.0.block.0.conv1.bias": "vae_encoder.down_blocks.0.resnets.0.conv1.bias",
+        "first_stage_model.encoder.down.0.block.0.conv1.weight": "vae_encoder.down_blocks.0.resnets.0.conv1.weight",
+        "first_stage_model.encoder.down.0.block.0.conv2.bias": "vae_encoder.down_blocks.0.resnets.0.conv2.bias",
+        "first_stage_model.encoder.down.0.block.0.conv2.weight": "vae_encoder.down_blocks.0.resnets.0.conv2.weight",
+        "first_stage_model.encoder.down.0.block.0.norm1.bias": "vae_encoder.down_blocks.0.resnets.0.norm1.bias",
+        "first_stage_model.encoder.down.0.block.0.norm1.weight": "vae_encoder.down_blocks.0.resnets.0.norm1.weight",
+        "first_stage_model.encoder.down.0.block.0.norm2.bias": "vae_encoder.down_blocks.0.resnets.0.norm2.bias",
+        "first_stage_model.encoder.down.0.block.0.norm2.weight": "vae_encoder.down_blocks.0.resnets.0.norm2.weight",
+        "first_stage_model.encoder.down.0.block.1.conv1.bias": "vae_encoder.down_blocks.0.resnets.1.conv1.bias",
+        "first_stage_model.encoder.down.0.block.1.conv1.weight": "vae_encoder.down_blocks.0.resnets.1.conv1.weight",
+        "first_stage_model.encoder.down.0.block.1.conv2.bias": "vae_encoder.down_blocks.0.resnets.1.conv2.bias",
+        "first_stage_model.encoder.down.0.block.1.conv2.weight": "vae_encoder.down_blocks.0.resnets.1.conv2.weight",
+        "first_stage_model.encoder.down.0.block.1.norm1.bias": "vae_encoder.down_blocks.0.resnets.1.norm1.bias",
+        "first_stage_model.encoder.down.0.block.1.norm1.weight": "vae_encoder.down_blocks.0.resnets.1.norm1.weight",
+        "first_stage_model.encoder.down.0.block.1.norm2.bias": "vae_encoder.down_blocks.0.resnets.1.norm2.bias",
+        "first_stage_model.encoder.down.0.block.1.norm2.weight": "vae_encoder.down_blocks.0.resnets.1.norm2.weight",
+        "first_stage_model.encoder.down.0.downsample.conv.bias": "vae_encoder.down_blocks.0.downsamplers.0.conv.bias",
+        "first_stage_model.encoder.down.0.downsample.conv.weight": "vae_encoder.down_blocks.0.downsamplers.0.conv.weight",
+        "first_stage_model.encoder.down.1.block.0.conv1.bias": "vae_encoder.down_blocks.1.resnets.0.conv1.bias",
+        "first_stage_model.encoder.down.1.block.0.conv1.weight": "vae_encoder.down_blocks.1.resnets.0.conv1.weight",
+        "first_stage_model.encoder.down.1.block.0.conv2.bias": "vae_encoder.down_blocks.1.resnets.0.conv2.bias",
+        "first_stage_model.encoder.down.1.block.0.conv2.weight": "vae_encoder.down_blocks.1.resnets.0.conv2.weight",
+        "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.bias",
+        "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.weight",
+        "first_stage_model.encoder.down.1.block.0.norm1.bias": "vae_encoder.down_blocks.1.resnets.0.norm1.bias",
+        "first_stage_model.encoder.down.1.block.0.norm1.weight": "vae_encoder.down_blocks.1.resnets.0.norm1.weight",
+        "first_stage_model.encoder.down.1.block.0.norm2.bias": "vae_encoder.down_blocks.1.resnets.0.norm2.bias",
+        "first_stage_model.encoder.down.1.block.0.norm2.weight": "vae_encoder.down_blocks.1.resnets.0.norm2.weight",
+        "first_stage_model.encoder.down.1.block.1.conv1.bias": "vae_encoder.down_blocks.1.resnets.1.conv1.bias",
+        "first_stage_model.encoder.down.1.block.1.conv1.weight": "vae_encoder.down_blocks.1.resnets.1.conv1.weight",
+        "first_stage_model.encoder.down.1.block.1.conv2.bias": "vae_encoder.down_blocks.1.resnets.1.conv2.bias",
+        "first_stage_model.encoder.down.1.block.1.conv2.weight": "vae_encoder.down_blocks.1.resnets.1.conv2.weight",
+        "first_stage_model.encoder.down.1.block.1.norm1.bias": "vae_encoder.down_blocks.1.resnets.1.norm1.bias",
+        "first_stage_model.encoder.down.1.block.1.norm1.weight": "vae_encoder.down_blocks.1.resnets.1.norm1.weight",
+        "first_stage_model.encoder.down.1.block.1.norm2.bias": "vae_encoder.down_blocks.1.resnets.1.norm2.bias",
+        "first_stage_model.encoder.down.1.block.1.norm2.weight": "vae_encoder.down_blocks.1.resnets.1.norm2.weight",
+        "first_stage_model.encoder.down.1.downsample.conv.bias": "vae_encoder.down_blocks.1.downsamplers.0.conv.bias",
+        "first_stage_model.encoder.down.1.downsample.conv.weight": "vae_encoder.down_blocks.1.downsamplers.0.conv.weight",
+        "first_stage_model.encoder.down.2.block.0.conv1.bias": "vae_encoder.down_blocks.2.resnets.0.conv1.bias",
+        "first_stage_model.encoder.down.2.block.0.conv1.weight": "vae_encoder.down_blocks.2.resnets.0.conv1.weight",
+        "first_stage_model.encoder.down.2.block.0.conv2.bias": "vae_encoder.down_blocks.2.resnets.0.conv2.bias",
+        "first_stage_model.encoder.down.2.block.0.conv2.weight": "vae_encoder.down_blocks.2.resnets.0.conv2.weight",
+        "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.bias",
+        "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.weight",
+        "first_stage_model.encoder.down.2.block.0.norm1.bias": "vae_encoder.down_blocks.2.resnets.0.norm1.bias",
+        "first_stage_model.encoder.down.2.block.0.norm1.weight": "vae_encoder.down_blocks.2.resnets.0.norm1.weight",
+        "first_stage_model.encoder.down.2.block.0.norm2.bias": "vae_encoder.down_blocks.2.resnets.0.norm2.bias",
+        "first_stage_model.encoder.down.2.block.0.norm2.weight": "vae_encoder.down_blocks.2.resnets.0.norm2.weight",
+        "first_stage_model.encoder.down.2.block.1.conv1.bias": "vae_encoder.down_blocks.2.resnets.1.conv1.bias",
+        "first_stage_model.encoder.down.2.block.1.conv1.weight": "vae_encoder.down_blocks.2.resnets.1.conv1.weight",
+        "first_stage_model.encoder.down.2.block.1.conv2.bias": "vae_encoder.down_blocks.2.resnets.1.conv2.bias",
+        "first_stage_model.encoder.down.2.block.1.conv2.weight": "vae_encoder.down_blocks.2.resnets.1.conv2.weight",
+        "first_stage_model.encoder.down.2.block.1.norm1.bias": "vae_encoder.down_blocks.2.resnets.1.norm1.bias",
+        "first_stage_model.encoder.down.2.block.1.norm1.weight": "vae_encoder.down_blocks.2.resnets.1.norm1.weight",
+        "first_stage_model.encoder.down.2.block.1.norm2.bias": "vae_encoder.down_blocks.2.resnets.1.norm2.bias",
+        "first_stage_model.encoder.down.2.block.1.norm2.weight": "vae_encoder.down_blocks.2.resnets.1.norm2.weight",
+        "first_stage_model.encoder.down.2.downsample.conv.bias": "vae_encoder.down_blocks.2.downsamplers.0.conv.bias",
+        "first_stage_model.encoder.down.2.downsample.conv.weight": "vae_encoder.down_blocks.2.downsamplers.0.conv.weight",
+        "first_stage_model.encoder.down.3.block.0.conv1.bias": "vae_encoder.down_blocks.3.resnets.0.conv1.bias",
+        "first_stage_model.encoder.down.3.block.0.conv1.weight": "vae_encoder.down_blocks.3.resnets.0.conv1.weight",
+        "first_stage_model.encoder.down.3.block.0.conv2.bias": "vae_encoder.down_blocks.3.resnets.0.conv2.bias",
+        "first_stage_model.encoder.down.3.block.0.conv2.weight": "vae_encoder.down_blocks.3.resnets.0.conv2.weight",
+        "first_stage_model.encoder.down.3.block.0.norm1.bias": "vae_encoder.down_blocks.3.resnets.0.norm1.bias",
+        "first_stage_model.encoder.down.3.block.0.norm1.weight": "vae_encoder.down_blocks.3.resnets.0.norm1.weight",
+        "first_stage_model.encoder.down.3.block.0.norm2.bias": "vae_encoder.down_blocks.3.resnets.0.norm2.bias",
+        "first_stage_model.encoder.down.3.block.0.norm2.weight": "vae_encoder.down_blocks.3.resnets.0.norm2.weight",
+        "first_stage_model.encoder.down.3.block.1.conv1.bias": "vae_encoder.down_blocks.3.resnets.1.conv1.bias",
+        "first_stage_model.encoder.down.3.block.1.conv1.weight": "vae_encoder.down_blocks.3.resnets.1.conv1.weight",
+        "first_stage_model.encoder.down.3.block.1.conv2.bias": "vae_encoder.down_blocks.3.resnets.1.conv2.bias",
+        "first_stage_model.encoder.down.3.block.1.conv2.weight": "vae_encoder.down_blocks.3.resnets.1.conv2.weight",
+        "first_stage_model.encoder.down.3.block.1.norm1.bias": "vae_encoder.down_blocks.3.resnets.1.norm1.bias",
+        "first_stage_model.encoder.down.3.block.1.norm1.weight": "vae_encoder.down_blocks.3.resnets.1.norm1.weight",
+        "first_stage_model.encoder.down.3.block.1.norm2.bias": "vae_encoder.down_blocks.3.resnets.1.norm2.bias",
+        "first_stage_model.encoder.down.3.block.1.norm2.weight": "vae_encoder.down_blocks.3.resnets.1.norm2.weight",
+        "first_stage_model.encoder.mid.attn_1.k.bias": "vae_encoder.mid_block.attentions.0.to_k.bias",
+        "first_stage_model.encoder.mid.attn_1.k.weight": "vae_encoder.mid_block.attentions.0.to_k.weight",
+        "first_stage_model.encoder.mid.attn_1.norm.bias": "vae_encoder.mid_block.attentions.0.group_norm.bias",
+        "first_stage_model.encoder.mid.attn_1.norm.weight": "vae_encoder.mid_block.attentions.0.group_norm.weight",
+        "first_stage_model.encoder.mid.attn_1.proj_out.bias": "vae_encoder.mid_block.attentions.0.to_out.0.bias",
+        "first_stage_model.encoder.mid.attn_1.proj_out.weight": "vae_encoder.mid_block.attentions.0.to_out.0.weight",
+        "first_stage_model.encoder.mid.attn_1.q.bias": "vae_encoder.mid_block.attentions.0.to_q.bias",
+        "first_stage_model.encoder.mid.attn_1.q.weight": "vae_encoder.mid_block.attentions.0.to_q.weight",
+        "first_stage_model.encoder.mid.attn_1.v.bias": "vae_encoder.mid_block.attentions.0.to_v.bias",
+        "first_stage_model.encoder.mid.attn_1.v.weight": "vae_encoder.mid_block.attentions.0.to_v.weight",
+        "first_stage_model.encoder.mid.block_1.conv1.bias": "vae_encoder.mid_block.resnets.0.conv1.bias",
+        "first_stage_model.encoder.mid.block_1.conv1.weight": "vae_encoder.mid_block.resnets.0.conv1.weight",
+        "first_stage_model.encoder.mid.block_1.conv2.bias": "vae_encoder.mid_block.resnets.0.conv2.bias",
+        "first_stage_model.encoder.mid.block_1.conv2.weight": "vae_encoder.mid_block.resnets.0.conv2.weight",
+        "first_stage_model.encoder.mid.block_1.norm1.bias": "vae_encoder.mid_block.resnets.0.norm1.bias",
+        "first_stage_model.encoder.mid.block_1.norm1.weight": "vae_encoder.mid_block.resnets.0.norm1.weight",
+        "first_stage_model.encoder.mid.block_1.norm2.bias": "vae_encoder.mid_block.resnets.0.norm2.bias",
+        "first_stage_model.encoder.mid.block_1.norm2.weight": "vae_encoder.mid_block.resnets.0.norm2.weight",
+        "first_stage_model.encoder.mid.block_2.conv1.bias": "vae_encoder.mid_block.resnets.1.conv1.bias",
+        "first_stage_model.encoder.mid.block_2.conv1.weight": "vae_encoder.mid_block.resnets.1.conv1.weight",
+        "first_stage_model.encoder.mid.block_2.conv2.bias": "vae_encoder.mid_block.resnets.1.conv2.bias",
+        "first_stage_model.encoder.mid.block_2.conv2.weight": "vae_encoder.mid_block.resnets.1.conv2.weight",
+        "first_stage_model.encoder.mid.block_2.norm1.bias": "vae_encoder.mid_block.resnets.1.norm1.bias",
+        "first_stage_model.encoder.mid.block_2.norm1.weight": "vae_encoder.mid_block.resnets.1.norm1.weight",
+        "first_stage_model.encoder.mid.block_2.norm2.bias": "vae_encoder.mid_block.resnets.1.norm2.bias",
+        "first_stage_model.encoder.mid.block_2.norm2.weight": "vae_encoder.mid_block.resnets.1.norm2.weight",
+        "first_stage_model.encoder.norm_out.bias": "vae_encoder.conv_norm_out.bias",
+        "first_stage_model.encoder.norm_out.weight": "vae_encoder.conv_norm_out.weight",
+        "first_stage_model.post_quant_conv.bias": "vae_post_quant_conv.bias",
+        "first_stage_model.post_quant_conv.weight": "vae_post_quant_conv.weight",
+        "first_stage_model.quant_conv.bias": "vae_quant_conv.bias",
+        "first_stage_model.quant_conv.weight": "vae_quant_conv.weight",
+        "model.diffusion_model.input_blocks.0.0.bias": "unet_conv_in.bias",
+        "model.diffusion_model.input_blocks.0.0.weight": "unet_conv_in.weight",
+        "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": "unet_down_blocks.0.resnets.0.norm1.bias",
+        "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": "unet_down_blocks.0.resnets.0.norm1.weight",
+        "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "unet_down_blocks.0.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": "unet_down_blocks.0.resnets.0.conv1.weight",
+        "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": "unet_down_blocks.0.resnets.0.norm2.bias",
+        "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": "unet_down_blocks.0.resnets.0.norm2.weight",
+        "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": "unet_down_blocks.0.resnets.0.conv2.bias",
+        "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": "unet_down_blocks.0.resnets.0.conv2.weight",
+        "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": "unet_down_blocks.3.resnets.0.norm1.bias",
+        "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": "unet_down_blocks.3.resnets.0.norm1.weight",
+        "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": "unet_down_blocks.3.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": "unet_down_blocks.3.resnets.0.conv1.weight",
+        "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": "unet_down_blocks.3.resnets.0.norm2.bias",
+        "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": "unet_down_blocks.3.resnets.0.norm2.weight",
+        "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": "unet_down_blocks.3.resnets.0.conv2.bias",
+        "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": "unet_down_blocks.3.resnets.0.conv2.weight",
+        "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": "unet_down_blocks.3.resnets.1.norm1.bias",
+        "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": "unet_down_blocks.3.resnets.1.norm1.weight",
+        "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": "unet_down_blocks.3.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": "unet_down_blocks.3.resnets.1.conv1.weight",
+        "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": "unet_down_blocks.3.resnets.1.norm2.bias",
+        "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": "unet_down_blocks.3.resnets.1.norm2.weight",
+        "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": "unet_down_blocks.3.resnets.1.conv2.bias",
+        "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": "unet_down_blocks.3.resnets.1.conv2.weight",
+        "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": "unet_down_blocks.0.resnets.1.norm1.bias",
+        "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": "unet_down_blocks.0.resnets.1.norm1.weight",
+        "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "unet_down_blocks.0.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": "unet_down_blocks.0.resnets.1.conv1.weight",
+        "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": "unet_down_blocks.0.resnets.1.norm2.bias",
+        "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": "unet_down_blocks.0.resnets.1.norm2.weight",
+        "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": "unet_down_blocks.0.resnets.1.conv2.bias",
+        "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": "unet_down_blocks.0.resnets.1.conv2.weight",
+        "model.diffusion_model.input_blocks.3.0.op.bias": "unet_down_blocks.0.downsamplers.0.conv.bias",
+        "model.diffusion_model.input_blocks.3.0.op.weight": "unet_down_blocks.0.downsamplers.0.conv.weight",
+        "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": "unet_down_blocks.1.resnets.0.norm1.bias",
+        "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": "unet_down_blocks.1.resnets.0.norm1.weight",
+        "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "unet_down_blocks.1.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": "unet_down_blocks.1.resnets.0.conv1.weight",
+        "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": "unet_down_blocks.1.resnets.0.norm2.bias",
+        "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": "unet_down_blocks.1.resnets.0.norm2.weight",
+        "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": "unet_down_blocks.1.resnets.0.conv2.bias",
+        "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": "unet_down_blocks.1.resnets.0.conv2.weight",
+        "model.diffusion_model.input_blocks.4.0.skip_connection.bias": "unet_down_blocks.1.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.input_blocks.4.0.skip_connection.weight": "unet_down_blocks.1.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.input_blocks.4.1.norm.bias": "unet_down_blocks.1.attentions.0.norm.bias",
+        "model.diffusion_model.input_blocks.4.1.norm.weight": "unet_down_blocks.1.attentions.0.norm.weight",
+        "model.diffusion_model.input_blocks.4.1.proj_in.bias": "unet_down_blocks.1.attentions.0.proj_in.bias",
+        "model.diffusion_model.input_blocks.4.1.proj_in.weight": "unet_down_blocks.1.attentions.0.proj_in.weight",
+        "model.diffusion_model.input_blocks.4.1.proj_out.bias": "unet_down_blocks.1.attentions.0.proj_out.bias",
+        "model.diffusion_model.input_blocks.4.1.proj_out.weight": "unet_down_blocks.1.attentions.0.proj_out.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn1.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn1.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn1.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn2.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn2.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.attn2.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.ff.net.2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.ff.net.2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.norm1.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.norm1.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.norm2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.2.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn1.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn1.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn1.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn2.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn2.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.attn2.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.ff.net.2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.ff.net.2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.norm1.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.norm1.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.norm2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.3.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": "unet_down_blocks.1.resnets.1.norm1.bias",
+        "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": "unet_down_blocks.1.resnets.1.norm1.weight",
+        "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "unet_down_blocks.1.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": "unet_down_blocks.1.resnets.1.conv1.weight",
+        "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": "unet_down_blocks.1.resnets.1.norm2.bias",
+        "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": "unet_down_blocks.1.resnets.1.norm2.weight",
+        "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": "unet_down_blocks.1.resnets.1.conv2.bias",
+        "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": "unet_down_blocks.1.resnets.1.conv2.weight",
+        "model.diffusion_model.input_blocks.5.1.norm.bias": "unet_down_blocks.1.attentions.1.norm.bias",
+        "model.diffusion_model.input_blocks.5.1.norm.weight": "unet_down_blocks.1.attentions.1.norm.weight",
+        "model.diffusion_model.input_blocks.5.1.proj_in.bias": "unet_down_blocks.1.attentions.1.proj_in.bias",
+        "model.diffusion_model.input_blocks.5.1.proj_in.weight": "unet_down_blocks.1.attentions.1.proj_in.weight",
+        "model.diffusion_model.input_blocks.5.1.proj_out.bias": "unet_down_blocks.1.attentions.1.proj_out.bias",
+        "model.diffusion_model.input_blocks.5.1.proj_out.weight": "unet_down_blocks.1.attentions.1.proj_out.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn1.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn1.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn1.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn2.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn2.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.attn2.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.ff.net.2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.ff.net.2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.norm1.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.norm1.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.norm2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.norm2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.norm3.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.2.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn1.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn1.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn1.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn2.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn2.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.attn2.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.ff.net.2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.ff.net.2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.norm1.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.norm1.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.norm2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.norm2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.norm3.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.3.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.input_blocks.6.0.op.bias": "unet_down_blocks.1.downsamplers.0.conv.bias",
+        "model.diffusion_model.input_blocks.6.0.op.weight": "unet_down_blocks.1.downsamplers.0.conv.weight",
+        "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": "unet_down_blocks.2.resnets.0.norm1.bias",
+        "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": "unet_down_blocks.2.resnets.0.norm1.weight",
+        "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "unet_down_blocks.2.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": "unet_down_blocks.2.resnets.0.conv1.weight",
+        "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": "unet_down_blocks.2.resnets.0.norm2.bias",
+        "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": "unet_down_blocks.2.resnets.0.norm2.weight",
+        "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": "unet_down_blocks.2.resnets.0.conv2.bias",
+        "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": "unet_down_blocks.2.resnets.0.conv2.weight",
+        "model.diffusion_model.input_blocks.7.0.skip_connection.bias": "unet_down_blocks.2.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.input_blocks.7.0.skip_connection.weight": "unet_down_blocks.2.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.input_blocks.7.1.norm.bias": "unet_down_blocks.2.attentions.0.norm.bias",
+        "model.diffusion_model.input_blocks.7.1.norm.weight": "unet_down_blocks.2.attentions.0.norm.weight",
+        "model.diffusion_model.input_blocks.7.1.proj_in.bias": "unet_down_blocks.2.attentions.0.proj_in.bias",
+        "model.diffusion_model.input_blocks.7.1.proj_in.weight": "unet_down_blocks.2.attentions.0.proj_in.weight",
+        "model.diffusion_model.input_blocks.7.1.proj_out.bias": "unet_down_blocks.2.attentions.0.proj_out.bias",
+        "model.diffusion_model.input_blocks.7.1.proj_out.weight": "unet_down_blocks.2.attentions.0.proj_out.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": "unet_down_blocks.2.resnets.1.norm1.bias",
+        "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": "unet_down_blocks.2.resnets.1.norm1.weight",
+        "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "unet_down_blocks.2.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": "unet_down_blocks.2.resnets.1.conv1.weight",
+        "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": "unet_down_blocks.2.resnets.1.norm2.bias",
+        "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": "unet_down_blocks.2.resnets.1.norm2.weight",
+        "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": "unet_down_blocks.2.resnets.1.conv2.bias",
+        "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": "unet_down_blocks.2.resnets.1.conv2.weight",
+        "model.diffusion_model.input_blocks.8.1.norm.bias": "unet_down_blocks.2.attentions.1.norm.bias",
+        "model.diffusion_model.input_blocks.8.1.norm.weight": "unet_down_blocks.2.attentions.1.norm.weight",
+        "model.diffusion_model.input_blocks.8.1.proj_in.bias": "unet_down_blocks.2.attentions.1.proj_in.bias",
+        "model.diffusion_model.input_blocks.8.1.proj_in.weight": "unet_down_blocks.2.attentions.1.proj_in.weight",
+        "model.diffusion_model.input_blocks.8.1.proj_out.bias": "unet_down_blocks.2.attentions.1.proj_out.bias",
+        "model.diffusion_model.input_blocks.8.1.proj_out.weight": "unet_down_blocks.2.attentions.1.proj_out.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.input_blocks.9.0.op.bias": "unet_down_blocks.2.downsamplers.0.conv.bias",
+        "model.diffusion_model.input_blocks.9.0.op.weight": "unet_down_blocks.2.downsamplers.0.conv.weight",
+        "model.diffusion_model.label_emb.0.0.bias": "unet_add_embedding.linear_1.bias",
+        "model.diffusion_model.label_emb.0.0.weight": "unet_add_embedding.linear_1.weight",
+        "model.diffusion_model.label_emb.0.2.bias": "unet_add_embedding.linear_2.bias",
+        "model.diffusion_model.label_emb.0.2.weight": "unet_add_embedding.linear_2.weight",
+        "model.diffusion_model.middle_block.0.emb_layers.1.bias": "unet_mid_block.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.0.emb_layers.1.weight": "unet_mid_block.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.middle_block.0.in_layers.0.bias": "unet_mid_block.resnets.0.norm1.bias",
+        "model.diffusion_model.middle_block.0.in_layers.0.weight": "unet_mid_block.resnets.0.norm1.weight",
+        "model.diffusion_model.middle_block.0.in_layers.2.bias": "unet_mid_block.resnets.0.conv1.bias",
+        "model.diffusion_model.middle_block.0.in_layers.2.weight": "unet_mid_block.resnets.0.conv1.weight",
+        "model.diffusion_model.middle_block.0.out_layers.0.bias": "unet_mid_block.resnets.0.norm2.bias",
+        "model.diffusion_model.middle_block.0.out_layers.0.weight": "unet_mid_block.resnets.0.norm2.weight",
+        "model.diffusion_model.middle_block.0.out_layers.3.bias": "unet_mid_block.resnets.0.conv2.bias",
+        "model.diffusion_model.middle_block.0.out_layers.3.weight": "unet_mid_block.resnets.0.conv2.weight",
+        "model.diffusion_model.middle_block.1.norm.bias": "unet_mid_block.attentions.0.norm.bias",
+        "model.diffusion_model.middle_block.1.norm.weight": "unet_mid_block.attentions.0.norm.weight",
+        "model.diffusion_model.middle_block.1.proj_in.bias": "unet_mid_block.attentions.0.proj_in.bias",
+        "model.diffusion_model.middle_block.1.proj_in.weight": "unet_mid_block.attentions.0.proj_in.weight",
+        "model.diffusion_model.middle_block.1.proj_out.bias": "unet_mid_block.attentions.0.proj_out.bias",
+        "model.diffusion_model.middle_block.1.proj_out.weight": "unet_mid_block.attentions.0.proj_out.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.ff.net.2.bias": "unet_mid_block.attentions.0.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.ff.net.2.weight": "unet_mid_block.attentions.0.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.norm1.bias": "unet_mid_block.attentions.0.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.norm1.weight": "unet_mid_block.attentions.0.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.norm2.bias": "unet_mid_block.attentions.0.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.norm2.weight": "unet_mid_block.attentions.0.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.norm3.bias": "unet_mid_block.attentions.0.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.1.norm3.weight": "unet_mid_block.attentions.0.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.ff.net.2.bias": "unet_mid_block.attentions.0.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.ff.net.2.weight": "unet_mid_block.attentions.0.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.norm1.bias": "unet_mid_block.attentions.0.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.norm1.weight": "unet_mid_block.attentions.0.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.norm2.bias": "unet_mid_block.attentions.0.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.norm2.weight": "unet_mid_block.attentions.0.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.norm3.bias": "unet_mid_block.attentions.0.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.norm3.weight": "unet_mid_block.attentions.0.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.ff.net.2.bias": "unet_mid_block.attentions.0.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.ff.net.2.weight": "unet_mid_block.attentions.0.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.norm1.bias": "unet_mid_block.attentions.0.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.norm1.weight": "unet_mid_block.attentions.0.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.norm2.bias": "unet_mid_block.attentions.0.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.norm2.weight": "unet_mid_block.attentions.0.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.norm3.bias": "unet_mid_block.attentions.0.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.norm3.weight": "unet_mid_block.attentions.0.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.middle_block.2.emb_layers.1.bias": "unet_mid_block.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.2.emb_layers.1.weight": "unet_mid_block.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.middle_block.2.in_layers.0.bias": "unet_mid_block.resnets.1.norm1.bias",
+        "model.diffusion_model.middle_block.2.in_layers.0.weight": "unet_mid_block.resnets.1.norm1.weight",
+        "model.diffusion_model.middle_block.2.in_layers.2.bias": "unet_mid_block.resnets.1.conv1.bias",
+        "model.diffusion_model.middle_block.2.in_layers.2.weight": "unet_mid_block.resnets.1.conv1.weight",
+        "model.diffusion_model.middle_block.2.out_layers.0.bias": "unet_mid_block.resnets.1.norm2.bias",
+        "model.diffusion_model.middle_block.2.out_layers.0.weight": "unet_mid_block.resnets.1.norm2.weight",
+        "model.diffusion_model.middle_block.2.out_layers.3.bias": "unet_mid_block.resnets.1.conv2.bias",
+        "model.diffusion_model.middle_block.2.out_layers.3.weight": "unet_mid_block.resnets.1.conv2.weight",
+        "model.diffusion_model.out.0.bias": "unet_conv_norm_out.bias",
+        "model.diffusion_model.out.0.weight": "unet_conv_norm_out.weight",
+        "model.diffusion_model.out.2.bias": "unet_conv_out.bias",
+        "model.diffusion_model.out.2.weight": "unet_conv_out.weight",
+        "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": "unet_up_blocks.0.resnets.0.norm1.bias",
+        "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": "unet_up_blocks.0.resnets.0.norm1.weight",
+        "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "unet_up_blocks.0.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": "unet_up_blocks.0.resnets.0.conv1.weight",
+        "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": "unet_up_blocks.0.resnets.0.norm2.bias",
+        "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": "unet_up_blocks.0.resnets.0.norm2.weight",
+        "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": "unet_up_blocks.0.resnets.0.conv2.bias",
+        "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": "unet_up_blocks.0.resnets.0.conv2.weight",
+        "model.diffusion_model.output_blocks.0.0.skip_connection.bias": "unet_up_blocks.0.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.0.0.skip_connection.weight": "unet_up_blocks.0.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": "unet_up_blocks.0.resnets.1.norm1.bias",
+        "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": "unet_up_blocks.0.resnets.1.norm1.weight",
+        "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "unet_up_blocks.0.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": "unet_up_blocks.0.resnets.1.conv1.weight",
+        "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": "unet_up_blocks.0.resnets.1.norm2.bias",
+        "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": "unet_up_blocks.0.resnets.1.norm2.weight",
+        "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": "unet_up_blocks.0.resnets.1.conv2.bias",
+        "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": "unet_up_blocks.0.resnets.1.conv2.weight",
+        "model.diffusion_model.output_blocks.1.0.skip_connection.bias": "unet_up_blocks.0.resnets.1.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.1.0.skip_connection.weight": "unet_up_blocks.0.resnets.1.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": "unet_up_blocks.3.resnets.1.norm1.bias",
+        "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": "unet_up_blocks.3.resnets.1.norm1.weight",
+        "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": "unet_up_blocks.3.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": "unet_up_blocks.3.resnets.1.conv1.weight",
+        "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": "unet_up_blocks.3.resnets.1.norm2.bias",
+        "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": "unet_up_blocks.3.resnets.1.norm2.weight",
+        "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": "unet_up_blocks.3.resnets.1.conv2.bias",
+        "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": "unet_up_blocks.3.resnets.1.conv2.weight",
+        "model.diffusion_model.output_blocks.10.0.skip_connection.bias": "unet_up_blocks.3.resnets.1.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.10.0.skip_connection.weight": "unet_up_blocks.3.resnets.1.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": "unet_up_blocks.3.resnets.2.norm1.bias",
+        "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": "unet_up_blocks.3.resnets.2.norm1.weight",
+        "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": "unet_up_blocks.3.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": "unet_up_blocks.3.resnets.2.conv1.weight",
+        "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": "unet_up_blocks.3.resnets.2.norm2.bias",
+        "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": "unet_up_blocks.3.resnets.2.norm2.weight",
+        "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": "unet_up_blocks.3.resnets.2.conv2.bias",
+        "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": "unet_up_blocks.3.resnets.2.conv2.weight",
+        "model.diffusion_model.output_blocks.11.0.skip_connection.bias": "unet_up_blocks.3.resnets.2.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.11.0.skip_connection.weight": "unet_up_blocks.3.resnets.2.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": "unet_up_blocks.0.resnets.2.norm1.bias",
+        "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": "unet_up_blocks.0.resnets.2.norm1.weight",
+        "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "unet_up_blocks.0.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": "unet_up_blocks.0.resnets.2.conv1.weight",
+        "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": "unet_up_blocks.0.resnets.2.norm2.bias",
+        "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": "unet_up_blocks.0.resnets.2.norm2.weight",
+        "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": "unet_up_blocks.0.resnets.2.conv2.bias",
+        "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": "unet_up_blocks.0.resnets.2.conv2.weight",
+        "model.diffusion_model.output_blocks.2.0.skip_connection.bias": "unet_up_blocks.0.resnets.2.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.2.0.skip_connection.weight": "unet_up_blocks.0.resnets.2.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.2.1.conv.bias": "unet_up_blocks.0.upsamplers.0.conv.bias",
+        "model.diffusion_model.output_blocks.2.1.conv.weight": "unet_up_blocks.0.upsamplers.0.conv.weight",
+        "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": "unet_up_blocks.1.resnets.0.norm1.bias",
+        "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": "unet_up_blocks.1.resnets.0.norm1.weight",
+        "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "unet_up_blocks.1.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": "unet_up_blocks.1.resnets.0.conv1.weight",
+        "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": "unet_up_blocks.1.resnets.0.norm2.bias",
+        "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": "unet_up_blocks.1.resnets.0.norm2.weight",
+        "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": "unet_up_blocks.1.resnets.0.conv2.bias",
+        "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": "unet_up_blocks.1.resnets.0.conv2.weight",
+        "model.diffusion_model.output_blocks.3.0.skip_connection.bias": "unet_up_blocks.1.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.3.0.skip_connection.weight": "unet_up_blocks.1.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.3.1.norm.bias": "unet_up_blocks.1.attentions.0.norm.bias",
+        "model.diffusion_model.output_blocks.3.1.norm.weight": "unet_up_blocks.1.attentions.0.norm.weight",
+        "model.diffusion_model.output_blocks.3.1.proj_in.bias": "unet_up_blocks.1.attentions.0.proj_in.bias",
+        "model.diffusion_model.output_blocks.3.1.proj_in.weight": "unet_up_blocks.1.attentions.0.proj_in.weight",
+        "model.diffusion_model.output_blocks.3.1.proj_out.bias": "unet_up_blocks.1.attentions.0.proj_out.bias",
+        "model.diffusion_model.output_blocks.3.1.proj_out.weight": "unet_up_blocks.1.attentions.0.proj_out.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn1.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn1.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn1.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn2.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn2.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.attn2.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.ff.net.2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.ff.net.2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.norm1.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.norm1.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.norm2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.2.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn1.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn1.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn1.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn2.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn2.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.attn2.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.ff.net.2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.ff.net.2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.norm1.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.norm1.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.norm2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.3.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": "unet_up_blocks.1.resnets.1.norm1.bias",
+        "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": "unet_up_blocks.1.resnets.1.norm1.weight",
+        "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "unet_up_blocks.1.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": "unet_up_blocks.1.resnets.1.conv1.weight",
+        "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": "unet_up_blocks.1.resnets.1.norm2.bias",
+        "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": "unet_up_blocks.1.resnets.1.norm2.weight",
+        "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": "unet_up_blocks.1.resnets.1.conv2.bias",
+        "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": "unet_up_blocks.1.resnets.1.conv2.weight",
+        "model.diffusion_model.output_blocks.4.0.skip_connection.bias": "unet_up_blocks.1.resnets.1.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.4.0.skip_connection.weight": "unet_up_blocks.1.resnets.1.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.4.1.norm.bias": "unet_up_blocks.1.attentions.1.norm.bias",
+        "model.diffusion_model.output_blocks.4.1.norm.weight": "unet_up_blocks.1.attentions.1.norm.weight",
+        "model.diffusion_model.output_blocks.4.1.proj_in.bias": "unet_up_blocks.1.attentions.1.proj_in.bias",
+        "model.diffusion_model.output_blocks.4.1.proj_in.weight": "unet_up_blocks.1.attentions.1.proj_in.weight",
+        "model.diffusion_model.output_blocks.4.1.proj_out.bias": "unet_up_blocks.1.attentions.1.proj_out.bias",
+        "model.diffusion_model.output_blocks.4.1.proj_out.weight": "unet_up_blocks.1.attentions.1.proj_out.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn1.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn1.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn1.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn2.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn2.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.attn2.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.ff.net.2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.ff.net.2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.norm1.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.norm1.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.norm2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.2.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn1.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn1.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn1.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn2.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn2.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.attn2.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.ff.net.2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.ff.net.2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.norm1.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.norm1.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.norm2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.3.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": "unet_up_blocks.1.resnets.2.norm1.bias",
+        "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": "unet_up_blocks.1.resnets.2.norm1.weight",
+        "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "unet_up_blocks.1.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": "unet_up_blocks.1.resnets.2.conv1.weight",
+        "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": "unet_up_blocks.1.resnets.2.norm2.bias",
+        "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": "unet_up_blocks.1.resnets.2.norm2.weight",
+        "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": "unet_up_blocks.1.resnets.2.conv2.bias",
+        "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": "unet_up_blocks.1.resnets.2.conv2.weight",
+        "model.diffusion_model.output_blocks.5.0.skip_connection.bias": "unet_up_blocks.1.resnets.2.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.5.0.skip_connection.weight": "unet_up_blocks.1.resnets.2.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.5.1.norm.bias": "unet_up_blocks.1.attentions.2.norm.bias",
+        "model.diffusion_model.output_blocks.5.1.norm.weight": "unet_up_blocks.1.attentions.2.norm.weight",
+        "model.diffusion_model.output_blocks.5.1.proj_in.bias": "unet_up_blocks.1.attentions.2.proj_in.bias",
+        "model.diffusion_model.output_blocks.5.1.proj_in.weight": "unet_up_blocks.1.attentions.2.proj_in.weight",
+        "model.diffusion_model.output_blocks.5.1.proj_out.bias": "unet_up_blocks.1.attentions.2.proj_out.bias",
+        "model.diffusion_model.output_blocks.5.1.proj_out.weight": "unet_up_blocks.1.attentions.2.proj_out.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn1.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn1.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn1.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn2.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn2.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.attn2.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.ff.net.2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.ff.net.2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.norm1.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.norm1.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.norm2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.norm2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.norm3.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.2.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn1.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn1.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn1.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn2.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn2.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.attn2.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.ff.net.2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.ff.net.2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.norm1.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.norm1.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.norm2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.norm2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.norm3.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.3.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.output_blocks.5.2.conv.bias": "unet_up_blocks.1.upsamplers.0.conv.bias",
+        "model.diffusion_model.output_blocks.5.2.conv.weight": "unet_up_blocks.1.upsamplers.0.conv.weight",
+        "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": "unet_up_blocks.2.resnets.0.norm1.bias",
+        "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": "unet_up_blocks.2.resnets.0.norm1.weight",
+        "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "unet_up_blocks.2.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": "unet_up_blocks.2.resnets.0.conv1.weight",
+        "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": "unet_up_blocks.2.resnets.0.norm2.bias",
+        "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": "unet_up_blocks.2.resnets.0.norm2.weight",
+        "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": "unet_up_blocks.2.resnets.0.conv2.bias",
+        "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": "unet_up_blocks.2.resnets.0.conv2.weight",
+        "model.diffusion_model.output_blocks.6.0.skip_connection.bias": "unet_up_blocks.2.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.6.0.skip_connection.weight": "unet_up_blocks.2.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.6.1.norm.bias": "unet_up_blocks.2.attentions.0.norm.bias",
+        "model.diffusion_model.output_blocks.6.1.norm.weight": "unet_up_blocks.2.attentions.0.norm.weight",
+        "model.diffusion_model.output_blocks.6.1.proj_in.bias": "unet_up_blocks.2.attentions.0.proj_in.bias",
+        "model.diffusion_model.output_blocks.6.1.proj_in.weight": "unet_up_blocks.2.attentions.0.proj_in.weight",
+        "model.diffusion_model.output_blocks.6.1.proj_out.bias": "unet_up_blocks.2.attentions.0.proj_out.bias",
+        "model.diffusion_model.output_blocks.6.1.proj_out.weight": "unet_up_blocks.2.attentions.0.proj_out.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn1.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn1.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn1.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn2.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn2.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.attn2.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.ff.net.2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.ff.net.2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.norm1.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.norm1.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.norm2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.norm2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.norm3.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.2.norm3.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn1.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn1.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn1.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn2.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn2.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.attn2.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.ff.net.2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.ff.net.2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.norm1.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.norm1.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.norm2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.norm2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.norm3.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.output_blocks.6.1.transformer_blocks.3.norm3.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": "unet_up_blocks.2.resnets.1.norm1.bias",
+        "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": "unet_up_blocks.2.resnets.1.norm1.weight",
+        "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "unet_up_blocks.2.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": "unet_up_blocks.2.resnets.1.conv1.weight",
+        "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": "unet_up_blocks.2.resnets.1.norm2.bias",
+        "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": "unet_up_blocks.2.resnets.1.norm2.weight",
+        "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": "unet_up_blocks.2.resnets.1.conv2.bias",
+        "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": "unet_up_blocks.2.resnets.1.conv2.weight",
+        "model.diffusion_model.output_blocks.7.0.skip_connection.bias": "unet_up_blocks.2.resnets.1.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.7.0.skip_connection.weight": "unet_up_blocks.2.resnets.1.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.7.1.norm.bias": "unet_up_blocks.2.attentions.1.norm.bias",
+        "model.diffusion_model.output_blocks.7.1.norm.weight": "unet_up_blocks.2.attentions.1.norm.weight",
+        "model.diffusion_model.output_blocks.7.1.proj_in.bias": "unet_up_blocks.2.attentions.1.proj_in.bias",
+        "model.diffusion_model.output_blocks.7.1.proj_in.weight": "unet_up_blocks.2.attentions.1.proj_in.weight",
+        "model.diffusion_model.output_blocks.7.1.proj_out.bias": "unet_up_blocks.2.attentions.1.proj_out.bias",
+        "model.diffusion_model.output_blocks.7.1.proj_out.weight": "unet_up_blocks.2.attentions.1.proj_out.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn1.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn1.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn1.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn2.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn2.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.attn2.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.ff.net.2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.ff.net.2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.norm1.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.norm1.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.norm2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.norm2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.norm3.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.2.norm3.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn1.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn1.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn1.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn2.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn2.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.attn2.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.ff.net.2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.ff.net.2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.norm1.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.norm1.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.norm2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.norm2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.norm3.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.output_blocks.7.1.transformer_blocks.3.norm3.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": "unet_up_blocks.2.resnets.2.norm1.bias",
+        "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": "unet_up_blocks.2.resnets.2.norm1.weight",
+        "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "unet_up_blocks.2.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": "unet_up_blocks.2.resnets.2.conv1.weight",
+        "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": "unet_up_blocks.2.resnets.2.norm2.bias",
+        "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": "unet_up_blocks.2.resnets.2.norm2.weight",
+        "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": "unet_up_blocks.2.resnets.2.conv2.bias",
+        "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": "unet_up_blocks.2.resnets.2.conv2.weight",
+        "model.diffusion_model.output_blocks.8.0.skip_connection.bias": "unet_up_blocks.2.resnets.2.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.8.0.skip_connection.weight": "unet_up_blocks.2.resnets.2.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.8.1.norm.bias": "unet_up_blocks.2.attentions.2.norm.bias",
+        "model.diffusion_model.output_blocks.8.1.norm.weight": "unet_up_blocks.2.attentions.2.norm.weight",
+        "model.diffusion_model.output_blocks.8.1.proj_in.bias": "unet_up_blocks.2.attentions.2.proj_in.bias",
+        "model.diffusion_model.output_blocks.8.1.proj_in.weight": "unet_up_blocks.2.attentions.2.proj_in.weight",
+        "model.diffusion_model.output_blocks.8.1.proj_out.bias": "unet_up_blocks.2.attentions.2.proj_out.bias",
+        "model.diffusion_model.output_blocks.8.1.proj_out.weight": "unet_up_blocks.2.attentions.2.proj_out.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn1.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn1.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn1.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn2.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn2.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.attn2.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.ff.net.2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.ff.net.2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.norm1.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.norm1.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.norm2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.norm2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.norm3.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.2.norm3.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn1.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn1.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn1.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn2.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn2.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.attn2.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.ff.net.2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.ff.net.2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.norm1.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.norm1.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.norm2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.norm2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.norm3.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.output_blocks.8.1.transformer_blocks.3.norm3.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.output_blocks.8.2.conv.bias": "unet_up_blocks.2.upsamplers.0.conv.bias",
+        "model.diffusion_model.output_blocks.8.2.conv.weight": "unet_up_blocks.2.upsamplers.0.conv.weight",
+        "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": "unet_up_blocks.3.resnets.0.norm1.bias",
+        "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": "unet_up_blocks.3.resnets.0.norm1.weight",
+        "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": "unet_up_blocks.3.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": "unet_up_blocks.3.resnets.0.conv1.weight",
+        "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": "unet_up_blocks.3.resnets.0.norm2.bias",
+        "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": "unet_up_blocks.3.resnets.0.norm2.weight",
+        "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": "unet_up_blocks.3.resnets.0.conv2.bias",
+        "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": "unet_up_blocks.3.resnets.0.conv2.weight",
+        "model.diffusion_model.output_blocks.9.0.skip_connection.bias": "unet_up_blocks.3.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.9.0.skip_connection.weight": "unet_up_blocks.3.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.time_embed.0.bias": "unet_time_embedding.linear_1.bias",
+        "model.diffusion_model.time_embed.0.weight": "unet_time_embedding.linear_1.weight",
+        "model.diffusion_model.time_embed.2.bias": "unet_time_embedding.linear_2.bias",
+        "model.diffusion_model.time_embed.2.weight": "unet_time_embedding.linear_2.weight"
+    },
+    "ldm_diffusers_shape_map": {
+        "first_stage_model.decoder.mid.attn_1.k.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.decoder.mid.attn_1.proj_out.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.decoder.mid.attn_1.q.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.decoder.mid.attn_1.v.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.encoder.mid.attn_1.k.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.encoder.mid.attn_1.proj_out.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.encoder.mid.attn_1.q.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.encoder.mid.attn_1.v.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ]
+    },
+    "ldm_diffusers_operator_map": {
+        "conditioner.embedders.0.model.transformer.resblocks.0.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.0.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.0.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.0.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.0.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.0.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.0.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.0.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.1.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.1.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.1.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.1.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.1.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.1.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.1.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.1.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.10.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.10.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.10.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.10.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.10.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.10.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.10.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.10.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.11.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.11.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.11.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.11.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.11.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.11.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.11.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.11.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.12.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.12.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.12.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.12.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.12.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.12.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.12.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.12.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.13.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.13.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.13.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.13.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.13.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.13.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.13.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.13.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.14.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.14.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.14.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.14.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.14.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.14.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.14.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.14.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.15.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.15.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.15.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.15.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.15.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.15.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.15.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.15.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.16.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.16.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.16.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.16.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.16.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.16.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.16.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.16.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.17.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.17.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.17.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.17.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.17.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.17.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.17.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.17.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.18.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.18.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.18.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.18.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.18.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.18.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.18.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.18.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.19.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.19.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.19.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.19.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.19.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.19.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.19.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.19.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.2.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.2.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.2.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.2.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.2.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.2.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.2.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.2.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.20.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.20.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.20.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.20.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.20.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.20.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.20.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.20.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.21.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.21.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.21.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.21.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.21.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.21.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.21.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.21.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.22.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.22.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.22.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.22.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.22.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.22.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.22.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.22.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.23.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.23.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.23.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.23.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.23.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.23.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.23.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.23.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.24.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.24.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.24.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.24.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.24.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.24.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.24.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.24.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.25.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.25.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.25.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.25.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.25.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.25.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.25.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.25.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.26.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.26.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.26.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.26.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.26.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.26.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.26.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.26.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.27.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.27.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.27.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.27.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.27.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.27.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.27.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.27.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.28.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.28.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.28.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.28.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.28.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.28.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.28.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.28.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.29.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.29.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.29.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.29.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.29.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.29.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.29.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.29.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.3.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.3.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.3.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.3.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.3.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.3.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.3.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.3.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.30.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.30.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.30.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.30.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.30.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.30.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.30.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.30.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.31.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.31.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.31.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.31.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.31.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.31.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.31.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.31.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.4.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.4.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.4.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.4.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.4.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.4.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.4.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.4.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.5.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.5.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.5.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.5.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.5.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.5.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.5.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.5.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.6.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.6.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.6.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.6.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.6.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.6.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.6.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.6.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.7.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.7.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.7.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.7.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.7.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.7.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.7.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.7.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.8.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.8.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.8.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.8.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.8.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.8.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.8.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.8.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.9.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.9.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.9.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.9.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.0.model.transformer.resblocks.9.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.9.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.9.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.9.self_attn.v_proj.weight"
+            ]
+        }
+    },
+    "diffusers_ldm_operator_map": {
+        "te1_text_model.encoder.layers.0.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.0.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.0.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.0.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.0.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.0.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.0.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.1.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.1.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.1.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.1.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.1.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.1.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.10.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.10.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.10.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.10.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.10.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.10.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.11.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.11.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.11.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.11.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.11.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.11.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.12.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.12.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.12.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.12.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.12.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.12.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.13.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.13.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.13.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.13.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.13.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.13.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.14.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.14.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.14.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.14.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.14.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.14.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.15.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.15.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.15.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.15.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.15.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.15.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.16.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.16.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.16.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.16.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.16.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.16.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.17.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.17.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.17.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.17.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.17.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.17.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.18.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.18.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.18.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.18.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.18.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.18.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.19.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.19.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.19.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.19.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.19.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.19.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.2.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.2.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.2.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.2.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.2.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.2.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.20.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.20.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.20.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.20.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.20.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.20.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.21.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.21.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.21.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.21.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.21.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.21.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.22.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.22.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.22.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.22.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.22.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.22.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.23.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.23.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.23.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.23.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.23.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.23.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.24.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.24.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.24.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.24.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.24.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.24.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.25.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.25.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.25.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.25.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.25.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.25.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.26.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.26.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.26.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.26.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.26.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.26.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.27.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.27.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.27.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.27.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.27.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.27.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.28.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.28.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.28.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.28.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.28.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.28.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.29.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.29.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.29.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.29.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.29.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.29.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.3.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.3.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.3.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.3.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.3.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.3.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.30.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.30.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.30.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.30.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.30.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.30.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.31.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.31.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.31.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.31.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.31.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.31.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.4.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.4.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.4.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.4.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.4.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.4.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.5.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.5.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.5.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.5.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.5.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.5.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.6.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.6.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.6.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.6.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.6.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.6.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.7.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.7.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.7.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.7.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.7.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.7.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.8.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.8.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.8.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.8.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.8.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.8.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.9.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.9.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.9.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.9.attn.in_proj_weight",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.9.attn.in_proj_weight",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.0.model.transformer.resblocks.9.attn.in_proj_weight",
+                "2560:, :"
+            ]
+        }
+    }
+}
\ No newline at end of file
diff --git a/toolkit/keymaps/stable_diffusion_refiner_ldm_base.safetensors b/toolkit/keymaps/stable_diffusion_refiner_ldm_base.safetensors
new file mode 100644
index 00000000..1614e499
Binary files /dev/null and b/toolkit/keymaps/stable_diffusion_refiner_ldm_base.safetensors differ
diff --git a/toolkit/keymaps/stable_diffusion_refiner_unmatched.json b/toolkit/keymaps/stable_diffusion_refiner_unmatched.json
new file mode 100644
index 00000000..cb5aba0a
--- /dev/null
+++ b/toolkit/keymaps/stable_diffusion_refiner_unmatched.json
@@ -0,0 +1,27 @@
+{
+    "ldm": {
+        "conditioner.embedders.0.model.logit_scale": {
+            "shape": [],
+            "min": 4.60546875,
+            "max": 4.60546875
+        },
+        "conditioner.embedders.0.model.text_projection": {
+            "shape": [
+                1280,
+                1280
+            ],
+            "min": -0.15966796875,
+            "max": 0.230712890625
+        }
+    },
+    "diffusers": {
+        "te1_text_projection.weight": {
+            "shape": [
+                1280,
+                1280
+            ],
+            "min": -0.15966796875,
+            "max": 0.230712890625
+        }
+    }
+}
\ No newline at end of file
diff --git a/toolkit/keymaps/stable_diffusion_sd1.json b/toolkit/keymaps/stable_diffusion_sd1.json
index bc00f08d..8f04f753 100644
--- a/toolkit/keymaps/stable_diffusion_sd1.json
+++ b/toolkit/keymaps/stable_diffusion_sd1.json
@@ -58,7 +58,7 @@
         "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.weight": "te_text_model.encoder.layers.11.mlp.fc1.weight",
         "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.bias": "te_text_model.encoder.layers.11.mlp.fc2.bias",
         "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.weight": "te_text_model.encoder.layers.11.mlp.fc2.weight",
-        "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": "te_text_model.encoder.layers.2.self_attn.k_proj.bias",
+        "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": "te_text_model.encoder.layers.11.self_attn.k_proj.bias",
         "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight": "te_text_model.encoder.layers.11.self_attn.k_proj.weight",
         "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias": "te_text_model.encoder.layers.11.self_attn.out_proj.bias",
         "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight": "te_text_model.encoder.layers.11.self_attn.out_proj.weight",
@@ -74,7 +74,7 @@
         "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.weight": "te_text_model.encoder.layers.2.mlp.fc1.weight",
         "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.bias": "te_text_model.encoder.layers.2.mlp.fc2.bias",
         "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.weight": "te_text_model.encoder.layers.2.mlp.fc2.weight",
-        "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": "te_text_model.encoder.layers.3.self_attn.k_proj.bias",
+        "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": "te_text_model.encoder.layers.2.self_attn.k_proj.bias",
         "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight": "te_text_model.encoder.layers.2.self_attn.k_proj.weight",
         "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias": "te_text_model.encoder.layers.2.self_attn.out_proj.bias",
         "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight": "te_text_model.encoder.layers.2.self_attn.out_proj.weight",
@@ -90,7 +90,7 @@
         "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.weight": "te_text_model.encoder.layers.3.mlp.fc1.weight",
         "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.bias": "te_text_model.encoder.layers.3.mlp.fc2.bias",
         "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.weight": "te_text_model.encoder.layers.3.mlp.fc2.weight",
-        "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": "te_text_model.encoder.layers.11.self_attn.k_proj.bias",
+        "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": "te_text_model.encoder.layers.3.self_attn.k_proj.bias",
         "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight": "te_text_model.encoder.layers.3.self_attn.k_proj.weight",
         "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias": "te_text_model.encoder.layers.3.self_attn.out_proj.bias",
         "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight": "te_text_model.encoder.layers.3.self_attn.out_proj.weight",
@@ -446,11 +446,11 @@
         "first_stage_model.quant_conv.weight": "vae_quant_conv.weight",
         "model.diffusion_model.input_blocks.0.0.bias": "unet_conv_in.bias",
         "model.diffusion_model.input_blocks.0.0.weight": "unet_conv_in.weight",
-        "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": "unet_down_blocks.0.resnets.0.norm1.bias",
         "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": "unet_down_blocks.0.resnets.0.norm1.weight",
-        "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "unet_down_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "unet_down_blocks.0.resnets.0.conv1.bias",
         "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": "unet_down_blocks.0.resnets.0.conv1.weight",
         "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": "unet_down_blocks.0.resnets.0.norm2.bias",
         "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": "unet_down_blocks.0.resnets.0.norm2.weight",
@@ -482,31 +482,31 @@
         "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.0.conv1.bias",
-        "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": "unet_time_embedding.linear_2.weight",
+        "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": "unet_down_blocks.3.resnets.0.norm1.bias",
         "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": "unet_down_blocks.3.resnets.0.norm1.weight",
-        "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": "unet_down_blocks.3.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": "unet_down_blocks.3.resnets.0.conv1.bias",
         "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": "unet_down_blocks.3.resnets.0.conv1.weight",
         "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": "unet_down_blocks.3.resnets.0.norm2.bias",
         "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": "unet_down_blocks.3.resnets.0.norm2.weight",
         "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": "unet_down_blocks.3.resnets.0.conv2.bias",
         "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": "unet_down_blocks.3.resnets.0.conv2.weight",
-        "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.1.conv1.bias",
-        "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": "unet_down_blocks.3.resnets.1.norm1.bias",
         "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": "unet_down_blocks.3.resnets.1.norm1.weight",
-        "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": "unet_down_blocks.3.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": "unet_down_blocks.3.resnets.1.conv1.bias",
         "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": "unet_down_blocks.3.resnets.1.conv1.weight",
         "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": "unet_down_blocks.3.resnets.1.norm2.bias",
         "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": "unet_down_blocks.3.resnets.1.norm2.weight",
         "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": "unet_down_blocks.3.resnets.1.conv2.bias",
         "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": "unet_down_blocks.3.resnets.1.conv2.weight",
-        "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": "unet_down_blocks.0.resnets.1.norm1.bias",
         "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": "unet_down_blocks.0.resnets.1.norm1.weight",
-        "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "unet_down_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "unet_down_blocks.0.resnets.1.conv1.bias",
         "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": "unet_down_blocks.0.resnets.1.conv1.weight",
         "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": "unet_down_blocks.0.resnets.1.norm2.bias",
         "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": "unet_down_blocks.0.resnets.1.norm2.weight",
@@ -540,11 +540,11 @@
         "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.input_blocks.3.0.op.bias": "unet_down_blocks.0.downsamplers.0.conv.bias",
         "model.diffusion_model.input_blocks.3.0.op.weight": "unet_down_blocks.0.downsamplers.0.conv.weight",
-        "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": "unet_down_blocks.1.resnets.0.norm1.bias",
         "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": "unet_down_blocks.1.resnets.0.norm1.weight",
-        "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "unet_down_blocks.1.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "unet_down_blocks.1.resnets.0.conv1.bias",
         "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": "unet_down_blocks.1.resnets.0.conv1.weight",
         "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": "unet_down_blocks.1.resnets.0.norm2.bias",
         "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": "unet_down_blocks.1.resnets.0.norm2.weight",
@@ -578,11 +578,11 @@
         "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": "unet_down_blocks.1.resnets.1.norm1.bias",
         "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": "unet_down_blocks.1.resnets.1.norm1.weight",
-        "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "unet_down_blocks.1.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "unet_down_blocks.1.resnets.1.conv1.bias",
         "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": "unet_down_blocks.1.resnets.1.conv1.weight",
         "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": "unet_down_blocks.1.resnets.1.norm2.bias",
         "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": "unet_down_blocks.1.resnets.1.norm2.weight",
@@ -616,11 +616,11 @@
         "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.input_blocks.6.0.op.bias": "unet_down_blocks.1.downsamplers.0.conv.bias",
         "model.diffusion_model.input_blocks.6.0.op.weight": "unet_down_blocks.1.downsamplers.0.conv.weight",
-        "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": "unet_down_blocks.2.resnets.0.norm1.bias",
         "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": "unet_down_blocks.2.resnets.0.norm1.weight",
-        "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "unet_down_blocks.2.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "unet_down_blocks.2.resnets.0.conv1.bias",
         "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": "unet_down_blocks.2.resnets.0.conv1.weight",
         "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": "unet_down_blocks.2.resnets.0.norm2.bias",
         "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": "unet_down_blocks.2.resnets.0.norm2.weight",
@@ -654,11 +654,11 @@
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": "unet_down_blocks.2.resnets.1.norm1.bias",
         "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": "unet_down_blocks.2.resnets.1.norm1.weight",
-        "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "unet_down_blocks.2.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "unet_down_blocks.2.resnets.1.conv1.bias",
         "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": "unet_down_blocks.2.resnets.1.conv1.weight",
         "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": "unet_down_blocks.2.resnets.1.norm2.bias",
         "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": "unet_down_blocks.2.resnets.1.norm2.weight",
@@ -692,11 +692,11 @@
         "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.input_blocks.9.0.op.bias": "unet_down_blocks.2.downsamplers.0.conv.bias",
         "model.diffusion_model.input_blocks.9.0.op.weight": "unet_down_blocks.2.downsamplers.0.conv.weight",
-        "model.diffusion_model.middle_block.0.emb_layers.1.bias": "unet_mid_block.resnets.0.conv1.bias",
-        "model.diffusion_model.middle_block.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.middle_block.0.emb_layers.1.bias": "unet_mid_block.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.0.emb_layers.1.weight": "unet_mid_block.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.middle_block.0.in_layers.0.bias": "unet_mid_block.resnets.0.norm1.bias",
         "model.diffusion_model.middle_block.0.in_layers.0.weight": "unet_mid_block.resnets.0.norm1.weight",
-        "model.diffusion_model.middle_block.0.in_layers.2.bias": "unet_mid_block.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.0.in_layers.2.bias": "unet_mid_block.resnets.0.conv1.bias",
         "model.diffusion_model.middle_block.0.in_layers.2.weight": "unet_mid_block.resnets.0.conv1.weight",
         "model.diffusion_model.middle_block.0.out_layers.0.bias": "unet_mid_block.resnets.0.norm2.bias",
         "model.diffusion_model.middle_block.0.out_layers.0.weight": "unet_mid_block.resnets.0.norm2.weight",
@@ -728,11 +728,11 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.middle_block.2.emb_layers.1.bias": "unet_mid_block.resnets.1.conv1.bias",
-        "model.diffusion_model.middle_block.2.emb_layers.1.weight": "unet_up_blocks.0.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.middle_block.2.emb_layers.1.bias": "unet_mid_block.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.2.emb_layers.1.weight": "unet_mid_block.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.middle_block.2.in_layers.0.bias": "unet_mid_block.resnets.1.norm1.bias",
         "model.diffusion_model.middle_block.2.in_layers.0.weight": "unet_mid_block.resnets.1.norm1.weight",
-        "model.diffusion_model.middle_block.2.in_layers.2.bias": "unet_mid_block.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.2.in_layers.2.bias": "unet_mid_block.resnets.1.conv1.bias",
         "model.diffusion_model.middle_block.2.in_layers.2.weight": "unet_mid_block.resnets.1.conv1.weight",
         "model.diffusion_model.middle_block.2.out_layers.0.bias": "unet_mid_block.resnets.1.norm2.bias",
         "model.diffusion_model.middle_block.2.out_layers.0.weight": "unet_mid_block.resnets.1.norm2.weight",
@@ -742,11 +742,11 @@
         "model.diffusion_model.out.0.weight": "unet_conv_norm_out.weight",
         "model.diffusion_model.out.2.bias": "unet_conv_out.bias",
         "model.diffusion_model.out.2.weight": "unet_conv_out.weight",
-        "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.0.conv1.bias",
-        "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": "unet_up_blocks.0.resnets.0.norm1.bias",
         "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": "unet_up_blocks.0.resnets.0.norm1.weight",
-        "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "unet_up_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "unet_up_blocks.0.resnets.0.conv1.bias",
         "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": "unet_up_blocks.0.resnets.0.conv1.weight",
         "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": "unet_up_blocks.0.resnets.0.norm2.bias",
         "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": "unet_up_blocks.0.resnets.0.norm2.weight",
@@ -754,11 +754,11 @@
         "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": "unet_up_blocks.0.resnets.0.conv2.weight",
         "model.diffusion_model.output_blocks.0.0.skip_connection.bias": "unet_up_blocks.0.resnets.0.conv_shortcut.bias",
         "model.diffusion_model.output_blocks.0.0.skip_connection.weight": "unet_up_blocks.0.resnets.0.conv_shortcut.weight",
-        "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.1.conv1.bias",
-        "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": "unet_up_blocks.0.resnets.1.norm1.bias",
         "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": "unet_up_blocks.0.resnets.1.norm1.weight",
-        "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "unet_up_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "unet_up_blocks.0.resnets.1.conv1.bias",
         "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": "unet_up_blocks.0.resnets.1.conv1.weight",
         "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": "unet_up_blocks.0.resnets.1.norm2.bias",
         "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": "unet_up_blocks.0.resnets.1.norm2.weight",
@@ -766,11 +766,11 @@
         "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": "unet_up_blocks.0.resnets.1.conv2.weight",
         "model.diffusion_model.output_blocks.1.0.skip_connection.bias": "unet_up_blocks.0.resnets.1.conv_shortcut.bias",
         "model.diffusion_model.output_blocks.1.0.skip_connection.weight": "unet_up_blocks.0.resnets.1.conv_shortcut.weight",
-        "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.1.conv1.bias",
-        "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": "unet_up_blocks.3.resnets.1.norm1.bias",
         "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": "unet_up_blocks.3.resnets.1.norm1.weight",
-        "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": "unet_up_blocks.3.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": "unet_up_blocks.3.resnets.1.conv1.bias",
         "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": "unet_up_blocks.3.resnets.1.conv1.weight",
         "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": "unet_up_blocks.3.resnets.1.norm2.bias",
         "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": "unet_up_blocks.3.resnets.1.norm2.weight",
@@ -804,11 +804,11 @@
         "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.2.conv1.bias",
-        "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.2.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": "unet_up_blocks.3.resnets.2.norm1.bias",
         "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": "unet_up_blocks.3.resnets.2.norm1.weight",
-        "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": "unet_up_blocks.3.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": "unet_up_blocks.3.resnets.2.conv1.bias",
         "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": "unet_up_blocks.3.resnets.2.conv1.weight",
         "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": "unet_up_blocks.3.resnets.2.norm2.bias",
         "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": "unet_up_blocks.3.resnets.2.norm2.weight",
@@ -842,11 +842,11 @@
         "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.2.conv1.bias",
-        "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "unet_mid_block.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.2.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": "unet_up_blocks.0.resnets.2.norm1.bias",
         "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": "unet_up_blocks.0.resnets.2.norm1.weight",
-        "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "unet_up_blocks.0.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "unet_up_blocks.0.resnets.2.conv1.bias",
         "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": "unet_up_blocks.0.resnets.2.conv1.weight",
         "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": "unet_up_blocks.0.resnets.2.norm2.bias",
         "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": "unet_up_blocks.0.resnets.2.norm2.weight",
@@ -856,11 +856,11 @@
         "model.diffusion_model.output_blocks.2.0.skip_connection.weight": "unet_up_blocks.0.resnets.2.conv_shortcut.weight",
         "model.diffusion_model.output_blocks.2.1.conv.bias": "unet_up_blocks.0.upsamplers.0.conv.bias",
         "model.diffusion_model.output_blocks.2.1.conv.weight": "unet_up_blocks.0.upsamplers.0.conv.weight",
-        "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": "unet_up_blocks.1.resnets.0.norm1.bias",
         "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": "unet_up_blocks.1.resnets.0.norm1.weight",
-        "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "unet_up_blocks.1.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "unet_up_blocks.1.resnets.0.conv1.bias",
         "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": "unet_up_blocks.1.resnets.0.conv1.weight",
         "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": "unet_up_blocks.1.resnets.0.norm2.bias",
         "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": "unet_up_blocks.1.resnets.0.norm2.weight",
@@ -894,11 +894,11 @@
         "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": "unet_up_blocks.1.resnets.1.norm1.bias",
         "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": "unet_up_blocks.1.resnets.1.norm1.weight",
-        "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "unet_up_blocks.1.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "unet_up_blocks.1.resnets.1.conv1.bias",
         "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": "unet_up_blocks.1.resnets.1.conv1.weight",
         "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": "unet_up_blocks.1.resnets.1.norm2.bias",
         "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": "unet_up_blocks.1.resnets.1.norm2.weight",
@@ -932,11 +932,11 @@
         "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.2.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.2.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": "unet_up_blocks.1.resnets.2.norm1.bias",
         "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": "unet_up_blocks.1.resnets.2.norm1.weight",
-        "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "unet_up_blocks.1.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "unet_up_blocks.1.resnets.2.conv1.bias",
         "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": "unet_up_blocks.1.resnets.2.conv1.weight",
         "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": "unet_up_blocks.1.resnets.2.norm2.bias",
         "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": "unet_up_blocks.1.resnets.2.norm2.weight",
@@ -972,11 +972,11 @@
         "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.output_blocks.5.2.conv.bias": "unet_up_blocks.1.upsamplers.0.conv.bias",
         "model.diffusion_model.output_blocks.5.2.conv.weight": "unet_up_blocks.1.upsamplers.0.conv.weight",
-        "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": "unet_up_blocks.2.resnets.0.norm1.bias",
         "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": "unet_up_blocks.2.resnets.0.norm1.weight",
-        "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "unet_up_blocks.2.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "unet_up_blocks.2.resnets.0.conv1.bias",
         "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": "unet_up_blocks.2.resnets.0.conv1.weight",
         "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": "unet_up_blocks.2.resnets.0.norm2.bias",
         "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": "unet_up_blocks.2.resnets.0.norm2.weight",
@@ -1010,11 +1010,11 @@
         "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": "unet_up_blocks.2.resnets.1.norm1.bias",
         "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": "unet_up_blocks.2.resnets.1.norm1.weight",
-        "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "unet_up_blocks.2.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "unet_up_blocks.2.resnets.1.conv1.bias",
         "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": "unet_up_blocks.2.resnets.1.conv1.weight",
         "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": "unet_up_blocks.2.resnets.1.norm2.bias",
         "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": "unet_up_blocks.2.resnets.1.norm2.weight",
@@ -1048,11 +1048,11 @@
         "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.2.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.2.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": "unet_up_blocks.2.resnets.2.norm1.bias",
         "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": "unet_up_blocks.2.resnets.2.norm1.weight",
-        "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "unet_up_blocks.2.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "unet_up_blocks.2.resnets.2.conv1.bias",
         "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": "unet_up_blocks.2.resnets.2.conv1.weight",
         "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": "unet_up_blocks.2.resnets.2.norm2.bias",
         "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": "unet_up_blocks.2.resnets.2.norm2.weight",
@@ -1088,11 +1088,11 @@
         "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.output_blocks.8.2.conv.bias": "unet_up_blocks.2.upsamplers.0.conv.bias",
         "model.diffusion_model.output_blocks.8.2.conv.weight": "unet_up_blocks.2.upsamplers.0.conv.weight",
-        "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.0.conv1.bias",
-        "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": "unet_up_blocks.3.resnets.0.norm1.bias",
         "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": "unet_up_blocks.3.resnets.0.norm1.weight",
-        "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": "unet_up_blocks.3.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": "unet_up_blocks.3.resnets.0.conv1.bias",
         "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": "unet_up_blocks.3.resnets.0.conv1.weight",
         "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": "unet_up_blocks.3.resnets.0.norm2.bias",
         "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": "unet_up_blocks.3.resnets.0.norm2.weight",
@@ -1129,7 +1129,7 @@
         "model.diffusion_model.time_embed.0.bias": "unet_time_embedding.linear_1.bias",
         "model.diffusion_model.time_embed.0.weight": "unet_time_embedding.linear_1.weight",
         "model.diffusion_model.time_embed.2.bias": "unet_time_embedding.linear_2.bias",
-        "model.diffusion_model.time_embed.2.weight": "unet_mid_block.resnets.1.time_emb_proj.weight"
+        "model.diffusion_model.time_embed.2.weight": "unet_time_embedding.linear_2.weight"
     },
     "ldm_diffusers_shape_map": {
         "first_stage_model.decoder.mid.attn_1.k.weight": [
@@ -1180,62 +1180,6 @@
                 512
             ]
         ],
-        "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [
-            [
-                128,
-                256,
-                1,
-                1
-            ],
-            [
-                128,
-                256,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [
-            [
-                256,
-                512,
-                1,
-                1
-            ],
-            [
-                256,
-                512,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [
-            [
-                256,
-                128,
-                1,
-                1
-            ],
-            [
-                256,
-                128,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [
-            [
-                512,
-                256,
-                1,
-                1
-            ],
-            [
-                512,
-                256,
-                1,
-                1
-            ]
-        ],
         "first_stage_model.encoder.mid.attn_1.k.weight": [
             [
                 512,
@@ -1283,678 +1227,6 @@
                 512,
                 512
             ]
-        ],
-        "first_stage_model.post_quant_conv.weight": [
-            [
-                4,
-                4,
-                1,
-                1
-            ],
-            [
-                4,
-                4,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.quant_conv.weight": [
-            [
-                8,
-                8,
-                1,
-                1
-            ],
-            [
-                8,
-                8,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.1.1.proj_in.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.1.1.proj_out.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.2.1.proj_in.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.2.1.proj_out.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [
-            [
-                640,
-                320,
-                1,
-                1
-            ],
-            [
-                640,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.4.1.proj_in.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.4.1.proj_out.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.5.1.proj_in.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.5.1.proj_out.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [
-            [
-                1280,
-                640,
-                1,
-                1
-            ],
-            [
-                1280,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.7.1.proj_in.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.7.1.proj_out.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.8.1.proj_in.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.8.1.proj_out.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.middle_block.1.proj_in.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.middle_block.1.proj_out.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [
-            [
-                320,
-                640,
-                1,
-                1
-            ],
-            [
-                320,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.10.1.proj_in.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.10.1.proj_out.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [
-            [
-                320,
-                640,
-                1,
-                1
-            ],
-            [
-                320,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.11.1.proj_in.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.11.1.proj_out.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.3.1.proj_in.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.3.1.proj_out.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.4.1.proj_in.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.4.1.proj_out.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [
-            [
-                1280,
-                1920,
-                1,
-                1
-            ],
-            [
-                1280,
-                1920,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.5.1.proj_in.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.5.1.proj_out.weight": [
-            [
-                1280,
-                1280,
-                1,
-                1
-            ],
-            [
-                1280,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [
-            [
-                640,
-                1920,
-                1,
-                1
-            ],
-            [
-                640,
-                1920,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.6.1.proj_in.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.6.1.proj_out.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [
-            [
-                640,
-                1280,
-                1,
-                1
-            ],
-            [
-                640,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.7.1.proj_in.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.7.1.proj_out.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [
-            [
-                640,
-                960,
-                1,
-                1
-            ],
-            [
-                640,
-                960,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.8.1.proj_in.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.8.1.proj_out.weight": [
-            [
-                640,
-                640,
-                1,
-                1
-            ],
-            [
-                640,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [
-            [
-                320,
-                960,
-                1,
-                1
-            ],
-            [
-                320,
-                960,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.9.1.proj_in.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.9.1.proj_out.weight": [
-            [
-                320,
-                320,
-                1,
-                1
-            ],
-            [
-                320,
-                320,
-                1,
-                1
-            ]
         ]
     },
     "ldm_diffusers_operator_map": {},
diff --git a/toolkit/keymaps/stable_diffusion_sd2.json b/toolkit/keymaps/stable_diffusion_sd2.json
index 59bc1511..868facaf 100644
--- a/toolkit/keymaps/stable_diffusion_sd2.json
+++ b/toolkit/keymaps/stable_diffusion_sd2.json
@@ -4,8 +4,6 @@
         "cond_stage_model.model.ln_final.weight": "te_text_model.final_layer_norm.weight",
         "cond_stage_model.model.positional_embedding": "te_text_model.embeddings.position_embedding.weight",
         "cond_stage_model.model.token_embedding.weight": "te_text_model.embeddings.token_embedding.weight",
-        "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias": "te_text_model.encoder.layers.0.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight": "te_text_model.encoder.layers.0.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.bias": "te_text_model.encoder.layers.0.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.weight": "te_text_model.encoder.layers.0.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.0.ln_1.bias": "te_text_model.encoder.layers.0.layer_norm1.bias",
@@ -16,8 +14,6 @@
         "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.weight": "te_text_model.encoder.layers.0.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.bias": "te_text_model.encoder.layers.0.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.weight": "te_text_model.encoder.layers.0.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias": "te_text_model.encoder.layers.1.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight": "te_text_model.encoder.layers.1.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.bias": "te_text_model.encoder.layers.1.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.weight": "te_text_model.encoder.layers.1.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.1.ln_1.bias": "te_text_model.encoder.layers.1.layer_norm1.bias",
@@ -28,8 +24,6 @@
         "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.weight": "te_text_model.encoder.layers.1.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.bias": "te_text_model.encoder.layers.1.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.weight": "te_text_model.encoder.layers.1.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias": "te_text_model.encoder.layers.10.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight": "te_text_model.encoder.layers.10.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.bias": "te_text_model.encoder.layers.10.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.weight": "te_text_model.encoder.layers.10.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.10.ln_1.bias": "te_text_model.encoder.layers.10.layer_norm1.bias",
@@ -40,8 +34,6 @@
         "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.weight": "te_text_model.encoder.layers.10.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.bias": "te_text_model.encoder.layers.10.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.weight": "te_text_model.encoder.layers.10.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias": "te_text_model.encoder.layers.11.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight": "te_text_model.encoder.layers.11.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.bias": "te_text_model.encoder.layers.11.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.weight": "te_text_model.encoder.layers.11.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.11.ln_1.bias": "te_text_model.encoder.layers.11.layer_norm1.bias",
@@ -52,8 +44,6 @@
         "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.weight": "te_text_model.encoder.layers.11.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.bias": "te_text_model.encoder.layers.11.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.weight": "te_text_model.encoder.layers.11.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias": "te_text_model.encoder.layers.12.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight": "te_text_model.encoder.layers.12.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.bias": "te_text_model.encoder.layers.12.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.weight": "te_text_model.encoder.layers.12.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.12.ln_1.bias": "te_text_model.encoder.layers.12.layer_norm1.bias",
@@ -64,8 +54,6 @@
         "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.weight": "te_text_model.encoder.layers.12.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.bias": "te_text_model.encoder.layers.12.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.weight": "te_text_model.encoder.layers.12.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias": "te_text_model.encoder.layers.13.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight": "te_text_model.encoder.layers.13.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.bias": "te_text_model.encoder.layers.13.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.weight": "te_text_model.encoder.layers.13.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.13.ln_1.bias": "te_text_model.encoder.layers.13.layer_norm1.bias",
@@ -76,8 +64,6 @@
         "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.weight": "te_text_model.encoder.layers.13.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.bias": "te_text_model.encoder.layers.13.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.weight": "te_text_model.encoder.layers.13.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias": "te_text_model.encoder.layers.14.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight": "te_text_model.encoder.layers.14.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.bias": "te_text_model.encoder.layers.14.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.weight": "te_text_model.encoder.layers.14.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.14.ln_1.bias": "te_text_model.encoder.layers.14.layer_norm1.bias",
@@ -88,8 +74,6 @@
         "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.weight": "te_text_model.encoder.layers.14.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.bias": "te_text_model.encoder.layers.14.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.weight": "te_text_model.encoder.layers.14.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias": "te_text_model.encoder.layers.15.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight": "te_text_model.encoder.layers.15.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.bias": "te_text_model.encoder.layers.15.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.weight": "te_text_model.encoder.layers.15.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.15.ln_1.bias": "te_text_model.encoder.layers.15.layer_norm1.bias",
@@ -100,8 +84,6 @@
         "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.weight": "te_text_model.encoder.layers.15.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.bias": "te_text_model.encoder.layers.15.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.weight": "te_text_model.encoder.layers.15.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias": "te_text_model.encoder.layers.16.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight": "te_text_model.encoder.layers.16.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.bias": "te_text_model.encoder.layers.16.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.weight": "te_text_model.encoder.layers.16.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.16.ln_1.bias": "te_text_model.encoder.layers.16.layer_norm1.bias",
@@ -112,8 +94,6 @@
         "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.weight": "te_text_model.encoder.layers.16.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.bias": "te_text_model.encoder.layers.16.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.weight": "te_text_model.encoder.layers.16.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias": "te_text_model.encoder.layers.17.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight": "te_text_model.encoder.layers.17.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.bias": "te_text_model.encoder.layers.17.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.weight": "te_text_model.encoder.layers.17.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.17.ln_1.bias": "te_text_model.encoder.layers.17.layer_norm1.bias",
@@ -124,8 +104,6 @@
         "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.weight": "te_text_model.encoder.layers.17.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.bias": "te_text_model.encoder.layers.17.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.weight": "te_text_model.encoder.layers.17.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias": "te_text_model.encoder.layers.18.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight": "te_text_model.encoder.layers.18.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.bias": "te_text_model.encoder.layers.18.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.weight": "te_text_model.encoder.layers.18.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.18.ln_1.bias": "te_text_model.encoder.layers.18.layer_norm1.bias",
@@ -136,8 +114,6 @@
         "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.weight": "te_text_model.encoder.layers.18.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.bias": "te_text_model.encoder.layers.18.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.weight": "te_text_model.encoder.layers.18.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias": "te_text_model.encoder.layers.19.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight": "te_text_model.encoder.layers.19.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.bias": "te_text_model.encoder.layers.19.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.weight": "te_text_model.encoder.layers.19.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.19.ln_1.bias": "te_text_model.encoder.layers.19.layer_norm1.bias",
@@ -148,8 +124,6 @@
         "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.weight": "te_text_model.encoder.layers.19.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.bias": "te_text_model.encoder.layers.19.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.weight": "te_text_model.encoder.layers.19.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias": "te_text_model.encoder.layers.2.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight": "te_text_model.encoder.layers.2.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.bias": "te_text_model.encoder.layers.2.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.weight": "te_text_model.encoder.layers.2.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.2.ln_1.bias": "te_text_model.encoder.layers.2.layer_norm1.bias",
@@ -160,8 +134,6 @@
         "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.weight": "te_text_model.encoder.layers.2.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.bias": "te_text_model.encoder.layers.2.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.weight": "te_text_model.encoder.layers.2.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias": "te_text_model.encoder.layers.20.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight": "te_text_model.encoder.layers.20.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.bias": "te_text_model.encoder.layers.20.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.weight": "te_text_model.encoder.layers.20.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.20.ln_1.bias": "te_text_model.encoder.layers.20.layer_norm1.bias",
@@ -172,8 +144,6 @@
         "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.weight": "te_text_model.encoder.layers.20.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.bias": "te_text_model.encoder.layers.20.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.weight": "te_text_model.encoder.layers.20.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias": "te_text_model.encoder.layers.21.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight": "te_text_model.encoder.layers.21.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.bias": "te_text_model.encoder.layers.21.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.weight": "te_text_model.encoder.layers.21.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.21.ln_1.bias": "te_text_model.encoder.layers.21.layer_norm1.bias",
@@ -184,8 +154,6 @@
         "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.weight": "te_text_model.encoder.layers.21.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.bias": "te_text_model.encoder.layers.21.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.weight": "te_text_model.encoder.layers.21.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias": "te_text_model.encoder.layers.22.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight": "te_text_model.encoder.layers.22.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.bias": "te_text_model.encoder.layers.22.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.weight": "te_text_model.encoder.layers.22.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.22.ln_1.bias": "te_text_model.encoder.layers.22.layer_norm1.bias",
@@ -196,8 +164,6 @@
         "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.weight": "te_text_model.encoder.layers.22.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.bias": "te_text_model.encoder.layers.22.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.weight": "te_text_model.encoder.layers.22.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias": "te_text_model.encoder.layers.3.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight": "te_text_model.encoder.layers.3.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.bias": "te_text_model.encoder.layers.3.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.weight": "te_text_model.encoder.layers.3.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.3.ln_1.bias": "te_text_model.encoder.layers.3.layer_norm1.bias",
@@ -208,8 +174,6 @@
         "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.weight": "te_text_model.encoder.layers.3.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.bias": "te_text_model.encoder.layers.3.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.weight": "te_text_model.encoder.layers.3.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias": "te_text_model.encoder.layers.4.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight": "te_text_model.encoder.layers.4.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.bias": "te_text_model.encoder.layers.4.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.weight": "te_text_model.encoder.layers.4.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.4.ln_1.bias": "te_text_model.encoder.layers.4.layer_norm1.bias",
@@ -220,8 +184,6 @@
         "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.weight": "te_text_model.encoder.layers.4.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.bias": "te_text_model.encoder.layers.4.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.weight": "te_text_model.encoder.layers.4.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias": "te_text_model.encoder.layers.5.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight": "te_text_model.encoder.layers.5.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.bias": "te_text_model.encoder.layers.5.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.weight": "te_text_model.encoder.layers.5.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.5.ln_1.bias": "te_text_model.encoder.layers.5.layer_norm1.bias",
@@ -232,8 +194,6 @@
         "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.weight": "te_text_model.encoder.layers.5.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.bias": "te_text_model.encoder.layers.5.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.weight": "te_text_model.encoder.layers.5.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias": "te_text_model.encoder.layers.6.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight": "te_text_model.encoder.layers.6.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.bias": "te_text_model.encoder.layers.6.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.weight": "te_text_model.encoder.layers.6.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.6.ln_1.bias": "te_text_model.encoder.layers.6.layer_norm1.bias",
@@ -244,8 +204,6 @@
         "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.weight": "te_text_model.encoder.layers.6.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.bias": "te_text_model.encoder.layers.6.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.weight": "te_text_model.encoder.layers.6.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias": "te_text_model.encoder.layers.7.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight": "te_text_model.encoder.layers.7.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.bias": "te_text_model.encoder.layers.7.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.weight": "te_text_model.encoder.layers.7.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.7.ln_1.bias": "te_text_model.encoder.layers.7.layer_norm1.bias",
@@ -256,8 +214,6 @@
         "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.weight": "te_text_model.encoder.layers.7.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.bias": "te_text_model.encoder.layers.7.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.weight": "te_text_model.encoder.layers.7.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias": "te_text_model.encoder.layers.8.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight": "te_text_model.encoder.layers.8.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.bias": "te_text_model.encoder.layers.8.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.weight": "te_text_model.encoder.layers.8.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.8.ln_1.bias": "te_text_model.encoder.layers.8.layer_norm1.bias",
@@ -268,8 +224,6 @@
         "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.weight": "te_text_model.encoder.layers.8.mlp.fc1.weight",
         "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.bias": "te_text_model.encoder.layers.8.mlp.fc2.bias",
         "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.weight": "te_text_model.encoder.layers.8.mlp.fc2.weight",
-        "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias": "te_text_model.encoder.layers.9.self_attn.MERGED.bias",
-        "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight": "te_text_model.encoder.layers.9.self_attn.MERGED.weight",
         "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.bias": "te_text_model.encoder.layers.9.self_attn.out_proj.bias",
         "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.weight": "te_text_model.encoder.layers.9.self_attn.out_proj.weight",
         "cond_stage_model.model.transformer.resblocks.9.ln_1.bias": "te_text_model.encoder.layers.9.layer_norm1.bias",
@@ -530,11 +484,11 @@
         "first_stage_model.quant_conv.weight": "vae_quant_conv.weight",
         "model.diffusion_model.input_blocks.0.0.bias": "unet_conv_in.bias",
         "model.diffusion_model.input_blocks.0.0.weight": "unet_conv_in.weight",
-        "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": "unet_down_blocks.0.resnets.0.norm1.bias",
         "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": "unet_down_blocks.0.resnets.0.norm1.weight",
-        "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "unet_down_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "unet_down_blocks.0.resnets.0.conv1.bias",
         "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": "unet_down_blocks.0.resnets.0.conv1.weight",
         "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": "unet_down_blocks.0.resnets.0.norm2.bias",
         "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": "unet_down_blocks.0.resnets.0.norm2.weight",
@@ -566,31 +520,31 @@
         "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.0.conv1.bias",
-        "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": "unet_time_embedding.linear_2.weight",
+        "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": "unet_down_blocks.3.resnets.0.norm1.bias",
         "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": "unet_down_blocks.3.resnets.0.norm1.weight",
-        "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": "unet_down_blocks.3.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": "unet_down_blocks.3.resnets.0.conv1.bias",
         "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": "unet_down_blocks.3.resnets.0.conv1.weight",
         "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": "unet_down_blocks.3.resnets.0.norm2.bias",
         "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": "unet_down_blocks.3.resnets.0.norm2.weight",
         "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": "unet_down_blocks.3.resnets.0.conv2.bias",
         "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": "unet_down_blocks.3.resnets.0.conv2.weight",
-        "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.1.conv1.bias",
-        "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": "unet_down_blocks.3.resnets.1.norm1.bias",
         "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": "unet_down_blocks.3.resnets.1.norm1.weight",
-        "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": "unet_down_blocks.3.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": "unet_down_blocks.3.resnets.1.conv1.bias",
         "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": "unet_down_blocks.3.resnets.1.conv1.weight",
         "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": "unet_down_blocks.3.resnets.1.norm2.bias",
         "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": "unet_down_blocks.3.resnets.1.norm2.weight",
         "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": "unet_down_blocks.3.resnets.1.conv2.bias",
         "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": "unet_down_blocks.3.resnets.1.conv2.weight",
-        "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": "unet_down_blocks.0.resnets.1.norm1.bias",
         "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": "unet_down_blocks.0.resnets.1.norm1.weight",
-        "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "unet_down_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "unet_down_blocks.0.resnets.1.conv1.bias",
         "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": "unet_down_blocks.0.resnets.1.conv1.weight",
         "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": "unet_down_blocks.0.resnets.1.norm2.bias",
         "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": "unet_down_blocks.0.resnets.1.norm2.weight",
@@ -624,11 +578,11 @@
         "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.input_blocks.3.0.op.bias": "unet_down_blocks.0.downsamplers.0.conv.bias",
         "model.diffusion_model.input_blocks.3.0.op.weight": "unet_down_blocks.0.downsamplers.0.conv.weight",
-        "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": "unet_down_blocks.1.resnets.0.norm1.bias",
         "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": "unet_down_blocks.1.resnets.0.norm1.weight",
-        "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "unet_down_blocks.1.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "unet_down_blocks.1.resnets.0.conv1.bias",
         "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": "unet_down_blocks.1.resnets.0.conv1.weight",
         "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": "unet_down_blocks.1.resnets.0.norm2.bias",
         "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": "unet_down_blocks.1.resnets.0.norm2.weight",
@@ -662,11 +616,11 @@
         "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": "unet_down_blocks.1.resnets.1.norm1.bias",
         "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": "unet_down_blocks.1.resnets.1.norm1.weight",
-        "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "unet_down_blocks.1.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "unet_down_blocks.1.resnets.1.conv1.bias",
         "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": "unet_down_blocks.1.resnets.1.conv1.weight",
         "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": "unet_down_blocks.1.resnets.1.norm2.bias",
         "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": "unet_down_blocks.1.resnets.1.norm2.weight",
@@ -700,11 +654,11 @@
         "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.input_blocks.6.0.op.bias": "unet_down_blocks.1.downsamplers.0.conv.bias",
         "model.diffusion_model.input_blocks.6.0.op.weight": "unet_down_blocks.1.downsamplers.0.conv.weight",
-        "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": "unet_down_blocks.2.resnets.0.norm1.bias",
         "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": "unet_down_blocks.2.resnets.0.norm1.weight",
-        "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "unet_down_blocks.2.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "unet_down_blocks.2.resnets.0.conv1.bias",
         "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": "unet_down_blocks.2.resnets.0.conv1.weight",
         "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": "unet_down_blocks.2.resnets.0.norm2.bias",
         "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": "unet_down_blocks.2.resnets.0.norm2.weight",
@@ -738,11 +692,11 @@
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": "unet_down_blocks.2.resnets.1.norm1.bias",
         "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": "unet_down_blocks.2.resnets.1.norm1.weight",
-        "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "unet_down_blocks.2.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "unet_down_blocks.2.resnets.1.conv1.bias",
         "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": "unet_down_blocks.2.resnets.1.conv1.weight",
         "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": "unet_down_blocks.2.resnets.1.norm2.bias",
         "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": "unet_down_blocks.2.resnets.1.norm2.weight",
@@ -776,11 +730,11 @@
         "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.input_blocks.9.0.op.bias": "unet_down_blocks.2.downsamplers.0.conv.bias",
         "model.diffusion_model.input_blocks.9.0.op.weight": "unet_down_blocks.2.downsamplers.0.conv.weight",
-        "model.diffusion_model.middle_block.0.emb_layers.1.bias": "unet_mid_block.resnets.0.conv1.bias",
-        "model.diffusion_model.middle_block.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.middle_block.0.emb_layers.1.bias": "unet_mid_block.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.0.emb_layers.1.weight": "unet_mid_block.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.middle_block.0.in_layers.0.bias": "unet_mid_block.resnets.0.norm1.bias",
         "model.diffusion_model.middle_block.0.in_layers.0.weight": "unet_mid_block.resnets.0.norm1.weight",
-        "model.diffusion_model.middle_block.0.in_layers.2.bias": "unet_mid_block.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.0.in_layers.2.bias": "unet_mid_block.resnets.0.conv1.bias",
         "model.diffusion_model.middle_block.0.in_layers.2.weight": "unet_mid_block.resnets.0.conv1.weight",
         "model.diffusion_model.middle_block.0.out_layers.0.bias": "unet_mid_block.resnets.0.norm2.bias",
         "model.diffusion_model.middle_block.0.out_layers.0.weight": "unet_mid_block.resnets.0.norm2.weight",
@@ -812,11 +766,11 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.middle_block.2.emb_layers.1.bias": "unet_mid_block.resnets.1.conv1.bias",
-        "model.diffusion_model.middle_block.2.emb_layers.1.weight": "unet_up_blocks.0.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.middle_block.2.emb_layers.1.bias": "unet_mid_block.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.2.emb_layers.1.weight": "unet_mid_block.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.middle_block.2.in_layers.0.bias": "unet_mid_block.resnets.1.norm1.bias",
         "model.diffusion_model.middle_block.2.in_layers.0.weight": "unet_mid_block.resnets.1.norm1.weight",
-        "model.diffusion_model.middle_block.2.in_layers.2.bias": "unet_mid_block.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.2.in_layers.2.bias": "unet_mid_block.resnets.1.conv1.bias",
         "model.diffusion_model.middle_block.2.in_layers.2.weight": "unet_mid_block.resnets.1.conv1.weight",
         "model.diffusion_model.middle_block.2.out_layers.0.bias": "unet_mid_block.resnets.1.norm2.bias",
         "model.diffusion_model.middle_block.2.out_layers.0.weight": "unet_mid_block.resnets.1.norm2.weight",
@@ -826,11 +780,11 @@
         "model.diffusion_model.out.0.weight": "unet_conv_norm_out.weight",
         "model.diffusion_model.out.2.bias": "unet_conv_out.bias",
         "model.diffusion_model.out.2.weight": "unet_conv_out.weight",
-        "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.0.conv1.bias",
-        "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": "unet_up_blocks.0.resnets.0.norm1.bias",
         "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": "unet_up_blocks.0.resnets.0.norm1.weight",
-        "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "unet_up_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "unet_up_blocks.0.resnets.0.conv1.bias",
         "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": "unet_up_blocks.0.resnets.0.conv1.weight",
         "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": "unet_up_blocks.0.resnets.0.norm2.bias",
         "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": "unet_up_blocks.0.resnets.0.norm2.weight",
@@ -838,11 +792,11 @@
         "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": "unet_up_blocks.0.resnets.0.conv2.weight",
         "model.diffusion_model.output_blocks.0.0.skip_connection.bias": "unet_up_blocks.0.resnets.0.conv_shortcut.bias",
         "model.diffusion_model.output_blocks.0.0.skip_connection.weight": "unet_up_blocks.0.resnets.0.conv_shortcut.weight",
-        "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.1.conv1.bias",
-        "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": "unet_up_blocks.0.resnets.1.norm1.bias",
         "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": "unet_up_blocks.0.resnets.1.norm1.weight",
-        "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "unet_up_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "unet_up_blocks.0.resnets.1.conv1.bias",
         "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": "unet_up_blocks.0.resnets.1.conv1.weight",
         "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": "unet_up_blocks.0.resnets.1.norm2.bias",
         "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": "unet_up_blocks.0.resnets.1.norm2.weight",
@@ -850,11 +804,11 @@
         "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": "unet_up_blocks.0.resnets.1.conv2.weight",
         "model.diffusion_model.output_blocks.1.0.skip_connection.bias": "unet_up_blocks.0.resnets.1.conv_shortcut.bias",
         "model.diffusion_model.output_blocks.1.0.skip_connection.weight": "unet_up_blocks.0.resnets.1.conv_shortcut.weight",
-        "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": "unet_up_blocks.3.resnets.1.norm1.bias",
         "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": "unet_up_blocks.3.resnets.1.norm1.weight",
-        "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": "unet_up_blocks.3.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": "unet_up_blocks.3.resnets.1.conv1.bias",
         "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": "unet_up_blocks.3.resnets.1.conv1.weight",
         "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": "unet_up_blocks.3.resnets.1.norm2.bias",
         "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": "unet_up_blocks.3.resnets.1.norm2.weight",
@@ -888,11 +842,11 @@
         "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.2.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.2.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": "unet_up_blocks.3.resnets.2.norm1.bias",
         "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": "unet_up_blocks.3.resnets.2.norm1.weight",
-        "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": "unet_up_blocks.3.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": "unet_up_blocks.3.resnets.2.conv1.bias",
         "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": "unet_up_blocks.3.resnets.2.conv1.weight",
         "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": "unet_up_blocks.3.resnets.2.norm2.bias",
         "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": "unet_up_blocks.3.resnets.2.norm2.weight",
@@ -926,11 +880,11 @@
         "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.2.conv1.bias",
-        "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "unet_mid_block.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.2.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": "unet_up_blocks.0.resnets.2.norm1.bias",
         "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": "unet_up_blocks.0.resnets.2.norm1.weight",
-        "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "unet_up_blocks.0.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "unet_up_blocks.0.resnets.2.conv1.bias",
         "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": "unet_up_blocks.0.resnets.2.conv1.weight",
         "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": "unet_up_blocks.0.resnets.2.norm2.bias",
         "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": "unet_up_blocks.0.resnets.2.norm2.weight",
@@ -940,11 +894,11 @@
         "model.diffusion_model.output_blocks.2.0.skip_connection.weight": "unet_up_blocks.0.resnets.2.conv_shortcut.weight",
         "model.diffusion_model.output_blocks.2.1.conv.bias": "unet_up_blocks.0.upsamplers.0.conv.bias",
         "model.diffusion_model.output_blocks.2.1.conv.weight": "unet_up_blocks.0.upsamplers.0.conv.weight",
-        "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": "unet_up_blocks.1.resnets.0.norm1.bias",
         "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": "unet_up_blocks.1.resnets.0.norm1.weight",
-        "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "unet_up_blocks.1.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "unet_up_blocks.1.resnets.0.conv1.bias",
         "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": "unet_up_blocks.1.resnets.0.conv1.weight",
         "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": "unet_up_blocks.1.resnets.0.norm2.bias",
         "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": "unet_up_blocks.1.resnets.0.norm2.weight",
@@ -978,11 +932,11 @@
         "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": "unet_up_blocks.1.resnets.1.norm1.bias",
         "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": "unet_up_blocks.1.resnets.1.norm1.weight",
-        "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "unet_up_blocks.1.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "unet_up_blocks.1.resnets.1.conv1.bias",
         "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": "unet_up_blocks.1.resnets.1.conv1.weight",
         "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": "unet_up_blocks.1.resnets.1.norm2.bias",
         "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": "unet_up_blocks.1.resnets.1.norm2.weight",
@@ -1016,11 +970,11 @@
         "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.2.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.2.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": "unet_up_blocks.1.resnets.2.norm1.bias",
         "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": "unet_up_blocks.1.resnets.2.norm1.weight",
-        "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "unet_up_blocks.1.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "unet_up_blocks.1.resnets.2.conv1.bias",
         "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": "unet_up_blocks.1.resnets.2.conv1.weight",
         "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": "unet_up_blocks.1.resnets.2.norm2.bias",
         "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": "unet_up_blocks.1.resnets.2.norm2.weight",
@@ -1056,11 +1010,11 @@
         "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.output_blocks.5.2.conv.bias": "unet_up_blocks.1.upsamplers.0.conv.bias",
         "model.diffusion_model.output_blocks.5.2.conv.weight": "unet_up_blocks.1.upsamplers.0.conv.weight",
-        "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": "unet_up_blocks.2.resnets.0.norm1.bias",
         "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": "unet_up_blocks.2.resnets.0.norm1.weight",
-        "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "unet_up_blocks.2.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "unet_up_blocks.2.resnets.0.conv1.bias",
         "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": "unet_up_blocks.2.resnets.0.conv1.weight",
         "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": "unet_up_blocks.2.resnets.0.norm2.bias",
         "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": "unet_up_blocks.2.resnets.0.norm2.weight",
@@ -1094,11 +1048,11 @@
         "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.1.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.1.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": "unet_up_blocks.2.resnets.1.norm1.bias",
         "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": "unet_up_blocks.2.resnets.1.norm1.weight",
-        "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "unet_up_blocks.2.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "unet_up_blocks.2.resnets.1.conv1.bias",
         "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": "unet_up_blocks.2.resnets.1.conv1.weight",
         "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": "unet_up_blocks.2.resnets.1.norm2.bias",
         "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": "unet_up_blocks.2.resnets.1.norm2.weight",
@@ -1132,11 +1086,11 @@
         "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm2.weight",
         "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.bias",
         "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.weight",
-        "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.2.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.2.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": "unet_up_blocks.2.resnets.2.norm1.bias",
         "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": "unet_up_blocks.2.resnets.2.norm1.weight",
-        "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "unet_up_blocks.2.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "unet_up_blocks.2.resnets.2.conv1.bias",
         "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": "unet_up_blocks.2.resnets.2.conv1.weight",
         "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": "unet_up_blocks.2.resnets.2.norm2.bias",
         "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": "unet_up_blocks.2.resnets.2.norm2.weight",
@@ -1172,11 +1126,11 @@
         "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm3.weight",
         "model.diffusion_model.output_blocks.8.2.conv.bias": "unet_up_blocks.2.upsamplers.0.conv.bias",
         "model.diffusion_model.output_blocks.8.2.conv.weight": "unet_up_blocks.2.upsamplers.0.conv.weight",
-        "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.0.time_emb_proj.bias",
         "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.0.time_emb_proj.weight",
         "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": "unet_up_blocks.3.resnets.0.norm1.bias",
         "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": "unet_up_blocks.3.resnets.0.norm1.weight",
-        "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": "unet_up_blocks.3.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": "unet_up_blocks.3.resnets.0.conv1.bias",
         "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": "unet_up_blocks.3.resnets.0.conv1.weight",
         "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": "unet_up_blocks.3.resnets.0.norm2.bias",
         "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": "unet_up_blocks.3.resnets.0.norm2.weight",
@@ -1213,7 +1167,7 @@
         "model.diffusion_model.time_embed.0.bias": "unet_time_embedding.linear_1.bias",
         "model.diffusion_model.time_embed.0.weight": "unet_time_embedding.linear_1.weight",
         "model.diffusion_model.time_embed.2.bias": "unet_time_embedding.linear_2.bias",
-        "model.diffusion_model.time_embed.2.weight": "unet_mid_block.resnets.1.time_emb_proj.weight"
+        "model.diffusion_model.time_embed.2.weight": "unet_time_embedding.linear_2.weight"
     },
     "ldm_diffusers_shape_map": {
         "first_stage_model.decoder.mid.attn_1.k.weight": [
@@ -1264,62 +1218,6 @@
                 512
             ]
         ],
-        "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [
-            [
-                128,
-                256,
-                1,
-                1
-            ],
-            [
-                128,
-                256,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [
-            [
-                256,
-                512,
-                1,
-                1
-            ],
-            [
-                256,
-                512,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [
-            [
-                256,
-                128,
-                1,
-                1
-            ],
-            [
-                256,
-                128,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [
-            [
-                512,
-                256,
-                1,
-                1
-            ],
-            [
-                512,
-                256,
-                1,
-                1
-            ]
-        ],
         "first_stage_model.encoder.mid.attn_1.k.weight": [
             [
                 512,
@@ -1367,230 +1265,6 @@
                 512,
                 512
             ]
-        ],
-        "first_stage_model.post_quant_conv.weight": [
-            [
-                4,
-                4,
-                1,
-                1
-            ],
-            [
-                4,
-                4,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.quant_conv.weight": [
-            [
-                8,
-                8,
-                1,
-                1
-            ],
-            [
-                8,
-                8,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [
-            [
-                640,
-                320,
-                1,
-                1
-            ],
-            [
-                640,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [
-            [
-                1280,
-                640,
-                1,
-                1
-            ],
-            [
-                1280,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [
-            [
-                320,
-                640,
-                1,
-                1
-            ],
-            [
-                320,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [
-            [
-                320,
-                640,
-                1,
-                1
-            ],
-            [
-                320,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [
-            [
-                1280,
-                1920,
-                1,
-                1
-            ],
-            [
-                1280,
-                1920,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [
-            [
-                640,
-                1920,
-                1,
-                1
-            ],
-            [
-                640,
-                1920,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [
-            [
-                640,
-                1280,
-                1,
-                1
-            ],
-            [
-                640,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [
-            [
-                640,
-                960,
-                1,
-                1
-            ],
-            [
-                640,
-                960,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [
-            [
-                320,
-                960,
-                1,
-                1
-            ],
-            [
-                320,
-                960,
-                1,
-                1
-            ]
         ]
     },
     "ldm_diffusers_operator_map": {
@@ -1606,8 +1280,7 @@
                 "te_text_model.encoder.layers.0.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.0.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.0.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.0.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias": {
             "cat": [
@@ -1621,8 +1294,7 @@
                 "te_text_model.encoder.layers.1.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.1.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.1.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.1.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias": {
             "cat": [
@@ -1636,8 +1308,7 @@
                 "te_text_model.encoder.layers.10.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.10.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.10.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.10.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias": {
             "cat": [
@@ -1651,8 +1322,7 @@
                 "te_text_model.encoder.layers.11.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.11.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.11.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.11.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias": {
             "cat": [
@@ -1666,8 +1336,7 @@
                 "te_text_model.encoder.layers.12.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.12.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.12.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.12.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias": {
             "cat": [
@@ -1681,8 +1350,7 @@
                 "te_text_model.encoder.layers.13.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.13.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.13.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.13.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias": {
             "cat": [
@@ -1696,8 +1364,7 @@
                 "te_text_model.encoder.layers.14.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.14.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.14.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.14.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias": {
             "cat": [
@@ -1711,8 +1378,7 @@
                 "te_text_model.encoder.layers.15.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.15.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.15.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.15.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias": {
             "cat": [
@@ -1726,8 +1392,7 @@
                 "te_text_model.encoder.layers.16.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.16.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.16.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.16.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias": {
             "cat": [
@@ -1741,8 +1406,7 @@
                 "te_text_model.encoder.layers.17.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.17.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.17.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.17.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias": {
             "cat": [
@@ -1756,8 +1420,7 @@
                 "te_text_model.encoder.layers.18.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.18.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.18.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.18.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias": {
             "cat": [
@@ -1771,8 +1434,7 @@
                 "te_text_model.encoder.layers.19.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.19.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.19.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.19.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias": {
             "cat": [
@@ -1786,8 +1448,7 @@
                 "te_text_model.encoder.layers.2.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.2.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.2.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.2.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias": {
             "cat": [
@@ -1801,8 +1462,7 @@
                 "te_text_model.encoder.layers.20.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.20.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.20.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.20.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias": {
             "cat": [
@@ -1816,8 +1476,7 @@
                 "te_text_model.encoder.layers.21.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.21.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.21.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.21.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias": {
             "cat": [
@@ -1831,8 +1490,7 @@
                 "te_text_model.encoder.layers.22.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.22.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.22.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.22.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias": {
             "cat": [
@@ -1846,8 +1504,7 @@
                 "te_text_model.encoder.layers.3.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.3.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.3.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.3.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias": {
             "cat": [
@@ -1861,8 +1518,7 @@
                 "te_text_model.encoder.layers.4.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.4.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.4.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.4.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias": {
             "cat": [
@@ -1876,8 +1532,7 @@
                 "te_text_model.encoder.layers.5.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.5.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.5.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.5.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias": {
             "cat": [
@@ -1891,8 +1546,7 @@
                 "te_text_model.encoder.layers.6.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.6.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.6.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.6.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias": {
             "cat": [
@@ -1906,8 +1560,7 @@
                 "te_text_model.encoder.layers.7.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.7.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.7.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.7.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias": {
             "cat": [
@@ -1921,8 +1574,7 @@
                 "te_text_model.encoder.layers.8.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.8.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.8.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.8.self_attn.MERGED.weight"
+            ]
         },
         "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias": {
             "cat": [
@@ -1936,8 +1588,7 @@
                 "te_text_model.encoder.layers.9.self_attn.q_proj.weight",
                 "te_text_model.encoder.layers.9.self_attn.k_proj.weight",
                 "te_text_model.encoder.layers.9.self_attn.v_proj.weight"
-            ],
-            "target": "te_text_model.encoder.layers.9.self_attn.MERGED.weight"
+            ]
         }
     },
     "diffusers_ldm_operator_map": {
diff --git a/toolkit/keymaps/stable_diffusion_sdxl.json b/toolkit/keymaps/stable_diffusion_sdxl.json
index baab7ae2..dd3c2447 100644
--- a/toolkit/keymaps/stable_diffusion_sdxl.json
+++ b/toolkit/keymaps/stable_diffusion_sdxl.json
@@ -58,7 +58,7 @@
         "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc1.weight": "te0_text_model.encoder.layers.11.mlp.fc1.weight",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc2.bias": "te0_text_model.encoder.layers.11.mlp.fc2.bias",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc2.weight": "te0_text_model.encoder.layers.11.mlp.fc2.weight",
-        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": "te0_text_model.encoder.layers.2.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": "te0_text_model.encoder.layers.11.self_attn.k_proj.bias",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight": "te0_text_model.encoder.layers.11.self_attn.k_proj.weight",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias": "te0_text_model.encoder.layers.11.self_attn.out_proj.bias",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight": "te0_text_model.encoder.layers.11.self_attn.out_proj.weight",
@@ -74,7 +74,7 @@
         "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc1.weight": "te0_text_model.encoder.layers.2.mlp.fc1.weight",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc2.bias": "te0_text_model.encoder.layers.2.mlp.fc2.bias",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc2.weight": "te0_text_model.encoder.layers.2.mlp.fc2.weight",
-        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": "te0_text_model.encoder.layers.3.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": "te0_text_model.encoder.layers.2.self_attn.k_proj.bias",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight": "te0_text_model.encoder.layers.2.self_attn.k_proj.weight",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias": "te0_text_model.encoder.layers.2.self_attn.out_proj.bias",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight": "te0_text_model.encoder.layers.2.self_attn.out_proj.weight",
@@ -90,7 +90,7 @@
         "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc1.weight": "te0_text_model.encoder.layers.3.mlp.fc1.weight",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc2.bias": "te0_text_model.encoder.layers.3.mlp.fc2.bias",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc2.weight": "te0_text_model.encoder.layers.3.mlp.fc2.weight",
-        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": "te0_text_model.encoder.layers.11.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": "te0_text_model.encoder.layers.3.self_attn.k_proj.bias",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight": "te0_text_model.encoder.layers.3.self_attn.k_proj.weight",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias": "te0_text_model.encoder.layers.3.self_attn.out_proj.bias",
         "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight": "te0_text_model.encoder.layers.3.self_attn.out_proj.weight",
@@ -200,8 +200,6 @@
         "conditioner.embedders.1.model.ln_final.weight": "te1_text_model.final_layer_norm.weight",
         "conditioner.embedders.1.model.positional_embedding": "te1_text_model.embeddings.position_embedding.weight",
         "conditioner.embedders.1.model.token_embedding.weight": "te1_text_model.embeddings.token_embedding.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias": "te1_text_model.encoder.layers.0.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight": "te1_text_model.encoder.layers.0.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.bias": "te1_text_model.encoder.layers.0.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.weight": "te1_text_model.encoder.layers.0.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.0.ln_1.bias": "te1_text_model.encoder.layers.0.layer_norm1.bias",
@@ -212,8 +210,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_fc.weight": "te1_text_model.encoder.layers.0.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.bias": "te1_text_model.encoder.layers.0.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.weight": "te1_text_model.encoder.layers.0.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias": "te1_text_model.encoder.layers.1.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight": "te1_text_model.encoder.layers.1.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.bias": "te1_text_model.encoder.layers.1.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.weight": "te1_text_model.encoder.layers.1.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.1.ln_1.bias": "te1_text_model.encoder.layers.1.layer_norm1.bias",
@@ -224,10 +220,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_fc.weight": "te1_text_model.encoder.layers.1.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.bias": "te1_text_model.encoder.layers.1.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.weight": "te1_text_model.encoder.layers.1.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias": "te1_text_model.encoder.layers.10.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight": "te1_text_model.encoder.layers.10.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.bias": "te1_text_model.encoder.layers.10.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.weight": "te1_text_model.encoder.layers.2.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.weight": "te1_text_model.encoder.layers.10.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.10.ln_1.bias": "te1_text_model.encoder.layers.10.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.10.ln_1.weight": "te1_text_model.encoder.layers.10.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.10.ln_2.bias": "te1_text_model.encoder.layers.10.layer_norm2.bias",
@@ -236,10 +230,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_fc.weight": "te1_text_model.encoder.layers.10.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.bias": "te1_text_model.encoder.layers.10.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.weight": "te1_text_model.encoder.layers.10.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias": "te1_text_model.encoder.layers.11.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight": "te1_text_model.encoder.layers.11.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.bias": "te1_text_model.encoder.layers.11.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.weight": "te1_text_model.encoder.layers.3.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.weight": "te1_text_model.encoder.layers.11.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.11.ln_1.bias": "te1_text_model.encoder.layers.11.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.11.ln_1.weight": "te1_text_model.encoder.layers.11.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.11.ln_2.bias": "te1_text_model.encoder.layers.11.layer_norm2.bias",
@@ -248,10 +240,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_fc.weight": "te1_text_model.encoder.layers.11.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.bias": "te1_text_model.encoder.layers.11.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.weight": "te1_text_model.encoder.layers.11.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias": "te1_text_model.encoder.layers.12.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight": "te1_text_model.encoder.layers.12.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.bias": "te1_text_model.encoder.layers.12.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.weight": "te1_text_model.encoder.layers.4.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.weight": "te1_text_model.encoder.layers.12.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.12.ln_1.bias": "te1_text_model.encoder.layers.12.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.12.ln_1.weight": "te1_text_model.encoder.layers.12.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.12.ln_2.bias": "te1_text_model.encoder.layers.12.layer_norm2.bias",
@@ -260,10 +250,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_fc.weight": "te1_text_model.encoder.layers.12.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.bias": "te1_text_model.encoder.layers.12.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.weight": "te1_text_model.encoder.layers.12.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias": "te1_text_model.encoder.layers.13.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight": "te1_text_model.encoder.layers.13.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.bias": "te1_text_model.encoder.layers.13.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.weight": "te1_text_model.encoder.layers.5.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.weight": "te1_text_model.encoder.layers.13.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.13.ln_1.bias": "te1_text_model.encoder.layers.13.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.13.ln_1.weight": "te1_text_model.encoder.layers.13.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.13.ln_2.bias": "te1_text_model.encoder.layers.13.layer_norm2.bias",
@@ -272,8 +260,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_fc.weight": "te1_text_model.encoder.layers.13.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.bias": "te1_text_model.encoder.layers.13.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.weight": "te1_text_model.encoder.layers.13.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias": "te1_text_model.encoder.layers.14.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight": "te1_text_model.encoder.layers.14.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.bias": "te1_text_model.encoder.layers.14.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.weight": "te1_text_model.encoder.layers.14.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.14.ln_1.bias": "te1_text_model.encoder.layers.14.layer_norm1.bias",
@@ -284,10 +270,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_fc.weight": "te1_text_model.encoder.layers.14.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.bias": "te1_text_model.encoder.layers.14.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.weight": "te1_text_model.encoder.layers.14.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias": "te1_text_model.encoder.layers.15.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight": "te1_text_model.encoder.layers.15.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.bias": "te1_text_model.encoder.layers.15.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.weight": "te1_text_model.encoder.layers.6.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.weight": "te1_text_model.encoder.layers.15.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.15.ln_1.bias": "te1_text_model.encoder.layers.15.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.15.ln_1.weight": "te1_text_model.encoder.layers.15.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.15.ln_2.bias": "te1_text_model.encoder.layers.15.layer_norm2.bias",
@@ -296,8 +280,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_fc.weight": "te1_text_model.encoder.layers.15.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.bias": "te1_text_model.encoder.layers.15.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.weight": "te1_text_model.encoder.layers.15.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias": "te1_text_model.encoder.layers.16.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight": "te1_text_model.encoder.layers.16.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.bias": "te1_text_model.encoder.layers.16.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.weight": "te1_text_model.encoder.layers.16.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.16.ln_1.bias": "te1_text_model.encoder.layers.16.layer_norm1.bias",
@@ -308,8 +290,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_fc.weight": "te1_text_model.encoder.layers.16.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.bias": "te1_text_model.encoder.layers.16.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.weight": "te1_text_model.encoder.layers.16.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias": "te1_text_model.encoder.layers.17.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight": "te1_text_model.encoder.layers.17.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.bias": "te1_text_model.encoder.layers.17.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.weight": "te1_text_model.encoder.layers.17.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.17.ln_1.bias": "te1_text_model.encoder.layers.17.layer_norm1.bias",
@@ -320,8 +300,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_fc.weight": "te1_text_model.encoder.layers.17.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.bias": "te1_text_model.encoder.layers.17.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.weight": "te1_text_model.encoder.layers.17.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias": "te1_text_model.encoder.layers.18.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight": "te1_text_model.encoder.layers.18.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.bias": "te1_text_model.encoder.layers.18.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.weight": "te1_text_model.encoder.layers.18.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.18.ln_1.bias": "te1_text_model.encoder.layers.18.layer_norm1.bias",
@@ -332,8 +310,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_fc.weight": "te1_text_model.encoder.layers.18.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.bias": "te1_text_model.encoder.layers.18.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.weight": "te1_text_model.encoder.layers.18.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias": "te1_text_model.encoder.layers.19.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight": "te1_text_model.encoder.layers.19.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.bias": "te1_text_model.encoder.layers.19.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.weight": "te1_text_model.encoder.layers.19.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.19.ln_1.bias": "te1_text_model.encoder.layers.19.layer_norm1.bias",
@@ -344,10 +320,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_fc.weight": "te1_text_model.encoder.layers.19.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.bias": "te1_text_model.encoder.layers.19.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.weight": "te1_text_model.encoder.layers.19.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias": "te1_text_model.encoder.layers.2.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight": "te1_text_model.encoder.layers.2.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.bias": "te1_text_model.encoder.layers.2.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.weight": "te1_text_model.encoder.layers.7.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.weight": "te1_text_model.encoder.layers.2.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.2.ln_1.bias": "te1_text_model.encoder.layers.2.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.2.ln_1.weight": "te1_text_model.encoder.layers.2.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.2.ln_2.bias": "te1_text_model.encoder.layers.2.layer_norm2.bias",
@@ -356,8 +330,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_fc.weight": "te1_text_model.encoder.layers.2.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.bias": "te1_text_model.encoder.layers.2.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.weight": "te1_text_model.encoder.layers.2.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias": "te1_text_model.encoder.layers.20.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight": "te1_text_model.encoder.layers.20.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.bias": "te1_text_model.encoder.layers.20.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.weight": "te1_text_model.encoder.layers.20.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.20.ln_1.bias": "te1_text_model.encoder.layers.20.layer_norm1.bias",
@@ -368,8 +340,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_fc.weight": "te1_text_model.encoder.layers.20.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.bias": "te1_text_model.encoder.layers.20.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.weight": "te1_text_model.encoder.layers.20.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias": "te1_text_model.encoder.layers.21.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight": "te1_text_model.encoder.layers.21.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.bias": "te1_text_model.encoder.layers.21.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.weight": "te1_text_model.encoder.layers.21.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.21.ln_1.bias": "te1_text_model.encoder.layers.21.layer_norm1.bias",
@@ -380,8 +350,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_fc.weight": "te1_text_model.encoder.layers.21.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.bias": "te1_text_model.encoder.layers.21.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.weight": "te1_text_model.encoder.layers.21.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias": "te1_text_model.encoder.layers.22.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight": "te1_text_model.encoder.layers.22.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.bias": "te1_text_model.encoder.layers.22.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.weight": "te1_text_model.encoder.layers.22.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.22.ln_1.bias": "te1_text_model.encoder.layers.22.layer_norm1.bias",
@@ -392,8 +360,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_fc.weight": "te1_text_model.encoder.layers.22.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.bias": "te1_text_model.encoder.layers.22.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.weight": "te1_text_model.encoder.layers.22.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias": "te1_text_model.encoder.layers.23.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight": "te1_text_model.encoder.layers.23.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.bias": "te1_text_model.encoder.layers.23.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.weight": "te1_text_model.encoder.layers.23.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.23.ln_1.bias": "te1_text_model.encoder.layers.23.layer_norm1.bias",
@@ -404,8 +370,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_fc.weight": "te1_text_model.encoder.layers.23.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.bias": "te1_text_model.encoder.layers.23.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.weight": "te1_text_model.encoder.layers.23.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias": "te1_text_model.encoder.layers.24.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight": "te1_text_model.encoder.layers.24.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.bias": "te1_text_model.encoder.layers.24.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.weight": "te1_text_model.encoder.layers.24.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.24.ln_1.bias": "te1_text_model.encoder.layers.24.layer_norm1.bias",
@@ -416,8 +380,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_fc.weight": "te1_text_model.encoder.layers.24.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.bias": "te1_text_model.encoder.layers.24.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.weight": "te1_text_model.encoder.layers.24.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias": "te1_text_model.encoder.layers.25.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight": "te1_text_model.encoder.layers.25.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.bias": "te1_text_model.encoder.layers.25.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.weight": "te1_text_model.encoder.layers.25.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.25.ln_1.bias": "te1_text_model.encoder.layers.25.layer_norm1.bias",
@@ -428,8 +390,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_fc.weight": "te1_text_model.encoder.layers.25.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.bias": "te1_text_model.encoder.layers.25.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.weight": "te1_text_model.encoder.layers.25.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias": "te1_text_model.encoder.layers.26.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight": "te1_text_model.encoder.layers.26.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.bias": "te1_text_model.encoder.layers.26.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.weight": "te1_text_model.encoder.layers.26.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.26.ln_1.bias": "te1_text_model.encoder.layers.26.layer_norm1.bias",
@@ -440,8 +400,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_fc.weight": "te1_text_model.encoder.layers.26.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.bias": "te1_text_model.encoder.layers.26.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.weight": "te1_text_model.encoder.layers.26.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias": "te1_text_model.encoder.layers.27.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight": "te1_text_model.encoder.layers.27.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.bias": "te1_text_model.encoder.layers.27.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.weight": "te1_text_model.encoder.layers.27.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.27.ln_1.bias": "te1_text_model.encoder.layers.27.layer_norm1.bias",
@@ -452,8 +410,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_fc.weight": "te1_text_model.encoder.layers.27.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.bias": "te1_text_model.encoder.layers.27.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.weight": "te1_text_model.encoder.layers.27.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias": "te1_text_model.encoder.layers.28.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight": "te1_text_model.encoder.layers.28.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.bias": "te1_text_model.encoder.layers.28.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.weight": "te1_text_model.encoder.layers.28.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.28.ln_1.bias": "te1_text_model.encoder.layers.28.layer_norm1.bias",
@@ -464,8 +420,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_fc.weight": "te1_text_model.encoder.layers.28.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.bias": "te1_text_model.encoder.layers.28.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.weight": "te1_text_model.encoder.layers.28.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias": "te1_text_model.encoder.layers.29.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight": "te1_text_model.encoder.layers.29.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.bias": "te1_text_model.encoder.layers.29.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.weight": "te1_text_model.encoder.layers.29.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.29.ln_1.bias": "te1_text_model.encoder.layers.29.layer_norm1.bias",
@@ -476,10 +430,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_fc.weight": "te1_text_model.encoder.layers.29.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.bias": "te1_text_model.encoder.layers.29.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.weight": "te1_text_model.encoder.layers.29.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias": "te1_text_model.encoder.layers.3.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight": "te1_text_model.encoder.layers.3.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.bias": "te1_text_model.encoder.layers.3.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.weight": "te1_text_model.encoder.layers.8.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.weight": "te1_text_model.encoder.layers.3.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.3.ln_1.bias": "te1_text_model.encoder.layers.3.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.3.ln_1.weight": "te1_text_model.encoder.layers.3.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.3.ln_2.bias": "te1_text_model.encoder.layers.3.layer_norm2.bias",
@@ -488,8 +440,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_fc.weight": "te1_text_model.encoder.layers.3.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.bias": "te1_text_model.encoder.layers.3.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.weight": "te1_text_model.encoder.layers.3.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias": "te1_text_model.encoder.layers.30.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight": "te1_text_model.encoder.layers.30.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.bias": "te1_text_model.encoder.layers.30.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.weight": "te1_text_model.encoder.layers.30.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.30.ln_1.bias": "te1_text_model.encoder.layers.30.layer_norm1.bias",
@@ -500,8 +450,6 @@
         "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_fc.weight": "te1_text_model.encoder.layers.30.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.bias": "te1_text_model.encoder.layers.30.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.weight": "te1_text_model.encoder.layers.30.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias": "te1_text_model.encoder.layers.31.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight": "te1_text_model.encoder.layers.31.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.bias": "te1_text_model.encoder.layers.31.self_attn.out_proj.bias",
         "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.weight": "te1_text_model.encoder.layers.31.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.31.ln_1.bias": "te1_text_model.encoder.layers.31.layer_norm1.bias",
@@ -512,10 +460,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_fc.weight": "te1_text_model.encoder.layers.31.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.bias": "te1_text_model.encoder.layers.31.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.weight": "te1_text_model.encoder.layers.31.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias": "te1_text_model.encoder.layers.4.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight": "te1_text_model.encoder.layers.4.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.bias": "te1_text_model.encoder.layers.4.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.weight": "te1_text_model.encoder.layers.9.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.weight": "te1_text_model.encoder.layers.4.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.4.ln_1.bias": "te1_text_model.encoder.layers.4.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.4.ln_1.weight": "te1_text_model.encoder.layers.4.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.4.ln_2.bias": "te1_text_model.encoder.layers.4.layer_norm2.bias",
@@ -524,10 +470,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_fc.weight": "te1_text_model.encoder.layers.4.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.bias": "te1_text_model.encoder.layers.4.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.weight": "te1_text_model.encoder.layers.4.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias": "te1_text_model.encoder.layers.5.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight": "te1_text_model.encoder.layers.5.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.bias": "te1_text_model.encoder.layers.5.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.weight": "te1_text_model.encoder.layers.10.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.weight": "te1_text_model.encoder.layers.5.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.5.ln_1.bias": "te1_text_model.encoder.layers.5.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.5.ln_1.weight": "te1_text_model.encoder.layers.5.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.5.ln_2.bias": "te1_text_model.encoder.layers.5.layer_norm2.bias",
@@ -536,10 +480,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_fc.weight": "te1_text_model.encoder.layers.5.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.bias": "te1_text_model.encoder.layers.5.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.weight": "te1_text_model.encoder.layers.5.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias": "te1_text_model.encoder.layers.6.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight": "te1_text_model.encoder.layers.6.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.bias": "te1_text_model.encoder.layers.6.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.weight": "te1_text_model.encoder.layers.11.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.weight": "te1_text_model.encoder.layers.6.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.6.ln_1.bias": "te1_text_model.encoder.layers.6.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.6.ln_1.weight": "te1_text_model.encoder.layers.6.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.6.ln_2.bias": "te1_text_model.encoder.layers.6.layer_norm2.bias",
@@ -548,10 +490,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_fc.weight": "te1_text_model.encoder.layers.6.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.bias": "te1_text_model.encoder.layers.6.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.weight": "te1_text_model.encoder.layers.6.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias": "te1_text_model.encoder.layers.7.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight": "te1_text_model.encoder.layers.7.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.bias": "te1_text_model.encoder.layers.7.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.weight": "te1_text_model.encoder.layers.12.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.weight": "te1_text_model.encoder.layers.7.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.7.ln_1.bias": "te1_text_model.encoder.layers.7.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.7.ln_1.weight": "te1_text_model.encoder.layers.7.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.7.ln_2.bias": "te1_text_model.encoder.layers.7.layer_norm2.bias",
@@ -560,10 +500,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_fc.weight": "te1_text_model.encoder.layers.7.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.bias": "te1_text_model.encoder.layers.7.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.weight": "te1_text_model.encoder.layers.7.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias": "te1_text_model.encoder.layers.8.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight": "te1_text_model.encoder.layers.8.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.bias": "te1_text_model.encoder.layers.8.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.weight": "te1_text_model.encoder.layers.13.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.weight": "te1_text_model.encoder.layers.8.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.8.ln_1.bias": "te1_text_model.encoder.layers.8.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.8.ln_1.weight": "te1_text_model.encoder.layers.8.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.8.ln_2.bias": "te1_text_model.encoder.layers.8.layer_norm2.bias",
@@ -572,10 +510,8 @@
         "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_fc.weight": "te1_text_model.encoder.layers.8.mlp.fc1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.bias": "te1_text_model.encoder.layers.8.mlp.fc2.bias",
         "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.weight": "te1_text_model.encoder.layers.8.mlp.fc2.weight",
-        "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias": "te1_text_model.encoder.layers.9.self_attn.MERGED.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight": "te1_text_model.encoder.layers.9.self_attn.MERGED.weight",
         "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.bias": "te1_text_model.encoder.layers.9.self_attn.out_proj.bias",
-        "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.weight": "te1_text_model.encoder.layers.9.self_attn.out_proj.weight",
         "conditioner.embedders.1.model.transformer.resblocks.9.ln_1.bias": "te1_text_model.encoder.layers.9.layer_norm1.bias",
         "conditioner.embedders.1.model.transformer.resblocks.9.ln_1.weight": "te1_text_model.encoder.layers.9.layer_norm1.weight",
         "conditioner.embedders.1.model.transformer.resblocks.9.ln_2.bias": "te1_text_model.encoder.layers.9.layer_norm2.bias",
@@ -1077,7 +1013,7 @@
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.bias",
-        "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.bias",
@@ -1097,7 +1033,7 @@
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.bias",
-        "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.bias",
@@ -1117,7 +1053,7 @@
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.bias",
-        "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_out.0.weight": "te1_text_model.encoder.layers.15.self_attn.out_proj.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v.weight",
         "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.bias",
@@ -1433,7 +1369,7 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_k.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
-        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_q.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
@@ -1473,7 +1409,7 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn1.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_k.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.bias",
-        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_q.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.bias",
@@ -1493,7 +1429,7 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn1.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_k.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.bias",
-        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_q.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.bias",
@@ -1513,7 +1449,7 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.4.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.4.attn1.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.4.attn2.to_k.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.bias",
-        "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.4.attn2.to_q.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.4.attn2.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.4.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.bias",
@@ -1533,7 +1469,7 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.5.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.5.attn1.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.5.attn2.to_k.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.bias",
-        "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.5.attn2.to_q.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.5.attn2.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.5.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.bias",
@@ -1553,7 +1489,7 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.6.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.6.attn1.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.6.attn2.to_k.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.bias",
-        "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.6.attn2.to_q.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.6.attn2.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.6.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.bias",
@@ -1573,7 +1509,7 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.7.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.7.attn1.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.7.attn2.to_k.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.bias",
-        "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.7.attn2.to_q.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.7.attn2.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.7.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.bias",
@@ -1593,7 +1529,7 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.8.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.8.attn1.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.8.attn2.to_k.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.bias",
-        "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.8.attn2.to_q.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.8.attn2.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.8.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.bias",
@@ -1613,7 +1549,7 @@
         "model.diffusion_model.middle_block.1.transformer_blocks.9.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.9.attn1.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.9.attn2.to_k.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.bias",
-        "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.9.attn2.to_q.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.9.attn2.to_v.weight",
         "model.diffusion_model.middle_block.1.transformer_blocks.9.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.bias",
@@ -2101,7 +2037,7 @@
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.bias",
-        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.bias",
@@ -2181,7 +2117,7 @@
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.bias",
-        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.bias",
@@ -2201,7 +2137,7 @@
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.bias",
-        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.bias",
@@ -2221,7 +2157,7 @@
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.bias",
-        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.bias",
@@ -2241,7 +2177,7 @@
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.bias",
-        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.bias",
@@ -2261,7 +2197,7 @@
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.bias",
-        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.bias",
@@ -2281,7 +2217,7 @@
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.bias",
-        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.weight",
         "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.bias",
@@ -2562,62 +2498,6 @@
                 512
             ]
         ],
-        "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [
-            [
-                128,
-                256,
-                1,
-                1
-            ],
-            [
-                128,
-                256,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [
-            [
-                256,
-                512,
-                1,
-                1
-            ],
-            [
-                256,
-                512,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [
-            [
-                256,
-                128,
-                1,
-                1
-            ],
-            [
-                256,
-                128,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [
-            [
-                512,
-                256,
-                1,
-                1
-            ],
-            [
-                512,
-                256,
-                1,
-                1
-            ]
-        ],
         "first_stage_model.encoder.mid.attn_1.k.weight": [
             [
                 512,
@@ -2665,188 +2545,6 @@
                 512,
                 512
             ]
-        ],
-        "first_stage_model.post_quant_conv.weight": [
-            [
-                4,
-                4,
-                1,
-                1
-            ],
-            [
-                4,
-                4,
-                1,
-                1
-            ]
-        ],
-        "first_stage_model.quant_conv.weight": [
-            [
-                8,
-                8,
-                1,
-                1
-            ],
-            [
-                8,
-                8,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [
-            [
-                640,
-                320,
-                1,
-                1
-            ],
-            [
-                640,
-                320,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [
-            [
-                1280,
-                640,
-                1,
-                1
-            ],
-            [
-                1280,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [
-            [
-                1280,
-                2560,
-                1,
-                1
-            ],
-            [
-                1280,
-                2560,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [
-            [
-                1280,
-                1920,
-                1,
-                1
-            ],
-            [
-                1280,
-                1920,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [
-            [
-                640,
-                1920,
-                1,
-                1
-            ],
-            [
-                640,
-                1920,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [
-            [
-                640,
-                1280,
-                1,
-                1
-            ],
-            [
-                640,
-                1280,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [
-            [
-                640,
-                960,
-                1,
-                1
-            ],
-            [
-                640,
-                960,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [
-            [
-                320,
-                960,
-                1,
-                1
-            ],
-            [
-                320,
-                960,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [
-            [
-                320,
-                640,
-                1,
-                1
-            ],
-            [
-                320,
-                640,
-                1,
-                1
-            ]
-        ],
-        "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [
-            [
-                320,
-                640,
-                1,
-                1
-            ],
-            [
-                320,
-                640,
-                1,
-                1
-            ]
         ]
     },
     "ldm_diffusers_operator_map": {
@@ -2855,515 +2553,469 @@
                 "te1_text_model.encoder.layers.0.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.0.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.0.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.0.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.0.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.0.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.0.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.0.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.1.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.1.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.1.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.1.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.1.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.1.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.1.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.1.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.10.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.10.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.10.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.10.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.10.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.10.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.10.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.10.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.11.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.11.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.11.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.11.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.11.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.11.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.11.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.11.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.12.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.12.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.12.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.12.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.12.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.12.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.12.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.12.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.13.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.13.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.13.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.13.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.13.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.13.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.13.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.13.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.14.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.14.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.14.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.14.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.14.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.14.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.14.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.14.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.15.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.15.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.15.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.15.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.15.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.15.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.15.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.15.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.16.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.16.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.16.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.16.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.16.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.16.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.16.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.16.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.17.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.17.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.17.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.17.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.17.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.17.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.17.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.17.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.18.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.18.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.18.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.18.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.18.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.18.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.18.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.18.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.19.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.19.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.19.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.19.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.19.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.19.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.19.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.19.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.2.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.2.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.2.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.2.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.2.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.2.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.2.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.2.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.20.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.20.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.20.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.20.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.20.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.20.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.20.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.20.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.21.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.21.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.21.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.21.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.21.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.21.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.21.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.21.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.22.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.22.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.22.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.22.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.22.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.22.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.22.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.22.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.23.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.23.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.23.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.23.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.23.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.23.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.23.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.23.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.24.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.24.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.24.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.24.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.24.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.24.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.24.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.24.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.25.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.25.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.25.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.25.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.25.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.25.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.25.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.25.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.26.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.26.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.26.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.26.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.26.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.26.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.26.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.26.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.27.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.27.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.27.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.27.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.27.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.27.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.27.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.27.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.28.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.28.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.28.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.28.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.28.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.28.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.28.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.28.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.29.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.29.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.29.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.29.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.29.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.29.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.29.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.29.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.3.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.3.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.3.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.3.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.3.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.3.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.3.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.3.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.30.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.30.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.30.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.30.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.30.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.30.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.30.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.30.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.31.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.31.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.31.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.31.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.31.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.31.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.31.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.31.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.4.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.4.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.4.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.4.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.4.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.4.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.4.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.4.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.5.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.5.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.5.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.5.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.5.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.5.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.5.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.5.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.6.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.6.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.6.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.6.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.6.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.6.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.6.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.6.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.7.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.7.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.7.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.7.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.7.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.7.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.7.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.7.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.8.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.8.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.8.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.8.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.8.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.8.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.8.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.8.self_attn.MERGED.weight"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias": {
             "cat": [
                 "te1_text_model.encoder.layers.9.self_attn.q_proj.bias",
                 "te1_text_model.encoder.layers.9.self_attn.k_proj.bias",
                 "te1_text_model.encoder.layers.9.self_attn.v_proj.bias"
-            ],
-            "target": "te1_text_model.encoder.layers.9.self_attn.MERGED.bias"
+            ]
         },
         "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight": {
             "cat": [
                 "te1_text_model.encoder.layers.9.self_attn.q_proj.weight",
                 "te1_text_model.encoder.layers.9.self_attn.k_proj.weight",
                 "te1_text_model.encoder.layers.9.self_attn.v_proj.weight"
-            ],
-            "target": "te1_text_model.encoder.layers.9.self_attn.MERGED.weight"
+            ]
         }
     },
     "diffusers_ldm_operator_map": {
+        "te1_text_model.encoder.layers.0.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.0.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight",
@@ -3382,6 +3034,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.1.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.1.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight",
@@ -3400,6 +3070,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.10.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.10.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight",
@@ -3418,6 +3106,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.11.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.11.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight",
@@ -3436,6 +3142,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.12.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.12.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight",
@@ -3454,6 +3178,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.13.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.13.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight",
@@ -3472,6 +3214,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.14.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.14.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight",
@@ -3490,6 +3250,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.15.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.15.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight",
@@ -3508,6 +3286,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.16.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.16.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight",
@@ -3526,6 +3322,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.17.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.17.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight",
@@ -3544,6 +3358,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.18.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.18.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight",
@@ -3562,6 +3394,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.19.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.19.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight",
@@ -3580,6 +3430,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.2.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.2.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight",
@@ -3598,6 +3466,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.20.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.20.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight",
@@ -3616,6 +3502,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.21.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.21.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight",
@@ -3634,6 +3538,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.22.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.22.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight",
@@ -3652,6 +3574,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.23.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.23.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight",
@@ -3670,6 +3610,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.24.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.24.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight",
@@ -3688,6 +3646,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.25.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.25.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight",
@@ -3706,6 +3682,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.26.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.26.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight",
@@ -3724,6 +3718,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.27.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.27.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight",
@@ -3742,6 +3754,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.28.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.28.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight",
@@ -3760,6 +3790,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.29.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.29.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight",
@@ -3778,6 +3826,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.3.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.3.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight",
@@ -3796,6 +3862,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.30.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.30.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight",
@@ -3814,6 +3898,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.31.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.31.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight",
@@ -3832,6 +3934,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.4.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.4.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight",
@@ -3850,6 +3970,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.5.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.5.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight",
@@ -3868,6 +4006,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.6.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.6.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight",
@@ -3886,6 +4042,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.7.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.7.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight",
@@ -3904,6 +4078,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.8.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.8.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight",
@@ -3922,6 +4114,24 @@
                 "2560:, :"
             ]
         },
+        "te1_text_model.encoder.layers.9.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias",
+                "0:1280, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias",
+                "1280:2560, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias",
+                "2560:, :"
+            ]
+        },
         "te1_text_model.encoder.layers.9.self_attn.q_proj.weight": {
             "slice": [
                 "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight",
diff --git a/toolkit/keymaps/stable_diffusion_sdxl_unmatched.json b/toolkit/keymaps/stable_diffusion_sdxl_unmatched.json
index 5af96059..d0b2554a 100644
--- a/toolkit/keymaps/stable_diffusion_sdxl_unmatched.json
+++ b/toolkit/keymaps/stable_diffusion_sdxl_unmatched.json
@@ -6,16 +6,12 @@
                 77
             ],
             "min": 0.0,
-            "max": 76.0,
-            "mean": 38.0,
-            "std": 22.375
+            "max": 76.0
         },
         "conditioner.embedders.1.model.logit_scale": {
             "shape": [],
             "min": 4.60546875,
-            "max": 4.60546875,
-            "mean": 4.60546875,
-            "std": NaN
+            "max": 4.60546875
         },
         "conditioner.embedders.1.model.text_projection": {
             "shape": [
@@ -23,9 +19,7 @@
                 1280
             ],
             "min": -0.15966796875,
-            "max": 0.230712890625,
-            "mean": 0.0,
-            "std": 0.0181732177734375
+            "max": 0.230712890625
         }
     },
     "diffusers": {
@@ -35,9 +29,7 @@
                 1280
             ],
             "min": -0.15966796875,
-            "max": 0.230712890625,
-            "mean": 2.128152846125886e-05,
-            "std": 0.018169498071074486
+            "max": 0.230712890625
         }
     }
 }
\ No newline at end of file
diff --git a/toolkit/keymaps/stable_diffusion_ssd.json b/toolkit/keymaps/stable_diffusion_ssd.json
new file mode 100644
index 00000000..9ad06407
--- /dev/null
+++ b/toolkit/keymaps/stable_diffusion_ssd.json
@@ -0,0 +1,3419 @@
+{
+    "ldm_diffusers_keymap": {
+        "conditioner.embedders.0.transformer.text_model.embeddings.position_embedding.weight": "te0_text_model.embeddings.position_embedding.weight",
+        "conditioner.embedders.0.transformer.text_model.embeddings.token_embedding.weight": "te0_text_model.embeddings.token_embedding.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm1.bias": "te0_text_model.encoder.layers.0.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm1.weight": "te0_text_model.encoder.layers.0.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm2.bias": "te0_text_model.encoder.layers.0.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm2.weight": "te0_text_model.encoder.layers.0.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc1.bias": "te0_text_model.encoder.layers.0.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc1.weight": "te0_text_model.encoder.layers.0.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc2.bias": "te0_text_model.encoder.layers.0.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc2.weight": "te0_text_model.encoder.layers.0.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias": "te0_text_model.encoder.layers.0.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight": "te0_text_model.encoder.layers.0.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias": "te0_text_model.encoder.layers.0.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight": "te0_text_model.encoder.layers.0.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias": "te0_text_model.encoder.layers.0.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight": "te0_text_model.encoder.layers.0.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias": "te0_text_model.encoder.layers.0.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight": "te0_text_model.encoder.layers.0.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm1.bias": "te0_text_model.encoder.layers.1.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm1.weight": "te0_text_model.encoder.layers.1.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm2.bias": "te0_text_model.encoder.layers.1.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm2.weight": "te0_text_model.encoder.layers.1.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc1.bias": "te0_text_model.encoder.layers.1.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc1.weight": "te0_text_model.encoder.layers.1.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc2.bias": "te0_text_model.encoder.layers.1.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc2.weight": "te0_text_model.encoder.layers.1.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias": "te0_text_model.encoder.layers.1.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight": "te0_text_model.encoder.layers.1.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias": "te0_text_model.encoder.layers.1.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight": "te0_text_model.encoder.layers.1.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias": "te0_text_model.encoder.layers.1.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight": "te0_text_model.encoder.layers.1.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias": "te0_text_model.encoder.layers.1.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight": "te0_text_model.encoder.layers.1.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm1.bias": "te0_text_model.encoder.layers.10.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm1.weight": "te0_text_model.encoder.layers.10.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm2.bias": "te0_text_model.encoder.layers.10.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm2.weight": "te0_text_model.encoder.layers.10.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc1.bias": "te0_text_model.encoder.layers.10.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc1.weight": "te0_text_model.encoder.layers.10.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc2.bias": "te0_text_model.encoder.layers.10.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc2.weight": "te0_text_model.encoder.layers.10.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias": "te0_text_model.encoder.layers.10.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight": "te0_text_model.encoder.layers.10.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias": "te0_text_model.encoder.layers.10.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight": "te0_text_model.encoder.layers.10.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias": "te0_text_model.encoder.layers.10.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight": "te0_text_model.encoder.layers.10.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias": "te0_text_model.encoder.layers.10.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight": "te0_text_model.encoder.layers.10.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm1.bias": "te0_text_model.encoder.layers.11.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm1.weight": "te0_text_model.encoder.layers.11.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm2.bias": "te0_text_model.encoder.layers.11.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm2.weight": "te0_text_model.encoder.layers.11.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc1.bias": "te0_text_model.encoder.layers.11.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc1.weight": "te0_text_model.encoder.layers.11.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc2.bias": "te0_text_model.encoder.layers.11.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc2.weight": "te0_text_model.encoder.layers.11.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": "te0_text_model.encoder.layers.11.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight": "te0_text_model.encoder.layers.11.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias": "te0_text_model.encoder.layers.11.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight": "te0_text_model.encoder.layers.11.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias": "te0_text_model.encoder.layers.11.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight": "te0_text_model.encoder.layers.11.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias": "te0_text_model.encoder.layers.11.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight": "te0_text_model.encoder.layers.11.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm1.bias": "te0_text_model.encoder.layers.2.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm1.weight": "te0_text_model.encoder.layers.2.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm2.bias": "te0_text_model.encoder.layers.2.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm2.weight": "te0_text_model.encoder.layers.2.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc1.bias": "te0_text_model.encoder.layers.2.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc1.weight": "te0_text_model.encoder.layers.2.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc2.bias": "te0_text_model.encoder.layers.2.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc2.weight": "te0_text_model.encoder.layers.2.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": "te0_text_model.encoder.layers.2.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight": "te0_text_model.encoder.layers.2.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias": "te0_text_model.encoder.layers.2.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight": "te0_text_model.encoder.layers.2.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias": "te0_text_model.encoder.layers.2.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight": "te0_text_model.encoder.layers.2.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias": "te0_text_model.encoder.layers.2.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight": "te0_text_model.encoder.layers.2.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm1.bias": "te0_text_model.encoder.layers.3.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm1.weight": "te0_text_model.encoder.layers.3.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm2.bias": "te0_text_model.encoder.layers.3.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm2.weight": "te0_text_model.encoder.layers.3.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc1.bias": "te0_text_model.encoder.layers.3.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc1.weight": "te0_text_model.encoder.layers.3.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc2.bias": "te0_text_model.encoder.layers.3.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc2.weight": "te0_text_model.encoder.layers.3.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": "te0_text_model.encoder.layers.3.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight": "te0_text_model.encoder.layers.3.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias": "te0_text_model.encoder.layers.3.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight": "te0_text_model.encoder.layers.3.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias": "te0_text_model.encoder.layers.3.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight": "te0_text_model.encoder.layers.3.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias": "te0_text_model.encoder.layers.3.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight": "te0_text_model.encoder.layers.3.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm1.bias": "te0_text_model.encoder.layers.4.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm1.weight": "te0_text_model.encoder.layers.4.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm2.bias": "te0_text_model.encoder.layers.4.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm2.weight": "te0_text_model.encoder.layers.4.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc1.bias": "te0_text_model.encoder.layers.4.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc1.weight": "te0_text_model.encoder.layers.4.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc2.bias": "te0_text_model.encoder.layers.4.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc2.weight": "te0_text_model.encoder.layers.4.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias": "te0_text_model.encoder.layers.4.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight": "te0_text_model.encoder.layers.4.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias": "te0_text_model.encoder.layers.4.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight": "te0_text_model.encoder.layers.4.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias": "te0_text_model.encoder.layers.4.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight": "te0_text_model.encoder.layers.4.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias": "te0_text_model.encoder.layers.4.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight": "te0_text_model.encoder.layers.4.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm1.bias": "te0_text_model.encoder.layers.5.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm1.weight": "te0_text_model.encoder.layers.5.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm2.bias": "te0_text_model.encoder.layers.5.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm2.weight": "te0_text_model.encoder.layers.5.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc1.bias": "te0_text_model.encoder.layers.5.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc1.weight": "te0_text_model.encoder.layers.5.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc2.bias": "te0_text_model.encoder.layers.5.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc2.weight": "te0_text_model.encoder.layers.5.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias": "te0_text_model.encoder.layers.5.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight": "te0_text_model.encoder.layers.5.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias": "te0_text_model.encoder.layers.5.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight": "te0_text_model.encoder.layers.5.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias": "te0_text_model.encoder.layers.5.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight": "te0_text_model.encoder.layers.5.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias": "te0_text_model.encoder.layers.5.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight": "te0_text_model.encoder.layers.5.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm1.bias": "te0_text_model.encoder.layers.6.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm1.weight": "te0_text_model.encoder.layers.6.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm2.bias": "te0_text_model.encoder.layers.6.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm2.weight": "te0_text_model.encoder.layers.6.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc1.bias": "te0_text_model.encoder.layers.6.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc1.weight": "te0_text_model.encoder.layers.6.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc2.bias": "te0_text_model.encoder.layers.6.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc2.weight": "te0_text_model.encoder.layers.6.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias": "te0_text_model.encoder.layers.6.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight": "te0_text_model.encoder.layers.6.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias": "te0_text_model.encoder.layers.6.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight": "te0_text_model.encoder.layers.6.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias": "te0_text_model.encoder.layers.6.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight": "te0_text_model.encoder.layers.6.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias": "te0_text_model.encoder.layers.6.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight": "te0_text_model.encoder.layers.6.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm1.bias": "te0_text_model.encoder.layers.7.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm1.weight": "te0_text_model.encoder.layers.7.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm2.bias": "te0_text_model.encoder.layers.7.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm2.weight": "te0_text_model.encoder.layers.7.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc1.bias": "te0_text_model.encoder.layers.7.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc1.weight": "te0_text_model.encoder.layers.7.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc2.bias": "te0_text_model.encoder.layers.7.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc2.weight": "te0_text_model.encoder.layers.7.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias": "te0_text_model.encoder.layers.7.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight": "te0_text_model.encoder.layers.7.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias": "te0_text_model.encoder.layers.7.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight": "te0_text_model.encoder.layers.7.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias": "te0_text_model.encoder.layers.7.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight": "te0_text_model.encoder.layers.7.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias": "te0_text_model.encoder.layers.7.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight": "te0_text_model.encoder.layers.7.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm1.bias": "te0_text_model.encoder.layers.8.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm1.weight": "te0_text_model.encoder.layers.8.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm2.bias": "te0_text_model.encoder.layers.8.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm2.weight": "te0_text_model.encoder.layers.8.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc1.bias": "te0_text_model.encoder.layers.8.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc1.weight": "te0_text_model.encoder.layers.8.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc2.bias": "te0_text_model.encoder.layers.8.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc2.weight": "te0_text_model.encoder.layers.8.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias": "te0_text_model.encoder.layers.8.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight": "te0_text_model.encoder.layers.8.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias": "te0_text_model.encoder.layers.8.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight": "te0_text_model.encoder.layers.8.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias": "te0_text_model.encoder.layers.8.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight": "te0_text_model.encoder.layers.8.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias": "te0_text_model.encoder.layers.8.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight": "te0_text_model.encoder.layers.8.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm1.bias": "te0_text_model.encoder.layers.9.layer_norm1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm1.weight": "te0_text_model.encoder.layers.9.layer_norm1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm2.bias": "te0_text_model.encoder.layers.9.layer_norm2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm2.weight": "te0_text_model.encoder.layers.9.layer_norm2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc1.bias": "te0_text_model.encoder.layers.9.mlp.fc1.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc1.weight": "te0_text_model.encoder.layers.9.mlp.fc1.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc2.bias": "te0_text_model.encoder.layers.9.mlp.fc2.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc2.weight": "te0_text_model.encoder.layers.9.mlp.fc2.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias": "te0_text_model.encoder.layers.9.self_attn.k_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight": "te0_text_model.encoder.layers.9.self_attn.k_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias": "te0_text_model.encoder.layers.9.self_attn.out_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight": "te0_text_model.encoder.layers.9.self_attn.out_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias": "te0_text_model.encoder.layers.9.self_attn.q_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight": "te0_text_model.encoder.layers.9.self_attn.q_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias": "te0_text_model.encoder.layers.9.self_attn.v_proj.bias",
+        "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight": "te0_text_model.encoder.layers.9.self_attn.v_proj.weight",
+        "conditioner.embedders.0.transformer.text_model.final_layer_norm.bias": "te0_text_model.final_layer_norm.bias",
+        "conditioner.embedders.0.transformer.text_model.final_layer_norm.weight": "te0_text_model.final_layer_norm.weight",
+        "conditioner.embedders.1.model.ln_final.bias": "te1_text_model.final_layer_norm.bias",
+        "conditioner.embedders.1.model.ln_final.weight": "te1_text_model.final_layer_norm.weight",
+        "conditioner.embedders.1.model.positional_embedding": "te1_text_model.embeddings.position_embedding.weight",
+        "conditioner.embedders.1.model.text_projection.weight": "te1_text_projection.weight",
+        "conditioner.embedders.1.model.token_embedding.weight": "te1_text_model.embeddings.token_embedding.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.bias": "te1_text_model.encoder.layers.0.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.weight": "te1_text_model.encoder.layers.0.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.0.ln_1.bias": "te1_text_model.encoder.layers.0.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.0.ln_1.weight": "te1_text_model.encoder.layers.0.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.0.ln_2.bias": "te1_text_model.encoder.layers.0.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.0.ln_2.weight": "te1_text_model.encoder.layers.0.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_fc.bias": "te1_text_model.encoder.layers.0.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_fc.weight": "te1_text_model.encoder.layers.0.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.bias": "te1_text_model.encoder.layers.0.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.weight": "te1_text_model.encoder.layers.0.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.bias": "te1_text_model.encoder.layers.1.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.weight": "te1_text_model.encoder.layers.1.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.1.ln_1.bias": "te1_text_model.encoder.layers.1.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.1.ln_1.weight": "te1_text_model.encoder.layers.1.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.1.ln_2.bias": "te1_text_model.encoder.layers.1.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.1.ln_2.weight": "te1_text_model.encoder.layers.1.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_fc.bias": "te1_text_model.encoder.layers.1.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_fc.weight": "te1_text_model.encoder.layers.1.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.bias": "te1_text_model.encoder.layers.1.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.weight": "te1_text_model.encoder.layers.1.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.bias": "te1_text_model.encoder.layers.10.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.weight": "te1_text_model.encoder.layers.10.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.10.ln_1.bias": "te1_text_model.encoder.layers.10.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.10.ln_1.weight": "te1_text_model.encoder.layers.10.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.10.ln_2.bias": "te1_text_model.encoder.layers.10.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.10.ln_2.weight": "te1_text_model.encoder.layers.10.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_fc.bias": "te1_text_model.encoder.layers.10.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_fc.weight": "te1_text_model.encoder.layers.10.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.bias": "te1_text_model.encoder.layers.10.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.weight": "te1_text_model.encoder.layers.10.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.bias": "te1_text_model.encoder.layers.11.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.weight": "te1_text_model.encoder.layers.11.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.11.ln_1.bias": "te1_text_model.encoder.layers.11.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.11.ln_1.weight": "te1_text_model.encoder.layers.11.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.11.ln_2.bias": "te1_text_model.encoder.layers.11.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.11.ln_2.weight": "te1_text_model.encoder.layers.11.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_fc.bias": "te1_text_model.encoder.layers.11.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_fc.weight": "te1_text_model.encoder.layers.11.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.bias": "te1_text_model.encoder.layers.11.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.weight": "te1_text_model.encoder.layers.11.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.bias": "te1_text_model.encoder.layers.12.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.weight": "te1_text_model.encoder.layers.12.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.12.ln_1.bias": "te1_text_model.encoder.layers.12.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.12.ln_1.weight": "te1_text_model.encoder.layers.12.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.12.ln_2.bias": "te1_text_model.encoder.layers.12.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.12.ln_2.weight": "te1_text_model.encoder.layers.12.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_fc.bias": "te1_text_model.encoder.layers.12.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_fc.weight": "te1_text_model.encoder.layers.12.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.bias": "te1_text_model.encoder.layers.12.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.weight": "te1_text_model.encoder.layers.12.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.bias": "te1_text_model.encoder.layers.13.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.weight": "te1_text_model.encoder.layers.13.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.13.ln_1.bias": "te1_text_model.encoder.layers.13.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.13.ln_1.weight": "te1_text_model.encoder.layers.13.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.13.ln_2.bias": "te1_text_model.encoder.layers.13.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.13.ln_2.weight": "te1_text_model.encoder.layers.13.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_fc.bias": "te1_text_model.encoder.layers.13.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_fc.weight": "te1_text_model.encoder.layers.13.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.bias": "te1_text_model.encoder.layers.13.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.weight": "te1_text_model.encoder.layers.13.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.bias": "te1_text_model.encoder.layers.14.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.weight": "te1_text_model.encoder.layers.14.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.14.ln_1.bias": "te1_text_model.encoder.layers.14.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.14.ln_1.weight": "te1_text_model.encoder.layers.14.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.14.ln_2.bias": "te1_text_model.encoder.layers.14.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.14.ln_2.weight": "te1_text_model.encoder.layers.14.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_fc.bias": "te1_text_model.encoder.layers.14.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_fc.weight": "te1_text_model.encoder.layers.14.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.bias": "te1_text_model.encoder.layers.14.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.weight": "te1_text_model.encoder.layers.14.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.bias": "te1_text_model.encoder.layers.15.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.weight": "te1_text_model.encoder.layers.15.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.15.ln_1.bias": "te1_text_model.encoder.layers.15.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.15.ln_1.weight": "te1_text_model.encoder.layers.15.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.15.ln_2.bias": "te1_text_model.encoder.layers.15.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.15.ln_2.weight": "te1_text_model.encoder.layers.15.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_fc.bias": "te1_text_model.encoder.layers.15.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_fc.weight": "te1_text_model.encoder.layers.15.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.bias": "te1_text_model.encoder.layers.15.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.weight": "te1_text_model.encoder.layers.15.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.bias": "te1_text_model.encoder.layers.16.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.weight": "te1_text_model.encoder.layers.16.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.16.ln_1.bias": "te1_text_model.encoder.layers.16.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.16.ln_1.weight": "te1_text_model.encoder.layers.16.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.16.ln_2.bias": "te1_text_model.encoder.layers.16.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.16.ln_2.weight": "te1_text_model.encoder.layers.16.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_fc.bias": "te1_text_model.encoder.layers.16.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_fc.weight": "te1_text_model.encoder.layers.16.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.bias": "te1_text_model.encoder.layers.16.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.weight": "te1_text_model.encoder.layers.16.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.bias": "te1_text_model.encoder.layers.17.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.weight": "te1_text_model.encoder.layers.17.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.17.ln_1.bias": "te1_text_model.encoder.layers.17.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.17.ln_1.weight": "te1_text_model.encoder.layers.17.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.17.ln_2.bias": "te1_text_model.encoder.layers.17.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.17.ln_2.weight": "te1_text_model.encoder.layers.17.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_fc.bias": "te1_text_model.encoder.layers.17.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_fc.weight": "te1_text_model.encoder.layers.17.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.bias": "te1_text_model.encoder.layers.17.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.weight": "te1_text_model.encoder.layers.17.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.bias": "te1_text_model.encoder.layers.18.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.weight": "te1_text_model.encoder.layers.18.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.18.ln_1.bias": "te1_text_model.encoder.layers.18.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.18.ln_1.weight": "te1_text_model.encoder.layers.18.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.18.ln_2.bias": "te1_text_model.encoder.layers.18.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.18.ln_2.weight": "te1_text_model.encoder.layers.18.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_fc.bias": "te1_text_model.encoder.layers.18.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_fc.weight": "te1_text_model.encoder.layers.18.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.bias": "te1_text_model.encoder.layers.18.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.weight": "te1_text_model.encoder.layers.18.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.bias": "te1_text_model.encoder.layers.19.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.weight": "te1_text_model.encoder.layers.19.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.19.ln_1.bias": "te1_text_model.encoder.layers.19.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.19.ln_1.weight": "te1_text_model.encoder.layers.19.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.19.ln_2.bias": "te1_text_model.encoder.layers.19.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.19.ln_2.weight": "te1_text_model.encoder.layers.19.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_fc.bias": "te1_text_model.encoder.layers.19.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_fc.weight": "te1_text_model.encoder.layers.19.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.bias": "te1_text_model.encoder.layers.19.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.weight": "te1_text_model.encoder.layers.19.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.bias": "te1_text_model.encoder.layers.2.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.weight": "te1_text_model.encoder.layers.2.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.2.ln_1.bias": "te1_text_model.encoder.layers.2.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.2.ln_1.weight": "te1_text_model.encoder.layers.2.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.2.ln_2.bias": "te1_text_model.encoder.layers.2.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.2.ln_2.weight": "te1_text_model.encoder.layers.2.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_fc.bias": "te1_text_model.encoder.layers.2.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_fc.weight": "te1_text_model.encoder.layers.2.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.bias": "te1_text_model.encoder.layers.2.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.weight": "te1_text_model.encoder.layers.2.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.bias": "te1_text_model.encoder.layers.20.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.weight": "te1_text_model.encoder.layers.20.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.20.ln_1.bias": "te1_text_model.encoder.layers.20.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.20.ln_1.weight": "te1_text_model.encoder.layers.20.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.20.ln_2.bias": "te1_text_model.encoder.layers.20.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.20.ln_2.weight": "te1_text_model.encoder.layers.20.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_fc.bias": "te1_text_model.encoder.layers.20.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_fc.weight": "te1_text_model.encoder.layers.20.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.bias": "te1_text_model.encoder.layers.20.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.weight": "te1_text_model.encoder.layers.20.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.bias": "te1_text_model.encoder.layers.21.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.weight": "te1_text_model.encoder.layers.21.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.21.ln_1.bias": "te1_text_model.encoder.layers.21.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.21.ln_1.weight": "te1_text_model.encoder.layers.21.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.21.ln_2.bias": "te1_text_model.encoder.layers.21.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.21.ln_2.weight": "te1_text_model.encoder.layers.21.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_fc.bias": "te1_text_model.encoder.layers.21.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_fc.weight": "te1_text_model.encoder.layers.21.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.bias": "te1_text_model.encoder.layers.21.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.weight": "te1_text_model.encoder.layers.21.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.bias": "te1_text_model.encoder.layers.22.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.weight": "te1_text_model.encoder.layers.22.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.22.ln_1.bias": "te1_text_model.encoder.layers.22.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.22.ln_1.weight": "te1_text_model.encoder.layers.22.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.22.ln_2.bias": "te1_text_model.encoder.layers.22.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.22.ln_2.weight": "te1_text_model.encoder.layers.22.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_fc.bias": "te1_text_model.encoder.layers.22.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_fc.weight": "te1_text_model.encoder.layers.22.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.bias": "te1_text_model.encoder.layers.22.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.weight": "te1_text_model.encoder.layers.22.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.bias": "te1_text_model.encoder.layers.23.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.weight": "te1_text_model.encoder.layers.23.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.23.ln_1.bias": "te1_text_model.encoder.layers.23.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.23.ln_1.weight": "te1_text_model.encoder.layers.23.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.23.ln_2.bias": "te1_text_model.encoder.layers.23.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.23.ln_2.weight": "te1_text_model.encoder.layers.23.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_fc.bias": "te1_text_model.encoder.layers.23.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_fc.weight": "te1_text_model.encoder.layers.23.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.bias": "te1_text_model.encoder.layers.23.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.weight": "te1_text_model.encoder.layers.23.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.bias": "te1_text_model.encoder.layers.24.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.weight": "te1_text_model.encoder.layers.24.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.24.ln_1.bias": "te1_text_model.encoder.layers.24.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.24.ln_1.weight": "te1_text_model.encoder.layers.24.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.24.ln_2.bias": "te1_text_model.encoder.layers.24.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.24.ln_2.weight": "te1_text_model.encoder.layers.24.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_fc.bias": "te1_text_model.encoder.layers.24.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_fc.weight": "te1_text_model.encoder.layers.24.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.bias": "te1_text_model.encoder.layers.24.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.weight": "te1_text_model.encoder.layers.24.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.bias": "te1_text_model.encoder.layers.25.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.weight": "te1_text_model.encoder.layers.25.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.25.ln_1.bias": "te1_text_model.encoder.layers.25.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.25.ln_1.weight": "te1_text_model.encoder.layers.25.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.25.ln_2.bias": "te1_text_model.encoder.layers.25.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.25.ln_2.weight": "te1_text_model.encoder.layers.25.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_fc.bias": "te1_text_model.encoder.layers.25.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_fc.weight": "te1_text_model.encoder.layers.25.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.bias": "te1_text_model.encoder.layers.25.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.weight": "te1_text_model.encoder.layers.25.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.bias": "te1_text_model.encoder.layers.26.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.weight": "te1_text_model.encoder.layers.26.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.26.ln_1.bias": "te1_text_model.encoder.layers.26.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.26.ln_1.weight": "te1_text_model.encoder.layers.26.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.26.ln_2.bias": "te1_text_model.encoder.layers.26.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.26.ln_2.weight": "te1_text_model.encoder.layers.26.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_fc.bias": "te1_text_model.encoder.layers.26.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_fc.weight": "te1_text_model.encoder.layers.26.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.bias": "te1_text_model.encoder.layers.26.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.weight": "te1_text_model.encoder.layers.26.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.bias": "te1_text_model.encoder.layers.27.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.weight": "te1_text_model.encoder.layers.27.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.27.ln_1.bias": "te1_text_model.encoder.layers.27.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.27.ln_1.weight": "te1_text_model.encoder.layers.27.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.27.ln_2.bias": "te1_text_model.encoder.layers.27.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.27.ln_2.weight": "te1_text_model.encoder.layers.27.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_fc.bias": "te1_text_model.encoder.layers.27.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_fc.weight": "te1_text_model.encoder.layers.27.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.bias": "te1_text_model.encoder.layers.27.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.weight": "te1_text_model.encoder.layers.27.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.bias": "te1_text_model.encoder.layers.28.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.weight": "te1_text_model.encoder.layers.28.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.28.ln_1.bias": "te1_text_model.encoder.layers.28.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.28.ln_1.weight": "te1_text_model.encoder.layers.28.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.28.ln_2.bias": "te1_text_model.encoder.layers.28.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.28.ln_2.weight": "te1_text_model.encoder.layers.28.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_fc.bias": "te1_text_model.encoder.layers.28.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_fc.weight": "te1_text_model.encoder.layers.28.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.bias": "te1_text_model.encoder.layers.28.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.weight": "te1_text_model.encoder.layers.28.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.bias": "te1_text_model.encoder.layers.29.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.weight": "te1_text_model.encoder.layers.29.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.29.ln_1.bias": "te1_text_model.encoder.layers.29.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.29.ln_1.weight": "te1_text_model.encoder.layers.29.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.29.ln_2.bias": "te1_text_model.encoder.layers.29.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.29.ln_2.weight": "te1_text_model.encoder.layers.29.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_fc.bias": "te1_text_model.encoder.layers.29.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_fc.weight": "te1_text_model.encoder.layers.29.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.bias": "te1_text_model.encoder.layers.29.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.weight": "te1_text_model.encoder.layers.29.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.bias": "te1_text_model.encoder.layers.3.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.weight": "te1_text_model.encoder.layers.3.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.3.ln_1.bias": "te1_text_model.encoder.layers.3.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.3.ln_1.weight": "te1_text_model.encoder.layers.3.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.3.ln_2.bias": "te1_text_model.encoder.layers.3.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.3.ln_2.weight": "te1_text_model.encoder.layers.3.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_fc.bias": "te1_text_model.encoder.layers.3.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_fc.weight": "te1_text_model.encoder.layers.3.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.bias": "te1_text_model.encoder.layers.3.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.weight": "te1_text_model.encoder.layers.3.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.bias": "te1_text_model.encoder.layers.30.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.weight": "te1_text_model.encoder.layers.30.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.30.ln_1.bias": "te1_text_model.encoder.layers.30.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.30.ln_1.weight": "te1_text_model.encoder.layers.30.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.30.ln_2.bias": "te1_text_model.encoder.layers.30.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.30.ln_2.weight": "te1_text_model.encoder.layers.30.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_fc.bias": "te1_text_model.encoder.layers.30.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_fc.weight": "te1_text_model.encoder.layers.30.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.bias": "te1_text_model.encoder.layers.30.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.weight": "te1_text_model.encoder.layers.30.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.bias": "te1_text_model.encoder.layers.31.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.weight": "te1_text_model.encoder.layers.31.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.31.ln_1.bias": "te1_text_model.encoder.layers.31.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.31.ln_1.weight": "te1_text_model.encoder.layers.31.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.31.ln_2.bias": "te1_text_model.encoder.layers.31.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.31.ln_2.weight": "te1_text_model.encoder.layers.31.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_fc.bias": "te1_text_model.encoder.layers.31.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_fc.weight": "te1_text_model.encoder.layers.31.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.bias": "te1_text_model.encoder.layers.31.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.weight": "te1_text_model.encoder.layers.31.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.bias": "te1_text_model.encoder.layers.4.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.weight": "te1_text_model.encoder.layers.4.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.4.ln_1.bias": "te1_text_model.encoder.layers.4.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.4.ln_1.weight": "te1_text_model.encoder.layers.4.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.4.ln_2.bias": "te1_text_model.encoder.layers.4.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.4.ln_2.weight": "te1_text_model.encoder.layers.4.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_fc.bias": "te1_text_model.encoder.layers.4.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_fc.weight": "te1_text_model.encoder.layers.4.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.bias": "te1_text_model.encoder.layers.4.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.weight": "te1_text_model.encoder.layers.4.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.bias": "te1_text_model.encoder.layers.5.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.weight": "te1_text_model.encoder.layers.5.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.5.ln_1.bias": "te1_text_model.encoder.layers.5.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.5.ln_1.weight": "te1_text_model.encoder.layers.5.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.5.ln_2.bias": "te1_text_model.encoder.layers.5.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.5.ln_2.weight": "te1_text_model.encoder.layers.5.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_fc.bias": "te1_text_model.encoder.layers.5.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_fc.weight": "te1_text_model.encoder.layers.5.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.bias": "te1_text_model.encoder.layers.5.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.weight": "te1_text_model.encoder.layers.5.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.bias": "te1_text_model.encoder.layers.6.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.weight": "te1_text_model.encoder.layers.6.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.6.ln_1.bias": "te1_text_model.encoder.layers.6.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.6.ln_1.weight": "te1_text_model.encoder.layers.6.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.6.ln_2.bias": "te1_text_model.encoder.layers.6.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.6.ln_2.weight": "te1_text_model.encoder.layers.6.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_fc.bias": "te1_text_model.encoder.layers.6.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_fc.weight": "te1_text_model.encoder.layers.6.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.bias": "te1_text_model.encoder.layers.6.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.weight": "te1_text_model.encoder.layers.6.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.bias": "te1_text_model.encoder.layers.7.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.weight": "te1_text_model.encoder.layers.7.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.7.ln_1.bias": "te1_text_model.encoder.layers.7.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.7.ln_1.weight": "te1_text_model.encoder.layers.7.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.7.ln_2.bias": "te1_text_model.encoder.layers.7.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.7.ln_2.weight": "te1_text_model.encoder.layers.7.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_fc.bias": "te1_text_model.encoder.layers.7.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_fc.weight": "te1_text_model.encoder.layers.7.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.bias": "te1_text_model.encoder.layers.7.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.weight": "te1_text_model.encoder.layers.7.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.bias": "te1_text_model.encoder.layers.8.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.weight": "te1_text_model.encoder.layers.8.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.8.ln_1.bias": "te1_text_model.encoder.layers.8.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.8.ln_1.weight": "te1_text_model.encoder.layers.8.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.8.ln_2.bias": "te1_text_model.encoder.layers.8.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.8.ln_2.weight": "te1_text_model.encoder.layers.8.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_fc.bias": "te1_text_model.encoder.layers.8.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_fc.weight": "te1_text_model.encoder.layers.8.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.bias": "te1_text_model.encoder.layers.8.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.weight": "te1_text_model.encoder.layers.8.mlp.fc2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.bias": "te1_text_model.encoder.layers.9.self_attn.out_proj.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.weight": "te1_text_model.encoder.layers.9.self_attn.out_proj.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.9.ln_1.bias": "te1_text_model.encoder.layers.9.layer_norm1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.9.ln_1.weight": "te1_text_model.encoder.layers.9.layer_norm1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.9.ln_2.bias": "te1_text_model.encoder.layers.9.layer_norm2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.9.ln_2.weight": "te1_text_model.encoder.layers.9.layer_norm2.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_fc.bias": "te1_text_model.encoder.layers.9.mlp.fc1.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_fc.weight": "te1_text_model.encoder.layers.9.mlp.fc1.weight",
+        "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.bias": "te1_text_model.encoder.layers.9.mlp.fc2.bias",
+        "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.weight": "te1_text_model.encoder.layers.9.mlp.fc2.weight",
+        "first_stage_model.decoder.conv_in.bias": "vae_decoder.conv_in.bias",
+        "first_stage_model.decoder.conv_in.weight": "vae_decoder.conv_in.weight",
+        "first_stage_model.decoder.conv_out.bias": "vae_decoder.conv_out.bias",
+        "first_stage_model.decoder.conv_out.weight": "vae_decoder.conv_out.weight",
+        "first_stage_model.decoder.mid.attn_1.k.bias": "vae_decoder.mid_block.attentions.0.to_k.bias",
+        "first_stage_model.decoder.mid.attn_1.k.weight": "vae_decoder.mid_block.attentions.0.to_k.weight",
+        "first_stage_model.decoder.mid.attn_1.norm.bias": "vae_decoder.mid_block.attentions.0.group_norm.bias",
+        "first_stage_model.decoder.mid.attn_1.norm.weight": "vae_decoder.mid_block.attentions.0.group_norm.weight",
+        "first_stage_model.decoder.mid.attn_1.proj_out.bias": "vae_decoder.mid_block.attentions.0.to_out.0.bias",
+        "first_stage_model.decoder.mid.attn_1.proj_out.weight": "vae_decoder.mid_block.attentions.0.to_out.0.weight",
+        "first_stage_model.decoder.mid.attn_1.q.bias": "vae_decoder.mid_block.attentions.0.to_q.bias",
+        "first_stage_model.decoder.mid.attn_1.q.weight": "vae_decoder.mid_block.attentions.0.to_q.weight",
+        "first_stage_model.decoder.mid.attn_1.v.bias": "vae_decoder.mid_block.attentions.0.to_v.bias",
+        "first_stage_model.decoder.mid.attn_1.v.weight": "vae_decoder.mid_block.attentions.0.to_v.weight",
+        "first_stage_model.decoder.mid.block_1.conv1.bias": "vae_decoder.mid_block.resnets.0.conv1.bias",
+        "first_stage_model.decoder.mid.block_1.conv1.weight": "vae_decoder.mid_block.resnets.0.conv1.weight",
+        "first_stage_model.decoder.mid.block_1.conv2.bias": "vae_decoder.mid_block.resnets.0.conv2.bias",
+        "first_stage_model.decoder.mid.block_1.conv2.weight": "vae_decoder.mid_block.resnets.0.conv2.weight",
+        "first_stage_model.decoder.mid.block_1.norm1.bias": "vae_decoder.mid_block.resnets.0.norm1.bias",
+        "first_stage_model.decoder.mid.block_1.norm1.weight": "vae_decoder.mid_block.resnets.0.norm1.weight",
+        "first_stage_model.decoder.mid.block_1.norm2.bias": "vae_decoder.mid_block.resnets.0.norm2.bias",
+        "first_stage_model.decoder.mid.block_1.norm2.weight": "vae_decoder.mid_block.resnets.0.norm2.weight",
+        "first_stage_model.decoder.mid.block_2.conv1.bias": "vae_decoder.mid_block.resnets.1.conv1.bias",
+        "first_stage_model.decoder.mid.block_2.conv1.weight": "vae_decoder.mid_block.resnets.1.conv1.weight",
+        "first_stage_model.decoder.mid.block_2.conv2.bias": "vae_decoder.mid_block.resnets.1.conv2.bias",
+        "first_stage_model.decoder.mid.block_2.conv2.weight": "vae_decoder.mid_block.resnets.1.conv2.weight",
+        "first_stage_model.decoder.mid.block_2.norm1.bias": "vae_decoder.mid_block.resnets.1.norm1.bias",
+        "first_stage_model.decoder.mid.block_2.norm1.weight": "vae_decoder.mid_block.resnets.1.norm1.weight",
+        "first_stage_model.decoder.mid.block_2.norm2.bias": "vae_decoder.mid_block.resnets.1.norm2.bias",
+        "first_stage_model.decoder.mid.block_2.norm2.weight": "vae_decoder.mid_block.resnets.1.norm2.weight",
+        "first_stage_model.decoder.norm_out.bias": "vae_decoder.conv_norm_out.bias",
+        "first_stage_model.decoder.norm_out.weight": "vae_decoder.conv_norm_out.weight",
+        "first_stage_model.decoder.up.0.block.0.conv1.bias": "vae_decoder.up_blocks.3.resnets.0.conv1.bias",
+        "first_stage_model.decoder.up.0.block.0.conv1.weight": "vae_decoder.up_blocks.3.resnets.0.conv1.weight",
+        "first_stage_model.decoder.up.0.block.0.conv2.bias": "vae_decoder.up_blocks.3.resnets.0.conv2.bias",
+        "first_stage_model.decoder.up.0.block.0.conv2.weight": "vae_decoder.up_blocks.3.resnets.0.conv2.weight",
+        "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.bias",
+        "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.weight",
+        "first_stage_model.decoder.up.0.block.0.norm1.bias": "vae_decoder.up_blocks.3.resnets.0.norm1.bias",
+        "first_stage_model.decoder.up.0.block.0.norm1.weight": "vae_decoder.up_blocks.3.resnets.0.norm1.weight",
+        "first_stage_model.decoder.up.0.block.0.norm2.bias": "vae_decoder.up_blocks.3.resnets.0.norm2.bias",
+        "first_stage_model.decoder.up.0.block.0.norm2.weight": "vae_decoder.up_blocks.3.resnets.0.norm2.weight",
+        "first_stage_model.decoder.up.0.block.1.conv1.bias": "vae_decoder.up_blocks.3.resnets.1.conv1.bias",
+        "first_stage_model.decoder.up.0.block.1.conv1.weight": "vae_decoder.up_blocks.3.resnets.1.conv1.weight",
+        "first_stage_model.decoder.up.0.block.1.conv2.bias": "vae_decoder.up_blocks.3.resnets.1.conv2.bias",
+        "first_stage_model.decoder.up.0.block.1.conv2.weight": "vae_decoder.up_blocks.3.resnets.1.conv2.weight",
+        "first_stage_model.decoder.up.0.block.1.norm1.bias": "vae_decoder.up_blocks.3.resnets.1.norm1.bias",
+        "first_stage_model.decoder.up.0.block.1.norm1.weight": "vae_decoder.up_blocks.3.resnets.1.norm1.weight",
+        "first_stage_model.decoder.up.0.block.1.norm2.bias": "vae_decoder.up_blocks.3.resnets.1.norm2.bias",
+        "first_stage_model.decoder.up.0.block.1.norm2.weight": "vae_decoder.up_blocks.3.resnets.1.norm2.weight",
+        "first_stage_model.decoder.up.0.block.2.conv1.bias": "vae_decoder.up_blocks.3.resnets.2.conv1.bias",
+        "first_stage_model.decoder.up.0.block.2.conv1.weight": "vae_decoder.up_blocks.3.resnets.2.conv1.weight",
+        "first_stage_model.decoder.up.0.block.2.conv2.bias": "vae_decoder.up_blocks.3.resnets.2.conv2.bias",
+        "first_stage_model.decoder.up.0.block.2.conv2.weight": "vae_decoder.up_blocks.3.resnets.2.conv2.weight",
+        "first_stage_model.decoder.up.0.block.2.norm1.bias": "vae_decoder.up_blocks.3.resnets.2.norm1.bias",
+        "first_stage_model.decoder.up.0.block.2.norm1.weight": "vae_decoder.up_blocks.3.resnets.2.norm1.weight",
+        "first_stage_model.decoder.up.0.block.2.norm2.bias": "vae_decoder.up_blocks.3.resnets.2.norm2.bias",
+        "first_stage_model.decoder.up.0.block.2.norm2.weight": "vae_decoder.up_blocks.3.resnets.2.norm2.weight",
+        "first_stage_model.decoder.up.1.block.0.conv1.bias": "vae_decoder.up_blocks.2.resnets.0.conv1.bias",
+        "first_stage_model.decoder.up.1.block.0.conv1.weight": "vae_decoder.up_blocks.2.resnets.0.conv1.weight",
+        "first_stage_model.decoder.up.1.block.0.conv2.bias": "vae_decoder.up_blocks.2.resnets.0.conv2.bias",
+        "first_stage_model.decoder.up.1.block.0.conv2.weight": "vae_decoder.up_blocks.2.resnets.0.conv2.weight",
+        "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.bias",
+        "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.weight",
+        "first_stage_model.decoder.up.1.block.0.norm1.bias": "vae_decoder.up_blocks.2.resnets.0.norm1.bias",
+        "first_stage_model.decoder.up.1.block.0.norm1.weight": "vae_decoder.up_blocks.2.resnets.0.norm1.weight",
+        "first_stage_model.decoder.up.1.block.0.norm2.bias": "vae_decoder.up_blocks.2.resnets.0.norm2.bias",
+        "first_stage_model.decoder.up.1.block.0.norm2.weight": "vae_decoder.up_blocks.2.resnets.0.norm2.weight",
+        "first_stage_model.decoder.up.1.block.1.conv1.bias": "vae_decoder.up_blocks.2.resnets.1.conv1.bias",
+        "first_stage_model.decoder.up.1.block.1.conv1.weight": "vae_decoder.up_blocks.2.resnets.1.conv1.weight",
+        "first_stage_model.decoder.up.1.block.1.conv2.bias": "vae_decoder.up_blocks.2.resnets.1.conv2.bias",
+        "first_stage_model.decoder.up.1.block.1.conv2.weight": "vae_decoder.up_blocks.2.resnets.1.conv2.weight",
+        "first_stage_model.decoder.up.1.block.1.norm1.bias": "vae_decoder.up_blocks.2.resnets.1.norm1.bias",
+        "first_stage_model.decoder.up.1.block.1.norm1.weight": "vae_decoder.up_blocks.2.resnets.1.norm1.weight",
+        "first_stage_model.decoder.up.1.block.1.norm2.bias": "vae_decoder.up_blocks.2.resnets.1.norm2.bias",
+        "first_stage_model.decoder.up.1.block.1.norm2.weight": "vae_decoder.up_blocks.2.resnets.1.norm2.weight",
+        "first_stage_model.decoder.up.1.block.2.conv1.bias": "vae_decoder.up_blocks.2.resnets.2.conv1.bias",
+        "first_stage_model.decoder.up.1.block.2.conv1.weight": "vae_decoder.up_blocks.2.resnets.2.conv1.weight",
+        "first_stage_model.decoder.up.1.block.2.conv2.bias": "vae_decoder.up_blocks.2.resnets.2.conv2.bias",
+        "first_stage_model.decoder.up.1.block.2.conv2.weight": "vae_decoder.up_blocks.2.resnets.2.conv2.weight",
+        "first_stage_model.decoder.up.1.block.2.norm1.bias": "vae_decoder.up_blocks.2.resnets.2.norm1.bias",
+        "first_stage_model.decoder.up.1.block.2.norm1.weight": "vae_decoder.up_blocks.2.resnets.2.norm1.weight",
+        "first_stage_model.decoder.up.1.block.2.norm2.bias": "vae_decoder.up_blocks.2.resnets.2.norm2.bias",
+        "first_stage_model.decoder.up.1.block.2.norm2.weight": "vae_decoder.up_blocks.2.resnets.2.norm2.weight",
+        "first_stage_model.decoder.up.1.upsample.conv.bias": "vae_decoder.up_blocks.2.upsamplers.0.conv.bias",
+        "first_stage_model.decoder.up.1.upsample.conv.weight": "vae_decoder.up_blocks.2.upsamplers.0.conv.weight",
+        "first_stage_model.decoder.up.2.block.0.conv1.bias": "vae_decoder.up_blocks.1.resnets.0.conv1.bias",
+        "first_stage_model.decoder.up.2.block.0.conv1.weight": "vae_decoder.up_blocks.1.resnets.0.conv1.weight",
+        "first_stage_model.decoder.up.2.block.0.conv2.bias": "vae_decoder.up_blocks.1.resnets.0.conv2.bias",
+        "first_stage_model.decoder.up.2.block.0.conv2.weight": "vae_decoder.up_blocks.1.resnets.0.conv2.weight",
+        "first_stage_model.decoder.up.2.block.0.norm1.bias": "vae_decoder.up_blocks.1.resnets.0.norm1.bias",
+        "first_stage_model.decoder.up.2.block.0.norm1.weight": "vae_decoder.up_blocks.1.resnets.0.norm1.weight",
+        "first_stage_model.decoder.up.2.block.0.norm2.bias": "vae_decoder.up_blocks.1.resnets.0.norm2.bias",
+        "first_stage_model.decoder.up.2.block.0.norm2.weight": "vae_decoder.up_blocks.1.resnets.0.norm2.weight",
+        "first_stage_model.decoder.up.2.block.1.conv1.bias": "vae_decoder.up_blocks.1.resnets.1.conv1.bias",
+        "first_stage_model.decoder.up.2.block.1.conv1.weight": "vae_decoder.up_blocks.1.resnets.1.conv1.weight",
+        "first_stage_model.decoder.up.2.block.1.conv2.bias": "vae_decoder.up_blocks.1.resnets.1.conv2.bias",
+        "first_stage_model.decoder.up.2.block.1.conv2.weight": "vae_decoder.up_blocks.1.resnets.1.conv2.weight",
+        "first_stage_model.decoder.up.2.block.1.norm1.bias": "vae_decoder.up_blocks.1.resnets.1.norm1.bias",
+        "first_stage_model.decoder.up.2.block.1.norm1.weight": "vae_decoder.up_blocks.1.resnets.1.norm1.weight",
+        "first_stage_model.decoder.up.2.block.1.norm2.bias": "vae_decoder.up_blocks.1.resnets.1.norm2.bias",
+        "first_stage_model.decoder.up.2.block.1.norm2.weight": "vae_decoder.up_blocks.1.resnets.1.norm2.weight",
+        "first_stage_model.decoder.up.2.block.2.conv1.bias": "vae_decoder.up_blocks.1.resnets.2.conv1.bias",
+        "first_stage_model.decoder.up.2.block.2.conv1.weight": "vae_decoder.up_blocks.1.resnets.2.conv1.weight",
+        "first_stage_model.decoder.up.2.block.2.conv2.bias": "vae_decoder.up_blocks.1.resnets.2.conv2.bias",
+        "first_stage_model.decoder.up.2.block.2.conv2.weight": "vae_decoder.up_blocks.1.resnets.2.conv2.weight",
+        "first_stage_model.decoder.up.2.block.2.norm1.bias": "vae_decoder.up_blocks.1.resnets.2.norm1.bias",
+        "first_stage_model.decoder.up.2.block.2.norm1.weight": "vae_decoder.up_blocks.1.resnets.2.norm1.weight",
+        "first_stage_model.decoder.up.2.block.2.norm2.bias": "vae_decoder.up_blocks.1.resnets.2.norm2.bias",
+        "first_stage_model.decoder.up.2.block.2.norm2.weight": "vae_decoder.up_blocks.1.resnets.2.norm2.weight",
+        "first_stage_model.decoder.up.2.upsample.conv.bias": "vae_decoder.up_blocks.1.upsamplers.0.conv.bias",
+        "first_stage_model.decoder.up.2.upsample.conv.weight": "vae_decoder.up_blocks.1.upsamplers.0.conv.weight",
+        "first_stage_model.decoder.up.3.block.0.conv1.bias": "vae_decoder.up_blocks.0.resnets.0.conv1.bias",
+        "first_stage_model.decoder.up.3.block.0.conv1.weight": "vae_decoder.up_blocks.0.resnets.0.conv1.weight",
+        "first_stage_model.decoder.up.3.block.0.conv2.bias": "vae_decoder.up_blocks.0.resnets.0.conv2.bias",
+        "first_stage_model.decoder.up.3.block.0.conv2.weight": "vae_decoder.up_blocks.0.resnets.0.conv2.weight",
+        "first_stage_model.decoder.up.3.block.0.norm1.bias": "vae_decoder.up_blocks.0.resnets.0.norm1.bias",
+        "first_stage_model.decoder.up.3.block.0.norm1.weight": "vae_decoder.up_blocks.0.resnets.0.norm1.weight",
+        "first_stage_model.decoder.up.3.block.0.norm2.bias": "vae_decoder.up_blocks.0.resnets.0.norm2.bias",
+        "first_stage_model.decoder.up.3.block.0.norm2.weight": "vae_decoder.up_blocks.0.resnets.0.norm2.weight",
+        "first_stage_model.decoder.up.3.block.1.conv1.bias": "vae_decoder.up_blocks.0.resnets.1.conv1.bias",
+        "first_stage_model.decoder.up.3.block.1.conv1.weight": "vae_decoder.up_blocks.0.resnets.1.conv1.weight",
+        "first_stage_model.decoder.up.3.block.1.conv2.bias": "vae_decoder.up_blocks.0.resnets.1.conv2.bias",
+        "first_stage_model.decoder.up.3.block.1.conv2.weight": "vae_decoder.up_blocks.0.resnets.1.conv2.weight",
+        "first_stage_model.decoder.up.3.block.1.norm1.bias": "vae_decoder.up_blocks.0.resnets.1.norm1.bias",
+        "first_stage_model.decoder.up.3.block.1.norm1.weight": "vae_decoder.up_blocks.0.resnets.1.norm1.weight",
+        "first_stage_model.decoder.up.3.block.1.norm2.bias": "vae_decoder.up_blocks.0.resnets.1.norm2.bias",
+        "first_stage_model.decoder.up.3.block.1.norm2.weight": "vae_decoder.up_blocks.0.resnets.1.norm2.weight",
+        "first_stage_model.decoder.up.3.block.2.conv1.bias": "vae_decoder.up_blocks.0.resnets.2.conv1.bias",
+        "first_stage_model.decoder.up.3.block.2.conv1.weight": "vae_decoder.up_blocks.0.resnets.2.conv1.weight",
+        "first_stage_model.decoder.up.3.block.2.conv2.bias": "vae_decoder.up_blocks.0.resnets.2.conv2.bias",
+        "first_stage_model.decoder.up.3.block.2.conv2.weight": "vae_decoder.up_blocks.0.resnets.2.conv2.weight",
+        "first_stage_model.decoder.up.3.block.2.norm1.bias": "vae_decoder.up_blocks.0.resnets.2.norm1.bias",
+        "first_stage_model.decoder.up.3.block.2.norm1.weight": "vae_decoder.up_blocks.0.resnets.2.norm1.weight",
+        "first_stage_model.decoder.up.3.block.2.norm2.bias": "vae_decoder.up_blocks.0.resnets.2.norm2.bias",
+        "first_stage_model.decoder.up.3.block.2.norm2.weight": "vae_decoder.up_blocks.0.resnets.2.norm2.weight",
+        "first_stage_model.decoder.up.3.upsample.conv.bias": "vae_decoder.up_blocks.0.upsamplers.0.conv.bias",
+        "first_stage_model.decoder.up.3.upsample.conv.weight": "vae_decoder.up_blocks.0.upsamplers.0.conv.weight",
+        "first_stage_model.encoder.conv_in.bias": "vae_encoder.conv_in.bias",
+        "first_stage_model.encoder.conv_in.weight": "vae_encoder.conv_in.weight",
+        "first_stage_model.encoder.conv_out.bias": "vae_encoder.conv_out.bias",
+        "first_stage_model.encoder.conv_out.weight": "vae_encoder.conv_out.weight",
+        "first_stage_model.encoder.down.0.block.0.conv1.bias": "vae_encoder.down_blocks.0.resnets.0.conv1.bias",
+        "first_stage_model.encoder.down.0.block.0.conv1.weight": "vae_encoder.down_blocks.0.resnets.0.conv1.weight",
+        "first_stage_model.encoder.down.0.block.0.conv2.bias": "vae_encoder.down_blocks.0.resnets.0.conv2.bias",
+        "first_stage_model.encoder.down.0.block.0.conv2.weight": "vae_encoder.down_blocks.0.resnets.0.conv2.weight",
+        "first_stage_model.encoder.down.0.block.0.norm1.bias": "vae_encoder.down_blocks.0.resnets.0.norm1.bias",
+        "first_stage_model.encoder.down.0.block.0.norm1.weight": "vae_encoder.down_blocks.0.resnets.0.norm1.weight",
+        "first_stage_model.encoder.down.0.block.0.norm2.bias": "vae_encoder.down_blocks.0.resnets.0.norm2.bias",
+        "first_stage_model.encoder.down.0.block.0.norm2.weight": "vae_encoder.down_blocks.0.resnets.0.norm2.weight",
+        "first_stage_model.encoder.down.0.block.1.conv1.bias": "vae_encoder.down_blocks.0.resnets.1.conv1.bias",
+        "first_stage_model.encoder.down.0.block.1.conv1.weight": "vae_encoder.down_blocks.0.resnets.1.conv1.weight",
+        "first_stage_model.encoder.down.0.block.1.conv2.bias": "vae_encoder.down_blocks.0.resnets.1.conv2.bias",
+        "first_stage_model.encoder.down.0.block.1.conv2.weight": "vae_encoder.down_blocks.0.resnets.1.conv2.weight",
+        "first_stage_model.encoder.down.0.block.1.norm1.bias": "vae_encoder.down_blocks.0.resnets.1.norm1.bias",
+        "first_stage_model.encoder.down.0.block.1.norm1.weight": "vae_encoder.down_blocks.0.resnets.1.norm1.weight",
+        "first_stage_model.encoder.down.0.block.1.norm2.bias": "vae_encoder.down_blocks.0.resnets.1.norm2.bias",
+        "first_stage_model.encoder.down.0.block.1.norm2.weight": "vae_encoder.down_blocks.0.resnets.1.norm2.weight",
+        "first_stage_model.encoder.down.0.downsample.conv.bias": "vae_encoder.down_blocks.0.downsamplers.0.conv.bias",
+        "first_stage_model.encoder.down.0.downsample.conv.weight": "vae_encoder.down_blocks.0.downsamplers.0.conv.weight",
+        "first_stage_model.encoder.down.1.block.0.conv1.bias": "vae_encoder.down_blocks.1.resnets.0.conv1.bias",
+        "first_stage_model.encoder.down.1.block.0.conv1.weight": "vae_encoder.down_blocks.1.resnets.0.conv1.weight",
+        "first_stage_model.encoder.down.1.block.0.conv2.bias": "vae_encoder.down_blocks.1.resnets.0.conv2.bias",
+        "first_stage_model.encoder.down.1.block.0.conv2.weight": "vae_encoder.down_blocks.1.resnets.0.conv2.weight",
+        "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.bias",
+        "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.weight",
+        "first_stage_model.encoder.down.1.block.0.norm1.bias": "vae_encoder.down_blocks.1.resnets.0.norm1.bias",
+        "first_stage_model.encoder.down.1.block.0.norm1.weight": "vae_encoder.down_blocks.1.resnets.0.norm1.weight",
+        "first_stage_model.encoder.down.1.block.0.norm2.bias": "vae_encoder.down_blocks.1.resnets.0.norm2.bias",
+        "first_stage_model.encoder.down.1.block.0.norm2.weight": "vae_encoder.down_blocks.1.resnets.0.norm2.weight",
+        "first_stage_model.encoder.down.1.block.1.conv1.bias": "vae_encoder.down_blocks.1.resnets.1.conv1.bias",
+        "first_stage_model.encoder.down.1.block.1.conv1.weight": "vae_encoder.down_blocks.1.resnets.1.conv1.weight",
+        "first_stage_model.encoder.down.1.block.1.conv2.bias": "vae_encoder.down_blocks.1.resnets.1.conv2.bias",
+        "first_stage_model.encoder.down.1.block.1.conv2.weight": "vae_encoder.down_blocks.1.resnets.1.conv2.weight",
+        "first_stage_model.encoder.down.1.block.1.norm1.bias": "vae_encoder.down_blocks.1.resnets.1.norm1.bias",
+        "first_stage_model.encoder.down.1.block.1.norm1.weight": "vae_encoder.down_blocks.1.resnets.1.norm1.weight",
+        "first_stage_model.encoder.down.1.block.1.norm2.bias": "vae_encoder.down_blocks.1.resnets.1.norm2.bias",
+        "first_stage_model.encoder.down.1.block.1.norm2.weight": "vae_encoder.down_blocks.1.resnets.1.norm2.weight",
+        "first_stage_model.encoder.down.1.downsample.conv.bias": "vae_encoder.down_blocks.1.downsamplers.0.conv.bias",
+        "first_stage_model.encoder.down.1.downsample.conv.weight": "vae_encoder.down_blocks.1.downsamplers.0.conv.weight",
+        "first_stage_model.encoder.down.2.block.0.conv1.bias": "vae_encoder.down_blocks.2.resnets.0.conv1.bias",
+        "first_stage_model.encoder.down.2.block.0.conv1.weight": "vae_encoder.down_blocks.2.resnets.0.conv1.weight",
+        "first_stage_model.encoder.down.2.block.0.conv2.bias": "vae_encoder.down_blocks.2.resnets.0.conv2.bias",
+        "first_stage_model.encoder.down.2.block.0.conv2.weight": "vae_encoder.down_blocks.2.resnets.0.conv2.weight",
+        "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.bias",
+        "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.weight",
+        "first_stage_model.encoder.down.2.block.0.norm1.bias": "vae_encoder.down_blocks.2.resnets.0.norm1.bias",
+        "first_stage_model.encoder.down.2.block.0.norm1.weight": "vae_encoder.down_blocks.2.resnets.0.norm1.weight",
+        "first_stage_model.encoder.down.2.block.0.norm2.bias": "vae_encoder.down_blocks.2.resnets.0.norm2.bias",
+        "first_stage_model.encoder.down.2.block.0.norm2.weight": "vae_encoder.down_blocks.2.resnets.0.norm2.weight",
+        "first_stage_model.encoder.down.2.block.1.conv1.bias": "vae_encoder.down_blocks.2.resnets.1.conv1.bias",
+        "first_stage_model.encoder.down.2.block.1.conv1.weight": "vae_encoder.down_blocks.2.resnets.1.conv1.weight",
+        "first_stage_model.encoder.down.2.block.1.conv2.bias": "vae_encoder.down_blocks.2.resnets.1.conv2.bias",
+        "first_stage_model.encoder.down.2.block.1.conv2.weight": "vae_encoder.down_blocks.2.resnets.1.conv2.weight",
+        "first_stage_model.encoder.down.2.block.1.norm1.bias": "vae_encoder.down_blocks.2.resnets.1.norm1.bias",
+        "first_stage_model.encoder.down.2.block.1.norm1.weight": "vae_encoder.down_blocks.2.resnets.1.norm1.weight",
+        "first_stage_model.encoder.down.2.block.1.norm2.bias": "vae_encoder.down_blocks.2.resnets.1.norm2.bias",
+        "first_stage_model.encoder.down.2.block.1.norm2.weight": "vae_encoder.down_blocks.2.resnets.1.norm2.weight",
+        "first_stage_model.encoder.down.2.downsample.conv.bias": "vae_encoder.down_blocks.2.downsamplers.0.conv.bias",
+        "first_stage_model.encoder.down.2.downsample.conv.weight": "vae_encoder.down_blocks.2.downsamplers.0.conv.weight",
+        "first_stage_model.encoder.down.3.block.0.conv1.bias": "vae_encoder.down_blocks.3.resnets.0.conv1.bias",
+        "first_stage_model.encoder.down.3.block.0.conv1.weight": "vae_encoder.down_blocks.3.resnets.0.conv1.weight",
+        "first_stage_model.encoder.down.3.block.0.conv2.bias": "vae_encoder.down_blocks.3.resnets.0.conv2.bias",
+        "first_stage_model.encoder.down.3.block.0.conv2.weight": "vae_encoder.down_blocks.3.resnets.0.conv2.weight",
+        "first_stage_model.encoder.down.3.block.0.norm1.bias": "vae_encoder.down_blocks.3.resnets.0.norm1.bias",
+        "first_stage_model.encoder.down.3.block.0.norm1.weight": "vae_encoder.down_blocks.3.resnets.0.norm1.weight",
+        "first_stage_model.encoder.down.3.block.0.norm2.bias": "vae_encoder.down_blocks.3.resnets.0.norm2.bias",
+        "first_stage_model.encoder.down.3.block.0.norm2.weight": "vae_encoder.down_blocks.3.resnets.0.norm2.weight",
+        "first_stage_model.encoder.down.3.block.1.conv1.bias": "vae_encoder.down_blocks.3.resnets.1.conv1.bias",
+        "first_stage_model.encoder.down.3.block.1.conv1.weight": "vae_encoder.down_blocks.3.resnets.1.conv1.weight",
+        "first_stage_model.encoder.down.3.block.1.conv2.bias": "vae_encoder.down_blocks.3.resnets.1.conv2.bias",
+        "first_stage_model.encoder.down.3.block.1.conv2.weight": "vae_encoder.down_blocks.3.resnets.1.conv2.weight",
+        "first_stage_model.encoder.down.3.block.1.norm1.bias": "vae_encoder.down_blocks.3.resnets.1.norm1.bias",
+        "first_stage_model.encoder.down.3.block.1.norm1.weight": "vae_encoder.down_blocks.3.resnets.1.norm1.weight",
+        "first_stage_model.encoder.down.3.block.1.norm2.bias": "vae_encoder.down_blocks.3.resnets.1.norm2.bias",
+        "first_stage_model.encoder.down.3.block.1.norm2.weight": "vae_encoder.down_blocks.3.resnets.1.norm2.weight",
+        "first_stage_model.encoder.mid.attn_1.k.bias": "vae_encoder.mid_block.attentions.0.to_k.bias",
+        "first_stage_model.encoder.mid.attn_1.k.weight": "vae_encoder.mid_block.attentions.0.to_k.weight",
+        "first_stage_model.encoder.mid.attn_1.norm.bias": "vae_encoder.mid_block.attentions.0.group_norm.bias",
+        "first_stage_model.encoder.mid.attn_1.norm.weight": "vae_encoder.mid_block.attentions.0.group_norm.weight",
+        "first_stage_model.encoder.mid.attn_1.proj_out.bias": "vae_encoder.mid_block.attentions.0.to_out.0.bias",
+        "first_stage_model.encoder.mid.attn_1.proj_out.weight": "vae_encoder.mid_block.attentions.0.to_out.0.weight",
+        "first_stage_model.encoder.mid.attn_1.q.bias": "vae_encoder.mid_block.attentions.0.to_q.bias",
+        "first_stage_model.encoder.mid.attn_1.q.weight": "vae_encoder.mid_block.attentions.0.to_q.weight",
+        "first_stage_model.encoder.mid.attn_1.v.bias": "vae_encoder.mid_block.attentions.0.to_v.bias",
+        "first_stage_model.encoder.mid.attn_1.v.weight": "vae_encoder.mid_block.attentions.0.to_v.weight",
+        "first_stage_model.encoder.mid.block_1.conv1.bias": "vae_encoder.mid_block.resnets.0.conv1.bias",
+        "first_stage_model.encoder.mid.block_1.conv1.weight": "vae_encoder.mid_block.resnets.0.conv1.weight",
+        "first_stage_model.encoder.mid.block_1.conv2.bias": "vae_encoder.mid_block.resnets.0.conv2.bias",
+        "first_stage_model.encoder.mid.block_1.conv2.weight": "vae_encoder.mid_block.resnets.0.conv2.weight",
+        "first_stage_model.encoder.mid.block_1.norm1.bias": "vae_encoder.mid_block.resnets.0.norm1.bias",
+        "first_stage_model.encoder.mid.block_1.norm1.weight": "vae_encoder.mid_block.resnets.0.norm1.weight",
+        "first_stage_model.encoder.mid.block_1.norm2.bias": "vae_encoder.mid_block.resnets.0.norm2.bias",
+        "first_stage_model.encoder.mid.block_1.norm2.weight": "vae_encoder.mid_block.resnets.0.norm2.weight",
+        "first_stage_model.encoder.mid.block_2.conv1.bias": "vae_encoder.mid_block.resnets.1.conv1.bias",
+        "first_stage_model.encoder.mid.block_2.conv1.weight": "vae_encoder.mid_block.resnets.1.conv1.weight",
+        "first_stage_model.encoder.mid.block_2.conv2.bias": "vae_encoder.mid_block.resnets.1.conv2.bias",
+        "first_stage_model.encoder.mid.block_2.conv2.weight": "vae_encoder.mid_block.resnets.1.conv2.weight",
+        "first_stage_model.encoder.mid.block_2.norm1.bias": "vae_encoder.mid_block.resnets.1.norm1.bias",
+        "first_stage_model.encoder.mid.block_2.norm1.weight": "vae_encoder.mid_block.resnets.1.norm1.weight",
+        "first_stage_model.encoder.mid.block_2.norm2.bias": "vae_encoder.mid_block.resnets.1.norm2.bias",
+        "first_stage_model.encoder.mid.block_2.norm2.weight": "vae_encoder.mid_block.resnets.1.norm2.weight",
+        "first_stage_model.encoder.norm_out.bias": "vae_encoder.conv_norm_out.bias",
+        "first_stage_model.encoder.norm_out.weight": "vae_encoder.conv_norm_out.weight",
+        "first_stage_model.post_quant_conv.bias": "vae_post_quant_conv.bias",
+        "first_stage_model.post_quant_conv.weight": "vae_post_quant_conv.weight",
+        "first_stage_model.quant_conv.bias": "vae_quant_conv.bias",
+        "first_stage_model.quant_conv.weight": "vae_quant_conv.weight",
+        "model.diffusion_model.input_blocks.0.0.bias": "unet_conv_in.bias",
+        "model.diffusion_model.input_blocks.0.0.weight": "unet_conv_in.weight",
+        "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": "unet_down_blocks.0.resnets.0.norm1.bias",
+        "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": "unet_down_blocks.0.resnets.0.norm1.weight",
+        "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "unet_down_blocks.0.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": "unet_down_blocks.0.resnets.0.conv1.weight",
+        "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": "unet_down_blocks.0.resnets.0.norm2.bias",
+        "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": "unet_down_blocks.0.resnets.0.norm2.weight",
+        "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": "unet_down_blocks.0.resnets.0.conv2.bias",
+        "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": "unet_down_blocks.0.resnets.0.conv2.weight",
+        "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": "unet_down_blocks.0.resnets.1.norm1.bias",
+        "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": "unet_down_blocks.0.resnets.1.norm1.weight",
+        "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "unet_down_blocks.0.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": "unet_down_blocks.0.resnets.1.conv1.weight",
+        "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": "unet_down_blocks.0.resnets.1.norm2.bias",
+        "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": "unet_down_blocks.0.resnets.1.norm2.weight",
+        "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": "unet_down_blocks.0.resnets.1.conv2.bias",
+        "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": "unet_down_blocks.0.resnets.1.conv2.weight",
+        "model.diffusion_model.input_blocks.3.0.op.bias": "unet_down_blocks.0.downsamplers.0.conv.bias",
+        "model.diffusion_model.input_blocks.3.0.op.weight": "unet_down_blocks.0.downsamplers.0.conv.weight",
+        "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": "unet_down_blocks.1.resnets.0.norm1.bias",
+        "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": "unet_down_blocks.1.resnets.0.norm1.weight",
+        "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "unet_down_blocks.1.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": "unet_down_blocks.1.resnets.0.conv1.weight",
+        "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": "unet_down_blocks.1.resnets.0.norm2.bias",
+        "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": "unet_down_blocks.1.resnets.0.norm2.weight",
+        "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": "unet_down_blocks.1.resnets.0.conv2.bias",
+        "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": "unet_down_blocks.1.resnets.0.conv2.weight",
+        "model.diffusion_model.input_blocks.4.0.skip_connection.bias": "unet_down_blocks.1.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.input_blocks.4.0.skip_connection.weight": "unet_down_blocks.1.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.input_blocks.4.1.norm.bias": "unet_down_blocks.1.attentions.0.norm.bias",
+        "model.diffusion_model.input_blocks.4.1.norm.weight": "unet_down_blocks.1.attentions.0.norm.weight",
+        "model.diffusion_model.input_blocks.4.1.proj_in.bias": "unet_down_blocks.1.attentions.0.proj_in.bias",
+        "model.diffusion_model.input_blocks.4.1.proj_in.weight": "unet_down_blocks.1.attentions.0.proj_in.weight",
+        "model.diffusion_model.input_blocks.4.1.proj_out.bias": "unet_down_blocks.1.attentions.0.proj_out.bias",
+        "model.diffusion_model.input_blocks.4.1.proj_out.weight": "unet_down_blocks.1.attentions.0.proj_out.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": "unet_down_blocks.1.resnets.1.norm1.bias",
+        "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": "unet_down_blocks.1.resnets.1.norm1.weight",
+        "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "unet_down_blocks.1.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": "unet_down_blocks.1.resnets.1.conv1.weight",
+        "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": "unet_down_blocks.1.resnets.1.norm2.bias",
+        "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": "unet_down_blocks.1.resnets.1.norm2.weight",
+        "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": "unet_down_blocks.1.resnets.1.conv2.bias",
+        "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": "unet_down_blocks.1.resnets.1.conv2.weight",
+        "model.diffusion_model.input_blocks.5.1.norm.bias": "unet_down_blocks.1.attentions.1.norm.bias",
+        "model.diffusion_model.input_blocks.5.1.norm.weight": "unet_down_blocks.1.attentions.1.norm.weight",
+        "model.diffusion_model.input_blocks.5.1.proj_in.bias": "unet_down_blocks.1.attentions.1.proj_in.bias",
+        "model.diffusion_model.input_blocks.5.1.proj_in.weight": "unet_down_blocks.1.attentions.1.proj_in.weight",
+        "model.diffusion_model.input_blocks.5.1.proj_out.bias": "unet_down_blocks.1.attentions.1.proj_out.bias",
+        "model.diffusion_model.input_blocks.5.1.proj_out.weight": "unet_down_blocks.1.attentions.1.proj_out.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.input_blocks.6.0.op.bias": "unet_down_blocks.1.downsamplers.0.conv.bias",
+        "model.diffusion_model.input_blocks.6.0.op.weight": "unet_down_blocks.1.downsamplers.0.conv.weight",
+        "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": "unet_down_blocks.2.resnets.0.norm1.bias",
+        "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": "unet_down_blocks.2.resnets.0.norm1.weight",
+        "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "unet_down_blocks.2.resnets.0.conv1.bias",
+        "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": "unet_down_blocks.2.resnets.0.conv1.weight",
+        "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": "unet_down_blocks.2.resnets.0.norm2.bias",
+        "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": "unet_down_blocks.2.resnets.0.norm2.weight",
+        "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": "unet_down_blocks.2.resnets.0.conv2.bias",
+        "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": "unet_down_blocks.2.resnets.0.conv2.weight",
+        "model.diffusion_model.input_blocks.7.0.skip_connection.bias": "unet_down_blocks.2.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.input_blocks.7.0.skip_connection.weight": "unet_down_blocks.2.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.input_blocks.7.1.norm.bias": "unet_down_blocks.2.attentions.0.norm.bias",
+        "model.diffusion_model.input_blocks.7.1.norm.weight": "unet_down_blocks.2.attentions.0.norm.weight",
+        "model.diffusion_model.input_blocks.7.1.proj_in.bias": "unet_down_blocks.2.attentions.0.proj_in.bias",
+        "model.diffusion_model.input_blocks.7.1.proj_in.weight": "unet_down_blocks.2.attentions.0.proj_in.weight",
+        "model.diffusion_model.input_blocks.7.1.proj_out.bias": "unet_down_blocks.2.attentions.0.proj_out.bias",
+        "model.diffusion_model.input_blocks.7.1.proj_out.weight": "unet_down_blocks.2.attentions.0.proj_out.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": "unet_down_blocks.2.resnets.1.norm1.bias",
+        "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": "unet_down_blocks.2.resnets.1.norm1.weight",
+        "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "unet_down_blocks.2.resnets.1.conv1.bias",
+        "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": "unet_down_blocks.2.resnets.1.conv1.weight",
+        "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": "unet_down_blocks.2.resnets.1.norm2.bias",
+        "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": "unet_down_blocks.2.resnets.1.norm2.weight",
+        "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": "unet_down_blocks.2.resnets.1.conv2.bias",
+        "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": "unet_down_blocks.2.resnets.1.conv2.weight",
+        "model.diffusion_model.input_blocks.8.1.norm.bias": "unet_down_blocks.2.attentions.1.norm.bias",
+        "model.diffusion_model.input_blocks.8.1.norm.weight": "unet_down_blocks.2.attentions.1.norm.weight",
+        "model.diffusion_model.input_blocks.8.1.proj_in.bias": "unet_down_blocks.2.attentions.1.proj_in.bias",
+        "model.diffusion_model.input_blocks.8.1.proj_in.weight": "unet_down_blocks.2.attentions.1.proj_in.weight",
+        "model.diffusion_model.input_blocks.8.1.proj_out.bias": "unet_down_blocks.2.attentions.1.proj_out.bias",
+        "model.diffusion_model.input_blocks.8.1.proj_out.weight": "unet_down_blocks.2.attentions.1.proj_out.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.label_emb.0.0.bias": "unet_add_embedding.linear_1.bias",
+        "model.diffusion_model.label_emb.0.0.weight": "unet_add_embedding.linear_1.weight",
+        "model.diffusion_model.label_emb.0.2.bias": "unet_add_embedding.linear_2.bias",
+        "model.diffusion_model.label_emb.0.2.weight": "unet_add_embedding.linear_2.weight",
+        "model.diffusion_model.middle_block.0.emb_layers.1.bias": "unet_mid_block.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.middle_block.0.emb_layers.1.weight": "unet_mid_block.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.middle_block.0.in_layers.0.bias": "unet_mid_block.resnets.0.norm1.bias",
+        "model.diffusion_model.middle_block.0.in_layers.0.weight": "unet_mid_block.resnets.0.norm1.weight",
+        "model.diffusion_model.middle_block.0.in_layers.2.bias": "unet_mid_block.resnets.0.conv1.bias",
+        "model.diffusion_model.middle_block.0.in_layers.2.weight": "unet_mid_block.resnets.0.conv1.weight",
+        "model.diffusion_model.middle_block.0.out_layers.0.bias": "unet_mid_block.resnets.0.norm2.bias",
+        "model.diffusion_model.middle_block.0.out_layers.0.weight": "unet_mid_block.resnets.0.norm2.weight",
+        "model.diffusion_model.middle_block.0.out_layers.3.bias": "unet_mid_block.resnets.0.conv2.bias",
+        "model.diffusion_model.middle_block.0.out_layers.3.weight": "unet_mid_block.resnets.0.conv2.weight",
+        "model.diffusion_model.out.0.bias": "unet_conv_norm_out.bias",
+        "model.diffusion_model.out.0.weight": "unet_conv_norm_out.weight",
+        "model.diffusion_model.out.2.bias": "unet_conv_out.bias",
+        "model.diffusion_model.out.2.weight": "unet_conv_out.weight",
+        "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": "unet_up_blocks.0.resnets.0.norm1.bias",
+        "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": "unet_up_blocks.0.resnets.0.norm1.weight",
+        "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "unet_up_blocks.0.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": "unet_up_blocks.0.resnets.0.conv1.weight",
+        "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": "unet_up_blocks.0.resnets.0.norm2.bias",
+        "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": "unet_up_blocks.0.resnets.0.norm2.weight",
+        "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": "unet_up_blocks.0.resnets.0.conv2.bias",
+        "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": "unet_up_blocks.0.resnets.0.conv2.weight",
+        "model.diffusion_model.output_blocks.0.0.skip_connection.bias": "unet_up_blocks.0.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.0.0.skip_connection.weight": "unet_up_blocks.0.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.0.1.norm.bias": "unet_up_blocks.0.attentions.0.norm.bias",
+        "model.diffusion_model.output_blocks.0.1.norm.weight": "unet_up_blocks.0.attentions.0.norm.weight",
+        "model.diffusion_model.output_blocks.0.1.proj_in.bias": "unet_up_blocks.0.attentions.0.proj_in.bias",
+        "model.diffusion_model.output_blocks.0.1.proj_in.weight": "unet_up_blocks.0.attentions.0.proj_in.weight",
+        "model.diffusion_model.output_blocks.0.1.proj_out.bias": "unet_up_blocks.0.attentions.0.proj_out.bias",
+        "model.diffusion_model.output_blocks.0.1.proj_out.weight": "unet_up_blocks.0.attentions.0.proj_out.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.ff.net.2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.ff.net.2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm1.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm1.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm3.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm3.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.ff.net.2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.ff.net.2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm1.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm1.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm3.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm3.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": "unet_up_blocks.0.resnets.1.norm1.bias",
+        "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": "unet_up_blocks.0.resnets.1.norm1.weight",
+        "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "unet_up_blocks.0.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": "unet_up_blocks.0.resnets.1.conv1.weight",
+        "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": "unet_up_blocks.0.resnets.1.norm2.bias",
+        "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": "unet_up_blocks.0.resnets.1.norm2.weight",
+        "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": "unet_up_blocks.0.resnets.1.conv2.bias",
+        "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": "unet_up_blocks.0.resnets.1.conv2.weight",
+        "model.diffusion_model.output_blocks.1.0.skip_connection.bias": "unet_up_blocks.0.resnets.1.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.1.0.skip_connection.weight": "unet_up_blocks.0.resnets.1.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.1.1.norm.bias": "unet_up_blocks.0.attentions.1.norm.bias",
+        "model.diffusion_model.output_blocks.1.1.norm.weight": "unet_up_blocks.0.attentions.1.norm.weight",
+        "model.diffusion_model.output_blocks.1.1.proj_in.bias": "unet_up_blocks.0.attentions.1.proj_in.bias",
+        "model.diffusion_model.output_blocks.1.1.proj_in.weight": "unet_up_blocks.0.attentions.1.proj_in.weight",
+        "model.diffusion_model.output_blocks.1.1.proj_out.bias": "unet_up_blocks.0.attentions.1.proj_out.bias",
+        "model.diffusion_model.output_blocks.1.1.proj_out.weight": "unet_up_blocks.0.attentions.1.proj_out.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.ff.net.2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.ff.net.2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm1.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm1.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm3.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm3.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.ff.net.2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.ff.net.2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm1.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm1.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm3.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm3.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": "unet_up_blocks.0.resnets.2.norm1.bias",
+        "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": "unet_up_blocks.0.resnets.2.norm1.weight",
+        "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "unet_up_blocks.0.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": "unet_up_blocks.0.resnets.2.conv1.weight",
+        "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": "unet_up_blocks.0.resnets.2.norm2.bias",
+        "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": "unet_up_blocks.0.resnets.2.norm2.weight",
+        "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": "unet_up_blocks.0.resnets.2.conv2.bias",
+        "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": "unet_up_blocks.0.resnets.2.conv2.weight",
+        "model.diffusion_model.output_blocks.2.0.skip_connection.bias": "unet_up_blocks.0.resnets.2.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.2.0.skip_connection.weight": "unet_up_blocks.0.resnets.2.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.2.1.norm.bias": "unet_up_blocks.0.attentions.2.norm.bias",
+        "model.diffusion_model.output_blocks.2.1.norm.weight": "unet_up_blocks.0.attentions.2.norm.weight",
+        "model.diffusion_model.output_blocks.2.1.proj_in.bias": "unet_up_blocks.0.attentions.2.proj_in.bias",
+        "model.diffusion_model.output_blocks.2.1.proj_in.weight": "unet_up_blocks.0.attentions.2.proj_in.weight",
+        "model.diffusion_model.output_blocks.2.1.proj_out.bias": "unet_up_blocks.0.attentions.2.proj_out.bias",
+        "model.diffusion_model.output_blocks.2.1.proj_out.weight": "unet_up_blocks.0.attentions.2.proj_out.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.2.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.2.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.2.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.2.norm3.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.3.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.3.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.3.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.3.norm3.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.4.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.4.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.4.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.4.norm3.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.5.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.5.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.5.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.5.norm3.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.6.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.6.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.6.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.6.norm3.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.7.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.7.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.7.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.7.norm3.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.8.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.8.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.8.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.8.norm3.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.9.norm1.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.norm1.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.9.norm2.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.norm2.weight",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.9.norm3.bias",
+        "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.9.norm3.weight",
+        "model.diffusion_model.output_blocks.2.2.conv.bias": "unet_up_blocks.0.upsamplers.0.conv.bias",
+        "model.diffusion_model.output_blocks.2.2.conv.weight": "unet_up_blocks.0.upsamplers.0.conv.weight",
+        "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": "unet_up_blocks.1.resnets.0.norm1.bias",
+        "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": "unet_up_blocks.1.resnets.0.norm1.weight",
+        "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "unet_up_blocks.1.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": "unet_up_blocks.1.resnets.0.conv1.weight",
+        "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": "unet_up_blocks.1.resnets.0.norm2.bias",
+        "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": "unet_up_blocks.1.resnets.0.norm2.weight",
+        "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": "unet_up_blocks.1.resnets.0.conv2.bias",
+        "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": "unet_up_blocks.1.resnets.0.conv2.weight",
+        "model.diffusion_model.output_blocks.3.0.skip_connection.bias": "unet_up_blocks.1.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.3.0.skip_connection.weight": "unet_up_blocks.1.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.3.1.norm.bias": "unet_up_blocks.1.attentions.0.norm.bias",
+        "model.diffusion_model.output_blocks.3.1.norm.weight": "unet_up_blocks.1.attentions.0.norm.weight",
+        "model.diffusion_model.output_blocks.3.1.proj_in.bias": "unet_up_blocks.1.attentions.0.proj_in.bias",
+        "model.diffusion_model.output_blocks.3.1.proj_in.weight": "unet_up_blocks.1.attentions.0.proj_in.weight",
+        "model.diffusion_model.output_blocks.3.1.proj_out.bias": "unet_up_blocks.1.attentions.0.proj_out.bias",
+        "model.diffusion_model.output_blocks.3.1.proj_out.weight": "unet_up_blocks.1.attentions.0.proj_out.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm1.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm1.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm2.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm2.weight",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm3.bias",
+        "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.1.norm3.weight",
+        "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": "unet_up_blocks.1.resnets.1.norm1.bias",
+        "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": "unet_up_blocks.1.resnets.1.norm1.weight",
+        "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "unet_up_blocks.1.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": "unet_up_blocks.1.resnets.1.conv1.weight",
+        "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": "unet_up_blocks.1.resnets.1.norm2.bias",
+        "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": "unet_up_blocks.1.resnets.1.norm2.weight",
+        "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": "unet_up_blocks.1.resnets.1.conv2.bias",
+        "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": "unet_up_blocks.1.resnets.1.conv2.weight",
+        "model.diffusion_model.output_blocks.4.0.skip_connection.bias": "unet_up_blocks.1.resnets.1.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.4.0.skip_connection.weight": "unet_up_blocks.1.resnets.1.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.4.1.norm.bias": "unet_up_blocks.1.attentions.1.norm.bias",
+        "model.diffusion_model.output_blocks.4.1.norm.weight": "unet_up_blocks.1.attentions.1.norm.weight",
+        "model.diffusion_model.output_blocks.4.1.proj_in.bias": "unet_up_blocks.1.attentions.1.proj_in.bias",
+        "model.diffusion_model.output_blocks.4.1.proj_in.weight": "unet_up_blocks.1.attentions.1.proj_in.weight",
+        "model.diffusion_model.output_blocks.4.1.proj_out.bias": "unet_up_blocks.1.attentions.1.proj_out.bias",
+        "model.diffusion_model.output_blocks.4.1.proj_out.weight": "unet_up_blocks.1.attentions.1.proj_out.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": "unet_up_blocks.1.resnets.2.norm1.bias",
+        "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": "unet_up_blocks.1.resnets.2.norm1.weight",
+        "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "unet_up_blocks.1.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": "unet_up_blocks.1.resnets.2.conv1.weight",
+        "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": "unet_up_blocks.1.resnets.2.norm2.bias",
+        "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": "unet_up_blocks.1.resnets.2.norm2.weight",
+        "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": "unet_up_blocks.1.resnets.2.conv2.bias",
+        "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": "unet_up_blocks.1.resnets.2.conv2.weight",
+        "model.diffusion_model.output_blocks.5.0.skip_connection.bias": "unet_up_blocks.1.resnets.2.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.5.0.skip_connection.weight": "unet_up_blocks.1.resnets.2.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.5.1.norm.bias": "unet_up_blocks.1.attentions.2.norm.bias",
+        "model.diffusion_model.output_blocks.5.1.norm.weight": "unet_up_blocks.1.attentions.2.norm.weight",
+        "model.diffusion_model.output_blocks.5.1.proj_in.bias": "unet_up_blocks.1.attentions.2.proj_in.bias",
+        "model.diffusion_model.output_blocks.5.1.proj_in.weight": "unet_up_blocks.1.attentions.2.proj_in.weight",
+        "model.diffusion_model.output_blocks.5.1.proj_out.bias": "unet_up_blocks.1.attentions.2.proj_out.bias",
+        "model.diffusion_model.output_blocks.5.1.proj_out.weight": "unet_up_blocks.1.attentions.2.proj_out.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.weight",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.bias",
+        "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight",
+        "model.diffusion_model.output_blocks.5.2.conv.bias": "unet_up_blocks.1.upsamplers.0.conv.bias",
+        "model.diffusion_model.output_blocks.5.2.conv.weight": "unet_up_blocks.1.upsamplers.0.conv.weight",
+        "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.0.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.0.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": "unet_up_blocks.2.resnets.0.norm1.bias",
+        "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": "unet_up_blocks.2.resnets.0.norm1.weight",
+        "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "unet_up_blocks.2.resnets.0.conv1.bias",
+        "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": "unet_up_blocks.2.resnets.0.conv1.weight",
+        "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": "unet_up_blocks.2.resnets.0.norm2.bias",
+        "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": "unet_up_blocks.2.resnets.0.norm2.weight",
+        "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": "unet_up_blocks.2.resnets.0.conv2.bias",
+        "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": "unet_up_blocks.2.resnets.0.conv2.weight",
+        "model.diffusion_model.output_blocks.6.0.skip_connection.bias": "unet_up_blocks.2.resnets.0.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.6.0.skip_connection.weight": "unet_up_blocks.2.resnets.0.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.1.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.1.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": "unet_up_blocks.2.resnets.1.norm1.bias",
+        "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": "unet_up_blocks.2.resnets.1.norm1.weight",
+        "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "unet_up_blocks.2.resnets.1.conv1.bias",
+        "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": "unet_up_blocks.2.resnets.1.conv1.weight",
+        "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": "unet_up_blocks.2.resnets.1.norm2.bias",
+        "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": "unet_up_blocks.2.resnets.1.norm2.weight",
+        "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": "unet_up_blocks.2.resnets.1.conv2.bias",
+        "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": "unet_up_blocks.2.resnets.1.conv2.weight",
+        "model.diffusion_model.output_blocks.7.0.skip_connection.bias": "unet_up_blocks.2.resnets.1.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.7.0.skip_connection.weight": "unet_up_blocks.2.resnets.1.conv_shortcut.weight",
+        "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.2.time_emb_proj.bias",
+        "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.2.time_emb_proj.weight",
+        "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": "unet_up_blocks.2.resnets.2.norm1.bias",
+        "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": "unet_up_blocks.2.resnets.2.norm1.weight",
+        "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "unet_up_blocks.2.resnets.2.conv1.bias",
+        "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": "unet_up_blocks.2.resnets.2.conv1.weight",
+        "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": "unet_up_blocks.2.resnets.2.norm2.bias",
+        "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": "unet_up_blocks.2.resnets.2.norm2.weight",
+        "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": "unet_up_blocks.2.resnets.2.conv2.bias",
+        "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": "unet_up_blocks.2.resnets.2.conv2.weight",
+        "model.diffusion_model.output_blocks.8.0.skip_connection.bias": "unet_up_blocks.2.resnets.2.conv_shortcut.bias",
+        "model.diffusion_model.output_blocks.8.0.skip_connection.weight": "unet_up_blocks.2.resnets.2.conv_shortcut.weight",
+        "model.diffusion_model.time_embed.0.bias": "unet_time_embedding.linear_1.bias",
+        "model.diffusion_model.time_embed.0.weight": "unet_time_embedding.linear_1.weight",
+        "model.diffusion_model.time_embed.2.bias": "unet_time_embedding.linear_2.bias",
+        "model.diffusion_model.time_embed.2.weight": "unet_time_embedding.linear_2.weight"
+    },
+    "ldm_diffusers_shape_map": {
+        "first_stage_model.decoder.mid.attn_1.k.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.decoder.mid.attn_1.proj_out.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.decoder.mid.attn_1.q.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.decoder.mid.attn_1.v.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.encoder.mid.attn_1.k.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.encoder.mid.attn_1.proj_out.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.encoder.mid.attn_1.q.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ],
+        "first_stage_model.encoder.mid.attn_1.v.weight": [
+            [
+                512,
+                512,
+                1,
+                1
+            ],
+            [
+                512,
+                512
+            ]
+        ]
+    },
+    "ldm_diffusers_operator_map": {
+        "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.0.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.0.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.0.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.0.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.0.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.0.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.1.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.1.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.1.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.1.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.1.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.1.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.10.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.10.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.10.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.10.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.10.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.10.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.11.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.11.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.11.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.11.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.11.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.11.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.12.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.12.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.12.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.12.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.12.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.12.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.13.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.13.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.13.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.13.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.13.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.13.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.14.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.14.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.14.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.14.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.14.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.14.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.15.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.15.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.15.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.15.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.15.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.15.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.16.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.16.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.16.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.16.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.16.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.16.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.17.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.17.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.17.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.17.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.17.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.17.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.18.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.18.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.18.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.18.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.18.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.18.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.19.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.19.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.19.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.19.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.19.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.19.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.2.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.2.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.2.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.2.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.2.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.2.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.20.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.20.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.20.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.20.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.20.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.20.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.21.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.21.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.21.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.21.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.21.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.21.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.22.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.22.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.22.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.22.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.22.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.22.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.23.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.23.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.23.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.23.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.23.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.23.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.24.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.24.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.24.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.24.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.24.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.24.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.25.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.25.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.25.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.25.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.25.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.25.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.26.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.26.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.26.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.26.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.26.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.26.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.27.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.27.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.27.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.27.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.27.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.27.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.28.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.28.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.28.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.28.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.28.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.28.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.29.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.29.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.29.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.29.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.29.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.29.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.3.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.3.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.3.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.3.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.3.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.3.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.30.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.30.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.30.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.30.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.30.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.30.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.31.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.31.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.31.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.31.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.31.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.31.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.4.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.4.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.4.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.4.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.4.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.4.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.5.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.5.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.5.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.5.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.5.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.5.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.6.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.6.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.6.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.6.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.6.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.6.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.7.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.7.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.7.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.7.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.7.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.7.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.8.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.8.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.8.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.8.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.8.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.8.self_attn.v_proj.weight"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias": {
+            "cat": [
+                "te1_text_model.encoder.layers.9.self_attn.q_proj.bias",
+                "te1_text_model.encoder.layers.9.self_attn.k_proj.bias",
+                "te1_text_model.encoder.layers.9.self_attn.v_proj.bias"
+            ]
+        },
+        "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight": {
+            "cat": [
+                "te1_text_model.encoder.layers.9.self_attn.q_proj.weight",
+                "te1_text_model.encoder.layers.9.self_attn.k_proj.weight",
+                "te1_text_model.encoder.layers.9.self_attn.v_proj.weight"
+            ]
+        }
+    },
+    "diffusers_ldm_operator_map": {
+        "te1_text_model.encoder.layers.0.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.0.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.1.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.10.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.11.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.12.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.13.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.14.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.15.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.16.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.17.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.18.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.19.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.2.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.20.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.21.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.22.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.23.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.24.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.25.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.26.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.27.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.28.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.29.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.3.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.30.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.31.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.4.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.5.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.6.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.7.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.8.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.q_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.k_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.v_proj.bias": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias",
+                "2048:, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.q_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight",
+                "0:1024, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.k_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight",
+                "1024:2048, :"
+            ]
+        },
+        "te1_text_model.encoder.layers.9.self_attn.v_proj.weight": {
+            "slice": [
+                "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight",
+                "2048:, :"
+            ]
+        }
+    }
+}
\ No newline at end of file
diff --git a/toolkit/keymaps/stable_diffusion_ssd_ldm_base.safetensors b/toolkit/keymaps/stable_diffusion_ssd_ldm_base.safetensors
new file mode 100644
index 00000000..36ce7400
Binary files /dev/null and b/toolkit/keymaps/stable_diffusion_ssd_ldm_base.safetensors differ
diff --git a/toolkit/keymaps/stable_diffusion_ssd_unmatched.json b/toolkit/keymaps/stable_diffusion_ssd_unmatched.json
new file mode 100644
index 00000000..6871c9eb
--- /dev/null
+++ b/toolkit/keymaps/stable_diffusion_ssd_unmatched.json
@@ -0,0 +1,21 @@
+{
+    "ldm": {
+        "conditioner.embedders.0.transformer.text_model.embeddings.position_ids": {
+            "shape": [
+                1,
+                77
+            ],
+            "min": 0.0,
+            "max": 76.0
+        },
+        "conditioner.embedders.1.model.text_model.embeddings.position_ids": {
+            "shape": [
+                1,
+                77
+            ],
+            "min": 0.0,
+            "max": 76.0
+        }
+    },
+    "diffusers": {}
+}
\ No newline at end of file
diff --git a/toolkit/llvae.py b/toolkit/llvae.py
index aedbcb61..9d559bfe 100644
--- a/toolkit/llvae.py
+++ b/toolkit/llvae.py
@@ -5,14 +5,18 @@ import itertools
 
 
 class LosslessLatentDecoder(nn.Module):
-    def __init__(self, in_channels, latent_depth, dtype=torch.float32):
+    def __init__(self, in_channels, latent_depth, dtype=torch.float32, trainable=False):
         super(LosslessLatentDecoder, self).__init__()
-        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.latent_depth = latent_depth
         self.in_channels = in_channels
         self.out_channels = int(in_channels // (latent_depth * latent_depth))
         numpy_kernel = self.build_kernel(in_channels, latent_depth)
-        self.kernel = torch.from_numpy(numpy_kernel).to(device=device, dtype=dtype)
+        numpy_kernel = torch.from_numpy(numpy_kernel).to(device=device, dtype=dtype)
+        if trainable:
+            self.kernel = nn.Parameter(numpy_kernel)
+        else:
+            self.kernel = numpy_kernel
 
     def build_kernel(self, in_channels, latent_depth):
         # my old code from tensorflow.
@@ -35,19 +39,27 @@ class LosslessLatentDecoder(nn.Module):
         return kernel
 
     def forward(self, x):
+        dtype = x.dtype
+        if self.kernel.dtype != dtype:
+            self.kernel = self.kernel.to(dtype=dtype)
+
         # Deconvolve input tensor with the kernel
         return nn.functional.conv_transpose2d(x, self.kernel, stride=self.latent_depth, padding=0, groups=1)
 
 
 class LosslessLatentEncoder(nn.Module):
-    def __init__(self, in_channels, latent_depth, dtype=torch.float32):
+    def __init__(self, in_channels, latent_depth, dtype=torch.float32, trainable=False):
         super(LosslessLatentEncoder, self).__init__()
-        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.latent_depth = latent_depth
         self.in_channels = in_channels
         self.out_channels = int(in_channels * (latent_depth * latent_depth))
         numpy_kernel = self.build_kernel(in_channels, latent_depth)
-        self.kernel = torch.from_numpy(numpy_kernel).to(device=device, dtype=dtype)
+        numpy_kernel = torch.from_numpy(numpy_kernel).to(device=device, dtype=dtype)
+        if trainable:
+            self.kernel = nn.Parameter(numpy_kernel)
+        else:
+            self.kernel = numpy_kernel
 
 
     def build_kernel(self, in_channels, latent_depth):
@@ -70,18 +82,21 @@ class LosslessLatentEncoder(nn.Module):
         return kernel
 
     def forward(self, x):
+        dtype = x.dtype
+        if self.kernel.dtype != dtype:
+            self.kernel = self.kernel.to(dtype=dtype)
         # Convolve input tensor with the kernel
         return nn.functional.conv2d(x, self.kernel, stride=self.latent_depth, padding=0, groups=1)
 
 
 class LosslessLatentVAE(nn.Module):
-    def __init__(self, in_channels, latent_depth, dtype=torch.float32):
+    def __init__(self, in_channels, latent_depth, dtype=torch.float32, trainable=False):
         super(LosslessLatentVAE, self).__init__()
         self.latent_depth = latent_depth
         self.in_channels = in_channels
-        self.encoder = LosslessLatentEncoder(in_channels, latent_depth, dtype=dtype)
+        self.encoder = LosslessLatentEncoder(in_channels, latent_depth, dtype=dtype, trainable=trainable)
         encoder_out_channels = self.encoder.out_channels
-        self.decoder = LosslessLatentDecoder(encoder_out_channels, latent_depth, dtype=dtype)
+        self.decoder = LosslessLatentDecoder(encoder_out_channels, latent_depth, dtype=dtype, trainable=trainable)
 
     def forward(self, x):
         latent = self.latent_encoder(x)
@@ -101,7 +116,7 @@ if __name__ == '__main__':
     from PIL import Image
     import torchvision.transforms as transforms
     user_path = os.path.expanduser('~')
-    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     dtype = torch.float32
 
     input_path = os.path.join(user_path, "Pictures/sample_2_512.png")
diff --git a/toolkit/lora.py b/toolkit/lora.py
deleted file mode 100644
index 0780cbc3..00000000
--- a/toolkit/lora.py
+++ /dev/null
@@ -1,243 +0,0 @@
-# ref:
-# - https://github.com/cloneofsimo/lora/blob/master/lora_diffusion/lora.py
-# - https://github.com/kohya-ss/sd-scripts/blob/main/networks/lora.py
-# - https://github.com/p1atdev/LECO/blob/main/lora.py
-
-import os
-import math
-from typing import Optional, List, Type, Set, Literal
-from collections import OrderedDict
-
-import torch
-import torch.nn as nn
-from diffusers import UNet2DConditionModel
-from safetensors.torch import save_file
-
-from toolkit.metadata import add_model_hash_to_meta
-
-UNET_TARGET_REPLACE_MODULE_TRANSFORMER = [
-    "Transformer2DModel",  # どうやらこっちの方らしい？ # attn1, 2
-]
-UNET_TARGET_REPLACE_MODULE_CONV = [
-    "ResnetBlock2D",
-    "Downsample2D",
-    "Upsample2D",
-]  # locon, 3clier
-
-LORA_PREFIX_UNET = "lora_unet"
-
-DEFAULT_TARGET_REPLACE = UNET_TARGET_REPLACE_MODULE_TRANSFORMER
-
-TRAINING_METHODS = Literal[
-    "noxattn",  # train all layers except x-attns and time_embed layers
-    "innoxattn",  # train all layers except self attention layers
-    "selfattn",  # ESD-u, train only self attention layers
-    "xattn",  # ESD-x, train only x attention layers
-    "full",  # train all layers
-    # "notime",
-    # "xlayer",
-    # "outxattn",
-    # "outsattn",
-    # "inxattn",
-    # "inmidsattn",
-    # "selflayer",
-]
-
-
-class LoRAModule(nn.Module):
-    """
-    replaces forward method of the original Linear, instead of replacing the original Linear module.
-    """
-
-    def __init__(
-            self,
-            lora_name,
-            org_module: nn.Module,
-            multiplier=1.0,
-            lora_dim=4,
-            alpha=1,
-    ):
-        """if alpha == 0 or None, alpha is rank (no scaling)."""
-        super().__init__()
-        self.lora_name = lora_name
-        self.lora_dim = lora_dim
-
-        if org_module.__class__.__name__ == "Linear":
-            in_dim = org_module.in_features
-            out_dim = org_module.out_features
-            self.lora_down = nn.Linear(in_dim, lora_dim, bias=False)
-            self.lora_up = nn.Linear(lora_dim, out_dim, bias=False)
-
-        elif org_module.__class__.__name__ == "Conv2d":  # 一応
-            in_dim = org_module.in_channels
-            out_dim = org_module.out_channels
-
-            self.lora_dim = min(self.lora_dim, in_dim, out_dim)
-            if self.lora_dim != lora_dim:
-                print(f"{lora_name} dim (rank) is changed to: {self.lora_dim}")
-
-            kernel_size = org_module.kernel_size
-            stride = org_module.stride
-            padding = org_module.padding
-            self.lora_down = nn.Conv2d(
-                in_dim, self.lora_dim, kernel_size, stride, padding, bias=False
-            )
-            self.lora_up = nn.Conv2d(self.lora_dim, out_dim, (1, 1), (1, 1), bias=False)
-
-        if type(alpha) == torch.Tensor:
-            alpha = alpha.detach().numpy()
-        alpha = lora_dim if alpha is None or alpha == 0 else alpha
-        self.scale = alpha / self.lora_dim
-        self.register_buffer("alpha", torch.tensor(alpha))  # 定数として扱える
-
-        # same as microsoft's
-        nn.init.kaiming_uniform_(self.lora_down.weight, a=math.sqrt(5))
-        nn.init.zeros_(self.lora_up.weight)
-
-        self.multiplier = multiplier
-        self.org_module = org_module  # remove in applying
-
-    def apply_to(self):
-        self.org_forward = self.org_module.forward
-        self.org_module.forward = self.forward
-        del self.org_module
-
-    def forward(self, x):
-        return (
-                self.org_forward(x)
-                + self.lora_up(self.lora_down(x)) * self.multiplier * self.scale
-        )
-
-
-class LoRANetwork(nn.Module):
-    def __init__(
-            self,
-            unet: UNet2DConditionModel,
-            rank: int = 4,
-            multiplier: float = 1.0,
-            alpha: float = 1.0,
-            train_method: TRAINING_METHODS = "full",
-    ) -> None:
-        super().__init__()
-
-        self.multiplier = multiplier
-        self.lora_dim = rank
-        self.alpha = alpha
-
-        # LoRAのみ
-        self.module = LoRAModule
-
-        # unetのloraを作る
-        self.unet_loras = self.create_modules(
-            LORA_PREFIX_UNET,
-            unet,
-            DEFAULT_TARGET_REPLACE,
-            self.lora_dim,
-            self.multiplier,
-            train_method=train_method,
-        )
-        print(f"create LoRA for U-Net: {len(self.unet_loras)} modules.")
-
-        # assertion 名前の被りがないか確認しているようだ
-        lora_names = set()
-        for lora in self.unet_loras:
-            assert (
-                    lora.lora_name not in lora_names
-            ), f"duplicated lora name: {lora.lora_name}. {lora_names}"
-            lora_names.add(lora.lora_name)
-
-        # 適用する
-        for lora in self.unet_loras:
-            lora.apply_to()
-            self.add_module(
-                lora.lora_name,
-                lora,
-            )
-
-        del unet
-
-        torch.cuda.empty_cache()
-
-    def create_modules(
-            self,
-            prefix: str,
-            root_module: nn.Module,
-            target_replace_modules: List[str],
-            rank: int,
-            multiplier: float,
-            train_method: TRAINING_METHODS,
-    ) -> list:
-        loras = []
-
-        for name, module in root_module.named_modules():
-            if train_method == "noxattn":  # Cross Attention と Time Embed 以外学習
-                if "attn2" in name or "time_embed" in name:
-                    continue
-            elif train_method == "innoxattn":  # Cross Attention 以外学習
-                if "attn2" in name:
-                    continue
-            elif train_method == "selfattn":  # Self Attention のみ学習
-                if "attn1" not in name:
-                    continue
-            elif train_method == "xattn":  # Cross Attention のみ学習
-                if "attn2" not in name:
-                    continue
-            elif train_method == "full":  # 全部学習
-                pass
-            else:
-                raise NotImplementedError(
-                    f"train_method: {train_method} is not implemented."
-                )
-            if module.__class__.__name__ in target_replace_modules:
-                for child_name, child_module in module.named_modules():
-                    if child_module.__class__.__name__ in ["Linear", "Conv2d"]:
-                        lora_name = prefix + "." + name + "." + child_name
-                        lora_name = lora_name.replace(".", "_")
-                        print(f"{lora_name}")
-                        lora = self.module(
-                            lora_name, child_module, multiplier, rank, self.alpha
-                        )
-                        loras.append(lora)
-
-        return loras
-
-    def prepare_optimizer_params(self):
-        all_params = []
-
-        if self.unet_loras:  # 実質これしかない
-            params = []
-            [params.extend(lora.parameters()) for lora in self.unet_loras]
-            param_data = {"params": params}
-            all_params.append(param_data)
-
-        return all_params
-
-    def save_weights(self, file, dtype=None, metadata: Optional[dict] = None):
-        state_dict = self.state_dict()
-        if metadata is None:
-            metadata = OrderedDict()
-
-        if dtype is not None:
-            for key in list(state_dict.keys()):
-                v = state_dict[key]
-                v = v.detach().clone().to("cpu").to(dtype)
-                state_dict[key] = v
-
-        for key in list(state_dict.keys()):
-            if not key.startswith("lora"):
-                # remove any not lora
-                del state_dict[key]
-
-        metadata = add_model_hash_to_meta(state_dict, metadata)
-        if os.path.splitext(file)[1] == ".safetensors":
-            save_file(state_dict, file, metadata)
-        else:
-            torch.save(state_dict, file)
-
-    def __enter__(self):
-        for lora in self.unet_loras:
-            lora.multiplier = 1.0
-
-    def __exit__(self, exc_type, exc_value, tb):
-        for lora in self.unet_loras:
-            lora.multiplier = 0
diff --git a/toolkit/lora_special.py b/toolkit/lora_special.py
index 0a477087..8bf1e063 100644
--- a/toolkit/lora_special.py
+++ b/toolkit/lora_special.py
@@ -1,14 +1,16 @@
+import json
 import math
 import os
 import re
 import sys
 from typing import List, Optional, Dict, Type, Union
-
 import torch
 from transformers import CLIPTextModel
 
+from .config_modules import NetworkConfig
+from .lorm import count_parameters
+from .network_mixins import ToolkitNetworkMixin, ToolkitModuleMixin, ExtractableModuleMixin
 from .paths import SD_SCRIPTS_ROOT
-from .train_tools import get_torch_dtype
 
 sys.path.append(SD_SCRIPTS_ROOT)
 
@@ -19,7 +21,18 @@ from torch.utils.checkpoint import checkpoint
 RE_UPDOWN = re.compile(r"(up|down)_blocks_(\d+)_(resnets|upsamplers|downsamplers|attentions)_(\d+)_")
 
 
-class LoRAModule(torch.nn.Module):
+# diffusers specific stuff
+LINEAR_MODULES = [
+    'Linear',
+    'LoRACompatibleLinear'
+    # 'GroupNorm',
+]
+CONV_MODULES = [
+    'Conv2d',
+    'LoRACompatibleConv'
+]
+
+class LoRAModule(ToolkitModuleMixin, ExtractableModuleMixin, torch.nn.Module):
     """
     replaces forward method of the original Linear, instead of replacing the original Linear module.
     """
@@ -34,12 +47,20 @@ class LoRAModule(torch.nn.Module):
             dropout=None,
             rank_dropout=None,
             module_dropout=None,
+            network: 'LoRASpecialNetwork' = None,
+            use_bias: bool = False,
+            **kwargs
     ):
         """if alpha == 0 or None, alpha is rank (no scaling)."""
-        super().__init__()
+        ToolkitModuleMixin.__init__(self, network=network)
+        torch.nn.Module.__init__(self)
         self.lora_name = lora_name
+        self.scalar = torch.tensor(1.0)
+        # check if parent has bias. if not force use_bias to False
+        if org_module.bias is None:
+            use_bias = False
 
-        if org_module.__class__.__name__ == "Conv2d":
+        if org_module.__class__.__name__ in CONV_MODULES:
             in_dim = org_module.in_channels
             out_dim = org_module.out_channels
         else:
@@ -53,15 +74,15 @@ class LoRAModule(torch.nn.Module):
         # else:
         self.lora_dim = lora_dim
 
-        if org_module.__class__.__name__ == "Conv2d":
+        if org_module.__class__.__name__ in CONV_MODULES:
             kernel_size = org_module.kernel_size
             stride = org_module.stride
             padding = org_module.padding
             self.lora_down = torch.nn.Conv2d(in_dim, self.lora_dim, kernel_size, stride, padding, bias=False)
-            self.lora_up = torch.nn.Conv2d(self.lora_dim, out_dim, (1, 1), (1, 1), bias=False)
+            self.lora_up = torch.nn.Conv2d(self.lora_dim, out_dim, (1, 1), (1, 1), bias=use_bias)
         else:
             self.lora_down = torch.nn.Linear(in_dim, self.lora_dim, bias=False)
-            self.lora_up = torch.nn.Linear(self.lora_dim, out_dim, bias=False)
+            self.lora_up = torch.nn.Linear(self.lora_dim, out_dim, bias=use_bias)
 
         if type(alpha) == torch.Tensor:
             alpha = alpha.detach().float().numpy()  # without casting, bf16 causes error
@@ -74,157 +95,20 @@ class LoRAModule(torch.nn.Module):
         torch.nn.init.zeros_(self.lora_up.weight)
 
         self.multiplier: Union[float, List[float]] = multiplier
-        self.org_module = org_module  # remove in applying
+        # wrap the original module so it doesn't get weights updated
+        self.org_module = [org_module]
         self.dropout = dropout
         self.rank_dropout = rank_dropout
         self.module_dropout = module_dropout
         self.is_checkpointing = False
-        self.is_normalizing = False
-        self.normalize_scaler = 1.0
 
     def apply_to(self):
-        self.org_forward = self.org_module.forward
-        self.org_module.forward = self.forward
-        del self.org_module
-
-    # this allows us to set different multipliers on a per item in a batch basis
-    # allowing us to run positive and negative weights in the same batch
-    # really only useful for slider training for now
-    def get_multiplier(self, lora_up):
-        with torch.no_grad():
-            batch_size = lora_up.size(0)
-            # batch will have all negative prompts first and positive prompts second
-            # our multiplier list is for a prompt pair. So we need to repeat it for positive and negative prompts
-            # if there is more than our multiplier, it is likely a batch size increase, so we need to
-            # interleave the multipliers
-            if isinstance(self.multiplier, list):
-                if len(self.multiplier) == 0:
-                    # single item, just return it
-                    return self.multiplier[0]
-                elif len(self.multiplier) == batch_size:
-                    # not doing CFG
-                    multiplier_tensor = torch.tensor(self.multiplier).to(lora_up.device, dtype=lora_up.dtype)
-                else:
-
-                    # we have a list of multipliers, so we need to get the multiplier for this batch
-                    multiplier_tensor = torch.tensor(self.multiplier * 2).to(lora_up.device, dtype=lora_up.dtype)
-                    # should be 1 for if total batch size was 1
-                    num_interleaves = (batch_size // 2) // len(self.multiplier)
-                    multiplier_tensor = multiplier_tensor.repeat_interleave(num_interleaves)
-
-                # match lora_up rank
-                if len(lora_up.size()) == 2:
-                    multiplier_tensor = multiplier_tensor.view(-1, 1)
-                elif len(lora_up.size()) == 3:
-                    multiplier_tensor = multiplier_tensor.view(-1, 1, 1)
-                elif len(lora_up.size()) == 4:
-                    multiplier_tensor = multiplier_tensor.view(-1, 1, 1, 1)
-                return multiplier_tensor.detach()
-
-            else:
-                return self.multiplier
-
-    def _call_forward(self, x):
-        # module dropout
-        if self.module_dropout is not None and self.training:
-            if torch.rand(1) < self.module_dropout:
-                return 0.0  # added to original forward
-
-        lx = self.lora_down(x)
-
-        # normal dropout
-        if self.dropout is not None and self.training:
-            lx = torch.nn.functional.dropout(lx, p=self.dropout)
-
-        # rank dropout
-        if self.rank_dropout is not None and self.training:
-            mask = torch.rand((lx.size(0), self.lora_dim), device=lx.device) > self.rank_dropout
-            if len(lx.size()) == 3:
-                mask = mask.unsqueeze(1)  # for Text Encoder
-            elif len(lx.size()) == 4:
-                mask = mask.unsqueeze(-1).unsqueeze(-1)  # for Conv2d
-            lx = lx * mask
-
-            # scaling for rank dropout: treat as if the rank is changed
-            # maskから計算することも考えられるが、augmentation的な効果を期待してrank_dropoutを用いる
-            scale = self.scale * (1.0 / (1.0 - self.rank_dropout))  # redundant for readability
-        else:
-            scale = self.scale
-
-        lx = self.lora_up(lx)
-
-        return lx * scale
-
-    def forward(self, x):
-        org_forwarded = self.org_forward(x)
-        lora_output = self._call_forward(x)
-
-        if self.is_normalizing:
-            with torch.no_grad():
-                # do this calculation without multiplier
-                # get a dim array from orig forward that had index of all dimensions except the batch and channel
-
-                # Calculate the target magnitude for the combined output
-                orig_max = torch.max(torch.abs(org_forwarded))
-
-                # Calculate the additional increase in magnitude that lora_output would introduce
-                potential_max_increase = torch.max(torch.abs(org_forwarded + lora_output) - torch.abs(org_forwarded))
-
-                epsilon = 1e-6  # Small constant to avoid division by zero
-
-                # Calculate the scaling factor for the lora_output
-                # to ensure that the potential increase in magnitude doesn't change the original max
-                normalize_scaler = orig_max / (orig_max + potential_max_increase + epsilon)
-                normalize_scaler = normalize_scaler.detach()
-
-                # save the scaler so it can be applied later
-                self.normalize_scaler = normalize_scaler.clone().detach()
-
-            lora_output *= normalize_scaler
-
-        multiplier = self.get_multiplier(lora_output)
-
-        return org_forwarded + (lora_output * multiplier)
-
-    def enable_gradient_checkpointing(self):
-        self.is_checkpointing = True
-
-    def disable_gradient_checkpointing(self):
-        self.is_checkpointing = False
-
-    @torch.no_grad()
-    def apply_stored_normalizer(self, target_normalize_scaler: float = 1.0):
-        """
-        Applied the previous normalization calculation to the module.
-        This must be called before saving or normalization will be lost.
-        It is probably best to call after each batch as well.
-        We just scale the up down weights to match this vector
-        :return:
-        """
-        # get state dict
-        state_dict = self.state_dict()
-        dtype = state_dict['lora_up.weight'].dtype
-        device = state_dict['lora_up.weight'].device
-
-        # todo should we do this at fp32?
-
-        total_module_scale = torch.tensor(self.normalize_scaler / target_normalize_scaler) \
-            .to(device, dtype=dtype)
-        num_modules_layers = 2  # up and down
-        up_down_scale = torch.pow(total_module_scale, 1.0 / num_modules_layers) \
-            .to(device, dtype=dtype)
-
-        # apply the scaler to the up and down weights
-        for key in state_dict.keys():
-            if key.endswith('.lora_up.weight') or key.endswith('.lora_down.weight'):
-                # do it inplace do params are updated
-                state_dict[key] *= up_down_scale
-
-        # reset the normalization scaler
-        self.normalize_scaler = target_normalize_scaler
+        self.org_forward = self.org_module[0].forward
+        self.org_module[0].forward = self.forward
+        # del self.org_module
 
 
-class LoRASpecialNetwork(LoRANetwork):
+class LoRASpecialNetwork(ToolkitNetworkMixin, LoRANetwork):
     NUM_OF_BLOCKS = 12  # フルモデル相当でのup,downの層の数
 
     UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel"]
@@ -258,7 +142,18 @@ class LoRASpecialNetwork(LoRANetwork):
             module_class: Type[object] = LoRAModule,
             varbose: Optional[bool] = False,
             train_text_encoder: Optional[bool] = True,
+            use_text_encoder_1: bool = True,
+            use_text_encoder_2: bool = True,
             train_unet: Optional[bool] = True,
+            is_sdxl=False,
+            is_v2=False,
+            use_bias: bool = False,
+            is_lorm: bool = False,
+            ignore_if_contains = None,
+            parameter_threshold: float = 0.0,
+            target_lin_modules=LoRANetwork.UNET_TARGET_REPLACE_MODULE,
+            target_conv_modules=LoRANetwork.UNET_TARGET_REPLACE_MODULE_CONV2D_3X3,
+            **kwargs
     ) -> None:
         """
         LoRA network: すごく引数が多いが、パターンは以下の通り
@@ -269,8 +164,19 @@ class LoRASpecialNetwork(LoRANetwork):
         5. modules_dimとmodules_alphaを指定 (推論用)
         """
         # call the parent of the parent we are replacing (LoRANetwork) init
-        super(LoRANetwork, self).__init__()
-
+        torch.nn.Module.__init__(self)
+        ToolkitNetworkMixin.__init__(
+            self,
+            train_text_encoder=train_text_encoder,
+            train_unet=train_unet,
+            is_sdxl=is_sdxl,
+            is_v2=is_v2,
+            is_lorm=is_lorm,
+            **kwargs
+        )
+        if ignore_if_contains is None:
+            ignore_if_contains = []
+        self.ignore_if_contains = ignore_if_contains
         self.lora_dim = lora_dim
         self.alpha = alpha
         self.conv_lora_dim = conv_lora_dim
@@ -281,9 +187,11 @@ class LoRASpecialNetwork(LoRANetwork):
         self.is_checkpointing = False
         self._multiplier: float = 1.0
         self.is_active: bool = False
-        self._is_normalizing: bool = False
+        self.torch_multiplier = None
         # triggers the state updates
         self.multiplier = multiplier
+        self.is_sdxl = is_sdxl
+        self.is_v2 = is_v2
 
         if modules_dim is not None:
             print(f"create LoRA network from weights")
@@ -325,11 +233,19 @@ class LoRASpecialNetwork(LoRANetwork):
             for name, module in root_module.named_modules():
                 if module.__class__.__name__ in target_replace_modules:
                     for child_name, child_module in module.named_modules():
-                        is_linear = child_module.__class__.__name__ == "Linear"
-                        is_conv2d = child_module.__class__.__name__ == "Conv2d"
+                        is_linear = child_module.__class__.__name__ in LINEAR_MODULES
+                        is_conv2d = child_module.__class__.__name__ in CONV_MODULES
                         is_conv2d_1x1 = is_conv2d and child_module.kernel_size == (1, 1)
 
-                        if is_linear or is_conv2d:
+                        skip = False
+                        if any([word in child_name for word in self.ignore_if_contains]):
+                            skip = True
+
+                        # see if it is over threshold
+                        if count_parameters(child_module) < parameter_threshold:
+                            skip = True
+
+                        if (is_linear or is_conv2d) and not skip:
                             lora_name = prefix + "." + name + "." + child_name
                             lora_name = lora_name.replace(".", "_")
 
@@ -375,6 +291,9 @@ class LoRASpecialNetwork(LoRANetwork):
                                 dropout=dropout,
                                 rank_dropout=rank_dropout,
                                 module_dropout=module_dropout,
+                                network=self,
+                                parent=module,
+                                use_bias=use_bias,
                             )
                             loras.append(lora)
             return loras, skipped
@@ -387,6 +306,10 @@ class LoRASpecialNetwork(LoRANetwork):
         skipped_te = []
         if train_text_encoder:
             for i, text_encoder in enumerate(text_encoders):
+                if not use_text_encoder_1 and i == 0:
+                    continue
+                if not use_text_encoder_2 and i == 1:
+                    continue
                 if len(text_encoders) > 1:
                     index = i + 1
                     print(f"create LoRA for Text Encoder {index}:")
@@ -401,9 +324,9 @@ class LoRASpecialNetwork(LoRANetwork):
         print(f"create LoRA for Text Encoder: {len(self.text_encoder_loras)} modules.")
 
         # extend U-Net target modules if conv2d 3x3 is enabled, or load from weights
-        target_modules = LoRANetwork.UNET_TARGET_REPLACE_MODULE
+        target_modules = target_lin_modules
         if modules_dim is not None or self.conv_lora_dim is not None or conv_block_dims is not None:
-            target_modules += LoRANetwork.UNET_TARGET_REPLACE_MODULE_CONV2D_3X3
+            target_modules += target_conv_modules
 
         if train_unet:
             self.unet_loras, skipped_un = create_modules(True, None, unet, target_modules)
@@ -430,106 +353,3 @@ class LoRASpecialNetwork(LoRANetwork):
         for lora in self.text_encoder_loras + self.unet_loras:
             assert lora.lora_name not in names, f"duplicated lora name: {lora.lora_name}"
             names.add(lora.lora_name)
-
-    def save_weights(self, file, dtype, metadata):
-        if metadata is not None and len(metadata) == 0:
-            metadata = None
-
-        state_dict = self.state_dict()
-
-        if dtype is not None:
-            for key in list(state_dict.keys()):
-                v = state_dict[key]
-                v = v.detach().clone().to("cpu").to(dtype)
-                state_dict[key] = v
-
-        if os.path.splitext(file)[1] == ".safetensors":
-            from safetensors.torch import save_file
-            save_file(state_dict, file, metadata)
-        else:
-            torch.save(state_dict, file)
-
-    @property
-    def multiplier(self) -> Union[float, List[float]]:
-        return self._multiplier
-
-    @multiplier.setter
-    def multiplier(self, value: Union[float, List[float]]):
-        self._multiplier = value
-        self._update_lora_multiplier()
-
-    def _update_lora_multiplier(self):
-
-        if self.is_active:
-            if hasattr(self, 'unet_loras'):
-                for lora in self.unet_loras:
-                    lora.multiplier = self._multiplier
-            if hasattr(self, 'text_encoder_loras'):
-                for lora in self.text_encoder_loras:
-                    lora.multiplier = self._multiplier
-        else:
-            if hasattr(self, 'unet_loras'):
-                for lora in self.unet_loras:
-                    lora.multiplier = 0
-            if hasattr(self, 'text_encoder_loras'):
-                for lora in self.text_encoder_loras:
-                    lora.multiplier = 0
-
-    # called when the context manager is entered
-    # ie: with network:
-    def __enter__(self):
-        self.is_active = True
-        self._update_lora_multiplier()
-
-    def __exit__(self, exc_type, exc_value, tb):
-        self.is_active = False
-        self._update_lora_multiplier()
-
-    def force_to(self, device, dtype):
-        self.to(device, dtype)
-        loras = []
-        if hasattr(self, 'unet_loras'):
-            loras += self.unet_loras
-        if hasattr(self, 'text_encoder_loras'):
-            loras += self.text_encoder_loras
-        for lora in loras:
-            lora.to(device, dtype)
-
-    def get_all_modules(self):
-        loras = []
-        if hasattr(self, 'unet_loras'):
-            loras += self.unet_loras
-        if hasattr(self, 'text_encoder_loras'):
-            loras += self.text_encoder_loras
-        return loras
-
-    def _update_checkpointing(self):
-        for module in self.get_all_modules():
-            if self.is_checkpointing:
-                module.enable_gradient_checkpointing()
-            else:
-                module.disable_gradient_checkpointing()
-
-    def enable_gradient_checkpointing(self):
-        # not supported
-        self.is_checkpointing = True
-        self._update_checkpointing()
-
-    def disable_gradient_checkpointing(self):
-        # not supported
-        self.is_checkpointing = False
-        self._update_checkpointing()
-
-    @property
-    def is_normalizing(self) -> bool:
-        return self._is_normalizing
-
-    @is_normalizing.setter
-    def is_normalizing(self, value: bool):
-        self._is_normalizing = value
-        for module in self.get_all_modules():
-            module.is_normalizing = self._is_normalizing
-
-    def apply_stored_normalizer(self, target_normalize_scaler: float = 1.0):
-        for module in self.get_all_modules():
-            module.apply_stored_normalizer(target_normalize_scaler)
diff --git a/toolkit/lorm.py b/toolkit/lorm.py
new file mode 100644
index 00000000..0a432838
--- /dev/null
+++ b/toolkit/lorm.py
@@ -0,0 +1,460 @@
+from typing import Union, Tuple, Literal, Optional
+
+import torch
+import torch.nn as nn
+from diffusers import UNet2DConditionModel
+from torch import Tensor
+from tqdm import tqdm
+
+from toolkit.config_modules import LoRMConfig
+
+conv = nn.Conv2d
+lin = nn.Linear
+_size_2_t = Union[int, Tuple[int, int]]
+
+ExtractMode = Union[
+    'fixed',
+    'threshold',
+    'ratio',
+    'quantile',
+    'percentage'
+]
+
+LINEAR_MODULES = [
+    'Linear',
+    'LoRACompatibleLinear'
+]
+CONV_MODULES = [
+    # 'Conv2d',
+    # 'LoRACompatibleConv'
+]
+
+UNET_TARGET_REPLACE_MODULE = [
+    "Transformer2DModel",
+    # "ResnetBlock2D",
+    "Downsample2D",
+    "Upsample2D",
+]
+
+LORM_TARGET_REPLACE_MODULE = UNET_TARGET_REPLACE_MODULE
+
+UNET_TARGET_REPLACE_NAME = [
+    "conv_in",
+    "conv_out",
+    "time_embedding.linear_1",
+    "time_embedding.linear_2",
+]
+
+UNET_MODULES_TO_AVOID = [
+]
+
+
+# Low Rank Convolution
+class LoRMCon2d(nn.Module):
+    def __init__(
+            self,
+            in_channels: int,
+            lorm_channels: int,
+            out_channels: int,
+            kernel_size: _size_2_t,
+            stride: _size_2_t = 1,
+            padding: Union[str, _size_2_t] = 'same',
+            dilation: _size_2_t = 1,
+            groups: int = 1,
+            bias: bool = True,
+            padding_mode: str = 'zeros',
+            device=None,
+            dtype=None
+    ) -> None:
+        super().__init__()
+        self.in_channels = in_channels
+        self.lorm_channels = lorm_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.groups = groups
+        self.padding_mode = padding_mode
+
+        self.down = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=lorm_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=False,
+            padding_mode=padding_mode,
+            device=device,
+            dtype=dtype
+        )
+
+        # Kernel size on the up is always 1x1.
+        # I don't think you could calculate a dual 3x3, or I can't at least
+
+        self.up = nn.Conv2d(
+            in_channels=lorm_channels,
+            out_channels=out_channels,
+            kernel_size=(1, 1),
+            stride=1,
+            padding='same',
+            dilation=1,
+            groups=1,
+            bias=bias,
+            padding_mode='zeros',
+            device=device,
+            dtype=dtype
+        )
+
+    def forward(self, input: Tensor, *args, **kwargs) -> Tensor:
+        x = input
+        x = self.down(x)
+        x = self.up(x)
+        return x
+
+
+class LoRMLinear(nn.Module):
+    def __init__(
+            self,
+            in_features: int,
+            lorm_features: int,
+            out_features: int,
+            bias: bool = True,
+            device=None,
+            dtype=None
+    ) -> None:
+        super().__init__()
+        self.in_features = in_features
+        self.lorm_features = lorm_features
+        self.out_features = out_features
+
+        self.down = nn.Linear(
+            in_features=in_features,
+            out_features=lorm_features,
+            bias=False,
+            device=device,
+            dtype=dtype
+
+        )
+        self.up = nn.Linear(
+            in_features=lorm_features,
+            out_features=out_features,
+            bias=bias,
+            # bias=True,
+            device=device,
+            dtype=dtype
+        )
+
+    def forward(self, input: Tensor, *args, **kwargs) -> Tensor:
+        x = input
+        x = self.down(x)
+        x = self.up(x)
+        return x
+
+
+def extract_conv(
+        weight: Union[torch.Tensor, nn.Parameter],
+        mode='fixed',
+        mode_param=0,
+        device='cpu'
+) -> Tuple[Tensor, Tensor, int, Tensor]:
+    weight = weight.to(device)
+    out_ch, in_ch, kernel_size, _ = weight.shape
+
+    U, S, Vh = torch.linalg.svd(weight.reshape(out_ch, -1))
+    if mode == 'percentage':
+        assert 0 <= mode_param <= 1  # Ensure it's a valid percentage.
+        original_params = out_ch * in_ch * kernel_size * kernel_size
+        desired_params = mode_param * original_params
+        # Solve for lora_rank from the equation
+        lora_rank = int(desired_params / (in_ch * kernel_size * kernel_size + out_ch))
+    elif mode == 'fixed':
+        lora_rank = mode_param
+    elif mode == 'threshold':
+        assert mode_param >= 0
+        lora_rank = torch.sum(S > mode_param).item()
+    elif mode == 'ratio':
+        assert 1 >= mode_param >= 0
+        min_s = torch.max(S) * mode_param
+        lora_rank = torch.sum(S > min_s).item()
+    elif mode == 'quantile' or mode == 'percentile':
+        assert 1 >= mode_param >= 0
+        s_cum = torch.cumsum(S, dim=0)
+        min_cum_sum = mode_param * torch.sum(S)
+        lora_rank = torch.sum(s_cum < min_cum_sum).item()
+    else:
+        raise NotImplementedError('Extract mode should be "fixed", "threshold", "ratio" or "quantile"')
+    lora_rank = max(1, lora_rank)
+    lora_rank = min(out_ch, in_ch, lora_rank)
+    if lora_rank >= out_ch / 2:
+        lora_rank = int(out_ch / 2)
+        print(f"rank is higher than it should be")
+        # print(f"Skipping layer as determined rank is too high")
+        # return None, None, None, None
+        # return weight, 'full'
+
+    U = U[:, :lora_rank]
+    S = S[:lora_rank]
+    U = U @ torch.diag(S)
+    Vh = Vh[:lora_rank, :]
+
+    diff = (weight - (U @ Vh).reshape(out_ch, in_ch, kernel_size, kernel_size)).detach()
+    extract_weight_A = Vh.reshape(lora_rank, in_ch, kernel_size, kernel_size).detach()
+    extract_weight_B = U.reshape(out_ch, lora_rank, 1, 1).detach()
+    del U, S, Vh, weight
+    return extract_weight_A, extract_weight_B, lora_rank, diff
+
+
+def extract_linear(
+        weight: Union[torch.Tensor, nn.Parameter],
+        mode='fixed',
+        mode_param=0,
+        device='cpu',
+) -> Tuple[Tensor, Tensor, int, Tensor]:
+    weight = weight.to(device)
+    out_ch, in_ch = weight.shape
+
+    U, S, Vh = torch.linalg.svd(weight)
+
+    if mode == 'percentage':
+        assert 0 <= mode_param <= 1  # Ensure it's a valid percentage.
+        desired_params = mode_param * out_ch * in_ch
+        # Solve for lora_rank from the equation
+        lora_rank = int(desired_params / (in_ch + out_ch))
+    elif mode == 'fixed':
+        lora_rank = mode_param
+    elif mode == 'threshold':
+        assert mode_param >= 0
+        lora_rank = torch.sum(S > mode_param).item()
+    elif mode == 'ratio':
+        assert 1 >= mode_param >= 0
+        min_s = torch.max(S) * mode_param
+        lora_rank = torch.sum(S > min_s).item()
+    elif mode == 'quantile':
+        assert 1 >= mode_param >= 0
+        s_cum = torch.cumsum(S, dim=0)
+        min_cum_sum = mode_param * torch.sum(S)
+        lora_rank = torch.sum(s_cum < min_cum_sum).item()
+    else:
+        raise NotImplementedError('Extract mode should be "fixed", "threshold", "ratio" or "quantile"')
+    lora_rank = max(1, lora_rank)
+    lora_rank = min(out_ch, in_ch, lora_rank)
+    if lora_rank >= out_ch / 2:
+        # print(f"rank is higher than it should be")
+        lora_rank = int(out_ch / 2)
+        # return weight, 'full'
+        # print(f"Skipping layer as determined rank is too high")
+        # return None, None, None, None
+
+    U = U[:, :lora_rank]
+    S = S[:lora_rank]
+    U = U @ torch.diag(S)
+    Vh = Vh[:lora_rank, :]
+
+    diff = (weight - U @ Vh).detach()
+    extract_weight_A = Vh.reshape(lora_rank, in_ch).detach()
+    extract_weight_B = U.reshape(out_ch, lora_rank).detach()
+    del U, S, Vh, weight
+    return extract_weight_A, extract_weight_B, lora_rank, diff
+
+
+def replace_module_by_path(network, name, module):
+    """Replace a module in a network by its name."""
+    name_parts = name.split('.')
+    current_module = network
+    for part in name_parts[:-1]:
+        current_module = getattr(current_module, part)
+    try:
+        setattr(current_module, name_parts[-1], module)
+    except Exception as e:
+        print(e)
+
+
+def count_parameters(module):
+    return sum(p.numel() for p in module.parameters())
+
+
+def compute_optimal_bias(original_module, linear_down, linear_up, X):
+    Y_original = original_module(X)
+    Y_approx = linear_up(linear_down(X))
+    E = Y_original - Y_approx
+
+    optimal_bias = E.mean(dim=0)
+
+    return optimal_bias
+
+
+def format_with_commas(n):
+    return f"{n:,}"
+
+
+def print_lorm_extract_details(
+        start_num_params: int,
+        end_num_params: int,
+        num_replaced: int,
+):
+    start_formatted = format_with_commas(start_num_params)
+    end_formatted = format_with_commas(end_num_params)
+    num_replaced_formatted = format_with_commas(num_replaced)
+
+    width = max(len(start_formatted), len(end_formatted), len(num_replaced_formatted))
+
+    print(f"Convert UNet result:")
+    print(f" - converted: {num_replaced:>{width},} modules")
+    print(f" -     start: {start_num_params:>{width},} params")
+    print(f" -       end: {end_num_params:>{width},} params")
+
+
+lorm_ignore_if_contains = [
+    'proj_out', 'proj_in',
+]
+
+lorm_parameter_threshold = 1000000
+
+
+@torch.no_grad()
+def convert_diffusers_unet_to_lorm(
+        unet: UNet2DConditionModel,
+        config: LoRMConfig,
+):
+    print('Converting UNet to LoRM UNet')
+    start_num_params = count_parameters(unet)
+    named_modules = list(unet.named_modules())
+
+    num_replaced = 0
+
+    pbar = tqdm(total=len(named_modules), desc="UNet -> LoRM UNet")
+    layer_names_replaced = []
+    converted_modules = []
+    ignore_if_contains = [
+        'proj_out', 'proj_in',
+    ]
+
+    for name, module in named_modules:
+        module_name = module.__class__.__name__
+        if module_name in UNET_TARGET_REPLACE_MODULE:
+            for child_name, child_module in module.named_modules():
+                new_module: Union[LoRMCon2d, LoRMLinear, None] = None
+                # if child name includes attn, skip it
+                combined_name = combined_name = f"{name}.{child_name}"
+                # if child_module.__class__.__name__ in LINEAR_MODULES and child_module.bias is None:
+                #     pass
+
+                lorm_config = config.get_config_for_module(combined_name)
+
+                extract_mode = lorm_config.extract_mode
+                extract_mode_param = lorm_config.extract_mode_param
+                parameter_threshold = lorm_config.parameter_threshold
+
+                if any([word in child_name for word in ignore_if_contains]):
+                    pass
+
+                elif child_module.__class__.__name__ in LINEAR_MODULES:
+                    if count_parameters(child_module) > parameter_threshold:
+
+                        dtype = child_module.weight.dtype
+                        # extract and convert
+                        down_weight, up_weight, lora_dim, diff = extract_linear(
+                            weight=child_module.weight.clone().detach().float(),
+                            mode=extract_mode,
+                            mode_param=extract_mode_param,
+                            device=child_module.weight.device,
+                        )
+                        if down_weight is None:
+                            continue
+                        down_weight = down_weight.to(dtype=dtype)
+                        up_weight = up_weight.to(dtype=dtype)
+                        bias_weight = None
+                        if child_module.bias is not None:
+                            bias_weight = child_module.bias.data.clone().detach().to(dtype=dtype)
+                        # linear layer weights = (out_features, in_features)
+                        new_module = LoRMLinear(
+                            in_features=down_weight.shape[1],
+                            lorm_features=lora_dim,
+                            out_features=up_weight.shape[0],
+                            bias=bias_weight is not None,
+                            device=down_weight.device,
+                            dtype=down_weight.dtype
+                        )
+
+                        # replace the weights
+                        new_module.down.weight.data = down_weight
+                        new_module.up.weight.data = up_weight
+                        if bias_weight is not None:
+                            new_module.up.bias.data = bias_weight
+                        # else:
+                        #     new_module.up.bias.data = torch.zeros_like(new_module.up.bias.data)
+
+                        # bias_correction = compute_optimal_bias(
+                        #     child_module,
+                        #     new_module.down,
+                        #     new_module.up,
+                        #     torch.randn((1000, down_weight.shape[1])).to(device=down_weight.device, dtype=dtype)
+                        #     )
+                        # new_module.up.bias.data += bias_correction
+
+                elif child_module.__class__.__name__ in CONV_MODULES:
+                    if count_parameters(child_module) > parameter_threshold:
+                        dtype = child_module.weight.dtype
+                        down_weight, up_weight, lora_dim, diff = extract_conv(
+                            weight=child_module.weight.clone().detach().float(),
+                            mode=extract_mode,
+                            mode_param=extract_mode_param,
+                            device=child_module.weight.device,
+                        )
+                        if down_weight is None:
+                            continue
+                        down_weight = down_weight.to(dtype=dtype)
+                        up_weight = up_weight.to(dtype=dtype)
+                        bias_weight = None
+                        if child_module.bias is not None:
+                            bias_weight = child_module.bias.data.clone().detach().to(dtype=dtype)
+
+                        new_module = LoRMCon2d(
+                            in_channels=down_weight.shape[1],
+                            lorm_channels=lora_dim,
+                            out_channels=up_weight.shape[0],
+                            kernel_size=child_module.kernel_size,
+                            dilation=child_module.dilation,
+                            padding=child_module.padding,
+                            padding_mode=child_module.padding_mode,
+                            stride=child_module.stride,
+                            bias=bias_weight is not None,
+                            device=down_weight.device,
+                            dtype=down_weight.dtype
+                        )
+                        # replace the weights
+                        new_module.down.weight.data = down_weight
+                        new_module.up.weight.data = up_weight
+                        if bias_weight is not None:
+                            new_module.up.bias.data = bias_weight
+
+                if new_module:
+                    combined_name = f"{name}.{child_name}"
+                    replace_module_by_path(unet, combined_name, new_module)
+                    converted_modules.append(new_module)
+                    num_replaced += 1
+                    layer_names_replaced.append(
+                        f"{combined_name} - {format_with_commas(count_parameters(child_module))}")
+
+                pbar.update(1)
+    pbar.close()
+    end_num_params = count_parameters(unet)
+
+    def sorting_key(s):
+        # Extract the number part, remove commas, and convert to integer
+        return int(s.split("-")[1].strip().replace(",", ""))
+
+    sorted_layer_names_replaced = sorted(layer_names_replaced, key=sorting_key, reverse=True)
+    for layer_name in sorted_layer_names_replaced:
+        print(layer_name)
+
+    print_lorm_extract_details(
+        start_num_params=start_num_params,
+        end_num_params=end_num_params,
+        num_replaced=num_replaced,
+    )
+
+    return converted_modules
diff --git a/toolkit/losses.py b/toolkit/losses.py
index aded0764..eeea3571 100644
--- a/toolkit/losses.py
+++ b/toolkit/losses.py
@@ -27,11 +27,17 @@ class ComparativeTotalVariation(torch.nn.Module):
 # Gradient penalty
 def get_gradient_penalty(critic, real, fake, device):
     with torch.autocast(device_type='cuda'):
-        alpha = torch.rand(real.size(0), 1, 1, 1).to(device)
+        real = real.float()
+        fake = fake.float()
+        alpha = torch.rand(real.size(0), 1, 1, 1).to(device).float()
         interpolates = (alpha * real + ((1 - alpha) * fake)).requires_grad_(True)
+        if torch.isnan(interpolates).any():
+            print('d_interpolates is nan')
         d_interpolates = critic(interpolates)
         fake = torch.ones(real.size(0), 1, device=device)
-
+            
+        if torch.isnan(d_interpolates).any():
+            print('fake is nan')
         gradients = torch.autograd.grad(
             outputs=d_interpolates,
             inputs=interpolates,
@@ -41,10 +47,14 @@ def get_gradient_penalty(critic, real, fake, device):
             only_inputs=True,
         )[0]
 
+        # see if any are nan
+        if torch.isnan(gradients).any():
+            print('gradients is nan')
+
         gradients = gradients.view(gradients.size(0), -1)
         gradient_norm = gradients.norm(2, dim=1)
         gradient_penalty = ((gradient_norm - 1) ** 2).mean()
-        return gradient_penalty
+        return gradient_penalty.float()
 
 
 class PatternLoss(torch.nn.Module):
diff --git a/toolkit/lycoris_special.py b/toolkit/lycoris_special.py
new file mode 100644
index 00000000..84021b49
--- /dev/null
+++ b/toolkit/lycoris_special.py
@@ -0,0 +1,373 @@
+import math
+import os
+from typing import Optional, Union, List, Type
+
+import torch
+from lycoris.kohya import LycorisNetwork, LoConModule
+from lycoris.modules.glora import GLoRAModule
+from torch import nn
+from transformers import CLIPTextModel
+from torch.nn import functional as F
+from toolkit.network_mixins import ToolkitNetworkMixin, ToolkitModuleMixin, ExtractableModuleMixin
+
+# diffusers specific stuff
+LINEAR_MODULES = [
+    'Linear',
+    'LoRACompatibleLinear'
+]
+CONV_MODULES = [
+    'Conv2d',
+    'LoRACompatibleConv'
+]
+
+class LoConSpecialModule(ToolkitModuleMixin, LoConModule, ExtractableModuleMixin):
+    def __init__(
+            self,
+            lora_name, org_module: nn.Module,
+            multiplier=1.0,
+            lora_dim=4, alpha=1,
+            dropout=0., rank_dropout=0., module_dropout=0.,
+            use_cp=False,
+            network: 'LycorisSpecialNetwork' = None,
+            use_bias=False,
+            **kwargs,
+    ):
+        """ if alpha == 0 or None, alpha is rank (no scaling). """
+        # call super of super
+        ToolkitModuleMixin.__init__(self, network=network)
+        torch.nn.Module.__init__(self)
+        self.lora_name = lora_name
+        self.lora_dim = lora_dim
+        self.cp = False
+
+        # check if parent has bias. if not force use_bias to False
+        if org_module.bias is None:
+            use_bias = False
+
+        self.scalar = nn.Parameter(torch.tensor(0.0))
+        orig_module_name = org_module.__class__.__name__
+        if orig_module_name in CONV_MODULES:
+            self.isconv = True
+            # For general LoCon
+            in_dim = org_module.in_channels
+            k_size = org_module.kernel_size
+            stride = org_module.stride
+            padding = org_module.padding
+            out_dim = org_module.out_channels
+            self.down_op = F.conv2d
+            self.up_op = F.conv2d
+            if use_cp and k_size != (1, 1):
+                self.lora_down = nn.Conv2d(in_dim, lora_dim, (1, 1), bias=False)
+                self.lora_mid = nn.Conv2d(lora_dim, lora_dim, k_size, stride, padding, bias=False)
+                self.cp = True
+            else:
+                self.lora_down = nn.Conv2d(in_dim, lora_dim, k_size, stride, padding, bias=False)
+            self.lora_up = nn.Conv2d(lora_dim, out_dim, (1, 1), bias=use_bias)
+        elif orig_module_name in LINEAR_MODULES:
+            self.isconv = False
+            self.down_op = F.linear
+            self.up_op = F.linear
+            if orig_module_name == 'GroupNorm':
+                # RuntimeError: mat1 and mat2 shapes cannot be multiplied (56320x120 and 320x32)
+                in_dim = org_module.num_channels
+                out_dim = org_module.num_channels
+            else:
+                in_dim = org_module.in_features
+                out_dim = org_module.out_features
+            self.lora_down = nn.Linear(in_dim, lora_dim, bias=False)
+            self.lora_up = nn.Linear(lora_dim, out_dim, bias=use_bias)
+        else:
+            raise NotImplementedError
+        self.shape = org_module.weight.shape
+
+        if dropout:
+            self.dropout = nn.Dropout(dropout)
+        else:
+            self.dropout = nn.Identity()
+        self.rank_dropout = rank_dropout
+        self.module_dropout = module_dropout
+
+        if type(alpha) == torch.Tensor:
+            alpha = alpha.detach().float().numpy()  # without casting, bf16 causes error
+        alpha = lora_dim if alpha is None or alpha == 0 else alpha
+        self.scale = alpha / self.lora_dim
+        self.register_buffer('alpha', torch.tensor(alpha))  # 定数として扱える
+
+        # same as microsoft's
+        torch.nn.init.kaiming_uniform_(self.lora_down.weight, a=math.sqrt(5))
+        torch.nn.init.kaiming_uniform_(self.lora_up.weight)
+        if self.cp:
+            torch.nn.init.kaiming_uniform_(self.lora_mid.weight, a=math.sqrt(5))
+
+        self.multiplier = multiplier
+        self.org_module = [org_module]
+        self.register_load_state_dict_post_hook(self.load_weight_hook)
+
+    def load_weight_hook(self, *args, **kwargs):
+        self.scalar = nn.Parameter(torch.ones_like(self.scalar))
+
+
+class LycorisSpecialNetwork(ToolkitNetworkMixin, LycorisNetwork):
+    UNET_TARGET_REPLACE_MODULE = [
+        "Transformer2DModel",
+        "ResnetBlock2D",
+        "Downsample2D",
+        "Upsample2D",
+        # 'UNet2DConditionModel',
+        # 'Conv2d',
+        # 'Timesteps',
+        # 'TimestepEmbedding',
+        # 'Linear',
+        # 'SiLU',
+        # 'ModuleList',
+        # 'DownBlock2D',
+        # 'ResnetBlock2D',  # need
+        # 'GroupNorm',
+        # 'LoRACompatibleConv',
+        # 'LoRACompatibleLinear',
+        # 'Dropout',
+        # 'CrossAttnDownBlock2D', # needed
+        # 'Transformer2DModel',  # maybe not, has duplicates
+        # 'BasicTransformerBlock', # duplicates
+        # 'LayerNorm',
+        # 'Attention',
+        # 'FeedForward',
+        # 'GEGLU',
+        # 'UpBlock2D',
+        # 'UNetMidBlock2DCrossAttn'
+    ]
+    UNET_TARGET_REPLACE_NAME = [
+        "conv_in",
+        "conv_out",
+        "time_embedding.linear_1",
+        "time_embedding.linear_2",
+    ]
+    def __init__(
+            self,
+            text_encoder: Union[List[CLIPTextModel], CLIPTextModel],
+            unet,
+            multiplier: float = 1.0,
+            lora_dim: int = 4,
+            alpha: float = 1,
+            dropout: Optional[float] = None,
+            rank_dropout: Optional[float] = None,
+            module_dropout: Optional[float] = None,
+            conv_lora_dim: Optional[int] = None,
+            conv_alpha: Optional[float] = None,
+            use_cp: Optional[bool] = False,
+            network_module: Type[object] = LoConSpecialModule,
+            train_unet: bool = True,
+            train_text_encoder: bool = True,
+            use_text_encoder_1: bool = True,
+            use_text_encoder_2: bool = True,
+            use_bias: bool = False,
+            is_lorm: bool = False,
+            **kwargs,
+    ) -> None:
+        # call ToolkitNetworkMixin super
+        ToolkitNetworkMixin.__init__(
+            self,
+            train_text_encoder=train_text_encoder,
+            train_unet=train_unet,
+            is_lorm=is_lorm,
+            **kwargs
+        )
+        # call the parent of the parent LycorisNetwork
+        torch.nn.Module.__init__(self)
+
+        # LyCORIS unique stuff
+        if dropout is None:
+            dropout = 0
+        if rank_dropout is None:
+            rank_dropout = 0
+        if module_dropout is None:
+            module_dropout = 0
+        self.train_unet = train_unet
+        self.train_text_encoder = train_text_encoder
+
+        self.torch_multiplier = None
+        # triggers a tensor update
+        self.multiplier = multiplier
+        self.lora_dim = lora_dim
+
+        if not self.ENABLE_CONV or conv_lora_dim is None:
+            conv_lora_dim = 0
+            conv_alpha = 0
+
+        self.conv_lora_dim = int(conv_lora_dim)
+        if self.conv_lora_dim and self.conv_lora_dim != self.lora_dim:
+            print('Apply different lora dim for conv layer')
+            print(f'Conv Dim: {conv_lora_dim}, Linear Dim: {lora_dim}')
+        elif self.conv_lora_dim == 0:
+            print('Disable conv layer')
+
+        self.alpha = alpha
+        self.conv_alpha = float(conv_alpha)
+        if self.conv_lora_dim and self.alpha != self.conv_alpha:
+            print('Apply different alpha value for conv layer')
+            print(f'Conv alpha: {conv_alpha}, Linear alpha: {alpha}')
+
+        if 1 >= dropout >= 0:
+            print(f'Use Dropout value: {dropout}')
+        self.dropout = dropout
+        self.rank_dropout = rank_dropout
+        self.module_dropout = module_dropout
+
+        # create module instances
+        def create_modules(
+                prefix,
+                root_module: torch.nn.Module,
+                target_replace_modules,
+                target_replace_names=[]
+        ) -> List[network_module]:
+            print('Create LyCORIS Module')
+            loras = []
+            # remove this
+            named_modules = root_module.named_modules()
+            # add a few to tthe generator
+
+            for name, module in named_modules:
+                module_name = module.__class__.__name__
+                if module_name in target_replace_modules:
+                    if module_name in self.MODULE_ALGO_MAP:
+                        algo = self.MODULE_ALGO_MAP[module_name]
+                    else:
+                        algo = network_module
+                    for child_name, child_module in module.named_modules():
+                        lora_name = prefix + '.' + name + '.' + child_name
+                        lora_name = lora_name.replace('.', '_')
+                        if lora_name.startswith('lora_unet_input_blocks_1_0_emb_layers_1'):
+                            print(f"{lora_name}")
+
+                        if child_module.__class__.__name__ in LINEAR_MODULES and lora_dim > 0:
+                            lora = algo(
+                                lora_name, child_module, self.multiplier,
+                                self.lora_dim, self.alpha,
+                                self.dropout, self.rank_dropout, self.module_dropout,
+                                use_cp,
+                                network=self,
+                                parent=module,
+                                use_bias=use_bias,
+                                **kwargs
+                            )
+                        elif child_module.__class__.__name__ in CONV_MODULES:
+                            k_size, *_ = child_module.kernel_size
+                            if k_size == 1 and lora_dim > 0:
+                                lora = algo(
+                                    lora_name, child_module, self.multiplier,
+                                    self.lora_dim, self.alpha,
+                                    self.dropout, self.rank_dropout, self.module_dropout,
+                                    use_cp,
+                                    network=self,
+                                    parent=module,
+                                use_bias=use_bias,
+                                    **kwargs
+                                )
+                            elif conv_lora_dim > 0:
+                                lora = algo(
+                                    lora_name, child_module, self.multiplier,
+                                    self.conv_lora_dim, self.conv_alpha,
+                                    self.dropout, self.rank_dropout, self.module_dropout,
+                                    use_cp,
+                                    network=self,
+                                    parent=module,
+                                    use_bias=use_bias,
+                                    **kwargs
+                                )
+                            else:
+                                continue
+                        else:
+                            continue
+                        loras.append(lora)
+                elif name in target_replace_names:
+                    if name in self.NAME_ALGO_MAP:
+                        algo = self.NAME_ALGO_MAP[name]
+                    else:
+                        algo = network_module
+                    lora_name = prefix + '.' + name
+                    lora_name = lora_name.replace('.', '_')
+                    if module.__class__.__name__ == 'Linear' and lora_dim > 0:
+                        lora = algo(
+                            lora_name, module, self.multiplier,
+                            self.lora_dim, self.alpha,
+                            self.dropout, self.rank_dropout, self.module_dropout,
+                            use_cp,
+                            parent=module,
+                            network=self,
+                            use_bias=use_bias,
+                            **kwargs
+                        )
+                    elif module.__class__.__name__ == 'Conv2d':
+                        k_size, *_ = module.kernel_size
+                        if k_size == 1 and lora_dim > 0:
+                            lora = algo(
+                                lora_name, module, self.multiplier,
+                                self.lora_dim, self.alpha,
+                                self.dropout, self.rank_dropout, self.module_dropout,
+                                use_cp,
+                                network=self,
+                                parent=module,
+                                use_bias=use_bias,
+                                **kwargs
+                            )
+                        elif conv_lora_dim > 0:
+                            lora = algo(
+                                lora_name, module, self.multiplier,
+                                self.conv_lora_dim, self.conv_alpha,
+                                self.dropout, self.rank_dropout, self.module_dropout,
+                                use_cp,
+                                network=self,
+                                parent=module,
+                                use_bias=use_bias,
+                                **kwargs
+                            )
+                        else:
+                            continue
+                    else:
+                        continue
+                    loras.append(lora)
+            return loras
+
+        if network_module == GLoRAModule:
+            print('GLoRA enabled, only train transformer')
+            # only train transformer (for GLoRA)
+            LycorisSpecialNetwork.UNET_TARGET_REPLACE_MODULE = [
+                "Transformer2DModel",
+                "Attention",
+            ]
+            LycorisSpecialNetwork.UNET_TARGET_REPLACE_NAME = []
+
+        if isinstance(text_encoder, list):
+            text_encoders = text_encoder
+            use_index = True
+        else:
+            text_encoders = [text_encoder]
+            use_index = False
+
+        self.text_encoder_loras = []
+        if self.train_text_encoder:
+            for i, te in enumerate(text_encoders):
+                if not use_text_encoder_1 and i == 0:
+                    continue
+                if not use_text_encoder_2 and i == 1:
+                    continue
+                self.text_encoder_loras.extend(create_modules(
+                    LycorisSpecialNetwork.LORA_PREFIX_TEXT_ENCODER + (f'{i + 1}' if use_index else ''),
+                    te,
+                    LycorisSpecialNetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE
+                ))
+        print(f"create LyCORIS for Text Encoder: {len(self.text_encoder_loras)} modules.")
+        if self.train_unet:
+            self.unet_loras = create_modules(LycorisSpecialNetwork.LORA_PREFIX_UNET, unet,
+                                             LycorisSpecialNetwork.UNET_TARGET_REPLACE_MODULE)
+        else:
+            self.unet_loras = []
+        print(f"create LyCORIS for U-Net: {len(self.unet_loras)} modules.")
+
+        self.weights_sd = None
+
+        # assertion
+        names = set()
+        for lora in self.text_encoder_loras + self.unet_loras:
+            assert lora.lora_name not in names, f"duplicated lora name: {lora.lora_name}"
+            names.add(lora.lora_name)
diff --git a/toolkit/metadata.py b/toolkit/metadata.py
index b652dcef..bf969f09 100644
--- a/toolkit/metadata.py
+++ b/toolkit/metadata.py
@@ -77,6 +77,10 @@ def parse_metadata_from_safetensors(meta: OrderedDict) -> OrderedDict:
 
 
 def load_metadata_from_safetensors(file_path: str) -> OrderedDict:
-    with safe_open(file_path, framework="pt") as f:
-        metadata = f.metadata()
-    return parse_metadata_from_safetensors(metadata)
+    try:
+        with safe_open(file_path, framework="pt") as f:
+            metadata = f.metadata()
+        return parse_metadata_from_safetensors(metadata)
+    except Exception as e:
+        print(f"Error loading metadata from {file_path}: {e}")
+        return OrderedDict()
diff --git a/toolkit/network_mixins.py b/toolkit/network_mixins.py
new file mode 100644
index 00000000..0644f494
--- /dev/null
+++ b/toolkit/network_mixins.py
@@ -0,0 +1,566 @@
+import json
+import os
+from collections import OrderedDict
+from typing import Optional, Union, List, Type, TYPE_CHECKING, Dict, Any, Literal
+
+import torch
+from torch import nn
+import weakref
+
+from tqdm import tqdm
+
+from toolkit.config_modules import NetworkConfig
+from toolkit.lorm import extract_conv, extract_linear, count_parameters
+from toolkit.metadata import add_model_hash_to_meta
+from toolkit.paths import KEYMAPS_ROOT
+
+if TYPE_CHECKING:
+    from toolkit.lycoris_special import LycorisSpecialNetwork, LoConSpecialModule
+    from toolkit.lora_special import LoRASpecialNetwork, LoRAModule
+    from toolkit.stable_diffusion_model import StableDiffusion
+
+Network = Union['LycorisSpecialNetwork', 'LoRASpecialNetwork']
+Module = Union['LoConSpecialModule', 'LoRAModule']
+
+LINEAR_MODULES = [
+    'Linear',
+    'LoRACompatibleLinear'
+    # 'GroupNorm',
+]
+CONV_MODULES = [
+    'Conv2d',
+    'LoRACompatibleConv'
+]
+
+ExtractMode = Union[
+    'existing'
+    'fixed',
+    'threshold',
+    'ratio',
+    'quantile',
+    'percentage'
+]
+
+
+def broadcast_and_multiply(tensor, multiplier):
+    # Determine the number of dimensions required
+    num_extra_dims = tensor.dim() - multiplier.dim()
+
+    # Unsqueezing the tensor to match the dimensionality
+    for _ in range(num_extra_dims):
+        multiplier = multiplier.unsqueeze(-1)
+
+    # Multiplying the broadcasted tensor with the output tensor
+    result = tensor * multiplier
+
+    return result
+
+
+def add_bias(tensor, bias):
+    if bias is None:
+        return tensor
+    # add batch dim
+    bias = bias.unsqueeze(0)
+    bias = torch.cat([bias] * tensor.size(0), dim=0)
+    # Determine the number of dimensions required
+    num_extra_dims = tensor.dim() - bias.dim()
+
+    # Unsqueezing the tensor to match the dimensionality
+    for _ in range(num_extra_dims):
+        bias = bias.unsqueeze(-1)
+
+    # we may need to swap -1 for -2
+    if bias.size(1) != tensor.size(1):
+        if len(bias.size()) == 3:
+            bias = bias.permute(0, 2, 1)
+        elif len(bias.size()) == 4:
+            bias = bias.permute(0, 3, 1, 2)
+
+    # Multiplying the broadcasted tensor with the output tensor
+    try:
+        result = tensor + bias
+    except RuntimeError as e:
+        print(e)
+        print(tensor.size())
+        print(bias.size())
+        raise e
+
+    return result
+
+
+class ExtractableModuleMixin:
+    def extract_weight(
+            self: Module,
+            extract_mode: ExtractMode = "existing",
+            extract_mode_param: Union[int, float] = None,
+    ):
+        device = self.lora_down.weight.device
+        weight_to_extract = self.org_module[0].weight
+        if extract_mode == "existing":
+            extract_mode = 'fixed'
+            extract_mode_param = self.lora_dim
+
+        if self.org_module[0].__class__.__name__ in CONV_MODULES:
+            # do conv extraction
+            down_weight, up_weight, new_dim, diff = extract_conv(
+                weight=weight_to_extract.clone().detach().float(),
+                mode=extract_mode,
+                mode_param=extract_mode_param,
+                device=device
+            )
+
+        elif self.org_module[0].__class__.__name__ in LINEAR_MODULES:
+            # do linear extraction
+            down_weight, up_weight, new_dim, diff = extract_linear(
+                weight=weight_to_extract.clone().detach().float(),
+                mode=extract_mode,
+                mode_param=extract_mode_param,
+                device=device,
+            )
+        else:
+            raise ValueError(f"Unknown module type: {self.org_module[0].__class__.__name__}")
+
+        self.lora_dim = new_dim
+
+        # inject weights into the param
+        self.lora_down.weight.data = down_weight.to(self.lora_down.weight.dtype).clone().detach()
+        self.lora_up.weight.data = up_weight.to(self.lora_up.weight.dtype).clone().detach()
+
+        # copy bias if we have one and are using them
+        if self.org_module[0].bias is not None and self.lora_up.bias is not None:
+            self.lora_up.bias.data = self.org_module[0].bias.data.clone().detach()
+
+        # set up alphas
+        self.alpha = (self.alpha * 0) + down_weight.shape[0]
+        self.scale = self.alpha / self.lora_dim
+
+        # assign them
+
+        # handle trainable scaler method locon does
+        if hasattr(self, 'scalar'):
+            # scaler is a parameter update the value with 1.0
+            self.scalar.data = torch.tensor(1.0).to(self.scalar.device, self.scalar.dtype)
+
+
+class ToolkitModuleMixin:
+    def __init__(
+            self: Module,
+            *args,
+            network: Network,
+            **kwargs
+    ):
+        self.network_ref: weakref.ref = weakref.ref(network)
+        self.is_checkpointing = False
+        self._multiplier: Union[float, list, torch.Tensor] = None
+
+    def _call_forward(self: Module, x):
+        # module dropout
+        if self.module_dropout is not None and self.training:
+            if torch.rand(1) < self.module_dropout:
+                return 0.0  # added to original forward
+
+        if hasattr(self, 'lora_mid') and self.lora_mid is not None:
+            lx = self.lora_mid(self.lora_down(x))
+        else:
+            try:
+                lx = self.lora_down(x)
+            except RuntimeError as e:
+                print(f"Error in {self.__class__.__name__} lora_down")
+
+        if isinstance(self.dropout, nn.Dropout) or isinstance(self.dropout, nn.Identity):
+            lx = self.dropout(lx)
+        # normal dropout
+        elif self.dropout is not None and self.training:
+            lx = torch.nn.functional.dropout(lx, p=self.dropout)
+
+        # rank dropout
+        if self.rank_dropout is not None and self.rank_dropout > 0 and self.training:
+            mask = torch.rand((lx.size(0), self.lora_dim), device=lx.device) > self.rank_dropout
+            if len(lx.size()) == 3:
+                mask = mask.unsqueeze(1)  # for Text Encoder
+            elif len(lx.size()) == 4:
+                mask = mask.unsqueeze(-1).unsqueeze(-1)  # for Conv2d
+            lx = lx * mask
+
+            # scaling for rank dropout: treat as if the rank is changed
+            # maskから計算することも考えられるが、augmentation的な効果を期待してrank_dropoutを用いる
+            scale = self.scale * (1.0 / (1.0 - self.rank_dropout))  # redundant for readability
+        else:
+            scale = self.scale
+
+        lx = self.lora_up(lx)
+
+        # handle trainable scaler method locon does
+        if hasattr(self, 'scalar'):
+            scale = scale * self.scalar
+
+        return lx * scale
+
+
+    def lorm_forward(self: Network, x, *args, **kwargs):
+        network: Network = self.network_ref()
+        if not network.is_active:
+            return self.org_forward(x, *args, **kwargs)
+
+        if network.lorm_train_mode == 'local':
+            # we are going to predict input with both and do a loss on them
+            inputs = x.detach()
+            with torch.no_grad():
+                # get the local prediction
+                target_pred = self.org_forward(inputs, *args, **kwargs).detach()
+            with torch.set_grad_enabled(True):
+                # make a prediction with the lorm
+                lorm_pred = self.lora_up(self.lora_down(inputs.requires_grad_(True)))
+
+                local_loss = torch.nn.functional.mse_loss(target_pred.float(), lorm_pred.float())
+                # backpropr
+                local_loss.backward()
+
+            network.module_losses.append(local_loss.detach())
+            # return the original as we dont want our trainer to affect ones down the line
+            return target_pred
+
+        else:
+            return self.lora_up(self.lora_down(x))
+
+    def forward(self: Module, x, *args, **kwargs):
+        skip = False
+        network: Network = self.network_ref()
+        if network.is_lorm:
+            # we are doing lorm
+            return self.lorm_forward(x, *args, **kwargs)
+
+        # skip if not active
+        if not network.is_active:
+            skip = True
+
+        # skip if is merged in
+        if network.is_merged_in:
+            skip = True
+
+        # skip if multiplier is 0
+        if network._multiplier == 0:
+            skip = True
+
+        if skip:
+            # network is not active, avoid doing anything
+            return self.org_forward(x, *args, **kwargs)
+
+        org_forwarded = self.org_forward(x, *args, **kwargs)
+        lora_output = self._call_forward(x)
+        multiplier = self.network_ref().torch_multiplier
+
+        lora_output_batch_size = lora_output.size(0)
+        multiplier_batch_size = multiplier.size(0)
+        if lora_output_batch_size != multiplier_batch_size:
+            num_interleaves = lora_output_batch_size // multiplier_batch_size
+            # todo check if this is correct, do we just concat when doing cfg?
+            multiplier = multiplier.repeat_interleave(num_interleaves)
+
+        x = org_forwarded + broadcast_and_multiply(lora_output, multiplier)
+        return x
+
+    def enable_gradient_checkpointing(self: Module):
+        self.is_checkpointing = True
+
+    def disable_gradient_checkpointing(self: Module):
+        self.is_checkpointing = False
+
+    @torch.no_grad()
+    def merge_out(self: Module, merge_out_weight=1.0):
+        # make sure it is positive
+        merge_out_weight = abs(merge_out_weight)
+        # merging out is just merging in the negative of the weight
+        self.merge_in(merge_weight=-merge_out_weight)
+
+    @torch.no_grad()
+    def merge_in(self: Module, merge_weight=1.0):
+        # get up/down weight
+        up_weight = self.lora_up.weight.clone().float()
+        down_weight = self.lora_down.weight.clone().float()
+
+        # extract weight from org_module
+        org_sd = self.org_module[0].state_dict()
+        orig_dtype = org_sd["weight"].dtype
+        weight = org_sd["weight"].float()
+
+        multiplier = merge_weight
+        scale = self.scale
+        # handle trainable scaler method locon does
+        if hasattr(self, 'scalar'):
+            scale = scale * self.scalar
+
+        # merge weight
+        if len(weight.size()) == 2:
+            # linear
+            weight = weight + multiplier * (up_weight @ down_weight) * scale
+        elif down_weight.size()[2:4] == (1, 1):
+            # conv2d 1x1
+            weight = (
+                    weight
+                    + multiplier
+                    * (up_weight.squeeze(3).squeeze(2) @ down_weight.squeeze(3).squeeze(2)).unsqueeze(2).unsqueeze(3)
+                    * scale
+            )
+        else:
+            # conv2d 3x3
+            conved = torch.nn.functional.conv2d(down_weight.permute(1, 0, 2, 3), up_weight).permute(1, 0, 2, 3)
+            # print(conved.size(), weight.size(), module.stride, module.padding)
+            weight = weight + multiplier * conved * scale
+
+        # set weight to org_module
+        org_sd["weight"] = weight.to(orig_dtype)
+        self.org_module[0].load_state_dict(org_sd)
+
+    def setup_lorm(self: Module, state_dict: Optional[Dict[str, Any]] = None):
+        # LoRM (Low Rank Middle) is a method reduce the number of parameters in a module while keeping the inputs and
+        # outputs the same. It is basically a LoRA but with the original module removed
+
+        # if a state dict is passed, use those weights instead of extracting
+        # todo load from state dict
+        network: Network = self.network_ref()
+        lorm_config = network.network_config.lorm_config.get_config_for_module(self.lora_name)
+
+        extract_mode = lorm_config.extract_mode
+        extract_mode_param = lorm_config.extract_mode_param
+        parameter_threshold = lorm_config.parameter_threshold
+        self.extract_weight(
+            extract_mode=extract_mode,
+            extract_mode_param=extract_mode_param
+        )
+
+
+class ToolkitNetworkMixin:
+    def __init__(
+            self: Network,
+            *args,
+            train_text_encoder: Optional[bool] = True,
+            train_unet: Optional[bool] = True,
+            is_sdxl=False,
+            is_v2=False,
+            network_config: Optional[NetworkConfig] = None,
+            is_lorm=False,
+            **kwargs
+    ):
+        self.train_text_encoder = train_text_encoder
+        self.train_unet = train_unet
+        self.is_checkpointing = False
+        self._multiplier: float = 1.0
+        self.is_active: bool = False
+        self.is_sdxl = is_sdxl
+        self.is_v2 = is_v2
+        self.is_merged_in = False
+        self.is_lorm = is_lorm
+        self.network_config: NetworkConfig = network_config
+        self.module_losses: List[torch.Tensor] = []
+        self.lorm_train_mode: Literal['local', None] = None
+        self.can_merge_in = not is_lorm
+
+    def get_keymap(self: Network):
+        if self.is_sdxl:
+            keymap_tail = 'sdxl'
+        elif self.is_v2:
+            keymap_tail = 'sd2'
+        else:
+            keymap_tail = 'sd1'
+        # load keymap
+        keymap_name = f"stable_diffusion_locon_{keymap_tail}.json"
+        keymap_path = os.path.join(KEYMAPS_ROOT, keymap_name)
+
+        keymap = None
+        # check if file exists
+        if os.path.exists(keymap_path):
+            with open(keymap_path, 'r') as f:
+                keymap = json.load(f)['ldm_diffusers_keymap']
+
+        return keymap
+
+    def save_weights(
+            self: Network,
+            file, dtype=torch.float16,
+            metadata=None,
+            extra_state_dict: Optional[OrderedDict] = None
+    ):
+        keymap = self.get_keymap()
+
+        save_keymap = {}
+        if keymap is not None:
+            for ldm_key, diffusers_key in keymap.items():
+                #  invert them
+                save_keymap[diffusers_key] = ldm_key
+
+        if metadata is not None and len(metadata) == 0:
+            metadata = None
+
+        state_dict = self.state_dict()
+        save_dict = OrderedDict()
+
+        for key in list(state_dict.keys()):
+            v = state_dict[key]
+            v = v.detach().clone().to("cpu").to(dtype)
+            save_key = save_keymap[key] if key in save_keymap else key
+            save_dict[save_key] = v
+
+        if extra_state_dict is not None:
+            # add extra items to state dict
+            for key in list(extra_state_dict.keys()):
+                v = extra_state_dict[key]
+                v = v.detach().clone().to("cpu").to(dtype)
+                save_dict[key] = v
+
+        if metadata is None:
+            metadata = OrderedDict()
+        metadata = add_model_hash_to_meta(state_dict, metadata)
+        if os.path.splitext(file)[1] == ".safetensors":
+            from safetensors.torch import save_file
+            save_file(save_dict, file, metadata)
+        else:
+            torch.save(save_dict, file)
+
+    def load_weights(self: Network, file):
+        # allows us to save and load to and from ldm weights
+        keymap = self.get_keymap()
+        keymap = {} if keymap is None else keymap
+
+        if os.path.splitext(file)[1] == ".safetensors":
+            from safetensors.torch import load_file
+
+            weights_sd = load_file(file)
+        else:
+            weights_sd = torch.load(file, map_location="cpu")
+
+        load_sd = OrderedDict()
+        for key, value in weights_sd.items():
+            load_key = keymap[key] if key in keymap else key
+            load_sd[load_key] = value
+
+        # extract extra items from state dict
+        current_state_dict = self.state_dict()
+        extra_dict = OrderedDict()
+        to_delete = []
+        for key in list(load_sd.keys()):
+            if key not in current_state_dict:
+                extra_dict[key] = load_sd[key]
+                to_delete.append(key)
+        for key in to_delete:
+            del load_sd[key]
+
+        info = self.load_state_dict(load_sd, False)
+        if len(extra_dict.keys()) == 0:
+            extra_dict = None
+        return extra_dict
+
+    @torch.no_grad()
+    def _update_torch_multiplier(self: Network):
+        # builds a tensor for fast usage in the forward pass of the network modules
+        # without having to set it in every single module every time it changes
+        multiplier = self._multiplier
+        # get first module
+        first_module = self.get_all_modules()[0]
+        device = first_module.lora_down.weight.device
+        dtype = first_module.lora_down.weight.dtype
+        with torch.no_grad():
+            tensor_multiplier = None
+            if isinstance(multiplier, int) or isinstance(multiplier, float):
+                tensor_multiplier = torch.tensor((multiplier,)).to(device, dtype=dtype)
+            elif isinstance(multiplier, list):
+                tensor_multiplier = torch.tensor(multiplier).to(device, dtype=dtype)
+            elif isinstance(multiplier, torch.Tensor):
+                tensor_multiplier = multiplier.clone().detach().to(device, dtype=dtype)
+
+            self.torch_multiplier = tensor_multiplier.clone().detach()
+
+    @property
+    def multiplier(self) -> Union[float, List[float], List[List[float]]]:
+        return self._multiplier
+
+    @multiplier.setter
+    def multiplier(self, value: Union[float, List[float], List[List[float]]]):
+        # it takes time to update all the multipliers, so we only do it if the value has changed
+        if self._multiplier == value:
+            return
+        # if we are setting a single value but have a list, keep the list if every item is the same as value
+        self._multiplier = value
+        self._update_torch_multiplier()
+
+    # called when the context manager is entered
+    # ie: with network:
+    def __enter__(self: Network):
+        self.is_active = True
+
+    def __exit__(self: Network, exc_type, exc_value, tb):
+        self.is_active = False
+
+    def force_to(self: Network, device, dtype):
+        self.to(device, dtype)
+        loras = []
+        if hasattr(self, 'unet_loras'):
+            loras += self.unet_loras
+        if hasattr(self, 'text_encoder_loras'):
+            loras += self.text_encoder_loras
+        for lora in loras:
+            lora.to(device, dtype)
+
+    def get_all_modules(self: Network) -> List[Module]:
+        loras = []
+        if hasattr(self, 'unet_loras'):
+            loras += self.unet_loras
+        if hasattr(self, 'text_encoder_loras'):
+            loras += self.text_encoder_loras
+        return loras
+
+    def _update_checkpointing(self: Network):
+        for module in self.get_all_modules():
+            if self.is_checkpointing:
+                module.enable_gradient_checkpointing()
+            else:
+                module.disable_gradient_checkpointing()
+
+    def enable_gradient_checkpointing(self: Network):
+        # not supported
+        self.is_checkpointing = True
+        self._update_checkpointing()
+
+    def disable_gradient_checkpointing(self: Network):
+        # not supported
+        self.is_checkpointing = False
+        self._update_checkpointing()
+
+    def merge_in(self, merge_weight=1.0):
+        self.is_merged_in = True
+        for module in self.get_all_modules():
+            module.merge_in(merge_weight)
+
+    def merge_out(self: Network, merge_weight=1.0):
+        if not self.is_merged_in:
+            return
+        self.is_merged_in = False
+        for module in self.get_all_modules():
+            module.merge_out(merge_weight)
+
+    def extract_weight(
+            self: Network,
+            extract_mode: ExtractMode = "existing",
+            extract_mode_param: Union[int, float] = None,
+    ):
+        if extract_mode_param is None:
+            raise ValueError("extract_mode_param must be set")
+        for module in tqdm(self.get_all_modules(), desc="Extracting weights"):
+            module.extract_weight(
+                extract_mode=extract_mode,
+                extract_mode_param=extract_mode_param
+            )
+
+    def setup_lorm(self: Network, state_dict: Optional[Dict[str, Any]] = None):
+        for module in tqdm(self.get_all_modules(), desc="Extracting LoRM"):
+            module.setup_lorm(state_dict=state_dict)
+
+    def calculate_lorem_parameter_reduction(self):
+        params_reduced = 0
+        for module in self.get_all_modules():
+            num_orig_module_params = count_parameters(module.org_module[0])
+            num_lorem_params = count_parameters(module.lora_down) + count_parameters(module.lora_up)
+            params_reduced += (num_orig_module_params - num_lorem_params)
+
+        return params_reduced
+
diff --git a/toolkit/optimizer.py b/toolkit/optimizer.py
index 96aabfca..2332b165 100644
--- a/toolkit/optimizer.py
+++ b/toolkit/optimizer.py
@@ -1,4 +1,5 @@
 import torch
+from transformers import Adafactor
 
 
 def get_optimizer(
@@ -35,6 +36,8 @@ def get_optimizer(
         if use_lr < 0.1:
             # dadaptation uses different lr that is values of 0.1 to 1.0. default to 1.0
             use_lr = 1.0
+
+        print(f"Using lr {use_lr}")
         # let net be the neural network you want to train
         # you can choose weight decay value based on your problem, 0 by default
         optimizer = Prodigy(params, lr=use_lr, **optimizer_params)
@@ -43,6 +46,8 @@ def get_optimizer(
 
         if lower_type == "adam8bit":
             return bitsandbytes.optim.Adam8bit(params, lr=learning_rate, **optimizer_params)
+        elif lower_type == "adamw8bit":
+            return bitsandbytes.optim.AdamW8bit(params, lr=learning_rate, **optimizer_params)
         elif lower_type == "lion8bit":
             return bitsandbytes.optim.Lion8bit(params, lr=learning_rate, **optimizer_params)
         else:
@@ -52,10 +57,15 @@ def get_optimizer(
     elif lower_type == 'adamw':
         optimizer = torch.optim.AdamW(params, lr=float(learning_rate), **optimizer_params)
     elif lower_type == 'lion':
-        from lion_pytorch import Lion
-        return Lion(params, lr=learning_rate, **optimizer_params)
+        try:
+            from lion_pytorch import Lion
+            return Lion(params, lr=learning_rate, **optimizer_params)
+        except ImportError:
+            raise ImportError("Please install lion_pytorch to use Lion optimizer -> pip install lion-pytorch")
     elif lower_type == 'adagrad':
         optimizer = torch.optim.Adagrad(params, lr=float(learning_rate), **optimizer_params)
+    elif lower_type == 'adafactor':
+        optimizer = Adafactor(params, lr=float(learning_rate), **optimizer_params)
     else:
         raise ValueError(f'Unknown optimizer type {optimizer_type}')
     return optimizer
diff --git a/toolkit/orig_configs/sd_xl_refiner.yaml b/toolkit/orig_configs/sd_xl_refiner.yaml
new file mode 100644
index 00000000..cab5fe28
--- /dev/null
+++ b/toolkit/orig_configs/sd_xl_refiner.yaml
@@ -0,0 +1,91 @@
+model:
+  target: sgm.models.diffusion.DiffusionEngine
+  params:
+    scale_factor: 0.13025
+    disable_first_stage_autocast: True
+
+    denoiser_config:
+      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
+      params:
+        num_idx: 1000
+
+        weighting_config:
+          target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
+        scaling_config:
+          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
+        discretization_config:
+          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
+
+    network_config:
+      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        adm_in_channels: 2560
+        num_classes: sequential
+        use_checkpoint: True
+        in_channels: 4
+        out_channels: 4
+        model_channels: 384
+        attention_resolutions: [4, 2]
+        num_res_blocks: 2
+        channel_mult: [1, 2, 4, 4]
+        num_head_channels: 64
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 4
+        context_dim: [1280, 1280, 1280, 1280]  # 1280
+        spatial_transformer_attn_type: softmax-xformers
+        legacy: False
+
+    conditioner_config:
+      target: sgm.modules.GeneralConditioner
+      params:
+        emb_models:
+          # crossattn and vector cond
+          - is_trainable: False
+            input_key: txt
+            target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
+            params:
+              arch: ViT-bigG-14
+              version: laion2b_s39b_b160k
+              legacy: False
+              freeze: True
+              layer: penultimate
+              always_return_pooled: True
+          # vector cond
+          - is_trainable: False
+            input_key: original_size_as_tuple
+            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
+            params:
+              outdim: 256  # multiplied by two
+          # vector cond
+          - is_trainable: False
+            input_key: crop_coords_top_left
+            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
+            params:
+              outdim: 256  # multiplied by two
+          # vector cond
+          - is_trainable: False
+            input_key: aesthetic_score
+            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
+            params:
+              outdim: 256  # multiplied by one
+
+    first_stage_config:
+      target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          attn_type: vanilla-xformers
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult: [1, 2, 4, 4]
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
diff --git a/toolkit/paths.py b/toolkit/paths.py
index 53851da5..b926c82f 100644
--- a/toolkit/paths.py
+++ b/toolkit/paths.py
@@ -5,6 +5,8 @@ CONFIG_ROOT = os.path.join(TOOLKIT_ROOT, 'config')
 SD_SCRIPTS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories", "sd-scripts")
 REPOS_ROOT = os.path.join(TOOLKIT_ROOT, "repositories")
 KEYMAPS_ROOT = os.path.join(TOOLKIT_ROOT, "toolkit", "keymaps")
+ORIG_CONFIGS_ROOT = os.path.join(TOOLKIT_ROOT, "toolkit", "orig_configs")
+DIFFUSERS_CONFIGS_ROOT = os.path.join(TOOLKIT_ROOT, "toolkit", "diffusers_configs")
 
 # check if ENV variable is set
 if 'MODELS_PATH' in os.environ:
diff --git a/toolkit/pipelines.py b/toolkit/pipelines.py
index f772fa1f..7be81bfc 100644
--- a/toolkit/pipelines.py
+++ b/toolkit/pipelines.py
@@ -1,14 +1,297 @@
+import importlib
+import inspect
 from typing import Union, List, Optional, Dict, Any, Tuple, Callable
 
 import torch
-from diffusers import StableDiffusionXLPipeline, StableDiffusionPipeline
+from diffusers import StableDiffusionXLPipeline, StableDiffusionPipeline, LMSDiscreteScheduler
 from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_k_diffusion import ModelWrapper
+from diffusers.pipelines.stable_diffusion_xl import StableDiffusionXLPipelineOutput
 from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import rescale_noise_cfg
+from diffusers.utils import is_torch_xla_available
+from k_diffusion.external import CompVisVDenoiser, CompVisDenoiser
+from k_diffusion.sampling import get_sigmas_karras, BrownianTreeNoiseSampler
+
+
+if is_torch_xla_available():
+    import torch_xla.core.xla_model as xm
+
+    XLA_AVAILABLE = True
+else:
+    XLA_AVAILABLE = False
+
+class StableDiffusionKDiffusionXLPipeline(StableDiffusionXLPipeline):
+
+    def __init__(
+            self,
+            vae: 'AutoencoderKL',
+            text_encoder: 'CLIPTextModel',
+            text_encoder_2: 'CLIPTextModelWithProjection',
+            tokenizer: 'CLIPTokenizer',
+            tokenizer_2: 'CLIPTokenizer',
+            unet: 'UNet2DConditionModel',
+            scheduler: 'KarrasDiffusionSchedulers',
+            force_zeros_for_empty_prompt: bool = True,
+            add_watermarker: Optional[bool] = None,
+    ):
+        super().__init__(
+            vae=vae,
+            text_encoder=text_encoder,
+            text_encoder_2=text_encoder_2,
+            tokenizer=tokenizer,
+            tokenizer_2=tokenizer_2,
+            unet=unet,
+            scheduler=scheduler,
+        )
+        self.sampler = None
+        scheduler = LMSDiscreteScheduler.from_config(scheduler.config)
+        model = ModelWrapper(unet, scheduler.alphas_cumprod)
+        if scheduler.config.prediction_type == "v_prediction":
+            self.k_diffusion_model = CompVisVDenoiser(model)
+        else:
+            self.k_diffusion_model = CompVisDenoiser(model)
+
+    def set_scheduler(self, scheduler_type: str):
+        library = importlib.import_module("k_diffusion")
+        sampling = getattr(library, "sampling")
+        self.sampler = getattr(sampling, scheduler_type)
+
+    @torch.no_grad()
+    def __call__(
+            self,
+            prompt: Union[str, List[str]] = None,
+            prompt_2: Optional[Union[str, List[str]]] = None,
+            height: Optional[int] = None,
+            width: Optional[int] = None,
+            num_inference_steps: int = 50,
+            denoising_end: Optional[float] = None,
+            guidance_scale: float = 5.0,
+            negative_prompt: Optional[Union[str, List[str]]] = None,
+            negative_prompt_2: Optional[Union[str, List[str]]] = None,
+            num_images_per_prompt: Optional[int] = 1,
+            eta: float = 0.0,
+            generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+            latents: Optional[torch.FloatTensor] = None,
+            prompt_embeds: Optional[torch.FloatTensor] = None,
+            negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+            pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+            negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+            output_type: Optional[str] = "pil",
+            return_dict: bool = True,
+            callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+            callback_steps: int = 1,
+            cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+            guidance_rescale: float = 0.0,
+            original_size: Optional[Tuple[int, int]] = None,
+            crops_coords_top_left: Tuple[int, int] = (0, 0),
+            target_size: Optional[Tuple[int, int]] = None,
+            use_karras_sigmas: bool = False,
+    ):
+
+        # 0. Default height and width to unet
+        height = height or self.default_sample_size * self.vae_scale_factor
+        width = width or self.default_sample_size * self.vae_scale_factor
+
+        original_size = original_size or (height, width)
+        target_size = target_size or (height, width)
+
+        # 1. Check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt,
+            prompt_2,
+            height,
+            width,
+            callback_steps,
+            negative_prompt,
+            negative_prompt_2,
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        )
+
+        # 2. Define call parameters
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        device = self._execution_device
+
+        # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+        # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+        # corresponds to doing no classifier free guidance.
+        do_classifier_free_guidance = guidance_scale > 1.0
+
+        # 3. Encode input prompt
+        text_encoder_lora_scale = (
+            cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+        )
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = self.encode_prompt(
+            prompt=prompt,
+            prompt_2=prompt_2,
+            device=device,
+            num_images_per_prompt=num_images_per_prompt,
+            do_classifier_free_guidance=do_classifier_free_guidance,
+            negative_prompt=negative_prompt,
+            negative_prompt_2=negative_prompt_2,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+            lora_scale=text_encoder_lora_scale,
+        )
+
+        # 4. Prepare timesteps
+        self.scheduler.set_timesteps(num_inference_steps, device=device)
+
+        timesteps = self.scheduler.timesteps
+
+        # 5. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
+
+        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+        # 7. Prepare added time ids & embeddings
+        add_text_embeds = pooled_prompt_embeds
+        add_time_ids = self._get_add_time_ids(
+            original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
+        )
+
+        if do_classifier_free_guidance:
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+            add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+            add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
+
+        prompt_embeds = prompt_embeds.to(device)
+        add_text_embeds = add_text_embeds.to(device)
+        add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+        # 8. Denoising loop
+        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+        # 7.1 Apply denoising_end
+        if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
+            discrete_timestep_cutoff = int(
+                round(
+                    self.scheduler.config.num_train_timesteps
+                    - (denoising_end * self.scheduler.config.num_train_timesteps)
+                )
+            )
+            num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+            timesteps = timesteps[:num_inference_steps]
+
+        # 5. Prepare sigmas
+        if use_karras_sigmas:
+            sigma_min: float = self.k_diffusion_model.sigmas[0].item()
+            sigma_max: float = self.k_diffusion_model.sigmas[-1].item()
+            sigmas = get_sigmas_karras(n=num_inference_steps, sigma_min=sigma_min, sigma_max=sigma_max)
+            sigmas = sigmas.to(device)
+        else:
+            sigmas = self.scheduler.sigmas
+        sigmas = sigmas.to(prompt_embeds.dtype)
+
+        # 5. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
+
+        latents = latents * sigmas[0]
+        self.k_diffusion_model.sigmas = self.k_diffusion_model.sigmas.to(latents.device)
+        self.k_diffusion_model.log_sigmas = self.k_diffusion_model.log_sigmas.to(latents.device)
+
+        # 7. Define model function
+        def model_fn(x, t):
+            latent_model_input = torch.cat([x] * 2)
+            t = torch.cat([t] * 2)
+
+            added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+            # noise_pred = self.unet(
+            #     latent_model_input,
+            #     t,
+            #     encoder_hidden_states=prompt_embeds,
+            #     cross_attention_kwargs=cross_attention_kwargs,
+            #     added_cond_kwargs=added_cond_kwargs,
+            #     return_dict=False,
+            # )[0]
+
+            noise_pred = self.k_diffusion_model(
+                latent_model_input,
+                t,
+                encoder_hidden_states=prompt_embeds,
+                cross_attention_kwargs=cross_attention_kwargs,
+                added_cond_kwargs=added_cond_kwargs,
+                return_dict=False,)[0]
+
+            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+            noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+            return noise_pred
+
+
+        # 8. Run k-diffusion solver
+        sampler_kwargs = {}
+        # should work without it
+        noise_sampler_seed = None
+
+
+        if "noise_sampler" in inspect.signature(self.sampler).parameters:
+            min_sigma, max_sigma = sigmas[sigmas > 0].min(), sigmas.max()
+            noise_sampler = BrownianTreeNoiseSampler(latents, min_sigma, max_sigma, noise_sampler_seed)
+            sampler_kwargs["noise_sampler"] = noise_sampler
+
+        latents = self.sampler(model_fn, latents, sigmas, **sampler_kwargs)
+
+        if not output_type == "latent":
+            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+            image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
+        else:
+            image = latents
+            has_nsfw_concept = None
+
+        if has_nsfw_concept is None:
+            do_denormalize = [True] * image.shape[0]
+        else:
+            do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
+
+        image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
+
+        # Offload last model to CPU
+        if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
+            self.final_offload_hook.offload()
+
+        if not return_dict:
+            return (image,)
+
+        return StableDiffusionXLPipelineOutput(images=image)
 
 
 class CustomStableDiffusionXLPipeline(StableDiffusionXLPipeline):
-    # def __init__(self, *args, **kwargs):
-    #     super().__init__(*args, **kwargs)
 
     def predict_noise(
             self,
@@ -532,3 +815,389 @@ class CustomStableDiffusionPipeline(StableDiffusionPipeline):
             noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
 
         return noise_pred
+
+
+class StableDiffusionXLRefinerPipeline(StableDiffusionXLPipeline):
+
+    @torch.no_grad()
+    def __call__(
+            self,
+            prompt: Union[str, List[str]] = None,
+            prompt_2: Optional[Union[str, List[str]]] = None,
+            height: Optional[int] = None,
+            width: Optional[int] = None,
+            num_inference_steps: int = 50,
+            denoising_end: Optional[float] = None,
+            denoising_start: Optional[float] = None,
+            guidance_scale: float = 5.0,
+            negative_prompt: Optional[Union[str, List[str]]] = None,
+            negative_prompt_2: Optional[Union[str, List[str]]] = None,
+            num_images_per_prompt: Optional[int] = 1,
+            eta: float = 0.0,
+            generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+            latents: Optional[torch.FloatTensor] = None,
+            prompt_embeds: Optional[torch.FloatTensor] = None,
+            negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+            pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+            negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+            output_type: Optional[str] = "pil",
+            return_dict: bool = True,
+            callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+            callback_steps: int = 1,
+            cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+            guidance_rescale: float = 0.0,
+            original_size: Optional[Tuple[int, int]] = None,
+            crops_coords_top_left: Tuple[int, int] = (0, 0),
+            target_size: Optional[Tuple[int, int]] = None,
+            negative_original_size: Optional[Tuple[int, int]] = None,
+            negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
+            negative_target_size: Optional[Tuple[int, int]] = None,
+            clip_skip: Optional[int] = None,
+    ):
+        r"""
+        Function invoked when calling the pipeline for generation.
+
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+                instead.
+            prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+                used in both text-encoders
+            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The height in pixels of the generated image. This is set to 1024 by default for the best results.
+                Anything below 512 pixels won't work well for
+                [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
+                and checkpoints that are not specifically fine-tuned on low resolutions.
+            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The width in pixels of the generated image. This is set to 1024 by default for the best results.
+                Anything below 512 pixels won't work well for
+                [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
+                and checkpoints that are not specifically fine-tuned on low resolutions.
+            num_inference_steps (`int`, *optional*, defaults to 50):
+                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+                expense of slower inference.
+            denoising_end (`float`, *optional*):
+                When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
+                completed before it is intentionally prematurely terminated. As a result, the returned sample will
+                still retain a substantial amount of noise as determined by the discrete timesteps selected by the
+                scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
+                "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
+                Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
+            denoising_start (`float`, *optional*):
+                When specified, indicates the fraction (between 0.0 and 1.0) of the total denoising process to be
+                bypassed before it is initiated. Consequently, the initial part of the denoising process is skipped and
+                it is assumed that the passed `image` is a partly denoised image. Note that when this is specified,
+                strength will be ignored. The `denoising_start` parameter is particularly beneficial when this pipeline
+                is integrated into a "Mixture of Denoisers" multi-pipeline setup, as detailed in [**Refine Image
+                Quality**](https://huggingface.co/docs/diffusers/using-diffusers/sdxl#refine-image-quality).
+            guidance_scale (`float`, *optional*, defaults to 5.0):
+                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+                `guidance_scale` is defined as `w` of equation 2. of [Imagen
+                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+                1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+                usually at the expense of lower image quality.
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation. If not defined, one has to pass
+                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+                less than `1`).
+            negative_prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+                `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+            num_images_per_prompt (`int`, *optional*, defaults to 1):
+                The number of images to generate per prompt.
+            eta (`float`, *optional*, defaults to 0.0):
+                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+                [`schedulers.DDIMScheduler`], will be ignored for others.
+            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
+                to make generation deterministic.
+            latents (`torch.FloatTensor`, *optional*):
+                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+                tensor will ge generated by sampling using the supplied random `generator`.
+            prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+                argument.
+            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generate image. Choose between
+                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
+                of a plain tuple.
+            callback (`Callable`, *optional*):
+                A function that will be called every `callback_steps` steps during inference. The function will be
+                called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+            callback_steps (`int`, *optional*, defaults to 1):
+                The frequency at which the `callback` function will be called. If not specified, the callback will be
+                called at every step.
+            cross_attention_kwargs (`dict`, *optional*):
+                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
+                `self.processor` in
+                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+            guidance_rescale (`float`, *optional*, defaults to 0.0):
+                Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
+                Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
+                [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
+                Guidance rescale factor should fix overexposure when using zero terminal SNR.
+            original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
+                `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
+                explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+                `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
+                `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
+                `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                For most cases, `target_size` should be set to the desired height and width of the generated image. If
+                not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
+                section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                To negatively condition the generation process based on a specific image resolution. Part of SDXL's
+                micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
+                information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
+            negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+                To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
+                micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
+                information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
+            negative_target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                To negatively condition the generation process based on a target image resolution. It should be as same
+                as the `target_size` for most cases. Part of SDXL's micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
+                information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
+
+        Examples:
+
+        Returns:
+            [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or `tuple`:
+            [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a
+            `tuple`. When returning a tuple, the first element is a list with the generated images.
+        """
+        # 0. Default height and width to unet
+        height = height or self.default_sample_size * self.vae_scale_factor
+        width = width or self.default_sample_size * self.vae_scale_factor
+
+        original_size = original_size or (height, width)
+        target_size = target_size or (height, width)
+
+        # 1. Check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt,
+            prompt_2,
+            height,
+            width,
+            callback_steps,
+            negative_prompt,
+            negative_prompt_2,
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        )
+
+        # 2. Define call parameters
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        device = self._execution_device
+
+        # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+        # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+        # corresponds to doing no classifier free guidance.
+        do_classifier_free_guidance = guidance_scale > 1.0
+
+        # 3. Encode input prompt
+        lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = self.encode_prompt(
+            prompt=prompt,
+            prompt_2=prompt_2,
+            device=device,
+            num_images_per_prompt=num_images_per_prompt,
+            do_classifier_free_guidance=do_classifier_free_guidance,
+            negative_prompt=negative_prompt,
+            negative_prompt_2=negative_prompt_2,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+            lora_scale=lora_scale,
+            clip_skip=clip_skip,
+        )
+
+        # 4. Prepare timesteps
+        self.scheduler.set_timesteps(num_inference_steps, device=device)
+
+        timesteps = self.scheduler.timesteps
+
+        # 5. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
+
+        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+        # 7. Prepare added time ids & embeddings
+        add_text_embeds = pooled_prompt_embeds
+        if self.text_encoder_2 is None:
+            text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
+        else:
+            text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
+
+        add_time_ids = self._get_add_time_ids(
+            original_size,
+            crops_coords_top_left,
+            target_size,
+            dtype=prompt_embeds.dtype,
+            text_encoder_projection_dim=text_encoder_projection_dim,
+        )
+        if negative_original_size is not None and negative_target_size is not None:
+            negative_add_time_ids = self._get_add_time_ids(
+                negative_original_size,
+                negative_crops_coords_top_left,
+                negative_target_size,
+                dtype=prompt_embeds.dtype,
+                text_encoder_projection_dim=text_encoder_projection_dim,
+            )
+        else:
+            negative_add_time_ids = add_time_ids
+
+        if do_classifier_free_guidance:
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+            add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+            add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
+
+        prompt_embeds = prompt_embeds.to(device)
+        add_text_embeds = add_text_embeds.to(device)
+        add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+        # 8. Denoising loop
+        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+        # 8.1 Apply denoising_end
+        if denoising_end is not None and isinstance(denoising_end, float) and denoising_end > 0 and denoising_end < 1:
+            discrete_timestep_cutoff = int(
+                round(
+                    self.scheduler.config.num_train_timesteps
+                    - (denoising_end * self.scheduler.config.num_train_timesteps)
+                )
+            )
+            num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+            timesteps = timesteps[:num_inference_steps]
+
+        # 8.2 Determine denoising_start
+        denoising_start_index = 0
+        if denoising_start is not None and isinstance(denoising_start, float) and denoising_start > 0 and denoising_start < 1:
+            discrete_timestep_start = int(
+                round(
+                    self.scheduler.config.num_train_timesteps
+                    - (denoising_start * self.scheduler.config.num_train_timesteps)
+                )
+            )
+            denoising_start_index = len(list(filter(lambda ts: ts < discrete_timestep_start, timesteps)))
+
+
+        with self.progress_bar(total=num_inference_steps - denoising_start_index) as progress_bar:
+            for i, t in enumerate(timesteps, start=denoising_start_index):
+                # expand the latents if we are doing classifier free guidance
+                latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                # predict the noise residual
+                added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+                noise_pred = self.unet(
+                    latent_model_input,
+                    t,
+                    encoder_hidden_states=prompt_embeds,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    added_cond_kwargs=added_cond_kwargs,
+                    return_dict=False,
+                )[0]
+
+                # perform guidance
+                if do_classifier_free_guidance:
+                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                if do_classifier_free_guidance and guidance_rescale > 0.0:
+                    # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+                    noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+                # compute the previous noisy sample x_t -> x_t-1
+                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                    progress_bar.update()
+                    if callback is not None and i % callback_steps == 0:
+                        step_idx = i // getattr(self.scheduler, "order", 1)
+                        callback(step_idx, t, latents)
+
+                if XLA_AVAILABLE:
+                    xm.mark_step()
+
+        if not output_type == "latent":
+            # make sure the VAE is in float32 mode, as it overflows in float16
+            needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+
+            if needs_upcasting:
+                self.upcast_vae()
+                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+
+            # cast back to fp16 if needed
+            if needs_upcasting:
+                self.vae.to(dtype=torch.float16)
+        else:
+            image = latents
+
+        if not output_type == "latent":
+            # apply watermark if available
+            if self.watermark is not None:
+                image = self.watermark.apply_watermark(image)
+
+            image = self.image_processor.postprocess(image, output_type=output_type)
+
+        # Offload all models
+        self.maybe_free_model_hooks()
+
+        if not return_dict:
+            return (image,)
+
+        return StableDiffusionXLPipelineOutput(images=image)
+
diff --git a/toolkit/progress_bar.py b/toolkit/progress_bar.py
new file mode 100644
index 00000000..e42f8086
--- /dev/null
+++ b/toolkit/progress_bar.py
@@ -0,0 +1,25 @@
+from tqdm import tqdm
+import time
+
+
+class ToolkitProgressBar(tqdm):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.paused = False
+        self.last_time = self._time()
+
+    def pause(self):
+        if not self.paused:
+            self.paused = True
+            self.last_time = self._time()
+
+    def unpause(self):
+        if self.paused:
+            self.paused = False
+            cur_t = self._time()
+            self.start_t += cur_t - self.last_time
+            self.last_print_t = cur_t
+
+    def update(self, *args, **kwargs):
+        if not self.paused:
+            super().update(*args, **kwargs)
diff --git a/toolkit/prompt_utils.py b/toolkit/prompt_utils.py
index 813c3bad..6a5032f3 100644
--- a/toolkit/prompt_utils.py
+++ b/toolkit/prompt_utils.py
@@ -1,12 +1,11 @@
 import os
-from typing import Optional, TYPE_CHECKING, List
+from typing import Optional, TYPE_CHECKING, List, Union, Tuple
 
 import torch
 from safetensors.torch import load_file, save_file
 from tqdm import tqdm
 import random
 
-from toolkit.stable_diffusion_model import PromptEmbeds
 from toolkit.train_tools import get_torch_dtype
 import itertools
 
@@ -19,6 +18,39 @@ class ACTION_TYPES_SLIDER:
     ENHANCE_NEGATIVE = 1
 
 
+class PromptEmbeds:
+    text_embeds: torch.Tensor
+    pooled_embeds: Union[torch.Tensor, None]
+
+    def __init__(self, args: Union[Tuple[torch.Tensor], List[torch.Tensor], torch.Tensor]) -> None:
+        if isinstance(args, list) or isinstance(args, tuple):
+            # xl
+            self.text_embeds = args[0]
+            self.pooled_embeds = args[1]
+        else:
+            # sdv1.x, sdv2.x
+            self.text_embeds = args
+            self.pooled_embeds = None
+
+    def to(self, *args, **kwargs):
+        self.text_embeds = self.text_embeds.to(*args, **kwargs)
+        if self.pooled_embeds is not None:
+            self.pooled_embeds = self.pooled_embeds.to(*args, **kwargs)
+        return self
+
+    def detach(self):
+        self.text_embeds = self.text_embeds.detach()
+        if self.pooled_embeds is not None:
+            self.pooled_embeds = self.pooled_embeds.detach()
+        return self
+
+    def clone(self):
+        if self.pooled_embeds is not None:
+            return PromptEmbeds([self.text_embeds.clone(), self.pooled_embeds.clone()])
+        else:
+            return PromptEmbeds(self.text_embeds.clone())
+
+
 class EncodedPromptPair:
     def __init__(
             self,
@@ -73,6 +105,18 @@ class EncodedPromptPair:
         self.both_targets = self.both_targets.to(*args, **kwargs)
         return self
 
+    def detach(self):
+        self.target_class = self.target_class.detach()
+        self.target_class_with_neutral = self.target_class_with_neutral.detach()
+        self.positive_target = self.positive_target.detach()
+        self.positive_target_with_neutral = self.positive_target_with_neutral.detach()
+        self.negative_target = self.negative_target.detach()
+        self.negative_target_with_neutral = self.negative_target_with_neutral.detach()
+        self.neutral = self.neutral.detach()
+        self.empty_prompt = self.empty_prompt.detach()
+        self.both_targets = self.both_targets.detach()
+        return self
+
 
 def concat_prompt_embeds(prompt_embeds: list[PromptEmbeds]):
     text_embeds = torch.cat([p.text_embeds for p in prompt_embeds], dim=0)
@@ -235,15 +279,17 @@ def split_anchors(concatenated: EncodedAnchor, num_anchors: int = 4) -> List[Enc
     return anchors
 
 
-def get_permutations(s):
+def get_permutations(s, max_permutations=8):
     # Split the string by comma
     phrases = [phrase.strip() for phrase in s.split(',')]
 
     # remove empty strings
     phrases = [phrase for phrase in phrases if len(phrase) > 0]
+    # shuffle the list
+    random.shuffle(phrases)
 
     # Get all permutations
-    permutations = list(itertools.permutations(phrases))
+    permutations = list([p for p in itertools.islice(itertools.permutations(phrases), max_permutations)])
 
     # Convert the tuples back to comma separated strings
     return [', '.join(permutation) for permutation in permutations]
@@ -251,8 +297,8 @@ def get_permutations(s):
 
 def get_slider_target_permutations(target: 'SliderTargetConfig', max_permutations=8) -> List['SliderTargetConfig']:
     from toolkit.config_modules import SliderTargetConfig
-    pos_permutations = get_permutations(target.positive)
-    neg_permutations = get_permutations(target.negative)
+    pos_permutations = get_permutations(target.positive, max_permutations=max_permutations)
+    neg_permutations = get_permutations(target.negative, max_permutations=max_permutations)
 
     permutations = []
     for pos, neg in itertools.product(pos_permutations, neg_permutations):
@@ -465,3 +511,39 @@ def build_latent_image_batch_for_prompt_pair(
         latent_list.append(neg_latent)
 
     return torch.cat(latent_list, dim=0)
+
+
+def inject_trigger_into_prompt(prompt, trigger=None, to_replace_list=None, add_if_not_present=True):
+    if trigger is None:
+        # process as empty string to remove any [trigger] tokens
+        trigger = ''
+    output_prompt = prompt
+    default_replacements = ["[name]", "[trigger]"]
+
+    replace_with = trigger
+    if to_replace_list is None:
+        to_replace_list = default_replacements
+    else:
+        to_replace_list += default_replacements
+
+    # remove duplicates
+    to_replace_list = list(set(to_replace_list))
+
+    # replace them all
+    for to_replace in to_replace_list:
+        # replace it
+        output_prompt = output_prompt.replace(to_replace, replace_with)
+
+    if trigger.strip() != "":
+        # see how many times replace_with is in the prompt
+        num_instances = output_prompt.count(replace_with)
+
+        if num_instances == 0 and add_if_not_present:
+            # add it to the beginning of the prompt
+            output_prompt = replace_with + " " + output_prompt
+
+        # if num_instances > 1:
+        #     print(
+        #         f"Warning: {trigger} token appears {num_instances} times in prompt {output_prompt}. This may cause issues.")
+
+    return output_prompt
diff --git a/toolkit/sampler.py b/toolkit/sampler.py
new file mode 100644
index 00000000..b2ba6646
--- /dev/null
+++ b/toolkit/sampler.py
@@ -0,0 +1,116 @@
+import copy
+
+from diffusers import (
+    DDPMScheduler,
+    EulerAncestralDiscreteScheduler,
+    DPMSolverMultistepScheduler,
+    DPMSolverSinglestepScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+    DDIMScheduler,
+    EulerDiscreteScheduler,
+    HeunDiscreteScheduler,
+    KDPM2DiscreteScheduler,
+    KDPM2AncestralDiscreteScheduler,
+    LCMScheduler
+)
+
+from k_diffusion.external import CompVisDenoiser
+
+from toolkit.samplers.custom_lcm_scheduler import CustomLCMScheduler
+
+# scheduler:
+SCHEDULER_LINEAR_START = 0.00085
+SCHEDULER_LINEAR_END = 0.0120
+SCHEDULER_TIMESTEPS = 1000
+SCHEDLER_SCHEDULE = "scaled_linear"
+
+sdxl_sampler_config = {
+    "_class_name": "EulerDiscreteScheduler",
+    "_diffusers_version": "0.19.0.dev0",
+    "beta_end": 0.012,
+    "beta_schedule": "scaled_linear",
+    "beta_start": 0.00085,
+    "clip_sample": False,
+    "interpolation_type": "linear",
+    "num_train_timesteps": 1000,
+    "prediction_type": "epsilon",
+    "sample_max_value": 1.0,
+    "set_alpha_to_one": False,
+    "skip_prk_steps": True,
+    "steps_offset": 1,
+    "timestep_spacing": "leading",
+    "trained_betas": None,
+    "use_karras_sigmas": False
+}
+
+
+def get_sampler(
+        sampler: str,
+):
+    sched_init_args = {}
+
+    if sampler.startswith("k_"):
+        sched_init_args["use_karras_sigmas"] = True
+
+    if sampler == "ddim":
+        scheduler_cls = DDIMScheduler
+    elif sampler == "ddpm":  # ddpm is not supported ?
+        scheduler_cls = DDPMScheduler
+    elif sampler == "pndm":
+        scheduler_cls = PNDMScheduler
+    elif sampler == "lms" or sampler == "k_lms":
+        scheduler_cls = LMSDiscreteScheduler
+    elif sampler == "euler" or sampler == "k_euler":
+        scheduler_cls = EulerDiscreteScheduler
+    elif sampler == "euler_a":
+        scheduler_cls = EulerAncestralDiscreteScheduler
+    elif sampler == "dpmsolver" or sampler == "dpmsolver++" or sampler == "k_dpmsolver" or sampler == "k_dpmsolver++":
+        scheduler_cls = DPMSolverMultistepScheduler
+        sched_init_args["algorithm_type"] = sampler.replace("k_", "")
+    elif sampler == "dpmsingle":
+        scheduler_cls = DPMSolverSinglestepScheduler
+    elif sampler == "heun":
+        scheduler_cls = HeunDiscreteScheduler
+    elif sampler == "dpm_2":
+        scheduler_cls = KDPM2DiscreteScheduler
+    elif sampler == "dpm_2_a":
+        scheduler_cls = KDPM2AncestralDiscreteScheduler
+    elif sampler == "lcm":
+        scheduler_cls = LCMScheduler
+    elif sampler == "custom_lcm":
+        scheduler_cls = CustomLCMScheduler
+
+    config = copy.deepcopy(sdxl_sampler_config)
+    config.update(sched_init_args)
+
+    scheduler = scheduler_cls.from_config(config)
+
+
+    return scheduler
+
+
+# testing
+if __name__ == "__main__":
+    from diffusers import DiffusionPipeline
+
+    from diffusers import StableDiffusionKDiffusionPipeline
+    import torch
+    import os
+
+    inference_steps = 25
+
+    pipe = StableDiffusionKDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base")
+    pipe = pipe.to("cuda")
+
+    k_diffusion_model = CompVisDenoiser(model)
+
+    pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", custom_pipeline="sd_text2img_k_diffusion")
+    pipe = pipe.to("cuda")
+
+    prompt = "an astronaut riding a horse on mars"
+    pipe.set_scheduler("sample_heun")
+    generator = torch.Generator(device="cuda").manual_seed(seed)
+    image = pipe(prompt, generator=generator, num_inference_steps=20).images[0]
+
+    image.save("./astronaut_heun_k_diffusion.png")
diff --git a/toolkit/samplers/custom_lcm_scheduler.py b/toolkit/samplers/custom_lcm_scheduler.py
new file mode 100644
index 00000000..132052af
--- /dev/null
+++ b/toolkit/samplers/custom_lcm_scheduler.py
@@ -0,0 +1,553 @@
+# Copyright 2023 Stanford University Team and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion
+# and https://github.com/hojonathanho/diffusion
+
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.utils import BaseOutput, logging
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
+
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+
+@dataclass
+class LCMSchedulerOutput(BaseOutput):
+    """
+    Output class for the scheduler's `step` function output.
+
+    Args:
+        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    """
+
+    prev_sample: torch.FloatTensor
+    denoised: Optional[torch.FloatTensor] = None
+
+
+# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
+def betas_for_alpha_bar(
+    num_diffusion_timesteps,
+    max_beta=0.999,
+    alpha_transform_type="cosine",
+):
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
+    (1-beta) over time from t = [0,1].
+
+    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
+    to that part of the diffusion process.
+
+
+    Args:
+        num_diffusion_timesteps (`int`): the number of betas to produce.
+        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+                     Choose from `cosine` or `exp`
+
+    Returns:
+        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+    """
+    if alpha_transform_type == "cosine":
+
+        def alpha_bar_fn(t):
+            return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
+
+    elif alpha_transform_type == "exp":
+
+        def alpha_bar_fn(t):
+            return math.exp(t * -12.0)
+
+    else:
+        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
+
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
+    return torch.tensor(betas, dtype=torch.float32)
+
+
+# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
+def rescale_zero_terminal_snr(betas: torch.FloatTensor) -> torch.FloatTensor:
+    """
+    Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
+
+
+    Args:
+        betas (`torch.FloatTensor`):
+            the betas that the scheduler is being initialized with.
+
+    Returns:
+        `torch.FloatTensor`: rescaled betas with zero terminal SNR
+    """
+    # Convert betas to alphas_bar_sqrt
+    alphas = 1.0 - betas
+    alphas_cumprod = torch.cumprod(alphas, dim=0)
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= alphas_bar_sqrt_T
+
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+    alphas = alphas_bar[1:] / alphas_bar[:-1]  # Revert cumprod
+    alphas = torch.cat([alphas_bar[0:1], alphas])
+    betas = 1 - alphas
+
+    return betas
+
+
+class CustomLCMScheduler(SchedulerMixin, ConfigMixin):
+    """
+    `LCMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with
+    non-Markovian guidance.
+
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. [`~ConfigMixin`] takes care of storing all config
+    attributes that are passed in the scheduler's `__init__` function, such as `num_train_timesteps`. They can be
+    accessed via `scheduler.config.num_train_timesteps`. [`SchedulerMixin`] provides general loading and saving
+    functionality via the [`SchedulerMixin.save_pretrained`] and [`~SchedulerMixin.from_pretrained`] functions.
+
+    Args:
+        num_train_timesteps (`int`, defaults to 1000):
+            The number of diffusion steps to train the model.
+        beta_start (`float`, defaults to 0.0001):
+            The starting `beta` value of inference.
+        beta_end (`float`, defaults to 0.02):
+            The final `beta` value.
+        beta_schedule (`str`, defaults to `"linear"`):
+            The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
+            `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
+        trained_betas (`np.ndarray`, *optional*):
+            Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
+        original_inference_steps (`int`, *optional*, defaults to 50):
+            The default number of inference steps used to generate a linearly-spaced timestep schedule, from which we
+            will ultimately take `num_inference_steps` evenly spaced timesteps to form the final timestep schedule.
+        clip_sample (`bool`, defaults to `True`):
+            Clip the predicted sample for numerical stability.
+        clip_sample_range (`float`, defaults to 1.0):
+            The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
+        set_alpha_to_one (`bool`, defaults to `True`):
+            Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
+            there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
+            otherwise it uses the alpha value at step 0.
+        steps_offset (`int`, defaults to 0):
+            An offset added to the inference steps. You can use a combination of `offset=1` and
+            `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
+            Diffusion.
+        prediction_type (`str`, defaults to `epsilon`, *optional*):
+            Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
+            `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
+            Video](https://imagen.research.google/video/paper.pdf) paper).
+        thresholding (`bool`, defaults to `False`):
+            Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
+            as Stable Diffusion.
+        dynamic_thresholding_ratio (`float`, defaults to 0.995):
+            The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
+        sample_max_value (`float`, defaults to 1.0):
+            The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
+        timestep_spacing (`str`, defaults to `"leading"`):
+            The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
+            Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
+        timestep_scaling (`float`, defaults to 10.0):
+            The factor the timesteps will be multiplied by when calculating the consistency model boundary conditions
+            `c_skip` and `c_out`. Increasing this will decrease the approximation error (although the approximation
+            error at the default of `10.0` is already pretty small).
+        rescale_betas_zero_snr (`bool`, defaults to `False`):
+            Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
+            dark samples instead of limiting it to samples with medium brightness. Loosely related to
+            [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
+    """
+
+    order = 1
+
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.00085,
+        beta_end: float = 0.012,
+        beta_schedule: str = "scaled_linear",
+        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
+        original_inference_steps: int = 50,
+        clip_sample: bool = False,
+        clip_sample_range: float = 1.0,
+        set_alpha_to_one: bool = True,
+        steps_offset: int = 0,
+        prediction_type: str = "epsilon",
+        thresholding: bool = False,
+        dynamic_thresholding_ratio: float = 0.995,
+        sample_max_value: float = 1.0,
+        timestep_spacing: str = "leading",
+        timestep_scaling: float = 10.0,
+        rescale_betas_zero_snr: bool = False,
+    ):
+        if trained_betas is not None:
+            self.betas = torch.tensor(trained_betas, dtype=torch.float32)
+        elif beta_schedule == "linear":
+            self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
+        elif beta_schedule == "scaled_linear":
+            # this schedule is very specific to the latent diffusion model.
+            self.betas = (
+                torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
+            )
+        elif beta_schedule == "squaredcos_cap_v2":
+            # Glide cosine schedule
+            self.betas = betas_for_alpha_bar(num_train_timesteps)
+        else:
+            raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+
+        # Rescale for zero SNR
+        if rescale_betas_zero_snr:
+            self.betas = rescale_zero_terminal_snr(self.betas)
+
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+
+        # At every step in ddim, we are looking into the previous alphas_cumprod
+        # For the final step, there is no previous alphas_cumprod because we are already at 0
+        # `set_alpha_to_one` decides whether we set this parameter simply to one or
+        # whether we use the final alpha of the "non-previous" one.
+        self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0]
+
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = 1.0
+        self.original_inference_steps = 50
+
+        # setable values
+        self.num_inference_steps = None
+        self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
+
+        self.train_timesteps = 1000
+
+        self._step_index = None
+
+    # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
+    def _init_step_index(self, timestep):
+        if isinstance(timestep, torch.Tensor):
+            timestep = timestep.to(self.timesteps.device)
+
+        index_candidates = (self.timesteps == timestep).nonzero()
+
+        # The sigma index that is taken for the **very** first `step`
+        # is always the second index (or the last index if there is only 1)
+        # This way we can ensure we don't accidentally skip a sigma in
+        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
+        if len(index_candidates) > 1:
+            step_index = index_candidates[1]
+        else:
+            step_index = index_candidates[0]
+
+        self._step_index = step_index.item()
+
+    @property
+    def step_index(self):
+        return self._step_index
+
+    def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
+        """
+        Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
+        current timestep.
+
+        Args:
+            sample (`torch.FloatTensor`):
+                The input sample.
+            timestep (`int`, *optional*):
+                The current timestep in the diffusion chain.
+        Returns:
+            `torch.FloatTensor`:
+                A scaled input sample.
+        """
+        return sample
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
+    def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+        """
+        "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
+        prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
+        s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
+        pixels from saturation at each step. We find that dynamic thresholding results in significantly better
+        photorealism as well as better image-text alignment, especially when using very large guidance weights."
+
+        https://arxiv.org/abs/2205.11487
+        """
+        dtype = sample.dtype
+        batch_size, channels, *remaining_dims = sample.shape
+
+        if dtype not in (torch.float32, torch.float64):
+            sample = sample.float()  # upcast for quantile calculation, and clamp not implemented for cpu half
+
+        # Flatten sample for doing quantile calculation along each image
+        sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
+
+        abs_sample = sample.abs()  # "a certain percentile absolute pixel value"
+
+        s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
+        s = torch.clamp(
+            s, min=1, max=self.config.sample_max_value
+        )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
+        s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
+        sample = torch.clamp(sample, -s, s) / s  # "we threshold xt0 to the range [-s, s] and then divide by s"
+
+        sample = sample.reshape(batch_size, channels, *remaining_dims)
+        sample = sample.to(dtype)
+
+        return sample
+
+    def set_timesteps(
+        self,
+        num_inference_steps: int,
+        device: Union[str, torch.device] = None,
+        strength: int = 1.0,
+    ):
+        """
+        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
+
+        Args:
+            num_inference_steps (`int`):
+                The number of diffusion steps used when generating samples with a pre-trained model.
+            device (`str` or `torch.device`, *optional*):
+                The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+            original_inference_steps (`int`, *optional*):
+                The original number of inference steps, which will be used to generate a linearly-spaced timestep
+                schedule (which is different from the standard `diffusers` implementation). We will then take
+                `num_inference_steps` timesteps from this schedule, evenly spaced in terms of indices, and use that as
+                our final timestep schedule. If not set, this will default to the `original_inference_steps` attribute.
+        """
+
+        original_inference_steps = self.original_inference_steps
+
+        if num_inference_steps > self.config.num_train_timesteps:
+            raise ValueError(
+                f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
+                f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
+                f" maximal {self.config.num_train_timesteps} timesteps."
+            )
+
+        self.num_inference_steps = num_inference_steps
+        original_steps = (
+            original_inference_steps if original_inference_steps is not None else self.config.original_inference_steps
+        )
+
+        if original_steps > self.config.num_train_timesteps:
+            raise ValueError(
+                f"`original_steps`: {original_steps} cannot be larger than `self.config.train_timesteps`:"
+                f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
+                f" maximal {self.config.num_train_timesteps} timesteps."
+            )
+
+        if num_inference_steps > original_steps:
+            raise ValueError(
+                f"`num_inference_steps`: {num_inference_steps} cannot be larger than `original_inference_steps`:"
+                f" {original_steps} because the final timestep schedule will be a subset of the"
+                f" `original_inference_steps`-sized initial timestep schedule."
+            )
+
+        # LCM Timesteps Setting
+        # The skipping step parameter k from the paper.
+        k = self.config.num_train_timesteps // original_steps
+        # LCM Training/Distillation Steps Schedule
+        # Currently, only a linearly-spaced schedule is supported (same as in the LCM distillation scripts).
+        lcm_origin_timesteps = np.asarray(list(range(1, int(original_steps * strength) + 1))) * k - 1
+        skipping_step = len(lcm_origin_timesteps) // num_inference_steps
+
+        if skipping_step < 1:
+            raise ValueError(
+                f"The combination of `original_steps x strength`: {original_steps} x {strength} is smaller than `num_inference_steps`: {num_inference_steps}. Make sure to either reduce `num_inference_steps` to a value smaller than {int(original_steps * strength)} or increase `strength` to a value higher than {float(num_inference_steps / original_steps)}."
+            )
+
+        # LCM Inference Steps Schedule
+        lcm_origin_timesteps = lcm_origin_timesteps[::-1].copy()
+        # Select (approximately) evenly spaced indices from lcm_origin_timesteps.
+        inference_indices = np.linspace(0, len(lcm_origin_timesteps) - 1, num=num_inference_steps)
+        inference_indices = np.floor(inference_indices).astype(np.int64)
+        timesteps = lcm_origin_timesteps[inference_indices]
+
+        self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.long)
+
+        self._step_index = None
+
+    def get_scalings_for_boundary_condition_discrete(self, timestep):
+        self.sigma_data = 0.5  # Default: 0.5
+        scaled_timestep = timestep * self.config.timestep_scaling
+
+        c_skip = self.sigma_data**2 / (scaled_timestep**2 + self.sigma_data**2)
+        c_out = scaled_timestep / (scaled_timestep**2 + self.sigma_data**2) ** 0.5
+        return c_skip, c_out
+
+    def step(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: int,
+        sample: torch.FloatTensor,
+        generator: Optional[torch.Generator] = None,
+        return_dict: bool = True,
+    ) -> Union[LCMSchedulerOutput, Tuple]:
+        """
+        Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
+        process from the learned model outputs (most often the predicted noise).
+
+        Args:
+            model_output (`torch.FloatTensor`):
+                The direct output from learned diffusion model.
+            timestep (`float`):
+                The current discrete timestep in the diffusion chain.
+            sample (`torch.FloatTensor`):
+                A current instance of a sample created by the diffusion process.
+            generator (`torch.Generator`, *optional*):
+                A random number generator.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] or `tuple`.
+        Returns:
+            [`~schedulers.scheduling_utils.LCMSchedulerOutput`] or `tuple`:
+                If return_dict is `True`, [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] is returned, otherwise a
+                tuple is returned where the first element is the sample tensor.
+        """
+        if self.num_inference_steps is None:
+            raise ValueError(
+                "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
+            )
+
+        if self.step_index is None:
+            self._init_step_index(timestep)
+
+        # 1. get previous step value
+        prev_step_index = self.step_index + 1
+        if prev_step_index < len(self.timesteps):
+            prev_timestep = self.timesteps[prev_step_index]
+        else:
+            prev_timestep = timestep
+
+        # 2. compute alphas, betas
+        alpha_prod_t = self.alphas_cumprod[timestep]
+        alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
+
+        beta_prod_t = 1 - alpha_prod_t
+        beta_prod_t_prev = 1 - alpha_prod_t_prev
+
+        # 3. Get scalings for boundary conditions
+        c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(timestep)
+
+        # 4. Compute the predicted original sample x_0 based on the model parameterization
+        if self.config.prediction_type == "epsilon":  # noise-prediction
+            predicted_original_sample = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt()
+        elif self.config.prediction_type == "sample":  # x-prediction
+            predicted_original_sample = model_output
+        elif self.config.prediction_type == "v_prediction":  # v-prediction
+            predicted_original_sample = alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output
+        else:
+            raise ValueError(
+                f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or"
+                " `v_prediction` for `LCMScheduler`."
+            )
+
+        # 5. Clip or threshold "predicted x_0"
+        if self.config.thresholding:
+            predicted_original_sample = self._threshold_sample(predicted_original_sample)
+        elif self.config.clip_sample:
+            predicted_original_sample = predicted_original_sample.clamp(
+                -self.config.clip_sample_range, self.config.clip_sample_range
+            )
+
+        # 6. Denoise model output using boundary conditions
+        denoised = c_out * predicted_original_sample + c_skip * sample
+
+        # 7. Sample and inject noise z ~ N(0, I) for MultiStep Inference
+        # Noise is not used on the final timestep of the timestep schedule.
+        # This also means that noise is not used for one-step sampling.
+        if self.step_index != self.num_inference_steps - 1:
+            noise = randn_tensor(
+                model_output.shape, generator=generator, device=model_output.device, dtype=denoised.dtype
+            )
+            prev_sample = alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise
+        else:
+            prev_sample = denoised
+
+        # upon completion increase step index by one
+        self._step_index += 1
+
+        if not return_dict:
+            return (prev_sample, denoised)
+
+        return LCMSchedulerOutput(prev_sample=prev_sample, denoised=denoised)
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
+    def add_noise(
+        self,
+        original_samples: torch.FloatTensor,
+        noise: torch.FloatTensor,
+        timesteps: torch.IntTensor,
+    ) -> torch.FloatTensor:
+        # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
+        alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
+        timesteps = timesteps.to(original_samples.device)
+
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+        while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+        while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+        noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+        return noisy_samples
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
+    def get_velocity(
+        self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
+    ) -> torch.FloatTensor:
+        # Make sure alphas_cumprod and timestep have same device and dtype as sample
+        alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
+        timesteps = timesteps.to(sample.device)
+
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+        while len(sqrt_alpha_prod.shape) < len(sample.shape):
+            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+        while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape):
+            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+        velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
+        return velocity
+
+    def __len__(self):
+        return self.config.num_train_timesteps
\ No newline at end of file
diff --git a/toolkit/saving.py b/toolkit/saving.py
index aab0d4bc..23862cb4 100644
--- a/toolkit/saving.py
+++ b/toolkit/saving.py
@@ -32,6 +32,10 @@ def convert_state_dict_to_ldm_with_mapping(
     with open(mapping_path, 'r') as f:
         mapping = json.load(f, object_pairs_hook=OrderedDict)
 
+    # keep track of keys not matched
+    ldm_matched_keys = []
+    diffusers_matched_keys = []
+
     ldm_diffusers_keymap = mapping['ldm_diffusers_keymap']
     ldm_diffusers_shape_map = mapping['ldm_diffusers_shape_map']
     ldm_diffusers_operator_map = mapping['ldm_diffusers_operator_map']
@@ -52,11 +56,15 @@ def convert_state_dict_to_ldm_with_mapping(
             for diffusers_key in ldm_diffusers_operator_map[ldm_key]['cat']:
                 cat_list.append(diffusers_state_dict[diffusers_key].detach())
             converted_state_dict[ldm_key] = torch.cat(cat_list, dim=0).to(device, dtype=dtype)
+            diffusers_matched_keys.extend(ldm_diffusers_operator_map[ldm_key]['cat'])
+            ldm_matched_keys.append(ldm_key)
         if 'slice' in ldm_diffusers_operator_map[ldm_key]:
             tensor_to_slice = diffusers_state_dict[ldm_diffusers_operator_map[ldm_key]['slice'][0]]
             slice_text = diffusers_state_dict[ldm_diffusers_operator_map[ldm_key]['slice'][1]]
             converted_state_dict[ldm_key] = tensor_to_slice[get_slices_from_string(slice_text)].detach().to(device,
                                                                                                             dtype=dtype)
+            diffusers_matched_keys.extend(ldm_diffusers_operator_map[ldm_key]['slice'])
+            ldm_matched_keys.append(ldm_key)
 
     # process the rest of the keys
     for ldm_key in ldm_diffusers_keymap:
@@ -67,13 +75,29 @@ def convert_state_dict_to_ldm_with_mapping(
             if ldm_key in ldm_diffusers_shape_map:
                 tensor = tensor.view(ldm_diffusers_shape_map[ldm_key][0])
             converted_state_dict[ldm_key] = tensor
+            diffusers_matched_keys.append(ldm_diffusers_keymap[ldm_key])
+            ldm_matched_keys.append(ldm_key)
+
+    # see if any are missing from know mapping
+    mapped_diffusers_keys = list(ldm_diffusers_keymap.values())
+    mapped_ldm_keys = list(ldm_diffusers_keymap.keys())
+
+    missing_diffusers_keys = [x for x in mapped_diffusers_keys if x not in diffusers_matched_keys]
+    missing_ldm_keys = [x for x in mapped_ldm_keys if x not in ldm_matched_keys]
+
+    if len(missing_diffusers_keys) > 0:
+        print(f"WARNING!!!! Missing {len(missing_diffusers_keys)} diffusers keys")
+        print(missing_diffusers_keys)
+    if len(missing_ldm_keys) > 0:
+        print(f"WARNING!!!! Missing {len(missing_ldm_keys)} ldm keys")
+        print(missing_ldm_keys)
 
     return converted_state_dict
 
 
 def get_ldm_state_dict_from_diffusers(
         state_dict: 'OrderedDict',
-        sd_version: Literal['1', '2', 'sdxl'] = '2',
+        sd_version: Literal['1', '2', 'sdxl', 'ssd', 'sdxl_refiner'] = '2',
         device='cpu',
         dtype=get_torch_dtype('fp32'),
 ):
@@ -87,6 +111,14 @@ def get_ldm_state_dict_from_diffusers(
         # load our base
         base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sdxl_ldm_base.safetensors')
         mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sdxl.json')
+    elif sd_version == 'ssd':
+        # load our base
+        base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_ssd_ldm_base.safetensors')
+        mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_ssd.json')
+    elif sd_version == 'sdxl_refiner':
+        # load our base
+        base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_refiner_ldm_base.safetensors')
+        mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_refiner.json')
     else:
         raise ValueError(f"Invalid sd_version {sd_version}")
 
@@ -105,7 +137,7 @@ def save_ldm_model_from_diffusers(
         output_file: str,
         meta: 'OrderedDict',
         save_dtype=get_torch_dtype('fp16'),
-        sd_version: Literal['1', '2', 'sdxl'] = '2'
+        sd_version: Literal['1', '2', 'sdxl', 'ssd'] = '2'
 ):
     converted_state_dict = get_ldm_state_dict_from_diffusers(
         sd.state_dict(),
@@ -117,3 +149,95 @@ def save_ldm_model_from_diffusers(
     # make sure parent folder exists
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
     save_file(converted_state_dict, output_file, metadata=meta)
+
+
+def save_lora_from_diffusers(
+        lora_state_dict: 'OrderedDict',
+        output_file: str,
+        meta: 'OrderedDict',
+        save_dtype=get_torch_dtype('fp16'),
+        sd_version: Literal['1', '2', 'sdxl', 'ssd'] = '2'
+):
+    converted_state_dict = OrderedDict()
+    # only handle sxdxl for now
+    if sd_version != 'sdxl' and sd_version != 'ssd':
+        raise ValueError(f"Invalid sd_version {sd_version}")
+    for key, value in lora_state_dict.items():
+        # todo verify if this works with ssd
+        # test encoders share keys for some reason
+        if key.begins_with('lora_te'):
+            converted_state_dict[key] = value.detach().to('cpu', dtype=save_dtype)
+        else:
+            converted_key = key
+
+    # make sure parent folder exists
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    save_file(converted_state_dict, output_file, metadata=meta)
+
+
+def save_t2i_from_diffusers(
+        t2i_state_dict: 'OrderedDict',
+        output_file: str,
+        meta: 'OrderedDict',
+        dtype=get_torch_dtype('fp16'),
+):
+    # todo: test compatibility with non diffusers
+    converted_state_dict = OrderedDict()
+    for key, value in t2i_state_dict.items():
+        converted_state_dict[key] = value.detach().to('cpu', dtype=dtype)
+
+    # make sure parent folder exists
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    save_file(converted_state_dict, output_file, metadata=meta)
+
+
+def load_t2i_model(
+        path_to_file,
+        device: Union[str] = 'cpu',
+        dtype: torch.dtype = torch.float32
+):
+    raw_state_dict = load_file(path_to_file, device)
+    converted_state_dict = OrderedDict()
+    for key, value in raw_state_dict.items():
+        # todo see if we need to convert dict
+        converted_state_dict[key] = value.detach().to(device, dtype=dtype)
+    return converted_state_dict
+
+
+IP_ADAPTER_MODULES = ['image_proj', 'ip_adapter']
+
+def save_ip_adapter_from_diffusers(
+        combined_state_dict: 'OrderedDict',
+        output_file: str,
+        meta: 'OrderedDict',
+        dtype=get_torch_dtype('fp16'),
+):
+    # todo: test compatibility with non diffusers
+    converted_state_dict = OrderedDict()
+    for module_name, state_dict in combined_state_dict.items():
+        for key, value in state_dict.items():
+            converted_state_dict[f"{module_name}.{key}"] = value.detach().to('cpu', dtype=dtype)
+
+    # make sure parent folder exists
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    save_file(converted_state_dict, output_file, metadata=meta)
+
+
+def load_ip_adapter_model(
+        path_to_file,
+        device: Union[str] = 'cpu',
+        dtype: torch.dtype = torch.float32
+):
+    # check if it is safetensors or checkpoint
+    if path_to_file.endswith('.safetensors'):
+        raw_state_dict = load_file(path_to_file, device)
+        combined_state_dict = OrderedDict()
+        for combo_key, value in raw_state_dict.items():
+            key_split = combo_key.split('.')
+            module_name = key_split.pop(0)
+            if module_name not in combined_state_dict:
+                combined_state_dict[module_name] = OrderedDict()
+            combined_state_dict[module_name]['.'.join(key_split)] = value.detach().to(device, dtype=dtype)
+        return combined_state_dict
+    else:
+        return torch.load(path_to_file, map_location=device)
diff --git a/toolkit/scheduler.py b/toolkit/scheduler.py
index ab5558a5..95ae36d8 100644
--- a/toolkit/scheduler.py
+++ b/toolkit/scheduler.py
@@ -1,33 +1,57 @@
 import torch
 from typing import Optional
+from diffusers.optimization import SchedulerType, TYPE_TO_SCHEDULER_FUNCTION, get_constant_schedule_with_warmup
 
 
 def get_lr_scheduler(
         name: Optional[str],
         optimizer: torch.optim.Optimizer,
-        max_iterations: Optional[int],
-        lr_min: Optional[float],
         **kwargs,
 ):
     if name == "cosine":
+        if 'total_iters' in kwargs:
+            kwargs['T_max'] = kwargs.pop('total_iters')
         return torch.optim.lr_scheduler.CosineAnnealingLR(
-            optimizer, T_max=max_iterations, eta_min=lr_min, **kwargs
+            optimizer, **kwargs
         )
     elif name == "cosine_with_restarts":
+        if 'total_iters' in kwargs:
+            kwargs['T_0'] = kwargs.pop('total_iters')
         return torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
-            optimizer, T_0=max_iterations, T_mult=2, eta_min=lr_min, **kwargs
+            optimizer, **kwargs
         )
     elif name == "step":
+
         return torch.optim.lr_scheduler.StepLR(
-            optimizer, step_size=max_iterations // 100, gamma=0.999, **kwargs
+            optimizer, **kwargs
         )
     elif name == "constant":
-        return torch.optim.lr_scheduler.ConstantLR(optimizer, factor=1, **kwargs)
+        if 'facor' not in kwargs:
+            kwargs['factor'] = 1.0
+
+        return torch.optim.lr_scheduler.ConstantLR(optimizer, **kwargs)
     elif name == "linear":
+
         return torch.optim.lr_scheduler.LinearLR(
-            optimizer, start_factor=0.5, end_factor=0.5, total_iters=max_iterations, **kwargs
+            optimizer, **kwargs
         )
+    elif name == 'constant_with_warmup':
+        # see if num_warmup_steps is in kwargs
+        if 'num_warmup_steps' not in kwargs:
+            print(f"WARNING: num_warmup_steps not in kwargs. Using default value of 1000")
+            kwargs['num_warmup_steps'] = 1000
+        del kwargs['total_iters']
+        return get_constant_schedule_with_warmup(optimizer, **kwargs)
     else:
+        # try to use a diffusers scheduler
+        print(f"Trying to use diffusers scheduler {name}")
+        try:
+            name = SchedulerType(name)
+            schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name]
+            return schedule_func(optimizer, **kwargs)
+        except Exception as e:
+            print(e)
+            pass
         raise ValueError(
             "Scheduler must be cosine, cosine_with_restarts, step, linear or constant"
         )
diff --git a/toolkit/sd_device_states_presets.py b/toolkit/sd_device_states_presets.py
new file mode 100644
index 00000000..fb4663a0
--- /dev/null
+++ b/toolkit/sd_device_states_presets.py
@@ -0,0 +1,88 @@
+from typing import Union
+
+import torch
+import copy
+
+empty_preset = {
+    'vae': {
+        'training': False,
+        'device': 'cpu',
+    },
+    'unet': {
+        'training': False,
+        'requires_grad': False,
+        'device': 'cpu',
+    },
+    'text_encoder': {
+        'training': False,
+        'requires_grad': False,
+        'device': 'cpu',
+    },
+    'adapter': {
+        'training': False,
+        'requires_grad': False,
+        'device': 'cpu',
+    },
+    'refiner_unet': {
+        'training': False,
+        'requires_grad': False,
+        'device': 'cpu',
+    },
+}
+
+
+def get_train_sd_device_state_preset(
+        device: Union[str, torch.device],
+        train_unet: bool = False,
+        train_text_encoder: bool = False,
+        cached_latents: bool = False,
+        train_lora: bool = False,
+        train_adapter: bool = False,
+        train_embedding: bool = False,
+        train_refiner: bool = False,
+):
+    preset = copy.deepcopy(empty_preset)
+    if not cached_latents:
+        preset['vae']['device'] = device
+
+    if train_unet:
+        preset['unet']['training'] = True
+        preset['unet']['requires_grad'] = True
+        preset['unet']['device'] = device
+    else:
+        preset['unet']['device'] = device
+
+    if train_text_encoder:
+        preset['text_encoder']['training'] = True
+        preset['text_encoder']['requires_grad'] = True
+        preset['text_encoder']['device'] = device
+    else:
+        preset['text_encoder']['device'] = device
+
+    if train_embedding:
+        preset['text_encoder']['training'] = True
+        preset['text_encoder']['requires_grad'] = True
+        preset['text_encoder']['training'] = True
+        preset['unet']['training'] = True
+
+    if train_refiner:
+        preset['refiner_unet']['training'] = True
+        preset['refiner_unet']['requires_grad'] = True
+        preset['refiner_unet']['device'] = device
+        # if not training unet, move that to cpu
+        if not train_unet:
+            preset['unet']['device'] = 'cpu'
+
+    if train_lora:
+        # preset['text_encoder']['requires_grad'] = False
+        preset['unet']['requires_grad'] = False
+        if train_refiner:
+            preset['refiner_unet']['requires_grad'] = False
+
+    if train_adapter:
+        preset['adapter']['requires_grad'] = True
+        preset['adapter']['training'] = True
+        preset['adapter']['device'] = device
+        preset['unet']['training'] = True
+
+    return preset
diff --git a/toolkit/stable_diffusion_model.py b/toolkit/stable_diffusion_model.py
index ddeff4f3..175fc24e 100644
--- a/toolkit/stable_diffusion_model.py
+++ b/toolkit/stable_diffusion_model.py
@@ -1,40 +1,72 @@
+import copy
 import gc
+import json
+import random
+import shutil
 import typing
-from typing import Union, List, Tuple
+from typing import Union, List, Literal, Iterator
 import sys
 import os
 from collections import OrderedDict
 
+import yaml
+from PIL import Image
 from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import rescale_noise_cfg
-from safetensors.torch import save_file
+from safetensors.torch import save_file, load_file
+from torch.nn import Parameter
+from torch.utils.checkpoint import checkpoint
 from tqdm import tqdm
-from torchvision.transforms import Resize
+from torchvision.transforms import Resize, transforms
 
+from toolkit.ip_adapter import IPAdapter
 from library.model_util import convert_unet_state_dict_to_sd, convert_text_encoder_state_dict_to_sd_v2, \
     convert_vae_state_dict, load_vae
 from toolkit import train_tools
 from toolkit.config_modules import ModelConfig, GenerateImageConfig
 from toolkit.metadata import get_meta_for_safetensors
-from toolkit.paths import REPOS_ROOT
-from toolkit.saving import save_ldm_model_from_diffusers
+from toolkit.paths import REPOS_ROOT, KEYMAPS_ROOT
+from toolkit.prompt_utils import inject_trigger_into_prompt, PromptEmbeds
+from toolkit.sampler import get_sampler
+from toolkit.saving import save_ldm_model_from_diffusers, get_ldm_state_dict_from_diffusers
+from toolkit.sd_device_states_presets import empty_preset
 from toolkit.train_tools import get_torch_dtype, apply_noise_offset
 import torch
-from library import model_util
-from library.sdxl_model_util import convert_text_encoder_2_state_dict_to_sdxl
-from diffusers.schedulers import DDPMScheduler
-from toolkit.pipelines import CustomStableDiffusionXLPipeline, CustomStableDiffusionPipeline
-from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline
+from toolkit.pipelines import CustomStableDiffusionXLPipeline, CustomStableDiffusionPipeline, \
+    StableDiffusionKDiffusionXLPipeline, StableDiffusionXLRefinerPipeline
+from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, T2IAdapter, DDPMScheduler, \
+    StableDiffusionXLAdapterPipeline, StableDiffusionAdapterPipeline, DiffusionPipeline, \
+    StableDiffusionXLImg2ImgPipeline
 import diffusers
+from diffusers import \
+    AutoencoderKL, \
+    UNet2DConditionModel
+
+from toolkit.paths import ORIG_CONFIGS_ROOT, DIFFUSERS_CONFIGS_ROOT
 
 # tell it to shut up
 diffusers.logging.set_verbosity(diffusers.logging.ERROR)
 
-VAE_PREFIX_UNET = "vae"
+SD_PREFIX_VAE = "vae"
 SD_PREFIX_UNET = "unet"
+SD_PREFIX_REFINER_UNET = "refiner_unet"
 SD_PREFIX_TEXT_ENCODER = "te"
 
-SD_PREFIX_TEXT_ENCODER1 = "te1"
-SD_PREFIX_TEXT_ENCODER2 = "te2"
+SD_PREFIX_TEXT_ENCODER1 = "te0"
+SD_PREFIX_TEXT_ENCODER2 = "te1"
+
+# prefixed diffusers keys
+DO_NOT_TRAIN_WEIGHTS = [
+    "unet_time_embedding.linear_1.bias",
+    "unet_time_embedding.linear_1.weight",
+    "unet_time_embedding.linear_2.bias",
+    "unet_time_embedding.linear_2.weight",
+    "refiner_unet_time_embedding.linear_1.bias",
+    "refiner_unet_time_embedding.linear_1.weight",
+    "refiner_unet_time_embedding.linear_2.bias",
+    "refiner_unet_time_embedding.linear_2.weight",
+]
+
+DeviceStatePreset = Literal['cache_latents', 'generate']
 
 
 class BlankNetwork:
@@ -42,10 +74,7 @@ class BlankNetwork:
     def __init__(self):
         self.multiplier = 1.0
         self.is_active = True
-        self.is_normalizing = False
-
-    def apply_stored_normalizer(self, target_normalize_scaler: float = 1.0):
-        pass
+        self.is_merged_in = False
 
     def __enter__(self):
         self.is_active = True
@@ -60,59 +89,23 @@ def flush():
 
 
 UNET_IN_CHANNELS = 4  # Stable Diffusion の in_channels は 4 で固定。XLも同じ。
-VAE_SCALE_FACTOR = 8  # 2 ** (len(vae.config.block_out_channels) - 1) = 8
-
-
-class PromptEmbeds:
-    text_embeds: torch.Tensor
-    pooled_embeds: Union[torch.Tensor, None]
-
-    def __init__(self, args: Union[Tuple[torch.Tensor], List[torch.Tensor], torch.Tensor]) -> None:
-        if isinstance(args, list) or isinstance(args, tuple):
-            # xl
-            self.text_embeds = args[0]
-            self.pooled_embeds = args[1]
-        else:
-            # sdv1.x, sdv2.x
-            self.text_embeds = args
-            self.pooled_embeds = None
-
-    def to(self, *args, **kwargs):
-        self.text_embeds = self.text_embeds.to(*args, **kwargs)
-        if self.pooled_embeds is not None:
-            self.pooled_embeds = self.pooled_embeds.to(*args, **kwargs)
-        return self
-
+# VAE_SCALE_FACTOR = 8  # 2 ** (len(vae.config.block_out_channels) - 1) = 8
 
 # if is type checking
 if typing.TYPE_CHECKING:
-    from diffusers import \
-        StableDiffusionPipeline, \
-        AutoencoderKL, \
-        UNet2DConditionModel
     from diffusers.schedulers import KarrasDiffusionSchedulers
     from transformers import CLIPTextModel, CLIPTokenizer, CLIPTextModelWithProjection
 
 
 class StableDiffusion:
-    pipeline: Union[None, 'StableDiffusionPipeline', 'CustomStableDiffusionXLPipeline']
-    vae: Union[None, 'AutoencoderKL']
-    unet: Union[None, 'UNet2DConditionModel']
-    text_encoder: Union[None, 'CLIPTextModel', List[Union['CLIPTextModel', 'CLIPTextModelWithProjection']]]
-    tokenizer: Union[None, 'CLIPTokenizer', List['CLIPTokenizer']]
-    noise_scheduler: Union[None, 'KarrasDiffusionSchedulers', 'DDPMScheduler']
-    device: str
-    dtype: str
-    torch_dtype: torch.dtype
-    device_torch: torch.device
-    model_config: ModelConfig
 
     def __init__(
             self,
             device,
             model_config: ModelConfig,
             dtype='fp16',
-            custom_pipeline=None
+            custom_pipeline=None,
+            noise_scheduler=None,
     ):
         self.custom_pipeline = custom_pipeline
         self.device = device
@@ -122,6 +115,17 @@ class StableDiffusion:
         self.model_config = model_config
         self.prediction_type = "v_prediction" if self.model_config.is_v_pred else "epsilon"
 
+        self.device_state = None
+
+        self.pipeline: Union[None, 'StableDiffusionPipeline', 'CustomStableDiffusionXLPipeline']
+        self.vae: Union[None, 'AutoencoderKL']
+        self.unet: Union[None, 'UNet2DConditionModel']
+        self.text_encoder: Union[None, 'CLIPTextModel', List[Union['CLIPTextModel', 'CLIPTextModelWithProjection']]]
+        self.tokenizer: Union[None, 'CLIPTokenizer', List['CLIPTokenizer']]
+        self.noise_scheduler: Union[None, 'DDPMScheduler'] = noise_scheduler
+
+        self.refiner_unet: Union[None, 'UNet2DConditionModel'] = None
+
         # sdxl stuff
         self.logit_scale = None
         self.ckppt_info = None
@@ -129,36 +133,29 @@ class StableDiffusion:
 
         # to hold network if there is one
         self.network = None
+        self.adapter: Union['T2IAdapter', 'IPAdapter', None] = None
         self.is_xl = model_config.is_xl
         self.is_v2 = model_config.is_v2
+        self.is_ssd = model_config.is_ssd
 
         self.use_text_encoder_1 = model_config.use_text_encoder_1
         self.use_text_encoder_2 = model_config.use_text_encoder_2
 
+        self.config_file = None
+
     def load_model(self):
         if self.is_loaded:
             return
         dtype = get_torch_dtype(self.dtype)
-
-        # TODO handle other schedulers
         # sch = KDPM2DiscreteScheduler
-        sch = DDPMScheduler
-        # do our own scheduler
-        prediction_type = "v_prediction" if self.model_config.is_v_pred else "epsilon"
-        scheduler = sch(
-            num_train_timesteps=1000,
-            beta_start=0.00085,
-            beta_end=0.0120,
-            beta_schedule="scaled_linear",
-            clip_sample=False,
-            prediction_type=prediction_type,
-            steps_offset=1
-        )
+        if self.noise_scheduler is None:
+            scheduler = get_sampler('ddpm')
+            self.noise_scheduler = scheduler
 
         # move the betas alphas and  alphas_cumprod to device. Sometimed they get stuck on cpu, not sure why
-        scheduler.betas = scheduler.betas.to(self.device_torch)
-        scheduler.alphas = scheduler.alphas.to(self.device_torch)
-        scheduler.alphas_cumprod = scheduler.alphas_cumprod.to(self.device_torch)
+        # self.noise_scheduler.betas = self.noise_scheduler.betas.to(self.device_torch)
+        # self.noise_scheduler.alphas = self.noise_scheduler.alphas.to(self.device_torch)
+        # self.noise_scheduler.alphas_cumprod = self.noise_scheduler.alphas_cumprod.to(self.device_torch)
 
         model_path = self.model_config.name_or_path
         if 'civitai.com' in self.model_config.name_or_path:
@@ -166,28 +163,40 @@ class StableDiffusion:
             from toolkit.civitai import get_model_path_from_url
             model_path = get_model_path_from_url(self.model_config.name_or_path)
 
-        if self.model_config.is_xl:
+        load_args = {
+            'scheduler': self.noise_scheduler,
+        }
+        if self.model_config.vae_path is not None:
+            load_args['vae'] = load_vae(self.model_config.vae_path, dtype)
+
+        if self.model_config.is_xl or self.model_config.is_ssd:
             if self.custom_pipeline is not None:
                 pipln = self.custom_pipeline
             else:
-                pipln = CustomStableDiffusionXLPipeline
+                pipln = StableDiffusionXLPipeline
+                # pipln = StableDiffusionKDiffusionXLPipeline
 
             # see if path exists
-            if not os.path.exists(model_path):
+            if not os.path.exists(model_path) or os.path.isdir(model_path):
                 # try to load with default diffusers
                 pipe = pipln.from_pretrained(
                     model_path,
                     dtype=dtype,
-                    scheduler_type='ddpm',
                     device=self.device_torch,
-                ).to(self.device_torch)
+                    variant="fp16",
+                    use_safetensors=True,
+                    **load_args
+                )
             else:
                 pipe = pipln.from_single_file(
                     model_path,
-                    dtype=dtype,
-                    scheduler_type='ddpm',
                     device=self.device_torch,
-                ).to(self.device_torch)
+                    torch_dtype=self.torch_dtype,
+                )
+
+            if 'vae' in load_args and load_args['vae'] is not None:
+                pipe.vae = load_args['vae']
+            flush()
 
             text_encoders = [pipe.text_encoder, pipe.text_encoder_2]
             tokenizer = [pipe.tokenizer, pipe.tokenizer_2]
@@ -196,34 +205,52 @@ class StableDiffusion:
                 text_encoder.requires_grad_(False)
                 text_encoder.eval()
             text_encoder = text_encoders
+
+            if self.model_config.experimental_xl:
+                print("Experimental XL mode enabled")
+                print("Loading and injecting alt weights")
+                # load the mismatched weight and force it in
+                raw_state_dict = load_file(model_path)
+                replacement_weight = raw_state_dict['conditioner.embedders.1.model.text_projection'].clone()
+                del raw_state_dict
+                #  get state dict for  for 2nd text encoder
+                te1_state_dict = text_encoders[1].state_dict()
+                # replace weight with mismatched weight
+                te1_state_dict['text_projection.weight'] = replacement_weight.to(self.device_torch, dtype=dtype)
+                flush()
+                print("Injecting alt weights")
+
         else:
             if self.custom_pipeline is not None:
                 pipln = self.custom_pipeline
             else:
-                pipln = CustomStableDiffusionPipeline
+                pipln = StableDiffusionPipeline
 
             # see if path exists
-            if not os.path.exists(model_path):
+            if not os.path.exists(model_path) or os.path.isdir(model_path):
                 # try to load with default diffusers
                 pipe = pipln.from_pretrained(
                     model_path,
                     dtype=dtype,
-                    scheduler_type='dpm',
                     device=self.device_torch,
                     load_safety_checker=False,
                     requires_safety_checker=False,
-                    safety_checker=None
+                    safety_checker=None,
+                    variant="fp16",
+                    **load_args
                 ).to(self.device_torch)
             else:
                 pipe = pipln.from_single_file(
                     model_path,
                     dtype=dtype,
-                    scheduler_type='dpm',
                     device=self.device_torch,
                     load_safety_checker=False,
                     requires_safety_checker=False,
-                    safety_checker=False
+                    torch_dtype=self.torch_dtype,
+                    safety_checker=False,
+                    **load_args
                 ).to(self.device_torch)
+            flush()
 
             pipe.register_to_config(requires_safety_checker=False)
             text_encoder = pipe.text_encoder
@@ -233,15 +260,13 @@ class StableDiffusion:
             tokenizer = pipe.tokenizer
 
         # scheduler doesn't get set sometimes, so we set it here
-        pipe.scheduler = scheduler
+        pipe.scheduler = self.noise_scheduler
 
-        if self.model_config.vae_path is not None:
-            external_vae = load_vae(self.model_config.vae_path, dtype)
-            pipe.vae = external_vae
+        # add hacks to unet to help training
+        # pipe.unet = prepare_unet_for_training(pipe.unet)
 
-        self.unet = pipe.unet
-        self.noise_scheduler = pipe.scheduler
-        self.vae = pipe.vae.to(self.device_torch, dtype=dtype)
+        self.unet: 'UNet2DConditionModel' = pipe.unet
+        self.vae: 'AutoencoderKL' = pipe.vae.to(self.device_torch, dtype=dtype)
         self.vae.eval()
         self.vae.requires_grad_(False)
         self.unet.to(self.device_torch, dtype=dtype)
@@ -251,77 +276,160 @@ class StableDiffusion:
         self.tokenizer = tokenizer
         self.text_encoder = text_encoder
         self.pipeline = pipe
+        self.load_refiner()
         self.is_loaded = True
 
-    def generate_images(self, image_configs: List[GenerateImageConfig]):
+    def load_refiner(self):
+        # for now, we are just going to rely on the TE from the base model
+        # which is TE2 for SDXL and TE for SD (no refiner currently)
+        # and completely ignore a TE that may or may not be packaged with the refiner
+        if self.model_config.refiner_name_or_path is not None:
+            refiner_config_path = os.path.join(ORIG_CONFIGS_ROOT, 'sd_xl_refiner.yaml')
+            # load the refiner model
+            dtype = get_torch_dtype(self.dtype)
+            model_path = self.model_config.refiner_name_or_path
+            if not os.path.exists(model_path) or os.path.isdir(model_path):
+                # TODO only load unet??
+                refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+                    model_path,
+                    dtype=dtype,
+                    device=self.device_torch,
+                    variant="fp16",
+                    use_safetensors=True,
+                ).to(self.device_torch)
+            else:
+                refiner = StableDiffusionXLImg2ImgPipeline.from_single_file(
+                    model_path,
+                    dtype=dtype,
+                    device=self.device_torch,
+                    torch_dtype=self.torch_dtype,
+                    original_config_file=refiner_config_path,
+                ).to(self.device_torch)
+
+            self.refiner_unet = refiner.unet
+            del refiner
+            flush()
+
+    @torch.no_grad()
+    def generate_images(
+            self,
+            image_configs: List[GenerateImageConfig],
+            sampler=None,
+            pipeline: Union[None, StableDiffusionPipeline, StableDiffusionXLPipeline] = None,
+    ):
+        merge_multiplier = 1.0
         # sample_folder = os.path.join(self.save_root, 'samples')
         if self.network is not None:
             self.network.eval()
             network = self.network
+            # check if we have the same network weight for all samples. If we do, we can merge in th
+            # the network to drastically speed up inference
+            unique_network_weights = set([x.network_multiplier for x in image_configs])
+            if len(unique_network_weights) == 1 and self.network.can_merge_in:
+                can_merge_in = True
+                merge_multiplier = unique_network_weights.pop()
+                network.merge_in(merge_weight=merge_multiplier)
         else:
             network = BlankNetwork()
 
-        was_network_normalizing = network.is_normalizing
-        # apply the normalizer if it is normalizing before inference and disable it
-        if network.is_normalizing:
-            network.apply_stored_normalizer()
-            network.is_normalizing = False
+        self.save_device_state()
+        self.set_device_state_preset('generate')
 
         # save current seed state for training
         rng_state = torch.get_rng_state()
         cuda_rng_state = torch.cuda.get_rng_state() if torch.cuda.is_available() else None
 
-        original_device_dict = {
-            'vae': self.vae.device,
-            'unet': self.unet.device,
-            # 'tokenizer': self.tokenizer.device,
-        }
+        if pipeline is None:
+            noise_scheduler = self.noise_scheduler
+            if sampler is not None:
+                if sampler.startswith("sample_"):  # sample_dpmpp_2m
+                    # using ksampler
+                    noise_scheduler = get_sampler('lms')
+                else:
+                    noise_scheduler = get_sampler(sampler)
 
-        # handle sdxl text encoder
-        if isinstance(self.text_encoder, list):
-            for encoder, i in zip(self.text_encoder, range(len(self.text_encoder))):
-                original_device_dict[f'text_encoder_{i}'] = encoder.device
-                encoder.to(self.device_torch)
-        else:
-            original_device_dict['text_encoder'] = self.text_encoder.device
-            self.text_encoder.to(self.device_torch)
+                try:
+                    noise_scheduler = noise_scheduler.to(self.device_torch, self.torch_dtype)
+                except:
+                    pass
 
-        self.vae.to(self.device_torch)
-        self.unet.to(self.device_torch)
+            if sampler.startswith("sample_") and self.is_xl:
+                # using kdiffusion
+                Pipe = StableDiffusionKDiffusionXLPipeline
+            elif self.is_xl:
+                Pipe = StableDiffusionXLPipeline
+            else:
+                Pipe = StableDiffusionPipeline
 
-        # TODO add clip skip
-        if self.is_xl:
-            pipeline = StableDiffusionXLPipeline(
-                vae=self.vae,
-                unet=self.unet,
-                text_encoder=self.text_encoder[0],
-                text_encoder_2=self.text_encoder[1],
-                tokenizer=self.tokenizer[0],
-                tokenizer_2=self.tokenizer[1],
-                scheduler=self.noise_scheduler,
+            extra_args = {}
+            if self.adapter is not None:
+                if isinstance(self.adapter, T2IAdapter):
+                    if self.is_xl:
+                        Pipe = StableDiffusionXLAdapterPipeline
+                    else:
+                        Pipe = StableDiffusionAdapterPipeline
+                    extra_args['adapter'] = self.adapter
+                else:
+                    if self.is_xl:
+                        extra_args['add_watermarker'] = False
+
+            # TODO add clip skip
+            if self.is_xl:
+                pipeline = Pipe(
+                    vae=self.vae,
+                    unet=self.unet,
+                    text_encoder=self.text_encoder[0],
+                    text_encoder_2=self.text_encoder[1],
+                    tokenizer=self.tokenizer[0],
+                    tokenizer_2=self.tokenizer[1],
+                    scheduler=noise_scheduler,
+                    **extra_args
+                ).to(self.device_torch)
+                pipeline.watermark = None
+            else:
+                pipeline = Pipe(
+                    vae=self.vae,
+                    unet=self.unet,
+                    text_encoder=self.text_encoder,
+                    tokenizer=self.tokenizer,
+                    scheduler=noise_scheduler,
+                    safety_checker=None,
+                    feature_extractor=None,
+                    requires_safety_checker=False,
+                    **extra_args
+                ).to(self.device_torch)
+            flush()
+            # disable progress bar
+            pipeline.set_progress_bar_config(disable=True)
+
+            if sampler.startswith("sample_"):
+                pipeline.set_scheduler(sampler)
+
+        refiner_pipeline = None
+        if self.refiner_unet:
+            # build refiner pipeline
+            refiner_pipeline = StableDiffusionXLImg2ImgPipeline(
+                vae=pipeline.vae,
+                unet=self.refiner_unet,
+                text_encoder=None,
+                text_encoder_2=pipeline.text_encoder_2,
+                tokenizer=None,
+                tokenizer_2=pipeline.tokenizer_2,
+                scheduler=pipeline.scheduler,
                 add_watermarker=False,
+                requires_aesthetics_score=True,
             ).to(self.device_torch)
-            # force turn that (ruin your images with obvious green and red dots) the #$@@ off!!!
-            pipeline.watermark = None
-        else:
-            pipeline = StableDiffusionPipeline(
-                vae=self.vae,
-                unet=self.unet,
-                text_encoder=self.text_encoder,
-                tokenizer=self.tokenizer,
-                scheduler=self.noise_scheduler,
-                safety_checker=None,
-                feature_extractor=None,
-                requires_safety_checker=False,
-            ).to(self.device_torch)
-        # disable progress bar
-        pipeline.set_progress_bar_config(disable=True)
+            # refiner_pipeline.register_to_config(requires_aesthetics_score=False)
+            refiner_pipeline.watermark = None
+            refiner_pipeline.set_progress_bar_config(disable=True)
+            flush()
 
         start_multiplier = 1.0
         if self.network is not None:
             start_multiplier = self.network.multiplier
 
         pipeline.to(self.device_torch)
+
         with network:
             with torch.no_grad():
                 if self.network is not None:
@@ -330,45 +438,134 @@ class StableDiffusion:
                 for i in tqdm(range(len(image_configs)), desc=f"Generating Images", leave=False):
                     gen_config = image_configs[i]
 
+                    extra = {}
+                    if self.adapter is not None and gen_config.adapter_image_path is not None:
+                        validation_image = Image.open(gen_config.adapter_image_path).convert("RGB")
+                        if isinstance(self.adapter, T2IAdapter):
+                            # not sure why this is double??
+                            validation_image = validation_image.resize((gen_config.width * 2, gen_config.height * 2))
+                            extra['image'] = validation_image
+                            extra['adapter_conditioning_scale'] = gen_config.adapter_conditioning_scale
+                        if isinstance(self.adapter, IPAdapter):
+                            transform = transforms.Compose([
+                                transforms.Resize(gen_config.width,
+                                                  interpolation=transforms.InterpolationMode.BILINEAR),
+                                transforms.PILToTensor(),
+                            ])
+                            validation_image = transform(validation_image)
+
                     if self.network is not None:
                         self.network.multiplier = gen_config.network_multiplier
                     torch.manual_seed(gen_config.seed)
                     torch.cuda.manual_seed(gen_config.seed)
 
-                    # todo do we disable text encoder here as well if disabled for model, or only do that for training?
+                    # encode the prompt ourselves so we can do fun stuff with embeddings
+                    conditional_embeds = self.encode_prompt(gen_config.prompt, gen_config.prompt_2, force_all=True)
+
+                    unconditional_embeds = self.encode_prompt(
+                        gen_config.negative_prompt, gen_config.negative_prompt_2, force_all=True
+                    )
+
+                    # allow any manipulations to take place to embeddings
+                    gen_config.post_process_embeddings(
+                        conditional_embeds,
+                        unconditional_embeds,
+                    )
+
+                    if self.adapter is not None and isinstance(self.adapter,
+                                                               IPAdapter) and gen_config.adapter_image_path is not None:
+                        # apply the image projection
+                        conditional_clip_embeds = self.adapter.get_clip_image_embeds_from_tensors(validation_image)
+                        unconditional_clip_embeds = self.adapter.get_clip_image_embeds_from_tensors(validation_image,
+                                                                                                    True)
+                        conditional_embeds = self.adapter(conditional_embeds, conditional_clip_embeds)
+                        unconditional_embeds = self.adapter(unconditional_embeds, unconditional_clip_embeds)
+
+                    if self.refiner_unet is not None and gen_config.refiner_start_at < 1.0:
+                        # if we have a refiner loaded, set the denoising end at the refiner start
+                        extra['denoising_end'] = gen_config.refiner_start_at
+                        extra['output_type'] = 'latent'
+                        if not self.is_xl:
+                            raise ValueError("Refiner is only supported for XL models")
+
                     if self.is_xl:
                         # fix guidance rescale for sdxl
                         # was trained on 0.7 (I believe)
 
                         grs = gen_config.guidance_rescale
-                        if grs is None or grs < 0.00001:
-                            grs = 0.7
+                        # if grs is None or grs < 0.00001:
+                        #     grs = 0.7
+                        # grs = 0.0
+
+                        if sampler.startswith("sample_"):
+                            extra['use_karras_sigmas'] = True
+                            extra = {
+                                **extra,
+                                **gen_config.extra_kwargs,
+                            }
 
                         img = pipeline(
-                            prompt=gen_config.prompt,
-                            prompt_2=gen_config.prompt_2,
-                            negative_prompt=gen_config.negative_prompt,
-                            negative_prompt_2=gen_config.negative_prompt_2,
+                            # prompt=gen_config.prompt,
+                            # prompt_2=gen_config.prompt_2,
+                            prompt_embeds=conditional_embeds.text_embeds,
+                            pooled_prompt_embeds=conditional_embeds.pooled_embeds,
+                            negative_prompt_embeds=unconditional_embeds.text_embeds,
+                            negative_pooled_prompt_embeds=unconditional_embeds.pooled_embeds,
+                            # negative_prompt=gen_config.negative_prompt,
+                            # negative_prompt_2=gen_config.negative_prompt_2,
                             height=gen_config.height,
                             width=gen_config.width,
                             num_inference_steps=gen_config.num_inference_steps,
                             guidance_scale=gen_config.guidance_scale,
                             guidance_rescale=grs,
+                            latents=gen_config.latents,
+                            **extra
                         ).images[0]
                     else:
                         img = pipeline(
-                            prompt=gen_config.prompt,
-                            negative_prompt=gen_config.negative_prompt,
+                            # prompt=gen_config.prompt,
+                            prompt_embeds=conditional_embeds.text_embeds,
+                            negative_prompt_embeds=unconditional_embeds.text_embeds,
+                            # negative_prompt=gen_config.negative_prompt,
                             height=gen_config.height,
                             width=gen_config.width,
                             num_inference_steps=gen_config.num_inference_steps,
                             guidance_scale=gen_config.guidance_scale,
+                            latents=gen_config.latents,
+                            **extra
                         ).images[0]
 
-                    gen_config.save_image(img)
+                    if self.refiner_unet is not None and gen_config.refiner_start_at < 1.0:
+                        # slide off just the last 1280 on the last dim as refiner does not use first text encoder
+                        # todo, should we just use the Text encoder for the refiner? Fine tuned versions will differ
+                        refiner_text_embeds = conditional_embeds.text_embeds[:, :, -1280:]
+                        refiner_unconditional_text_embeds = unconditional_embeds.text_embeds[:, :, -1280:]
+                        # run through refiner
+                        img = refiner_pipeline(
+                            # prompt=gen_config.prompt,
+                            # prompt_2=gen_config.prompt_2,
+
+                            # slice these as it does not use both text encoders
+                            # height=gen_config.height,
+                            # width=gen_config.width,
+                            prompt_embeds=refiner_text_embeds,
+                            pooled_prompt_embeds=conditional_embeds.pooled_embeds,
+                            negative_prompt_embeds=refiner_unconditional_text_embeds,
+                            negative_pooled_prompt_embeds=unconditional_embeds.pooled_embeds,
+                            num_inference_steps=gen_config.num_inference_steps,
+                            guidance_scale=gen_config.guidance_scale,
+                            guidance_rescale=grs,
+                            denoising_start=gen_config.refiner_start_at,
+                            denoising_end=gen_config.num_inference_steps,
+                            image=img.unsqueeze(0)
+                        ).images[0]
+
+                    gen_config.save_image(img, i)
 
         # clear pipeline and cache to reduce vram usage
         del pipeline
+        if refiner_pipeline is not None:
+            del refiner_pipeline
         torch.cuda.empty_cache()
 
         # restore training state
@@ -376,17 +573,13 @@ class StableDiffusion:
         if cuda_rng_state is not None:
             torch.cuda.set_rng_state(cuda_rng_state)
 
-        self.vae.to(original_device_dict['vae'])
-        self.unet.to(original_device_dict['unet'])
-        if isinstance(self.text_encoder, list):
-            for encoder, i in zip(self.text_encoder, range(len(self.text_encoder))):
-                encoder.to(original_device_dict[f'text_encoder_{i}'])
-        else:
-            self.text_encoder.to(original_device_dict['text_encoder'])
+        self.restore_device_state()
         if self.network is not None:
             self.network.train()
             self.network.multiplier = start_multiplier
-            self.network.is_normalizing = was_network_normalizing
+
+        if network.is_merged_in:
+            network.merge_out(merge_multiplier)
         # self.tokenizer.to(original_device_dict['tokenizer'])
 
     def get_latent_noise(
@@ -398,6 +591,7 @@ class StableDiffusion:
             batch_size=1,
             noise_offset=0.0,
     ):
+        VAE_SCALE_FACTOR = 2 ** (len(self.vae.config['block_out_channels']) - 1)
         if height is None and pixel_height is None:
             raise ValueError("height or pixel_height must be specified")
         if width is None and pixel_width is None:
@@ -410,31 +604,42 @@ class StableDiffusion:
         noise = torch.randn(
             (
                 batch_size,
-                UNET_IN_CHANNELS,
+                self.unet.config['in_channels'],
                 height,
                 width,
             ),
-            device="cpu",
+            device=self.unet.device,
         )
         noise = apply_noise_offset(noise, noise_offset)
         return noise
 
-    def get_time_ids_from_latents(self, latents: torch.Tensor):
-        bs, ch, h, w = list(latents.shape)
-
-        height = h * VAE_SCALE_FACTOR
-        width = w * VAE_SCALE_FACTOR
-
-        dtype = latents.dtype
-
+    def get_time_ids_from_latents(self, latents: torch.Tensor, requires_aesthetic_score=False):
+        VAE_SCALE_FACTOR = 2 ** (len(self.vae.config['block_out_channels']) - 1)
         if self.is_xl:
-            prompt_ids = train_tools.get_add_time_ids(
-                height,
-                width,
-                dynamic_crops=False,  # look into this
-                dtype=dtype,
-            ).to(self.device_torch, dtype=dtype)
-            return prompt_ids
+            bs, ch, h, w = list(latents.shape)
+
+            height = h * VAE_SCALE_FACTOR
+            width = w * VAE_SCALE_FACTOR
+
+            dtype = latents.dtype
+            # just do it without any cropping nonsense
+            target_size = (height, width)
+            original_size = (height, width)
+            crops_coords_top_left = (0, 0)
+            if requires_aesthetic_score:
+                # refiner
+                # https://huggingface.co/papers/2307.01952
+                aesthetic_score = 6.0  # simulate one
+                add_time_ids = list(original_size + crops_coords_top_left + (aesthetic_score,))
+            else:
+                add_time_ids = list(original_size + crops_coords_top_left + target_size)
+            add_time_ids = torch.tensor([add_time_ids])
+            add_time_ids = add_time_ids.to(latents.device, dtype=dtype)
+
+            batch_time_ids = torch.cat(
+                [add_time_ids for _ in range(bs)]
+            )
+            return batch_time_ids
         else:
             return None
 
@@ -444,68 +649,154 @@ class StableDiffusion:
             text_embeddings: Union[PromptEmbeds, None] = None,
             timestep: Union[int, torch.Tensor] = 1,
             guidance_scale=7.5,
-            guidance_rescale=0,  # 0.7 sdxl
+            guidance_rescale=0,
             add_time_ids=None,
             conditional_embeddings: Union[PromptEmbeds, None] = None,
             unconditional_embeddings: Union[PromptEmbeds, None] = None,
             **kwargs,
     ):
-        # get the embeddings
-        if text_embeddings is None and conditional_embeddings is None:
-            raise ValueError("Either text_embeddings or conditional_embeddings must be specified")
-        if text_embeddings is None and unconditional_embeddings is not None:
-            text_embeddings = train_tools.concat_prompt_embeddings(
-                unconditional_embeddings,  # negative embedding
-                conditional_embeddings,  # positive embedding
-                1,  # batch size
-            )
-        elif text_embeddings is None and conditional_embeddings is not None:
-            # not doing cfg
-            text_embeddings = conditional_embeddings
+        with torch.no_grad():
+            # get the embeddings
+            if text_embeddings is None and conditional_embeddings is None:
+                raise ValueError("Either text_embeddings or conditional_embeddings must be specified")
+            if text_embeddings is None and unconditional_embeddings is not None:
+                text_embeddings = train_tools.concat_prompt_embeddings(
+                    unconditional_embeddings,  # negative embedding
+                    conditional_embeddings,  # positive embedding
+                    1,  # batch size
+                )
+            elif text_embeddings is None and conditional_embeddings is not None:
+                # not doing cfg
+                text_embeddings = conditional_embeddings
 
-        # CFG is comparing neg and positive, if we have concatenated embeddings
-        # then we are doing it, otherwise we are not and takes half the time.
-        do_classifier_free_guidance = True
+            # CFG is comparing neg and positive, if we have concatenated embeddings
+            # then we are doing it, otherwise we are not and takes half the time.
+            do_classifier_free_guidance = True
 
-        # check if batch size of embeddings matches batch size of latents
-        if latents.shape[0] == text_embeddings.text_embeds.shape[0]:
-            do_classifier_free_guidance = False
-        elif latents.shape[0] * 2 != text_embeddings.text_embeds.shape[0]:
-            raise ValueError("Batch size of latents must be the same or half the batch size of text embeddings")
+            # check if batch size of embeddings matches batch size of latents
+            if latents.shape[0] == text_embeddings.text_embeds.shape[0]:
+                do_classifier_free_guidance = False
+            elif latents.shape[0] * 2 != text_embeddings.text_embeds.shape[0]:
+                raise ValueError("Batch size of latents must be the same or half the batch size of text embeddings")
+        latents = latents.to(self.device_torch)
+        text_embeddings = text_embeddings.to(self.device_torch)
+        timestep = timestep.to(self.device_torch)
+
+        # if timestep is zero dim, unsqueeze it
+        if len(timestep.shape) == 0:
+            timestep = timestep.unsqueeze(0)
+
+        # if we only have 1 timestep, we can just use the same timestep for all
+        if timestep.shape[0] == 1 and latents.shape[0] > 1:
+            # check if it is rank 1 or 2
+            if len(timestep.shape) == 1:
+                timestep = timestep.repeat(latents.shape[0])
+            else:
+                timestep = timestep.repeat(latents.shape[0], 0)
+
+
+        def scale_model_input(model_input, timestep_tensor):
+            mi_chunks = torch.chunk(model_input, model_input.shape[0], dim=0)
+            timestep_chunks = torch.chunk(timestep_tensor, timestep_tensor.shape[0], dim=0)
+            out_chunks = []
+            # unsqueeze if timestep is zero dim
+            for idx in range(model_input.shape[0]):
+                # if scheduler has step_index
+                if hasattr(self.noise_scheduler, '_step_index'):
+                    self.noise_scheduler._step_index = None
+                out_chunks.append(
+                    self.noise_scheduler.scale_model_input(mi_chunks[idx], timestep_chunks[idx])
+                )
+            return torch.cat(out_chunks, dim=0)
 
         if self.is_xl:
-            if add_time_ids is None:
-                add_time_ids = self.get_time_ids_from_latents(latents)
+            with torch.no_grad():
+                # 16, 6 for bs of 4
+                if add_time_ids is None:
+                    add_time_ids = self.get_time_ids_from_latents(latents)
+
+                    if do_classifier_free_guidance:
+                        # todo check this with larget batches
+                        add_time_ids = torch.cat([add_time_ids] * 2)
 
                 if do_classifier_free_guidance:
-                    # todo check this with larget batches
-                    add_time_ids = train_tools.concat_embeddings(
-                        add_time_ids, add_time_ids, int(latents.shape[0])
-                    )
+                    latent_model_input = torch.cat([latents] * 2)
+                    timestep = torch.cat([timestep] * 2)
                 else:
-                    # concat to fit batch size
-                    add_time_ids = torch.cat([add_time_ids] * latents.shape[0])
+                    latent_model_input = latents
+
+                latent_model_input = scale_model_input(latent_model_input, timestep)
+
+                added_cond_kwargs = {
+                    # todo can we zero here the second text encoder? or match a blank string?
+                    "text_embeds": text_embeddings.pooled_embeds,
+                    "time_ids": add_time_ids,
+                }
+
+            if self.model_config.refiner_name_or_path is not None:
+                # we have the refiner on the second half of everything. Do Both
+                if do_classifier_free_guidance:
+                    raise ValueError("Refiner is not supported with classifier free guidance")
+
+                if self.unet.training:
+                    input_chunks = torch.chunk(latent_model_input, 2, dim=0)
+                    timestep_chunks = torch.chunk(timestep, 2, dim=0)
+                    added_cond_kwargs_chunked = {
+                        "text_embeds": torch.chunk(text_embeddings.pooled_embeds, 2, dim=0),
+                        "time_ids": torch.chunk(add_time_ids, 2, dim=0),
+                    }
+                    text_embeds_chunks = torch.chunk(text_embeddings.text_embeds, 2, dim=0)
+
+                    # predict the noise residual
+                    base_pred = self.unet(
+                        input_chunks[0],
+                        timestep_chunks[0],
+                        encoder_hidden_states=text_embeds_chunks[0],
+                        added_cond_kwargs={
+                            "text_embeds": added_cond_kwargs_chunked['text_embeds'][0],
+                            "time_ids": added_cond_kwargs_chunked['time_ids'][0],
+                        },
+                        **kwargs,
+                    ).sample
+
+                    refiner_pred = self.refiner_unet(
+                        input_chunks[1],
+                        timestep_chunks[1],
+                        encoder_hidden_states=text_embeds_chunks[1][:, :, -1280:],
+                        # just use the first second text encoder
+                        added_cond_kwargs={
+                            "text_embeds": added_cond_kwargs_chunked['text_embeds'][1],
+                            # "time_ids": added_cond_kwargs_chunked['time_ids'][1],
+                            "time_ids": self.get_time_ids_from_latents(input_chunks[1], requires_aesthetic_score=True),
+                        },
+                        **kwargs,
+                    ).sample
+
+                    noise_pred = torch.cat([base_pred, refiner_pred], dim=0)
+                else:
+                    noise_pred = self.refiner_unet(
+                        latent_model_input,
+                        timestep,
+                        encoder_hidden_states=text_embeddings.text_embeds[:, :, -1280:],
+                        # just use the first second text encoder
+                        added_cond_kwargs={
+                            "text_embeds": text_embeddings.pooled_embeds,
+                            "time_ids": self.get_time_ids_from_latents(latent_model_input,
+                                                                       requires_aesthetic_score=True),
+                        },
+                        **kwargs,
+                    ).sample
 
-            if do_classifier_free_guidance:
-                latent_model_input = torch.cat([latents] * 2)
             else:
-                latent_model_input = latents
 
-            latent_model_input = self.noise_scheduler.scale_model_input(latent_model_input, timestep)
-
-            added_cond_kwargs = {
-                # todo can we zero here the second text encoder? or match a blank string?
-                "text_embeds": text_embeddings.pooled_embeds,
-                "time_ids": add_time_ids,
-            }
-
-            # predict the noise residual
-            noise_pred = self.unet(
-                latent_model_input,
-                timestep,
-                encoder_hidden_states=text_embeddings.text_embeds,
-                added_cond_kwargs=added_cond_kwargs,
-            ).sample
+                # predict the noise residual
+                noise_pred = self.unet(
+                    latent_model_input,
+                    timestep,
+                    encoder_hidden_states=text_embeddings.text_embeds,
+                    added_cond_kwargs=added_cond_kwargs,
+                    **kwargs,
+                ).sample
 
             if do_classifier_free_guidance:
                 # perform guidance
@@ -520,31 +811,34 @@ class StableDiffusion:
                     noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
 
         else:
-            if do_classifier_free_guidance:
-                # if we are doing classifier free guidance, need to double up
-                latent_model_input = torch.cat([latents] * 2)
-            else:
-                latent_model_input = latents
+            with torch.no_grad():
+                if do_classifier_free_guidance:
+                    # if we are doing classifier free guidance, need to double up
+                    latent_model_input = torch.cat([latents] * 2)
+                    timestep = torch.cat([timestep] * 2)
+                else:
+                    latent_model_input = latents
 
-            latent_model_input = self.noise_scheduler.scale_model_input(latent_model_input, timestep)
+                latent_model_input = scale_model_input(latent_model_input, timestep)
 
-            # check if we need to concat timesteps
-            if isinstance(timestep, torch.Tensor) and len(timestep.shape) > 1:
-                ts_bs = timestep.shape[0]
-                if ts_bs != latent_model_input.shape[0]:
-                    if ts_bs == 1:
-                        timestep = torch.cat([timestep] * latent_model_input.shape[0])
-                    elif ts_bs * 2 == latent_model_input.shape[0]:
-                        timestep = torch.cat([timestep] * 2)
-                    else:
-                        raise ValueError(
-                            f"Batch size of latents {latent_model_input.shape[0]} must be the same or half the batch size of timesteps {timestep.shape[0]}")
+                # check if we need to concat timesteps
+                if isinstance(timestep, torch.Tensor) and len(timestep.shape) > 1:
+                    ts_bs = timestep.shape[0]
+                    if ts_bs != latent_model_input.shape[0]:
+                        if ts_bs == 1:
+                            timestep = torch.cat([timestep] * latent_model_input.shape[0])
+                        elif ts_bs * 2 == latent_model_input.shape[0]:
+                            timestep = torch.cat([timestep] * 2)
+                        else:
+                            raise ValueError(
+                                f"Batch size of latents {latent_model_input.shape[0]} must be the same or half the batch size of timesteps {timestep.shape[0]}")
 
             # predict the noise residual
             noise_pred = self.unet(
                 latent_model_input,
                 timestep,
                 encoder_hidden_states=text_embeddings.text_embeds,
+                **kwargs,
             ).sample
 
             if do_classifier_free_guidance:
@@ -561,6 +855,27 @@ class StableDiffusion:
 
         return noise_pred
 
+    def step_scheduler(self, model_input, latent_input, timestep_tensor):
+        mi_chunks = torch.chunk(model_input, model_input.shape[0], dim=0)
+        latent_chunks = torch.chunk(latent_input, latent_input.shape[0], dim=0)
+        timestep_chunks = torch.chunk(timestep_tensor, timestep_tensor.shape[0], dim=0)
+        out_chunks = []
+        if len(timestep_chunks) == 1 and len(mi_chunks) > 1:
+            # expand timestep to match
+            timestep_chunks = timestep_chunks * len(mi_chunks)
+
+        for idx in range(model_input.shape[0]):
+            # Reset it so it is unique for the
+            if hasattr(self.noise_scheduler, '_step_index'):
+                self.noise_scheduler._step_index = None
+            if hasattr(self.noise_scheduler, 'is_scale_input_called'):
+                self.noise_scheduler.is_scale_input_called = True
+            out_chunks.append(
+                self.noise_scheduler.step(mi_chunks[idx], timestep_chunks[idx], latent_chunks[idx], return_dict=False)[
+                    0]
+            )
+        return torch.cat(out_chunks, dim=0)
+
     # ref: https://github.com/huggingface/diffusers/blob/0bab447670f47c28df60fbd2f6a0f833f75a16f5/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L746
     def diffuse_some_steps(
             self,
@@ -570,10 +885,14 @@ class StableDiffusion:
             start_timesteps=0,
             guidance_scale=1,
             add_time_ids=None,
+            bleed_ratio: float = 0.5,
+            bleed_latents: torch.FloatTensor = None,
             **kwargs,
     ):
+        timesteps_to_run = self.noise_scheduler.timesteps[start_timesteps:total_timesteps]
 
-        for timestep in tqdm(self.noise_scheduler.timesteps[start_timesteps:total_timesteps], leave=False):
+        for timestep in tqdm(timesteps_to_run, leave=False):
+            timestep = timestep.unsqueeze_(0)
             noise_pred = self.predict_noise(
                 latents,
                 text_embeddings,
@@ -582,31 +901,67 @@ class StableDiffusion:
                 add_time_ids=add_time_ids,
                 **kwargs,
             )
-            latents = self.noise_scheduler.step(noise_pred, timestep, latents).prev_sample
+            # some schedulers need to run separately, so do that. (euler for example)
+
+            latents = self.step_scheduler(noise_pred, latents, timestep)
+
+            # if not last step, and bleeding, bleed in some latents
+            if bleed_latents is not None and timestep != self.noise_scheduler.timesteps[-1]:
+                latents = (latents * (1 - bleed_ratio)) + (bleed_latents * bleed_ratio)
 
         # return latents_steps
         return latents
 
-    def encode_prompt(self, prompt, num_images_per_prompt=1) -> PromptEmbeds:
+    def encode_prompt(
+            self,
+            prompt,
+            prompt2=None,
+            num_images_per_prompt=1,
+            force_all=False,
+            long_prompts=False,
+            max_length=None,
+            dropout_prob=0.0,
+    ) -> PromptEmbeds:
+        # sd1.5 embeddings are (bs, 77, 768)
         prompt = prompt
         # if it is not a list, make it one
         if not isinstance(prompt, list):
             prompt = [prompt]
+
+        if prompt2 is not None and not isinstance(prompt2, list):
+            prompt2 = [prompt2]
         if self.is_xl:
+            # todo make this a config
+            # 50% chance to use an encoder anyway even if it is disabled
+            # allows the other TE to compensate for the disabled one
+            # use_encoder_1 = self.use_text_encoder_1 or force_all or random.random() > 0.5
+            # use_encoder_2 = self.use_text_encoder_2 or force_all or random.random() > 0.5
+            use_encoder_1 = True
+            use_encoder_2 = True
+
             return PromptEmbeds(
                 train_tools.encode_prompts_xl(
                     self.tokenizer,
                     self.text_encoder,
                     prompt,
+                    prompt2,
                     num_images_per_prompt=num_images_per_prompt,
-                    use_text_encoder_1=self.use_text_encoder_1,
-                    use_text_encoder_2=self.use_text_encoder_2,
+                    use_text_encoder_1=use_encoder_1,
+                    use_text_encoder_2=use_encoder_2,
+                    truncate=not long_prompts,
+                    max_length=max_length,
+                    dropout_prob=dropout_prob,
                 )
             )
         else:
             return PromptEmbeds(
                 train_tools.encode_prompts(
-                    self.tokenizer, self.text_encoder, prompt
+                    self.tokenizer,
+                    self.text_encoder,
+                    prompt,
+                    truncate=not long_prompts,
+                    max_length=max_length,
+                    dropout_prob=dropout_prob
                 )
             )
 
@@ -638,7 +993,6 @@ class StableDiffusion:
                 image_list[i] = Resize((image.shape[1] // 8 * 8, image.shape[2] // 8 * 8))(image)
 
         images = torch.stack(image_list)
-        flush()
         latents = self.vae.encode(images).latent_dist.sample()
         latents = latents * self.vae.config['scaling_factor']
         latents = latents.to(device, dtype=dtype)
@@ -660,7 +1014,7 @@ class StableDiffusion:
         if self.vae.device == 'cpu':
             self.vae.to(self.device)
         latents = latents.to(device, dtype=dtype)
-        latents = latents / 0.18215
+        latents = latents / self.vae.config['scaling_factor']
         images = self.vae.decode(latents).sample
         images = images.to(device, dtype=dtype)
 
@@ -707,45 +1061,19 @@ class StableDiffusion:
 
         raise ValueError(f"Unknown weight name: {name}")
 
-    def inject_trigger_into_prompt(self, prompt, trigger=None, to_replace_list=None, add_if_not_present=True):
-        if trigger is None:
-            return prompt
-        output_prompt = prompt
-        default_replacements = ["[name]", "[trigger]"]
-        num_times_trigger_exists = prompt.count(trigger)
-
-        replace_with = trigger
-        if to_replace_list is None:
-            to_replace_list = default_replacements
-        else:
-            to_replace_list += default_replacements
-
-        # remove duplicates
-        to_replace_list = list(set(to_replace_list))
-
-        # replace them all
-        for to_replace in to_replace_list:
-            # replace it
-            output_prompt = output_prompt.replace(to_replace, replace_with)
-
-        # see how many times replace_with is in the prompt
-        num_instances = output_prompt.count(replace_with)
-
-        if num_instances == 0 and add_if_not_present:
-            # add it to the beginning of the prompt
-            output_prompt = replace_with + " " + output_prompt
-
-        if num_instances > 1:
-            print(
-                f"Warning: {trigger} token appears {num_instances} times in prompt {output_prompt}. This may cause issues.")
-
-        return output_prompt
+    def inject_trigger_into_prompt(self, prompt, trigger=None, to_replace_list=None, add_if_not_present=False):
+        return inject_trigger_into_prompt(
+            prompt,
+            trigger=trigger,
+            to_replace_list=to_replace_list,
+            add_if_not_present=add_if_not_present,
+        )
 
     def state_dict(self, vae=True, text_encoder=True, unet=True):
         state_dict = OrderedDict()
         if vae:
             for k, v in self.vae.state_dict().items():
-                new_key = k if k.startswith(f"{VAE_PREFIX_UNET}") else f"{VAE_PREFIX_UNET}_{k}"
+                new_key = k if k.startswith(f"{SD_PREFIX_VAE}") else f"{SD_PREFIX_VAE}_{k}"
                 state_dict[new_key] = v
         if text_encoder:
             if isinstance(self.text_encoder, list):
@@ -764,16 +1092,382 @@ class StableDiffusion:
                 state_dict[new_key] = v
         return state_dict
 
+    def named_parameters(self, vae=True, text_encoder=True, unet=True, refiner=False, state_dict_keys=False) -> \
+            OrderedDict[
+                str, Parameter]:
+        named_params: OrderedDict[str, Parameter] = OrderedDict()
+        if vae:
+            for name, param in self.vae.named_parameters(recurse=True, prefix=f"{SD_PREFIX_VAE}"):
+                named_params[name] = param
+        if text_encoder:
+            if isinstance(self.text_encoder, list):
+                for i, encoder in enumerate(self.text_encoder):
+                    if self.is_xl and not self.model_config.use_text_encoder_1 and i == 0:
+                        # dont add these params
+                        continue
+                    if self.is_xl and not self.model_config.use_text_encoder_2 and i == 1:
+                        # dont add these params
+                        continue
+
+                    for name, param in encoder.named_parameters(recurse=True, prefix=f"{SD_PREFIX_TEXT_ENCODER}{i}"):
+                        named_params[name] = param
+            else:
+                for name, param in self.text_encoder.named_parameters(recurse=True, prefix=f"{SD_PREFIX_TEXT_ENCODER}"):
+                    named_params[name] = param
+        if unet:
+            for name, param in self.unet.named_parameters(recurse=True, prefix=f"{SD_PREFIX_UNET}"):
+                named_params[name] = param
+
+        if refiner:
+            for name, param in self.refiner_unet.named_parameters(recurse=True, prefix=f"{SD_PREFIX_REFINER_UNET}"):
+                named_params[name] = param
+
+        # convert to state dict keys, jsut replace . with _ on keys
+        if state_dict_keys:
+            new_named_params = OrderedDict()
+            for k, v in named_params.items():
+                # replace only the first . with an _
+                new_key = k.replace('.', '_', 1)
+                new_named_params[new_key] = v
+            named_params = new_named_params
+
+        return named_params
+
+    def save_refiner(self, output_file: str, meta: OrderedDict, save_dtype=get_torch_dtype('fp16')):
+
+        # load the full refiner since we only train unet
+        if self.model_config.refiner_name_or_path is None:
+            raise ValueError("Refiner must be specified to save it")
+        refiner_config_path = os.path.join(ORIG_CONFIGS_ROOT, 'sd_xl_refiner.yaml')
+        # load the refiner model
+        dtype = get_torch_dtype(self.dtype)
+        model_path = self.model_config._original_refiner_name_or_path
+        if not os.path.exists(model_path) or os.path.isdir(model_path):
+            # TODO only load unet??
+            refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+                model_path,
+                dtype=dtype,
+                device='cpu',
+                variant="fp16",
+                use_safetensors=True,
+            )
+        else:
+            refiner = StableDiffusionXLImg2ImgPipeline.from_single_file(
+                model_path,
+                dtype=dtype,
+                device='cpu',
+                torch_dtype=self.torch_dtype,
+                original_config_file=refiner_config_path,
+            )
+        # replace original unet
+        refiner.unet = self.refiner_unet
+        flush()
+
+        diffusers_state_dict = OrderedDict()
+        for k, v in refiner.vae.state_dict().items():
+            new_key = k if k.startswith(f"{SD_PREFIX_VAE}") else f"{SD_PREFIX_VAE}_{k}"
+            diffusers_state_dict[new_key] = v
+        for k, v in refiner.text_encoder_2.state_dict().items():
+            new_key = k if k.startswith(f"{SD_PREFIX_TEXT_ENCODER2}_") else f"{SD_PREFIX_TEXT_ENCODER2}_{k}"
+            diffusers_state_dict[new_key] = v
+        for k, v in refiner.unet.state_dict().items():
+            new_key = k if k.startswith(f"{SD_PREFIX_UNET}_") else f"{SD_PREFIX_UNET}_{k}"
+            diffusers_state_dict[new_key] = v
+
+        converted_state_dict = get_ldm_state_dict_from_diffusers(
+            diffusers_state_dict,
+            'sdxl_refiner',
+            device='cpu',
+            dtype=save_dtype
+        )
+
+        # make sure parent folder exists
+        os.makedirs(os.path.dirname(output_file), exist_ok=True)
+        save_file(converted_state_dict, output_file, metadata=meta)
+
+        if self.config_file is not None:
+            output_path_no_ext = os.path.splitext(output_file)[0]
+            output_config_path = f"{output_path_no_ext}.yaml"
+            shutil.copyfile(self.config_file, output_config_path)
+
+
     def save(self, output_file: str, meta: OrderedDict, save_dtype=get_torch_dtype('fp16'), logit_scale=None):
         version_string = '1'
         if self.is_v2:
             version_string = '2'
         if self.is_xl:
             version_string = 'sdxl'
-        save_ldm_model_from_diffusers(
-            sd=self,
-            output_file=output_file,
-            meta=meta,
-            save_dtype=save_dtype,
-            sd_version=version_string,
-        )
+        if self.is_ssd:
+            # overwrite sdxl because both wil be true here
+            version_string = 'ssd'
+        # if output file does not end in .safetensors, then it is a directory and we are
+        # saving in diffusers format
+        if not output_file.endswith('.safetensors'):
+            # diffusers
+            self.pipeline.save_pretrained(
+                save_directory=output_file,
+                safe_serialization=True,
+            )
+            # save out meta config
+            meta_path = os.path.join(output_file, 'aitk_meta.yaml')
+            with open(meta_path, 'w') as f:
+                yaml.dump(meta, f)
+
+        else:
+            save_ldm_model_from_diffusers(
+                sd=self,
+                output_file=output_file,
+                meta=meta,
+                save_dtype=save_dtype,
+                sd_version=version_string,
+            )
+            if self.config_file is not None:
+                output_path_no_ext = os.path.splitext(output_file)[0]
+                output_config_path = f"{output_path_no_ext}.yaml"
+                shutil.copyfile(self.config_file, output_config_path)
+
+    def prepare_optimizer_params(
+            self,
+            unet=False,
+            text_encoder=False,
+            text_encoder_lr=None,
+            unet_lr=None,
+            refiner_lr=None,
+            refiner=False,
+            default_lr=1e-6,
+    ):
+        # todo maybe only get locon ones?
+        # not all items are saved, to make it match, we need to match out save mappings
+        # and not train anything not mapped. Also add learning rate
+        version = 'sd1'
+        if self.is_xl:
+            version = 'sdxl'
+        if self.is_v2:
+            version = 'sd2'
+        mapping_filename = f"stable_diffusion_{version}.json"
+        mapping_path = os.path.join(KEYMAPS_ROOT, mapping_filename)
+        with open(mapping_path, 'r') as f:
+            mapping = json.load(f)
+        ldm_diffusers_keymap = mapping['ldm_diffusers_keymap']
+
+        trainable_parameters = []
+
+        # we use state dict to find params
+
+        if unet:
+            named_params = self.named_parameters(vae=False, unet=unet, text_encoder=False, state_dict_keys=True)
+            unet_lr = unet_lr if unet_lr is not None else default_lr
+            params = []
+            for key, diffusers_key in ldm_diffusers_keymap.items():
+                if diffusers_key in named_params and diffusers_key not in DO_NOT_TRAIN_WEIGHTS:
+                    if named_params[diffusers_key].requires_grad:
+                        params.append(named_params[diffusers_key])
+            param_data = {"params": params, "lr": unet_lr}
+            trainable_parameters.append(param_data)
+            print(f"Found {len(params)} trainable parameter in unet")
+
+        if text_encoder:
+            named_params = self.named_parameters(vae=False, unet=False, text_encoder=text_encoder, state_dict_keys=True)
+            text_encoder_lr = text_encoder_lr if text_encoder_lr is not None else default_lr
+            params = []
+            for key, diffusers_key in ldm_diffusers_keymap.items():
+                if diffusers_key in named_params and diffusers_key not in DO_NOT_TRAIN_WEIGHTS:
+                    if named_params[diffusers_key].requires_grad:
+                        params.append(named_params[diffusers_key])
+            param_data = {"params": params, "lr": text_encoder_lr}
+            trainable_parameters.append(param_data)
+
+            print(f"Found {len(params)} trainable parameter in text encoder")
+
+        if refiner:
+            named_params = self.named_parameters(vae=False, unet=False, text_encoder=False, refiner=True,
+                                                 state_dict_keys=True)
+            refiner_lr = refiner_lr if refiner_lr is not None else default_lr
+            params = []
+            for key, diffusers_key in ldm_diffusers_keymap.items():
+                diffusers_key = f"refiner_{diffusers_key}"
+                if diffusers_key in named_params and diffusers_key not in DO_NOT_TRAIN_WEIGHTS:
+                    if named_params[diffusers_key].requires_grad:
+                        params.append(named_params[diffusers_key])
+            param_data = {"params": params, "lr": refiner_lr}
+            trainable_parameters.append(param_data)
+
+            print(f"Found {len(params)} trainable parameter in refiner")
+
+        return trainable_parameters
+
+    def save_device_state(self):
+        # saves the current device state for all modules
+        # this is useful for when we want to alter the state and restore it
+        self.device_state = {
+            **empty_preset,
+            'vae': {
+                'training': self.vae.training,
+                'device': self.vae.device,
+            },
+            'unet': {
+                'training': self.unet.training,
+                'device': self.unet.device,
+                'requires_grad': self.unet.conv_in.weight.requires_grad,
+            },
+        }
+        if isinstance(self.text_encoder, list):
+            self.device_state['text_encoder']: List[dict] = []
+            for encoder in self.text_encoder:
+                self.device_state['text_encoder'].append({
+                    'training': encoder.training,
+                    'device': encoder.device,
+                    # todo there has to be a better way to do this
+                    'requires_grad': encoder.text_model.final_layer_norm.weight.requires_grad
+                })
+        else:
+            self.device_state['text_encoder'] = {
+                'training': self.text_encoder.training,
+                'device': self.text_encoder.device,
+                'requires_grad': self.text_encoder.text_model.final_layer_norm.weight.requires_grad
+            }
+        if self.adapter is not None:
+            if isinstance(self.adapter, IPAdapter):
+                requires_grad = self.adapter.adapter_modules.training
+                adapter_device = self.unet.device
+            elif isinstance(self.adapter, T2IAdapter):
+                requires_grad = self.adapter.adapter.conv_in.weight.requires_grad
+                adapter_device = self.adapter.device
+            else:
+                raise ValueError(f"Unknown adapter type: {type(self.adapter)}")
+            self.device_state['adapter'] = {
+                'training': self.adapter.training,
+                'device': adapter_device,
+                'requires_grad': requires_grad,
+            }
+
+        if self.refiner_unet is not None:
+            self.device_state['refiner_unet'] = {
+                'training': self.refiner_unet.training,
+                'device': self.refiner_unet.device,
+                'requires_grad': self.refiner_unet.conv_in.weight.requires_grad,
+            }
+
+    def restore_device_state(self):
+        # restores the device state for all modules
+        # this is useful for when we want to alter the state and restore it
+        if self.device_state is None:
+            return
+        self.set_device_state(self.device_state)
+        self.device_state = None
+
+    def set_device_state(self, state):
+        if state['vae']['training']:
+            self.vae.train()
+        else:
+            self.vae.eval()
+        self.vae.to(state['vae']['device'])
+        if state['unet']['training']:
+            self.unet.train()
+        else:
+            self.unet.eval()
+        self.unet.to(state['unet']['device'])
+        if state['unet']['requires_grad']:
+            self.unet.requires_grad_(True)
+        else:
+            self.unet.requires_grad_(False)
+        if isinstance(self.text_encoder, list):
+            for i, encoder in enumerate(self.text_encoder):
+                if isinstance(state['text_encoder'], list):
+                    if state['text_encoder'][i]['training']:
+                        encoder.train()
+                    else:
+                        encoder.eval()
+                    encoder.to(state['text_encoder'][i]['device'])
+                    encoder.requires_grad_(state['text_encoder'][i]['requires_grad'])
+                else:
+                    if state['text_encoder']['training']:
+                        encoder.train()
+                    else:
+                        encoder.eval()
+                    encoder.to(state['text_encoder']['device'])
+                    encoder.requires_grad_(state['text_encoder']['requires_grad'])
+        else:
+            if state['text_encoder']['training']:
+                self.text_encoder.train()
+            else:
+                self.text_encoder.eval()
+            self.text_encoder.to(state['text_encoder']['device'])
+            self.text_encoder.requires_grad_(state['text_encoder']['requires_grad'])
+
+        if self.adapter is not None:
+            self.adapter.to(state['adapter']['device'])
+            self.adapter.requires_grad_(state['adapter']['requires_grad'])
+            if state['adapter']['training']:
+                self.adapter.train()
+            else:
+                self.adapter.eval()
+
+        if self.refiner_unet is not None:
+            self.refiner_unet.to(state['refiner_unet']['device'])
+            self.refiner_unet.requires_grad_(state['refiner_unet']['requires_grad'])
+            if state['refiner_unet']['training']:
+                self.refiner_unet.train()
+            else:
+                self.refiner_unet.eval()
+        flush()
+
+    def set_device_state_preset(self, device_state_preset: DeviceStatePreset):
+        # sets a preset for device state
+
+        # save current state first
+        self.save_device_state()
+
+        active_modules = []
+        training_modules = []
+        if device_state_preset in ['cache_latents']:
+            active_modules = ['vae']
+        if device_state_preset in ['generate']:
+            active_modules = ['vae', 'unet', 'text_encoder', 'adapter', 'refiner_unet']
+
+        state = copy.deepcopy(empty_preset)
+        # vae
+        state['vae'] = {
+            'training': 'vae' in training_modules,
+            'device': self.device_torch if 'vae' in active_modules else 'cpu',
+            'requires_grad': 'vae' in training_modules,
+        }
+
+        # unet
+        state['unet'] = {
+            'training': 'unet' in training_modules,
+            'device': self.device_torch if 'unet' in active_modules else 'cpu',
+            'requires_grad': 'unet' in training_modules,
+        }
+
+        if self.refiner_unet is not None:
+            state['refiner_unet'] = {
+                'training': 'refiner_unet' in training_modules,
+                'device': self.device_torch if 'refiner_unet' in active_modules else 'cpu',
+                'requires_grad': 'refiner_unet' in training_modules,
+            }
+
+        # text encoder
+        if isinstance(self.text_encoder, list):
+            state['text_encoder'] = []
+            for i, encoder in enumerate(self.text_encoder):
+                state['text_encoder'].append({
+                    'training': 'text_encoder' in training_modules,
+                    'device': self.device_torch if 'text_encoder' in active_modules else 'cpu',
+                    'requires_grad': 'text_encoder' in training_modules,
+                })
+        else:
+            state['text_encoder'] = {
+                'training': 'text_encoder' in training_modules,
+                'device': self.device_torch if 'text_encoder' in active_modules else 'cpu',
+                'requires_grad': 'text_encoder' in training_modules,
+            }
+
+        if self.adapter is not None:
+            state['adapter'] = {
+                'training': 'adapter' in training_modules,
+                'device': self.device_torch if 'adapter' in active_modules else 'cpu',
+                'requires_grad': 'adapter' in training_modules,
+            }
+
+        self.set_device_state(state)
diff --git a/toolkit/style.py b/toolkit/style.py
index 4282a230..b08214a9 100644
--- a/toolkit/style.py
+++ b/toolkit/style.py
@@ -33,12 +33,17 @@ class ContentLoss(nn.Module):
 
         # Define the separate loss function
         def separated_loss(y_pred, y_true):
+            y_pred = y_pred.float()
+            y_true = y_true.float()
             diff = torch.abs(y_pred - y_true)
             l2 = torch.sum(diff ** 2, dim=[1, 2, 3], keepdim=True) / 2.0
             return 2. * l2 / content_size
 
         # Calculate itemized loss
         pred_itemized_loss = separated_loss(pred_layer, target_layer)
+        # check if is nan
+        if torch.isnan(pred_itemized_loss).any():
+            print('pred_itemized_loss is nan')
 
         # Calculate the mean of itemized loss
         loss = torch.mean(pred_itemized_loss, dim=(1, 2, 3), keepdim=True)
@@ -48,6 +53,7 @@ class ContentLoss(nn.Module):
 
 
 def convert_to_gram_matrix(inputs):
+    inputs = inputs.float()
     shape = inputs.size()
     batch, filters, height, width = shape[0], shape[1], shape[2], shape[3]
     size = height * width * filters
@@ -93,11 +99,14 @@ class StyleLoss(nn.Module):
         target_grams = convert_to_gram_matrix(style_target)
         pred_grams = convert_to_gram_matrix(preds)
         itemized_loss = separated_loss(pred_grams, target_grams)
+        # check if is nan
+        if torch.isnan(itemized_loss).any():
+            print('itemized_loss is nan')
         # reshape itemized loss to be (batch, 1, 1, 1)
         itemized_loss = torch.unsqueeze(itemized_loss, dim=1)
         # gram_size = (tf.shape(target_grams)[1] * tf.shape(target_grams)[2])
         loss = torch.mean(itemized_loss, dim=(1, 2), keepdim=True)
-        self.loss = loss.to(input_dtype)
+        self.loss = loss.to(input_dtype).float()
         return stacked_input.to(input_dtype)
 
 
@@ -149,7 +158,7 @@ def get_style_model_and_losses(
 ):
     # content_layers = ['conv_4']
     # style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
-    content_layers = ['conv4_2']
+    content_layers = ['conv2_2', 'conv3_2', 'conv4_2', 'conv5_2']
     style_layers = ['conv2_1', 'conv3_1', 'conv4_1']
     cnn = models.vgg19(pretrained=True).features.to(device, dtype=dtype).eval()
     # set all weights in the model to our dtype
diff --git a/toolkit/timer.py b/toolkit/timer.py
new file mode 100644
index 00000000..ca4fecba
--- /dev/null
+++ b/toolkit/timer.py
@@ -0,0 +1,65 @@
+import time
+from collections import OrderedDict, deque
+
+
+class Timer:
+    def __init__(self, name='Timer', max_buffer=10):
+        self.name = name
+        self.max_buffer = max_buffer
+        self.timers = OrderedDict()
+        self.active_timers = {}
+        self.current_timer = None  # Used for the context manager functionality
+
+    def start(self, timer_name):
+        if timer_name not in self.timers:
+            self.timers[timer_name] = deque(maxlen=self.max_buffer)
+        self.active_timers[timer_name] = time.time()
+
+    def cancel(self, timer_name):
+        """Cancel an active timer."""
+        if timer_name in self.active_timers:
+            del self.active_timers[timer_name]
+
+    def stop(self, timer_name):
+        if timer_name not in self.active_timers:
+            raise ValueError(f"Timer '{timer_name}' was not started!")
+
+        elapsed_time = time.time() - self.active_timers[timer_name]
+        self.timers[timer_name].append(elapsed_time)
+
+        # Clean up active timers
+        del self.active_timers[timer_name]
+
+        # Check if this timer's buffer exceeds max_buffer and remove the oldest if it does
+        if len(self.timers[timer_name]) > self.max_buffer:
+            self.timers[timer_name].popleft()
+
+    def print(self):
+        print(f"\nTimer '{self.name}':")
+        # sort by longest at top
+        for timer_name, timings in sorted(self.timers.items(), key=lambda x: sum(x[1]), reverse=True):
+            avg_time = sum(timings) / len(timings)
+            print(f" - {avg_time:.4f}s avg - {timer_name}, num = {len(timings)}")
+
+        print('')
+
+    def reset(self):
+        self.timers.clear()
+        self.active_timers.clear()
+
+    def __call__(self, timer_name):
+        """Enable the use of the Timer class as a context manager."""
+        self.current_timer = timer_name
+        self.start(timer_name)
+        return self
+
+    def __enter__(self):
+        pass
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if exc_type is None:
+            # No exceptions, stop the timer normally
+            self.stop(self.current_timer)
+        else:
+            # There was an exception, cancel the timer
+            self.cancel(self.current_timer)
diff --git a/toolkit/train_tools.py b/toolkit/train_tools.py
index 8bcea8a9..4ea23b69 100644
--- a/toolkit/train_tools.py
+++ b/toolkit/train_tools.py
@@ -5,6 +5,9 @@ import os
 import time
 from typing import TYPE_CHECKING, Union
 import sys
+
+from torch.cuda.amp import GradScaler
+
 from toolkit.paths import SD_SCRIPTS_ROOT
 
 sys.path.append(SD_SCRIPTS_ROOT)
@@ -444,29 +447,78 @@ if TYPE_CHECKING:
 
 
 def text_tokenize(
-        tokenizer: 'CLIPTokenizer',  # 普通ならひとつ、XLならふたつ！
+        tokenizer: 'CLIPTokenizer',
         prompts: list[str],
+        truncate: bool = True,
+        max_length: int = None,
+        max_length_multiplier: int = 4,
 ):
-    return tokenizer(
+    # allow fo up to 4x the max length for long prompts
+    if max_length is None:
+        if truncate:
+            max_length = tokenizer.model_max_length
+        else:
+            # allow up to 4x the max length for long prompts
+            max_length = tokenizer.model_max_length * max_length_multiplier
+
+    input_ids = tokenizer(
         prompts,
-        padding="max_length",
-        max_length=tokenizer.model_max_length,
+        padding='max_length',
+        max_length=max_length,
         truncation=True,
         return_tensors="pt",
     ).input_ids
 
+    if truncate or max_length == tokenizer.model_max_length:
+        return input_ids
+    else:
+        # remove additional padding
+        num_chunks = input_ids.shape[1] // tokenizer.model_max_length
+        chunks = torch.chunk(input_ids, chunks=num_chunks, dim=1)
+
+        # New list to store non-redundant chunks
+        non_redundant_chunks = []
+
+        for chunk in chunks:
+            if not chunk.eq(chunk[0, 0]).all():  # Check if all elements in the chunk are the same as the first element
+                non_redundant_chunks.append(chunk)
+
+        input_ids = torch.cat(non_redundant_chunks, dim=1)
+        return input_ids
+
 
 # https://github.com/huggingface/diffusers/blob/78922ed7c7e66c20aa95159c7b7a6057ba7d590d/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py#L334-L348
 def text_encode_xl(
         text_encoder: Union['CLIPTextModel', 'CLIPTextModelWithProjection'],
         tokens: torch.FloatTensor,
         num_images_per_prompt: int = 1,
+        max_length: int = 77,  # not sure what default to put here, always pass one?
+        truncate: bool = True,
 ):
-    prompt_embeds = text_encoder(
-        tokens.to(text_encoder.device), output_hidden_states=True
-    )
-    pooled_prompt_embeds = prompt_embeds[0]
-    prompt_embeds = prompt_embeds.hidden_states[-2]  # always penultimate layer
+    if truncate:
+        # normal short prompt 77 tokens max
+        prompt_embeds = text_encoder(
+            tokens.to(text_encoder.device), output_hidden_states=True
+        )
+        pooled_prompt_embeds = prompt_embeds[0]
+        prompt_embeds = prompt_embeds.hidden_states[-2]  # always penultimate layer
+    else:
+        # handle long prompts
+        prompt_embeds_list = []
+        tokens = tokens.to(text_encoder.device)
+        pooled_prompt_embeds = None
+        for i in range(0, tokens.shape[-1], max_length):
+            # todo run it through the in a single batch
+            section_tokens = tokens[:, i: i + max_length]
+            embeds = text_encoder(section_tokens, output_hidden_states=True)
+            pooled_prompt_embed = embeds[0]
+            if pooled_prompt_embeds is None:
+                # we only want the first ( I think??)
+                pooled_prompt_embeds = pooled_prompt_embed
+            prompt_embed = embeds.hidden_states[-2]  # always penultimate layer
+            prompt_embeds_list.append(prompt_embed)
+
+        prompt_embeds = torch.cat(prompt_embeds_list, dim=1)
 
     bs_embed, seq_len, _ = prompt_embeds.shape
     prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
@@ -479,26 +531,43 @@ def encode_prompts_xl(
         tokenizers: list['CLIPTokenizer'],
         text_encoders: list[Union['CLIPTextModel', 'CLIPTextModelWithProjection']],
         prompts: list[str],
+        prompts2: Union[list[str], None],
         num_images_per_prompt: int = 1,
         use_text_encoder_1: bool = True,  # sdxl
-        use_text_encoder_2: bool = True  # sdxl
+        use_text_encoder_2: bool = True,  # sdxl
+        truncate: bool = True,
+        max_length=None,
+        dropout_prob=0.0,
 ) -> tuple[torch.FloatTensor, torch.FloatTensor]:
     # text_encoder and text_encoder_2's penuultimate layer's output
     text_embeds_list = []
     pooled_text_embeds = None  # always text_encoder_2's pool
+    if prompts2 is None:
+        prompts2 = prompts
 
     for idx, (tokenizer, text_encoder) in enumerate(zip(tokenizers, text_encoders)):
         # todo, we are using a blank string to ignore that encoder for now.
         # find a better way to do this (zeroing?, removing it from the unet?)
-        prompt_list_to_use = prompts
+        prompt_list_to_use = prompts if idx == 0 else prompts2
         if idx == 0 and not use_text_encoder_1:
             prompt_list_to_use = ["" for _ in prompts]
         if idx == 1 and not use_text_encoder_2:
             prompt_list_to_use = ["" for _ in prompts]
 
-        text_tokens_input_ids = text_tokenize(tokenizer, prompt_list_to_use)
+        if dropout_prob > 0.0:
+            # randomly drop out prompts
+            prompt_list_to_use = [
+                prompt if torch.rand(1).item() > dropout_prob else "" for prompt in prompt_list_to_use
+            ]
+
+        text_tokens_input_ids = text_tokenize(tokenizer, prompt_list_to_use, truncate=truncate, max_length=max_length)
+        # set the max length for the next one
+        if idx == 0:
+            max_length = text_tokens_input_ids.shape[-1]
+
         text_embeds, pooled_text_embeds = text_encode_xl(
-            text_encoder, text_tokens_input_ids, num_images_per_prompt
+            text_encoder, text_tokens_input_ids, num_images_per_prompt, max_length=tokenizer.model_max_length,
+            truncate=truncate
         )
 
         text_embeds_list.append(text_embeds)
@@ -511,17 +580,49 @@ def encode_prompts_xl(
     return torch.concat(text_embeds_list, dim=-1), pooled_text_embeds
 
 
-def text_encode(text_encoder: 'CLIPTextModel', tokens):
-    return text_encoder(tokens.to(text_encoder.device))[0]
+# ref for long prompts https://github.com/huggingface/diffusers/issues/2136
+def text_encode(text_encoder: 'CLIPTextModel', tokens, truncate: bool = True, max_length=None):
+    if max_length is None and not truncate:
+        raise ValueError("max_length must be set if truncate is True")
+    try:
+        tokens = tokens.to(text_encoder.device)
+    except Exception as e:
+        print(e)
+        print("tokens.device", tokens.device)
+        print("text_encoder.device", text_encoder.device)
+        raise e
+
+    if truncate:
+        return text_encoder(tokens)[0]
+    else:
+        # handle long prompts
+        prompt_embeds_list = []
+        for i in range(0, tokens.shape[-1], max_length):
+            prompt_embeds = text_encoder(tokens[:, i: i + max_length])[0]
+            prompt_embeds_list.append(prompt_embeds)
+
+        return torch.cat(prompt_embeds_list, dim=1)
 
 
 def encode_prompts(
         tokenizer: 'CLIPTokenizer',
-        text_encoder: 'CLIPTokenizer',
+        text_encoder: 'CLIPTextModel',
         prompts: list[str],
+        truncate: bool = True,
+        max_length=None,
+        dropout_prob=0.0,
 ):
-    text_tokens = text_tokenize(tokenizer, prompts)
-    text_embeddings = text_encode(text_encoder, text_tokens)
+    if max_length is None:
+        max_length = tokenizer.model_max_length
+
+    if dropout_prob > 0.0:
+        # randomly drop out prompts
+        prompts = [
+            prompt if torch.rand(1).item() > dropout_prob else "" for prompt in prompts
+        ]
+
+    text_tokens = text_tokenize(tokenizer, prompts, truncate=truncate, max_length=max_length)
+    text_embeddings = text_encode(text_encoder, text_tokens, truncate=truncate, max_length=max_length)
 
     return text_embeddings
 
@@ -602,18 +703,86 @@ def get_all_snr(noise_scheduler, device):
         all_snr.requires_grad = False
     return all_snr.to(device)
 
+class LearnableSNRGamma:
+    """
+    This is a trainer for learnable snr gamma
+    It will adapt to the dataset and attempt to adjust the snr multiplier to balance the loss over the timesteps
+    """
+    def __init__(self, noise_scheduler: Union['DDPMScheduler'], device='cuda'):
+        self.device = device
+        self.noise_scheduler: Union['DDPMScheduler'] = noise_scheduler
+        self.offset_1 = torch.nn.Parameter(torch.tensor(0.0, dtype=torch.float32, device=device))
+        self.offset_2 = torch.nn.Parameter(torch.tensor(0.777, dtype=torch.float32, device=device))
+        self.scale = torch.nn.Parameter(torch.tensor(4.14, dtype=torch.float32, device=device))
+        self.gamma = torch.nn.Parameter(torch.tensor(2.03, dtype=torch.float32, device=device))
+        self.optimizer = torch.optim.AdamW([self.offset_1, self.offset_2, self.gamma, self.scale], lr=0.01)
+        self.buffer = []
+        self.max_buffer_size = 20
+
+    def forward(self, loss, timesteps):
+        # do a our train loop for lsnr here and return our values detached
+        loss = loss.detach()
+        with torch.no_grad():
+            loss_chunks = torch.chunk(loss, loss.shape[0], dim=0)
+            for loss_chunk in loss_chunks:
+                self.buffer.append(loss_chunk.mean().detach())
+                if len(self.buffer) > self.max_buffer_size:
+                    self.buffer.pop(0)
+            all_snr = get_all_snr(self.noise_scheduler, loss.device)
+            snr: torch.Tensor = torch.stack([all_snr[t] for t in timesteps]).detach().float().to(loss.device)
+        base_snrs = snr.clone().detach()
+        snr.requires_grad = True
+        snr = (snr + self.offset_1) * self.scale + self.offset_2
+
+        gamma_over_snr = torch.div(torch.ones_like(snr) * self.gamma, snr)
+        snr_weight = torch.abs(gamma_over_snr).float().to(loss.device)  # directly using gamma over snr
+        snr_adjusted_loss = loss * snr_weight
+        with torch.no_grad():
+            target = torch.mean(torch.stack(self.buffer)).detach()
+
+        # local_loss = torch.mean(torch.abs(snr_adjusted_loss - target))
+        squared_differences = (snr_adjusted_loss - target) ** 2
+        local_loss = torch.mean(squared_differences)
+        local_loss.backward()
+        self.optimizer.step()
+        self.optimizer.zero_grad()
+
+        return base_snrs, self.gamma.detach(), self.offset_1.detach(), self.offset_2.detach(), self.scale.detach()
+
+
+def apply_learnable_snr_gos(
+        loss,
+        timesteps,
+        learnable_snr_trainer: LearnableSNRGamma
+):
+
+    snr, gamma, offset_1, offset_2, scale = learnable_snr_trainer.forward(loss, timesteps)
+
+    snr = (snr + offset_1) * scale + offset_2
+
+    gamma_over_snr = torch.div(torch.ones_like(snr) * gamma, snr)
+    snr_weight = torch.abs(gamma_over_snr).float().to(loss.device)  # directly using gamma over snr
+    snr_adjusted_loss = loss * snr_weight
+
+    return snr_adjusted_loss
+
 
 def apply_snr_weight(
         loss,
         timesteps,
         noise_scheduler: Union['DDPMScheduler'],
-        gamma
+        gamma,
+        fixed=False,
 ):
-    # will get it form noise scheduler if exist or will calculate it if not
+    # will get it from noise scheduler if exist or will calculate it if not
     all_snr = get_all_snr(noise_scheduler, loss.device)
-
-    snr = torch.stack([all_snr[t] for t in timesteps])
+    step_indices = [(noise_scheduler.timesteps == t).nonzero().item() for t in timesteps]
+    snr = torch.stack([all_snr[t] for t in step_indices])
     gamma_over_snr = torch.div(torch.ones_like(snr) * gamma, snr)
-    snr_weight = torch.minimum(gamma_over_snr, torch.ones_like(gamma_over_snr)).float().to(loss.device)  # from paper
-    loss = loss * snr_weight
-    return loss
+    if fixed:
+        snr_weight = gamma_over_snr.float().to(loss.device)  # directly using gamma over snr
+    else:
+        snr_weight = torch.minimum(gamma_over_snr, torch.ones_like(gamma_over_snr)).float().to(loss.device)
+    snr_adjusted_loss = loss * snr_weight
+
+    return snr_adjusted_loss