diff --git a/testing/generate_weight_mappings.py b/testing/generate_weight_mappings.py index cf544fb..17425a5 100644 --- a/testing/generate_weight_mappings.py +++ b/testing/generate_weight_mappings.py @@ -2,6 +2,11 @@ import argparse import gc import os import re +import os +# add project root to sys path +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import torch from diffusers.loaders import LoraLoaderMixin @@ -90,90 +95,134 @@ matched_diffusers_keys = [] error_margin = 1e-4 +te_suffix = '' +proj_pattern_weight = None +proj_pattern_bias = None +text_proj_layer = None if args.sdxl: + te_suffix = '1' + ldm_res_block_prefix = "conditioner.embedders.1.model.transformer.resblocks" + proj_pattern_weight = r"conditioner\.embedders\.1\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_weight" + proj_pattern_bias = r"conditioner\.embedders\.1\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_bias" + text_proj_layer = "conditioner.embedders.1.model.text_projection" +if args.sd2: + te_suffix = '' + ldm_res_block_prefix = "cond_stage_model.model.transformer.resblocks" + proj_pattern_weight = r"cond_stage_model\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_weight" + proj_pattern_bias = r"cond_stage_model\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_bias" + text_proj_layer = "cond_stage_model.model.text_projection" + +if args.sdxl or args.sd2: + if "conditioner.embedders.1.model.text_projection" in ldm_dict_keys: + # d_model = int(checkpoint[prefix + "text_projection"].shape[0])) + d_model = int(ldm_state_dict["conditioner.embedders.1.model.text_projection"].shape[0]) + else: + d_model = 1024 + # do pre known merging for ldm_key in ldm_dict_keys: - pattern = r"conditioner\.embedders\.1\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_weight" - match = re.match(pattern, ldm_key) - if match: - number = int(match.group(1)) - new_val = torch.cat([ - diffusers_state_dict[f"te1_text_model.encoder.layers.{number}.self_attn.q_proj.weight"], - diffusers_state_dict[f"te1_text_model.encoder.layers.{number}.self_attn.k_proj.weight"], - diffusers_state_dict[f"te1_text_model.encoder.layers.{number}.self_attn.v_proj.weight"], - ], dim=0) - # add to matched so we dont check them - matched_diffusers_keys.append(f"te1_text_model.encoder.layers.{number}.self_attn.q_proj.weight") - matched_diffusers_keys.append(f"te1_text_model.encoder.layers.{number}.self_attn.k_proj.weight") - matched_diffusers_keys.append(f"te1_text_model.encoder.layers.{number}.self_attn.v_proj.weight") - # make diffusers convertable_dict - diffusers_state_dict[f"te1_text_model.encoder.layers.{number}.self_attn.MERGED.weight"] = new_val + try: + match = re.match(proj_pattern_weight, ldm_key) + if match: + number = int(match.group(1)) + new_val = torch.cat([ + diffusers_state_dict[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.weight"], + diffusers_state_dict[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.weight"], + diffusers_state_dict[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.weight"], + ], dim=0) + # add to matched so we dont check them + matched_diffusers_keys.append( + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.weight") + matched_diffusers_keys.append( + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.weight") + matched_diffusers_keys.append( + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.weight") + # make diffusers convertable_dict + diffusers_state_dict[ + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.weight"] = new_val - # add operator - ldm_operator_map[ldm_key] = { - "cat": [ - f"te1_text_model.encoder.layers.{number}.self_attn.q_proj.weight", - f"te1_text_model.encoder.layers.{number}.self_attn.k_proj.weight", - f"te1_text_model.encoder.layers.{number}.self_attn.v_proj.weight", - ], - "target": f"te1_text_model.encoder.layers.{number}.self_attn.MERGED.weight" - } + # add operator + ldm_operator_map[ldm_key] = { + "cat": [ + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.weight", + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.weight", + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.weight", + ], + "target": f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.weight" + } - if "conditioner.embedders.1.model.text_projection" in ldm_dict_keys: - # d_model = int(checkpoint[prefix + "text_projection"].shape[0])) - d_model = int(ldm_state_dict["conditioner.embedders.1.model.text_projection"].shape[0]) - else: - d_model = 1024 + # text_model_dict[new_key + ".q_proj.weight"] = checkpoint[key][:d_model, :] + # text_model_dict[new_key + ".k_proj.weight"] = checkpoint[key][d_model: d_model * 2, :] + # text_model_dict[new_key + ".v_proj.weight"] = checkpoint[key][d_model * 2:, :] - # text_model_dict[new_key + ".q_proj.weight"] = checkpoint[key][:d_model, :] - # text_model_dict[new_key + ".k_proj.weight"] = checkpoint[key][d_model: d_model * 2, :] - # text_model_dict[new_key + ".v_proj.weight"] = checkpoint[key][d_model * 2:, :] + # add diffusers operators + diffusers_operator_map[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.weight"] = { + "slice": [ + f"{ldm_res_block_prefix}.{number}.attn.in_proj_weight", + f"0:{d_model}, :" + ] + } + diffusers_operator_map[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.weight"] = { + "slice": [ + f"{ldm_res_block_prefix}.{number}.attn.in_proj_weight", + f"{d_model}:{d_model * 2}, :" + ] + } + diffusers_operator_map[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.weight"] = { + "slice": [ + f"{ldm_res_block_prefix}.{number}.attn.in_proj_weight", + f"{d_model * 2}:, :" + ] + } - # add diffusers operators - diffusers_operator_map[f"te1_text_model.encoder.layers.{number}.self_attn.q_proj.weight"] = { - "slice": [ - f"conditioner.embedders.1.model.transformer.resblocks.{number}.attn.in_proj_weight", - f"0:{d_model}, :" - ] - } - diffusers_operator_map[f"te1_text_model.encoder.layers.{number}.self_attn.k_proj.weight"] = { - "slice": [ - f"conditioner.embedders.1.model.transformer.resblocks.{number}.attn.in_proj_weight", - f"{d_model}:{d_model * 2}, :" - ] - } - diffusers_operator_map[f"te1_text_model.encoder.layers.{number}.self_attn.v_proj.weight"] = { - "slice": [ - f"conditioner.embedders.1.model.transformer.resblocks.{number}.attn.in_proj_weight", - f"{d_model * 2}:, :" - ] - } + match = re.match(proj_pattern_bias, ldm_key) + if match: + number = int(match.group(1)) + new_val = torch.cat([ + diffusers_state_dict[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.bias"], + diffusers_state_dict[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.bias"], + diffusers_state_dict[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.bias"], + ], dim=0) + # add to matched so we dont check them + matched_diffusers_keys.append(f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.bias") + matched_diffusers_keys.append(f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.bias") + matched_diffusers_keys.append(f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.bias") + # make diffusers convertable_dict + diffusers_state_dict[ + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.bias"] = new_val - pattern = r"conditioner\.embedders\.1\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_bias" - match = re.match(pattern, ldm_key) - if match: - number = int(match.group(1)) - new_val = torch.cat([ - diffusers_state_dict[f"te1_text_model.encoder.layers.{number}.self_attn.q_proj.bias"], - diffusers_state_dict[f"te1_text_model.encoder.layers.{number}.self_attn.k_proj.bias"], - diffusers_state_dict[f"te1_text_model.encoder.layers.{number}.self_attn.v_proj.bias"], - ], dim=0) - # add to matched so we dont check them - matched_diffusers_keys.append(f"te1_text_model.encoder.layers.{number}.self_attn.q_proj.bias") - matched_diffusers_keys.append(f"te1_text_model.encoder.layers.{number}.self_attn.k_proj.bias") - matched_diffusers_keys.append(f"te1_text_model.encoder.layers.{number}.self_attn.v_proj.bias") - # make diffusers convertable_dict - diffusers_state_dict[f"te1_text_model.encoder.layers.{number}.self_attn.MERGED.bias"] = new_val + # add operator + ldm_operator_map[ldm_key] = { + "cat": [ + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.bias", + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.bias", + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.bias", + ], + # "target": f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.bias" + } - # add operator - ldm_operator_map[ldm_key] = { - "cat": [ - f"te1_text_model.encoder.layers.{number}.self_attn.q_proj.bias", - f"te1_text_model.encoder.layers.{number}.self_attn.k_proj.bias", - f"te1_text_model.encoder.layers.{number}.self_attn.v_proj.bias", - ], - "target": f"te1_text_model.encoder.layers.{number}.self_attn.MERGED.bias" - } + # add diffusers operators + diffusers_operator_map[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.bias"] = { + "slice": [ + f"{ldm_res_block_prefix}.{number}.attn.in_proj_bias", + f"0:{d_model}, :" + ] + } + diffusers_operator_map[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.bias"] = { + "slice": [ + f"{ldm_res_block_prefix}.{number}.attn.in_proj_bias", + f"{d_model}:{d_model * 2}, :" + ] + } + diffusers_operator_map[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.bias"] = { + "slice": [ + f"{ldm_res_block_prefix}.{number}.attn.in_proj_bias", + f"{d_model * 2}:, :" + ] + } + except Exception as e: + print(f"Error on key {ldm_key}") + print(e) # update keys diffusers_dict_keys = list(diffusers_state_dict.keys()) @@ -275,14 +324,10 @@ if has_unmatched_keys: weight = ldm_state_dict[key] weight_min = weight.min().item() weight_max = weight.max().item() - weight_mean = weight.mean().item() - weight_std = weight.std().item() unmatched_obj['ldm'][key] = { 'shape': weight.shape, "min": weight_min, "max": weight_max, - "mean": weight_mean, - "std": weight_std, } del weight flush() @@ -292,14 +337,10 @@ if has_unmatched_keys: weight = diffusers_state_dict[key] weight_min = weight.min().item() weight_max = weight.max().item() - weight_mean = weight.mean().item() - weight_std = weight.std().item() unmatched_obj['diffusers'][key] = { "shape": weight.shape, "min": weight_min, "max": weight_max, - "mean": weight_mean, - "std": weight_std, } del weight flush() @@ -318,7 +359,6 @@ for key in unmatched_ldm_keys: save_file(remaining_ldm_values, os.path.join(KEYMAPS_FOLDER, f'{name}_ldm_base.safetensors')) print(f'Saved remaining ldm values to {os.path.join(KEYMAPS_FOLDER, f"{name}_ldm_base.safetensors")}') - dest_path = os.path.join(KEYMAPS_FOLDER, f'{name}.json') save_obj = OrderedDict() save_obj["ldm_diffusers_keymap"] = ldm_diffusers_keymap diff --git a/testing/test_model_load_save.py b/testing/test_model_load_save.py index ddfec50..dd4883c 100644 --- a/testing/test_model_load_save.py +++ b/testing/test_model_load_save.py @@ -13,7 +13,7 @@ import json from toolkit.config_modules import ModelConfig from toolkit.paths import KEYMAPS_ROOT -from toolkit.saving import convert_state_dict_to_ldm_with_mapping +from toolkit.saving import convert_state_dict_to_ldm_with_mapping, get_ldm_state_dict_from_diffusers from toolkit.stable_diffusion_model import StableDiffusion # this was just used to match the vae keys to the diffusers keys @@ -39,6 +39,12 @@ parser.add_argument( help='Is the model an XL model' ) +parser.add_argument( + '--is_v2', + action='store_true', + help='Is the model a v2 model' +) + args = parser.parse_args() find_matches = False @@ -58,19 +64,20 @@ sd = StableDiffusion( ) sd.load_model() -if not args.is_xl: - # not supported yet - raise NotImplementedError("Only SDXL is supported at this time with this method") # load our base base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sdxl_ldm_base.safetensors') mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sdxl.json') print("Converting model back to LDM") +version_string = '1' +if args.is_v2: + version_string = '2' +if args.is_xl: + version_string = 'sdxl' # convert the state dict -state_dict_file_2 = convert_state_dict_to_ldm_with_mapping( +state_dict_file_2 = get_ldm_state_dict_from_diffusers( sd.state_dict(), - mapping_path, - base_path, + version_string, device='cpu', dtype=dtype ) diff --git a/toolkit/keymaps/stable_diffusion_sd1.json b/toolkit/keymaps/stable_diffusion_sd1.json new file mode 100644 index 0000000..bc00f08 --- /dev/null +++ b/toolkit/keymaps/stable_diffusion_sd1.json @@ -0,0 +1,1962 @@ +{ + "ldm_diffusers_keymap": { + "cond_stage_model.transformer.text_model.embeddings.position_embedding.weight": "te_text_model.embeddings.position_embedding.weight", + "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight": "te_text_model.embeddings.token_embedding.weight", + "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.bias": "te_text_model.encoder.layers.0.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.weight": "te_text_model.encoder.layers.0.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.bias": "te_text_model.encoder.layers.0.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.weight": "te_text_model.encoder.layers.0.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.bias": "te_text_model.encoder.layers.0.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.weight": "te_text_model.encoder.layers.0.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.bias": "te_text_model.encoder.layers.0.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.weight": "te_text_model.encoder.layers.0.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias": "te_text_model.encoder.layers.0.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight": "te_text_model.encoder.layers.0.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias": "te_text_model.encoder.layers.0.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight": "te_text_model.encoder.layers.0.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias": "te_text_model.encoder.layers.0.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight": "te_text_model.encoder.layers.0.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias": "te_text_model.encoder.layers.0.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight": "te_text_model.encoder.layers.0.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.bias": "te_text_model.encoder.layers.1.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.weight": "te_text_model.encoder.layers.1.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.bias": "te_text_model.encoder.layers.1.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.weight": "te_text_model.encoder.layers.1.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.bias": "te_text_model.encoder.layers.1.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.weight": "te_text_model.encoder.layers.1.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.bias": "te_text_model.encoder.layers.1.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.weight": "te_text_model.encoder.layers.1.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias": "te_text_model.encoder.layers.1.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight": "te_text_model.encoder.layers.1.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias": "te_text_model.encoder.layers.1.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight": "te_text_model.encoder.layers.1.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias": "te_text_model.encoder.layers.1.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight": "te_text_model.encoder.layers.1.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias": "te_text_model.encoder.layers.1.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight": "te_text_model.encoder.layers.1.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.bias": "te_text_model.encoder.layers.10.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.weight": "te_text_model.encoder.layers.10.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.bias": "te_text_model.encoder.layers.10.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.weight": "te_text_model.encoder.layers.10.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.bias": "te_text_model.encoder.layers.10.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.weight": "te_text_model.encoder.layers.10.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.bias": "te_text_model.encoder.layers.10.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.weight": "te_text_model.encoder.layers.10.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias": "te_text_model.encoder.layers.10.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight": "te_text_model.encoder.layers.10.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias": "te_text_model.encoder.layers.10.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight": "te_text_model.encoder.layers.10.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias": "te_text_model.encoder.layers.10.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight": "te_text_model.encoder.layers.10.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias": "te_text_model.encoder.layers.10.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight": "te_text_model.encoder.layers.10.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.bias": "te_text_model.encoder.layers.11.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.weight": "te_text_model.encoder.layers.11.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.bias": "te_text_model.encoder.layers.11.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.weight": "te_text_model.encoder.layers.11.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.bias": "te_text_model.encoder.layers.11.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.weight": "te_text_model.encoder.layers.11.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.bias": "te_text_model.encoder.layers.11.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.weight": "te_text_model.encoder.layers.11.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": "te_text_model.encoder.layers.2.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight": "te_text_model.encoder.layers.11.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias": "te_text_model.encoder.layers.11.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight": "te_text_model.encoder.layers.11.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias": "te_text_model.encoder.layers.11.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight": "te_text_model.encoder.layers.11.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias": "te_text_model.encoder.layers.11.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight": "te_text_model.encoder.layers.11.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.bias": "te_text_model.encoder.layers.2.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.weight": "te_text_model.encoder.layers.2.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.bias": "te_text_model.encoder.layers.2.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.weight": "te_text_model.encoder.layers.2.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.bias": "te_text_model.encoder.layers.2.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.weight": "te_text_model.encoder.layers.2.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.bias": "te_text_model.encoder.layers.2.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.weight": "te_text_model.encoder.layers.2.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": "te_text_model.encoder.layers.3.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight": "te_text_model.encoder.layers.2.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias": "te_text_model.encoder.layers.2.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight": "te_text_model.encoder.layers.2.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias": "te_text_model.encoder.layers.2.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight": "te_text_model.encoder.layers.2.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias": "te_text_model.encoder.layers.2.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight": "te_text_model.encoder.layers.2.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.bias": "te_text_model.encoder.layers.3.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.weight": "te_text_model.encoder.layers.3.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.bias": "te_text_model.encoder.layers.3.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.weight": "te_text_model.encoder.layers.3.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.bias": "te_text_model.encoder.layers.3.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.weight": "te_text_model.encoder.layers.3.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.bias": "te_text_model.encoder.layers.3.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.weight": "te_text_model.encoder.layers.3.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": "te_text_model.encoder.layers.11.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight": "te_text_model.encoder.layers.3.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias": "te_text_model.encoder.layers.3.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight": "te_text_model.encoder.layers.3.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias": "te_text_model.encoder.layers.3.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight": "te_text_model.encoder.layers.3.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias": "te_text_model.encoder.layers.3.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight": "te_text_model.encoder.layers.3.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.bias": "te_text_model.encoder.layers.4.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.weight": "te_text_model.encoder.layers.4.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.bias": "te_text_model.encoder.layers.4.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.weight": "te_text_model.encoder.layers.4.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.bias": "te_text_model.encoder.layers.4.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.weight": "te_text_model.encoder.layers.4.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.bias": "te_text_model.encoder.layers.4.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.weight": "te_text_model.encoder.layers.4.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias": "te_text_model.encoder.layers.4.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight": "te_text_model.encoder.layers.4.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias": "te_text_model.encoder.layers.4.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight": "te_text_model.encoder.layers.4.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias": "te_text_model.encoder.layers.4.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight": "te_text_model.encoder.layers.4.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias": "te_text_model.encoder.layers.4.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight": "te_text_model.encoder.layers.4.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.bias": "te_text_model.encoder.layers.5.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.weight": "te_text_model.encoder.layers.5.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.bias": "te_text_model.encoder.layers.5.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.weight": "te_text_model.encoder.layers.5.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.bias": "te_text_model.encoder.layers.5.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.weight": "te_text_model.encoder.layers.5.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.bias": "te_text_model.encoder.layers.5.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.weight": "te_text_model.encoder.layers.5.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias": "te_text_model.encoder.layers.5.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight": "te_text_model.encoder.layers.5.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias": "te_text_model.encoder.layers.5.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight": "te_text_model.encoder.layers.5.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias": "te_text_model.encoder.layers.5.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight": "te_text_model.encoder.layers.5.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias": "te_text_model.encoder.layers.5.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight": "te_text_model.encoder.layers.5.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.bias": "te_text_model.encoder.layers.6.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.weight": "te_text_model.encoder.layers.6.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.bias": "te_text_model.encoder.layers.6.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.weight": "te_text_model.encoder.layers.6.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.bias": "te_text_model.encoder.layers.6.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.weight": "te_text_model.encoder.layers.6.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.bias": "te_text_model.encoder.layers.6.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.weight": "te_text_model.encoder.layers.6.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias": "te_text_model.encoder.layers.6.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight": "te_text_model.encoder.layers.6.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias": "te_text_model.encoder.layers.6.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight": "te_text_model.encoder.layers.6.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias": "te_text_model.encoder.layers.6.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight": "te_text_model.encoder.layers.6.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias": "te_text_model.encoder.layers.6.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight": "te_text_model.encoder.layers.6.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.bias": "te_text_model.encoder.layers.7.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.weight": "te_text_model.encoder.layers.7.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.bias": "te_text_model.encoder.layers.7.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.weight": "te_text_model.encoder.layers.7.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.bias": "te_text_model.encoder.layers.7.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.weight": "te_text_model.encoder.layers.7.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.bias": "te_text_model.encoder.layers.7.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.weight": "te_text_model.encoder.layers.7.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias": "te_text_model.encoder.layers.7.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight": "te_text_model.encoder.layers.7.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias": "te_text_model.encoder.layers.7.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight": "te_text_model.encoder.layers.7.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias": "te_text_model.encoder.layers.7.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight": "te_text_model.encoder.layers.7.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias": "te_text_model.encoder.layers.7.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight": "te_text_model.encoder.layers.7.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.bias": "te_text_model.encoder.layers.8.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.weight": "te_text_model.encoder.layers.8.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.bias": "te_text_model.encoder.layers.8.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.weight": "te_text_model.encoder.layers.8.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.bias": "te_text_model.encoder.layers.8.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.weight": "te_text_model.encoder.layers.8.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.bias": "te_text_model.encoder.layers.8.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.weight": "te_text_model.encoder.layers.8.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias": "te_text_model.encoder.layers.8.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight": "te_text_model.encoder.layers.8.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias": "te_text_model.encoder.layers.8.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight": "te_text_model.encoder.layers.8.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias": "te_text_model.encoder.layers.8.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight": "te_text_model.encoder.layers.8.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias": "te_text_model.encoder.layers.8.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight": "te_text_model.encoder.layers.8.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.bias": "te_text_model.encoder.layers.9.layer_norm1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.weight": "te_text_model.encoder.layers.9.layer_norm1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.bias": "te_text_model.encoder.layers.9.layer_norm2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.weight": "te_text_model.encoder.layers.9.layer_norm2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.bias": "te_text_model.encoder.layers.9.mlp.fc1.bias", + "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.weight": "te_text_model.encoder.layers.9.mlp.fc1.weight", + "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.bias": "te_text_model.encoder.layers.9.mlp.fc2.bias", + "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.weight": "te_text_model.encoder.layers.9.mlp.fc2.weight", + "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias": "te_text_model.encoder.layers.9.self_attn.k_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight": "te_text_model.encoder.layers.9.self_attn.k_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias": "te_text_model.encoder.layers.9.self_attn.out_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight": "te_text_model.encoder.layers.9.self_attn.out_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias": "te_text_model.encoder.layers.9.self_attn.q_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight": "te_text_model.encoder.layers.9.self_attn.q_proj.weight", + "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias": "te_text_model.encoder.layers.9.self_attn.v_proj.bias", + "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight": "te_text_model.encoder.layers.9.self_attn.v_proj.weight", + "cond_stage_model.transformer.text_model.final_layer_norm.bias": "te_text_model.final_layer_norm.bias", + "cond_stage_model.transformer.text_model.final_layer_norm.weight": "te_text_model.final_layer_norm.weight", + "first_stage_model.decoder.conv_in.bias": "vae_decoder.conv_in.bias", + "first_stage_model.decoder.conv_in.weight": "vae_decoder.conv_in.weight", + "first_stage_model.decoder.conv_out.bias": "vae_decoder.conv_out.bias", + "first_stage_model.decoder.conv_out.weight": "vae_decoder.conv_out.weight", + "first_stage_model.decoder.mid.attn_1.k.bias": "vae_decoder.mid_block.attentions.0.to_k.bias", + "first_stage_model.decoder.mid.attn_1.k.weight": "vae_decoder.mid_block.attentions.0.to_k.weight", + "first_stage_model.decoder.mid.attn_1.norm.bias": "vae_decoder.mid_block.attentions.0.group_norm.bias", + "first_stage_model.decoder.mid.attn_1.norm.weight": "vae_decoder.mid_block.attentions.0.group_norm.weight", + "first_stage_model.decoder.mid.attn_1.proj_out.bias": "vae_decoder.mid_block.attentions.0.to_out.0.bias", + "first_stage_model.decoder.mid.attn_1.proj_out.weight": "vae_decoder.mid_block.attentions.0.to_out.0.weight", + "first_stage_model.decoder.mid.attn_1.q.bias": "vae_decoder.mid_block.attentions.0.to_q.bias", + "first_stage_model.decoder.mid.attn_1.q.weight": "vae_decoder.mid_block.attentions.0.to_q.weight", + "first_stage_model.decoder.mid.attn_1.v.bias": "vae_decoder.mid_block.attentions.0.to_v.bias", + "first_stage_model.decoder.mid.attn_1.v.weight": "vae_decoder.mid_block.attentions.0.to_v.weight", + "first_stage_model.decoder.mid.block_1.conv1.bias": "vae_decoder.mid_block.resnets.0.conv1.bias", + "first_stage_model.decoder.mid.block_1.conv1.weight": "vae_decoder.mid_block.resnets.0.conv1.weight", + "first_stage_model.decoder.mid.block_1.conv2.bias": "vae_decoder.mid_block.resnets.0.conv2.bias", + "first_stage_model.decoder.mid.block_1.conv2.weight": "vae_decoder.mid_block.resnets.0.conv2.weight", + "first_stage_model.decoder.mid.block_1.norm1.bias": "vae_decoder.mid_block.resnets.0.norm1.bias", + "first_stage_model.decoder.mid.block_1.norm1.weight": "vae_decoder.mid_block.resnets.0.norm1.weight", + "first_stage_model.decoder.mid.block_1.norm2.bias": "vae_decoder.mid_block.resnets.0.norm2.bias", + "first_stage_model.decoder.mid.block_1.norm2.weight": "vae_decoder.mid_block.resnets.0.norm2.weight", + "first_stage_model.decoder.mid.block_2.conv1.bias": "vae_decoder.mid_block.resnets.1.conv1.bias", + "first_stage_model.decoder.mid.block_2.conv1.weight": "vae_decoder.mid_block.resnets.1.conv1.weight", + "first_stage_model.decoder.mid.block_2.conv2.bias": "vae_decoder.mid_block.resnets.1.conv2.bias", + "first_stage_model.decoder.mid.block_2.conv2.weight": "vae_decoder.mid_block.resnets.1.conv2.weight", + "first_stage_model.decoder.mid.block_2.norm1.bias": "vae_decoder.mid_block.resnets.1.norm1.bias", + "first_stage_model.decoder.mid.block_2.norm1.weight": "vae_decoder.mid_block.resnets.1.norm1.weight", + "first_stage_model.decoder.mid.block_2.norm2.bias": "vae_decoder.mid_block.resnets.1.norm2.bias", + "first_stage_model.decoder.mid.block_2.norm2.weight": "vae_decoder.mid_block.resnets.1.norm2.weight", + "first_stage_model.decoder.norm_out.bias": "vae_decoder.conv_norm_out.bias", + "first_stage_model.decoder.norm_out.weight": "vae_decoder.conv_norm_out.weight", + "first_stage_model.decoder.up.0.block.0.conv1.bias": "vae_decoder.up_blocks.3.resnets.0.conv1.bias", + "first_stage_model.decoder.up.0.block.0.conv1.weight": "vae_decoder.up_blocks.3.resnets.0.conv1.weight", + "first_stage_model.decoder.up.0.block.0.conv2.bias": "vae_decoder.up_blocks.3.resnets.0.conv2.bias", + "first_stage_model.decoder.up.0.block.0.conv2.weight": "vae_decoder.up_blocks.3.resnets.0.conv2.weight", + "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.bias", + "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.weight", + "first_stage_model.decoder.up.0.block.0.norm1.bias": "vae_decoder.up_blocks.3.resnets.0.norm1.bias", + "first_stage_model.decoder.up.0.block.0.norm1.weight": "vae_decoder.up_blocks.3.resnets.0.norm1.weight", + "first_stage_model.decoder.up.0.block.0.norm2.bias": "vae_decoder.up_blocks.3.resnets.0.norm2.bias", + "first_stage_model.decoder.up.0.block.0.norm2.weight": "vae_decoder.up_blocks.3.resnets.0.norm2.weight", + "first_stage_model.decoder.up.0.block.1.conv1.bias": "vae_decoder.up_blocks.3.resnets.1.conv1.bias", + "first_stage_model.decoder.up.0.block.1.conv1.weight": "vae_decoder.up_blocks.3.resnets.1.conv1.weight", + "first_stage_model.decoder.up.0.block.1.conv2.bias": "vae_decoder.up_blocks.3.resnets.1.conv2.bias", + "first_stage_model.decoder.up.0.block.1.conv2.weight": "vae_decoder.up_blocks.3.resnets.1.conv2.weight", + "first_stage_model.decoder.up.0.block.1.norm1.bias": "vae_decoder.up_blocks.3.resnets.1.norm1.bias", + "first_stage_model.decoder.up.0.block.1.norm1.weight": "vae_decoder.up_blocks.3.resnets.1.norm1.weight", + "first_stage_model.decoder.up.0.block.1.norm2.bias": "vae_decoder.up_blocks.3.resnets.1.norm2.bias", + "first_stage_model.decoder.up.0.block.1.norm2.weight": "vae_decoder.up_blocks.3.resnets.1.norm2.weight", + "first_stage_model.decoder.up.0.block.2.conv1.bias": "vae_decoder.up_blocks.3.resnets.2.conv1.bias", + "first_stage_model.decoder.up.0.block.2.conv1.weight": "vae_decoder.up_blocks.3.resnets.2.conv1.weight", + "first_stage_model.decoder.up.0.block.2.conv2.bias": "vae_decoder.up_blocks.3.resnets.2.conv2.bias", + "first_stage_model.decoder.up.0.block.2.conv2.weight": "vae_decoder.up_blocks.3.resnets.2.conv2.weight", + "first_stage_model.decoder.up.0.block.2.norm1.bias": "vae_decoder.up_blocks.3.resnets.2.norm1.bias", + "first_stage_model.decoder.up.0.block.2.norm1.weight": "vae_decoder.up_blocks.3.resnets.2.norm1.weight", + "first_stage_model.decoder.up.0.block.2.norm2.bias": "vae_decoder.up_blocks.3.resnets.2.norm2.bias", + "first_stage_model.decoder.up.0.block.2.norm2.weight": "vae_decoder.up_blocks.3.resnets.2.norm2.weight", + "first_stage_model.decoder.up.1.block.0.conv1.bias": "vae_decoder.up_blocks.2.resnets.0.conv1.bias", + "first_stage_model.decoder.up.1.block.0.conv1.weight": "vae_decoder.up_blocks.2.resnets.0.conv1.weight", + "first_stage_model.decoder.up.1.block.0.conv2.bias": "vae_decoder.up_blocks.2.resnets.0.conv2.bias", + "first_stage_model.decoder.up.1.block.0.conv2.weight": "vae_decoder.up_blocks.2.resnets.0.conv2.weight", + "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.bias", + "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.weight", + "first_stage_model.decoder.up.1.block.0.norm1.bias": "vae_decoder.up_blocks.2.resnets.0.norm1.bias", + "first_stage_model.decoder.up.1.block.0.norm1.weight": "vae_decoder.up_blocks.2.resnets.0.norm1.weight", + "first_stage_model.decoder.up.1.block.0.norm2.bias": "vae_decoder.up_blocks.2.resnets.0.norm2.bias", + "first_stage_model.decoder.up.1.block.0.norm2.weight": "vae_decoder.up_blocks.2.resnets.0.norm2.weight", + "first_stage_model.decoder.up.1.block.1.conv1.bias": "vae_decoder.up_blocks.2.resnets.1.conv1.bias", + "first_stage_model.decoder.up.1.block.1.conv1.weight": "vae_decoder.up_blocks.2.resnets.1.conv1.weight", + "first_stage_model.decoder.up.1.block.1.conv2.bias": "vae_decoder.up_blocks.2.resnets.1.conv2.bias", + "first_stage_model.decoder.up.1.block.1.conv2.weight": "vae_decoder.up_blocks.2.resnets.1.conv2.weight", + "first_stage_model.decoder.up.1.block.1.norm1.bias": "vae_decoder.up_blocks.2.resnets.1.norm1.bias", + "first_stage_model.decoder.up.1.block.1.norm1.weight": "vae_decoder.up_blocks.2.resnets.1.norm1.weight", + "first_stage_model.decoder.up.1.block.1.norm2.bias": "vae_decoder.up_blocks.2.resnets.1.norm2.bias", + "first_stage_model.decoder.up.1.block.1.norm2.weight": "vae_decoder.up_blocks.2.resnets.1.norm2.weight", + "first_stage_model.decoder.up.1.block.2.conv1.bias": "vae_decoder.up_blocks.2.resnets.2.conv1.bias", + "first_stage_model.decoder.up.1.block.2.conv1.weight": "vae_decoder.up_blocks.2.resnets.2.conv1.weight", + "first_stage_model.decoder.up.1.block.2.conv2.bias": "vae_decoder.up_blocks.2.resnets.2.conv2.bias", + "first_stage_model.decoder.up.1.block.2.conv2.weight": "vae_decoder.up_blocks.2.resnets.2.conv2.weight", + "first_stage_model.decoder.up.1.block.2.norm1.bias": "vae_decoder.up_blocks.2.resnets.2.norm1.bias", + "first_stage_model.decoder.up.1.block.2.norm1.weight": "vae_decoder.up_blocks.2.resnets.2.norm1.weight", + "first_stage_model.decoder.up.1.block.2.norm2.bias": "vae_decoder.up_blocks.2.resnets.2.norm2.bias", + "first_stage_model.decoder.up.1.block.2.norm2.weight": "vae_decoder.up_blocks.2.resnets.2.norm2.weight", + "first_stage_model.decoder.up.1.upsample.conv.bias": "vae_decoder.up_blocks.2.upsamplers.0.conv.bias", + "first_stage_model.decoder.up.1.upsample.conv.weight": "vae_decoder.up_blocks.2.upsamplers.0.conv.weight", + "first_stage_model.decoder.up.2.block.0.conv1.bias": "vae_decoder.up_blocks.1.resnets.0.conv1.bias", + "first_stage_model.decoder.up.2.block.0.conv1.weight": "vae_decoder.up_blocks.1.resnets.0.conv1.weight", + "first_stage_model.decoder.up.2.block.0.conv2.bias": "vae_decoder.up_blocks.1.resnets.0.conv2.bias", + "first_stage_model.decoder.up.2.block.0.conv2.weight": "vae_decoder.up_blocks.1.resnets.0.conv2.weight", + "first_stage_model.decoder.up.2.block.0.norm1.bias": "vae_decoder.up_blocks.1.resnets.0.norm1.bias", + "first_stage_model.decoder.up.2.block.0.norm1.weight": "vae_decoder.up_blocks.1.resnets.0.norm1.weight", + "first_stage_model.decoder.up.2.block.0.norm2.bias": "vae_decoder.up_blocks.1.resnets.0.norm2.bias", + "first_stage_model.decoder.up.2.block.0.norm2.weight": "vae_decoder.up_blocks.1.resnets.0.norm2.weight", + "first_stage_model.decoder.up.2.block.1.conv1.bias": "vae_decoder.up_blocks.1.resnets.1.conv1.bias", + "first_stage_model.decoder.up.2.block.1.conv1.weight": "vae_decoder.up_blocks.1.resnets.1.conv1.weight", + "first_stage_model.decoder.up.2.block.1.conv2.bias": "vae_decoder.up_blocks.1.resnets.1.conv2.bias", + "first_stage_model.decoder.up.2.block.1.conv2.weight": "vae_decoder.up_blocks.1.resnets.1.conv2.weight", + "first_stage_model.decoder.up.2.block.1.norm1.bias": "vae_decoder.up_blocks.1.resnets.1.norm1.bias", + "first_stage_model.decoder.up.2.block.1.norm1.weight": "vae_decoder.up_blocks.1.resnets.1.norm1.weight", + "first_stage_model.decoder.up.2.block.1.norm2.bias": "vae_decoder.up_blocks.1.resnets.1.norm2.bias", + "first_stage_model.decoder.up.2.block.1.norm2.weight": "vae_decoder.up_blocks.1.resnets.1.norm2.weight", + "first_stage_model.decoder.up.2.block.2.conv1.bias": "vae_decoder.up_blocks.1.resnets.2.conv1.bias", + "first_stage_model.decoder.up.2.block.2.conv1.weight": "vae_decoder.up_blocks.1.resnets.2.conv1.weight", + "first_stage_model.decoder.up.2.block.2.conv2.bias": "vae_decoder.up_blocks.1.resnets.2.conv2.bias", + "first_stage_model.decoder.up.2.block.2.conv2.weight": "vae_decoder.up_blocks.1.resnets.2.conv2.weight", + "first_stage_model.decoder.up.2.block.2.norm1.bias": "vae_decoder.up_blocks.1.resnets.2.norm1.bias", + "first_stage_model.decoder.up.2.block.2.norm1.weight": "vae_decoder.up_blocks.1.resnets.2.norm1.weight", + "first_stage_model.decoder.up.2.block.2.norm2.bias": "vae_decoder.up_blocks.1.resnets.2.norm2.bias", + "first_stage_model.decoder.up.2.block.2.norm2.weight": "vae_decoder.up_blocks.1.resnets.2.norm2.weight", + "first_stage_model.decoder.up.2.upsample.conv.bias": "vae_decoder.up_blocks.1.upsamplers.0.conv.bias", + "first_stage_model.decoder.up.2.upsample.conv.weight": "vae_decoder.up_blocks.1.upsamplers.0.conv.weight", + "first_stage_model.decoder.up.3.block.0.conv1.bias": "vae_decoder.up_blocks.0.resnets.0.conv1.bias", + "first_stage_model.decoder.up.3.block.0.conv1.weight": "vae_decoder.up_blocks.0.resnets.0.conv1.weight", + "first_stage_model.decoder.up.3.block.0.conv2.bias": "vae_decoder.up_blocks.0.resnets.0.conv2.bias", + "first_stage_model.decoder.up.3.block.0.conv2.weight": "vae_decoder.up_blocks.0.resnets.0.conv2.weight", + "first_stage_model.decoder.up.3.block.0.norm1.bias": "vae_decoder.up_blocks.0.resnets.0.norm1.bias", + "first_stage_model.decoder.up.3.block.0.norm1.weight": "vae_decoder.up_blocks.0.resnets.0.norm1.weight", + "first_stage_model.decoder.up.3.block.0.norm2.bias": "vae_decoder.up_blocks.0.resnets.0.norm2.bias", + "first_stage_model.decoder.up.3.block.0.norm2.weight": "vae_decoder.up_blocks.0.resnets.0.norm2.weight", + "first_stage_model.decoder.up.3.block.1.conv1.bias": "vae_decoder.up_blocks.0.resnets.1.conv1.bias", + "first_stage_model.decoder.up.3.block.1.conv1.weight": "vae_decoder.up_blocks.0.resnets.1.conv1.weight", + "first_stage_model.decoder.up.3.block.1.conv2.bias": "vae_decoder.up_blocks.0.resnets.1.conv2.bias", + "first_stage_model.decoder.up.3.block.1.conv2.weight": "vae_decoder.up_blocks.0.resnets.1.conv2.weight", + "first_stage_model.decoder.up.3.block.1.norm1.bias": "vae_decoder.up_blocks.0.resnets.1.norm1.bias", + "first_stage_model.decoder.up.3.block.1.norm1.weight": "vae_decoder.up_blocks.0.resnets.1.norm1.weight", + "first_stage_model.decoder.up.3.block.1.norm2.bias": "vae_decoder.up_blocks.0.resnets.1.norm2.bias", + "first_stage_model.decoder.up.3.block.1.norm2.weight": "vae_decoder.up_blocks.0.resnets.1.norm2.weight", + "first_stage_model.decoder.up.3.block.2.conv1.bias": "vae_decoder.up_blocks.0.resnets.2.conv1.bias", + "first_stage_model.decoder.up.3.block.2.conv1.weight": "vae_decoder.up_blocks.0.resnets.2.conv1.weight", + "first_stage_model.decoder.up.3.block.2.conv2.bias": "vae_decoder.up_blocks.0.resnets.2.conv2.bias", + "first_stage_model.decoder.up.3.block.2.conv2.weight": "vae_decoder.up_blocks.0.resnets.2.conv2.weight", + "first_stage_model.decoder.up.3.block.2.norm1.bias": "vae_decoder.up_blocks.0.resnets.2.norm1.bias", + "first_stage_model.decoder.up.3.block.2.norm1.weight": "vae_decoder.up_blocks.0.resnets.2.norm1.weight", + "first_stage_model.decoder.up.3.block.2.norm2.bias": "vae_decoder.up_blocks.0.resnets.2.norm2.bias", + "first_stage_model.decoder.up.3.block.2.norm2.weight": "vae_decoder.up_blocks.0.resnets.2.norm2.weight", + "first_stage_model.decoder.up.3.upsample.conv.bias": "vae_decoder.up_blocks.0.upsamplers.0.conv.bias", + "first_stage_model.decoder.up.3.upsample.conv.weight": "vae_decoder.up_blocks.0.upsamplers.0.conv.weight", + "first_stage_model.encoder.conv_in.bias": "vae_encoder.conv_in.bias", + "first_stage_model.encoder.conv_in.weight": "vae_encoder.conv_in.weight", + "first_stage_model.encoder.conv_out.bias": "vae_encoder.conv_out.bias", + "first_stage_model.encoder.conv_out.weight": "vae_encoder.conv_out.weight", + "first_stage_model.encoder.down.0.block.0.conv1.bias": "vae_encoder.down_blocks.0.resnets.0.conv1.bias", + "first_stage_model.encoder.down.0.block.0.conv1.weight": "vae_encoder.down_blocks.0.resnets.0.conv1.weight", + "first_stage_model.encoder.down.0.block.0.conv2.bias": "vae_encoder.down_blocks.0.resnets.0.conv2.bias", + "first_stage_model.encoder.down.0.block.0.conv2.weight": "vae_encoder.down_blocks.0.resnets.0.conv2.weight", + "first_stage_model.encoder.down.0.block.0.norm1.bias": "vae_encoder.down_blocks.0.resnets.0.norm1.bias", + "first_stage_model.encoder.down.0.block.0.norm1.weight": "vae_encoder.down_blocks.0.resnets.0.norm1.weight", + "first_stage_model.encoder.down.0.block.0.norm2.bias": "vae_encoder.down_blocks.0.resnets.0.norm2.bias", + "first_stage_model.encoder.down.0.block.0.norm2.weight": "vae_encoder.down_blocks.0.resnets.0.norm2.weight", + "first_stage_model.encoder.down.0.block.1.conv1.bias": "vae_encoder.down_blocks.0.resnets.1.conv1.bias", + "first_stage_model.encoder.down.0.block.1.conv1.weight": "vae_encoder.down_blocks.0.resnets.1.conv1.weight", + "first_stage_model.encoder.down.0.block.1.conv2.bias": "vae_encoder.down_blocks.0.resnets.1.conv2.bias", + "first_stage_model.encoder.down.0.block.1.conv2.weight": "vae_encoder.down_blocks.0.resnets.1.conv2.weight", + "first_stage_model.encoder.down.0.block.1.norm1.bias": "vae_encoder.down_blocks.0.resnets.1.norm1.bias", + "first_stage_model.encoder.down.0.block.1.norm1.weight": "vae_encoder.down_blocks.0.resnets.1.norm1.weight", + "first_stage_model.encoder.down.0.block.1.norm2.bias": "vae_encoder.down_blocks.0.resnets.1.norm2.bias", + "first_stage_model.encoder.down.0.block.1.norm2.weight": "vae_encoder.down_blocks.0.resnets.1.norm2.weight", + "first_stage_model.encoder.down.0.downsample.conv.bias": "vae_encoder.down_blocks.0.downsamplers.0.conv.bias", + "first_stage_model.encoder.down.0.downsample.conv.weight": "vae_encoder.down_blocks.0.downsamplers.0.conv.weight", + "first_stage_model.encoder.down.1.block.0.conv1.bias": "vae_encoder.down_blocks.1.resnets.0.conv1.bias", + "first_stage_model.encoder.down.1.block.0.conv1.weight": "vae_encoder.down_blocks.1.resnets.0.conv1.weight", + "first_stage_model.encoder.down.1.block.0.conv2.bias": "vae_encoder.down_blocks.1.resnets.0.conv2.bias", + "first_stage_model.encoder.down.1.block.0.conv2.weight": "vae_encoder.down_blocks.1.resnets.0.conv2.weight", + "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.bias", + "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.weight", + "first_stage_model.encoder.down.1.block.0.norm1.bias": "vae_encoder.down_blocks.1.resnets.0.norm1.bias", + "first_stage_model.encoder.down.1.block.0.norm1.weight": "vae_encoder.down_blocks.1.resnets.0.norm1.weight", + "first_stage_model.encoder.down.1.block.0.norm2.bias": "vae_encoder.down_blocks.1.resnets.0.norm2.bias", + "first_stage_model.encoder.down.1.block.0.norm2.weight": "vae_encoder.down_blocks.1.resnets.0.norm2.weight", + "first_stage_model.encoder.down.1.block.1.conv1.bias": "vae_encoder.down_blocks.1.resnets.1.conv1.bias", + "first_stage_model.encoder.down.1.block.1.conv1.weight": "vae_encoder.down_blocks.1.resnets.1.conv1.weight", + "first_stage_model.encoder.down.1.block.1.conv2.bias": "vae_encoder.down_blocks.1.resnets.1.conv2.bias", + "first_stage_model.encoder.down.1.block.1.conv2.weight": "vae_encoder.down_blocks.1.resnets.1.conv2.weight", + "first_stage_model.encoder.down.1.block.1.norm1.bias": "vae_encoder.down_blocks.1.resnets.1.norm1.bias", + "first_stage_model.encoder.down.1.block.1.norm1.weight": "vae_encoder.down_blocks.1.resnets.1.norm1.weight", + "first_stage_model.encoder.down.1.block.1.norm2.bias": "vae_encoder.down_blocks.1.resnets.1.norm2.bias", + "first_stage_model.encoder.down.1.block.1.norm2.weight": "vae_encoder.down_blocks.1.resnets.1.norm2.weight", + "first_stage_model.encoder.down.1.downsample.conv.bias": "vae_encoder.down_blocks.1.downsamplers.0.conv.bias", + "first_stage_model.encoder.down.1.downsample.conv.weight": "vae_encoder.down_blocks.1.downsamplers.0.conv.weight", + "first_stage_model.encoder.down.2.block.0.conv1.bias": "vae_encoder.down_blocks.2.resnets.0.conv1.bias", + "first_stage_model.encoder.down.2.block.0.conv1.weight": "vae_encoder.down_blocks.2.resnets.0.conv1.weight", + "first_stage_model.encoder.down.2.block.0.conv2.bias": "vae_encoder.down_blocks.2.resnets.0.conv2.bias", + "first_stage_model.encoder.down.2.block.0.conv2.weight": "vae_encoder.down_blocks.2.resnets.0.conv2.weight", + "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.bias", + "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.weight", + "first_stage_model.encoder.down.2.block.0.norm1.bias": "vae_encoder.down_blocks.2.resnets.0.norm1.bias", + "first_stage_model.encoder.down.2.block.0.norm1.weight": "vae_encoder.down_blocks.2.resnets.0.norm1.weight", + "first_stage_model.encoder.down.2.block.0.norm2.bias": "vae_encoder.down_blocks.2.resnets.0.norm2.bias", + "first_stage_model.encoder.down.2.block.0.norm2.weight": "vae_encoder.down_blocks.2.resnets.0.norm2.weight", + "first_stage_model.encoder.down.2.block.1.conv1.bias": "vae_encoder.down_blocks.2.resnets.1.conv1.bias", + "first_stage_model.encoder.down.2.block.1.conv1.weight": "vae_encoder.down_blocks.2.resnets.1.conv1.weight", + "first_stage_model.encoder.down.2.block.1.conv2.bias": "vae_encoder.down_blocks.2.resnets.1.conv2.bias", + "first_stage_model.encoder.down.2.block.1.conv2.weight": "vae_encoder.down_blocks.2.resnets.1.conv2.weight", + "first_stage_model.encoder.down.2.block.1.norm1.bias": "vae_encoder.down_blocks.2.resnets.1.norm1.bias", + "first_stage_model.encoder.down.2.block.1.norm1.weight": "vae_encoder.down_blocks.2.resnets.1.norm1.weight", + "first_stage_model.encoder.down.2.block.1.norm2.bias": "vae_encoder.down_blocks.2.resnets.1.norm2.bias", + "first_stage_model.encoder.down.2.block.1.norm2.weight": "vae_encoder.down_blocks.2.resnets.1.norm2.weight", + "first_stage_model.encoder.down.2.downsample.conv.bias": "vae_encoder.down_blocks.2.downsamplers.0.conv.bias", + "first_stage_model.encoder.down.2.downsample.conv.weight": "vae_encoder.down_blocks.2.downsamplers.0.conv.weight", + "first_stage_model.encoder.down.3.block.0.conv1.bias": "vae_encoder.down_blocks.3.resnets.0.conv1.bias", + "first_stage_model.encoder.down.3.block.0.conv1.weight": "vae_encoder.down_blocks.3.resnets.0.conv1.weight", + "first_stage_model.encoder.down.3.block.0.conv2.bias": "vae_encoder.down_blocks.3.resnets.0.conv2.bias", + "first_stage_model.encoder.down.3.block.0.conv2.weight": "vae_encoder.down_blocks.3.resnets.0.conv2.weight", + "first_stage_model.encoder.down.3.block.0.norm1.bias": "vae_encoder.down_blocks.3.resnets.0.norm1.bias", + "first_stage_model.encoder.down.3.block.0.norm1.weight": "vae_encoder.down_blocks.3.resnets.0.norm1.weight", + "first_stage_model.encoder.down.3.block.0.norm2.bias": "vae_encoder.down_blocks.3.resnets.0.norm2.bias", + "first_stage_model.encoder.down.3.block.0.norm2.weight": "vae_encoder.down_blocks.3.resnets.0.norm2.weight", + "first_stage_model.encoder.down.3.block.1.conv1.bias": "vae_encoder.down_blocks.3.resnets.1.conv1.bias", + "first_stage_model.encoder.down.3.block.1.conv1.weight": "vae_encoder.down_blocks.3.resnets.1.conv1.weight", + "first_stage_model.encoder.down.3.block.1.conv2.bias": "vae_encoder.down_blocks.3.resnets.1.conv2.bias", + "first_stage_model.encoder.down.3.block.1.conv2.weight": "vae_encoder.down_blocks.3.resnets.1.conv2.weight", + "first_stage_model.encoder.down.3.block.1.norm1.bias": "vae_encoder.down_blocks.3.resnets.1.norm1.bias", + "first_stage_model.encoder.down.3.block.1.norm1.weight": "vae_encoder.down_blocks.3.resnets.1.norm1.weight", + "first_stage_model.encoder.down.3.block.1.norm2.bias": "vae_encoder.down_blocks.3.resnets.1.norm2.bias", + "first_stage_model.encoder.down.3.block.1.norm2.weight": "vae_encoder.down_blocks.3.resnets.1.norm2.weight", + "first_stage_model.encoder.mid.attn_1.k.bias": "vae_encoder.mid_block.attentions.0.to_k.bias", + "first_stage_model.encoder.mid.attn_1.k.weight": "vae_encoder.mid_block.attentions.0.to_k.weight", + "first_stage_model.encoder.mid.attn_1.norm.bias": "vae_encoder.mid_block.attentions.0.group_norm.bias", + "first_stage_model.encoder.mid.attn_1.norm.weight": "vae_encoder.mid_block.attentions.0.group_norm.weight", + "first_stage_model.encoder.mid.attn_1.proj_out.bias": "vae_encoder.mid_block.attentions.0.to_out.0.bias", + "first_stage_model.encoder.mid.attn_1.proj_out.weight": "vae_encoder.mid_block.attentions.0.to_out.0.weight", + "first_stage_model.encoder.mid.attn_1.q.bias": "vae_encoder.mid_block.attentions.0.to_q.bias", + "first_stage_model.encoder.mid.attn_1.q.weight": "vae_encoder.mid_block.attentions.0.to_q.weight", + "first_stage_model.encoder.mid.attn_1.v.bias": "vae_encoder.mid_block.attentions.0.to_v.bias", + "first_stage_model.encoder.mid.attn_1.v.weight": "vae_encoder.mid_block.attentions.0.to_v.weight", + "first_stage_model.encoder.mid.block_1.conv1.bias": "vae_encoder.mid_block.resnets.0.conv1.bias", + "first_stage_model.encoder.mid.block_1.conv1.weight": "vae_encoder.mid_block.resnets.0.conv1.weight", + "first_stage_model.encoder.mid.block_1.conv2.bias": "vae_encoder.mid_block.resnets.0.conv2.bias", + "first_stage_model.encoder.mid.block_1.conv2.weight": "vae_encoder.mid_block.resnets.0.conv2.weight", + "first_stage_model.encoder.mid.block_1.norm1.bias": "vae_encoder.mid_block.resnets.0.norm1.bias", + "first_stage_model.encoder.mid.block_1.norm1.weight": "vae_encoder.mid_block.resnets.0.norm1.weight", + "first_stage_model.encoder.mid.block_1.norm2.bias": "vae_encoder.mid_block.resnets.0.norm2.bias", + "first_stage_model.encoder.mid.block_1.norm2.weight": "vae_encoder.mid_block.resnets.0.norm2.weight", + "first_stage_model.encoder.mid.block_2.conv1.bias": "vae_encoder.mid_block.resnets.1.conv1.bias", + "first_stage_model.encoder.mid.block_2.conv1.weight": "vae_encoder.mid_block.resnets.1.conv1.weight", + "first_stage_model.encoder.mid.block_2.conv2.bias": "vae_encoder.mid_block.resnets.1.conv2.bias", + "first_stage_model.encoder.mid.block_2.conv2.weight": "vae_encoder.mid_block.resnets.1.conv2.weight", + "first_stage_model.encoder.mid.block_2.norm1.bias": "vae_encoder.mid_block.resnets.1.norm1.bias", + "first_stage_model.encoder.mid.block_2.norm1.weight": "vae_encoder.mid_block.resnets.1.norm1.weight", + "first_stage_model.encoder.mid.block_2.norm2.bias": "vae_encoder.mid_block.resnets.1.norm2.bias", + "first_stage_model.encoder.mid.block_2.norm2.weight": "vae_encoder.mid_block.resnets.1.norm2.weight", + "first_stage_model.encoder.norm_out.bias": "vae_encoder.conv_norm_out.bias", + "first_stage_model.encoder.norm_out.weight": "vae_encoder.conv_norm_out.weight", + "first_stage_model.post_quant_conv.bias": "vae_post_quant_conv.bias", + "first_stage_model.post_quant_conv.weight": "vae_post_quant_conv.weight", + "first_stage_model.quant_conv.bias": "vae_quant_conv.bias", + "first_stage_model.quant_conv.weight": "vae_quant_conv.weight", + "model.diffusion_model.input_blocks.0.0.bias": "unet_conv_in.bias", + "model.diffusion_model.input_blocks.0.0.weight": "unet_conv_in.weight", + "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": "unet_down_blocks.0.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": "unet_down_blocks.0.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "unet_down_blocks.0.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": "unet_down_blocks.0.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": "unet_down_blocks.0.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": "unet_down_blocks.0.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": "unet_down_blocks.0.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": "unet_down_blocks.0.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.1.1.norm.bias": "unet_down_blocks.0.attentions.0.norm.bias", + "model.diffusion_model.input_blocks.1.1.norm.weight": "unet_down_blocks.0.attentions.0.norm.weight", + "model.diffusion_model.input_blocks.1.1.proj_in.bias": "unet_down_blocks.0.attentions.0.proj_in.bias", + "model.diffusion_model.input_blocks.1.1.proj_in.weight": "unet_down_blocks.0.attentions.0.proj_in.weight", + "model.diffusion_model.input_blocks.1.1.proj_out.bias": "unet_down_blocks.0.attentions.0.proj_out.bias", + "model.diffusion_model.input_blocks.1.1.proj_out.weight": "unet_down_blocks.0.attentions.0.proj_out.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": "unet_time_embedding.linear_2.weight", + "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": "unet_down_blocks.3.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": "unet_down_blocks.3.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": "unet_down_blocks.3.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": "unet_down_blocks.3.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": "unet_down_blocks.3.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": "unet_down_blocks.3.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": "unet_down_blocks.3.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": "unet_down_blocks.3.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": "unet_down_blocks.3.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": "unet_down_blocks.3.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": "unet_down_blocks.3.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": "unet_down_blocks.3.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": "unet_down_blocks.3.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": "unet_down_blocks.3.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": "unet_down_blocks.3.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": "unet_down_blocks.3.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.1.time_emb_proj.weight", + "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": "unet_down_blocks.0.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": "unet_down_blocks.0.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "unet_down_blocks.0.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": "unet_down_blocks.0.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": "unet_down_blocks.0.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": "unet_down_blocks.0.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": "unet_down_blocks.0.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": "unet_down_blocks.0.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.2.1.norm.bias": "unet_down_blocks.0.attentions.1.norm.bias", + "model.diffusion_model.input_blocks.2.1.norm.weight": "unet_down_blocks.0.attentions.1.norm.weight", + "model.diffusion_model.input_blocks.2.1.proj_in.bias": "unet_down_blocks.0.attentions.1.proj_in.bias", + "model.diffusion_model.input_blocks.2.1.proj_in.weight": "unet_down_blocks.0.attentions.1.proj_in.weight", + "model.diffusion_model.input_blocks.2.1.proj_out.bias": "unet_down_blocks.0.attentions.1.proj_out.bias", + "model.diffusion_model.input_blocks.2.1.proj_out.weight": "unet_down_blocks.0.attentions.1.proj_out.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.3.0.op.bias": "unet_down_blocks.0.downsamplers.0.conv.bias", + "model.diffusion_model.input_blocks.3.0.op.weight": "unet_down_blocks.0.downsamplers.0.conv.weight", + "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": "unet_down_blocks.1.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": "unet_down_blocks.1.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "unet_down_blocks.1.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": "unet_down_blocks.1.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": "unet_down_blocks.1.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": "unet_down_blocks.1.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": "unet_down_blocks.1.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": "unet_down_blocks.1.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.4.0.skip_connection.bias": "unet_down_blocks.1.resnets.0.conv_shortcut.bias", + "model.diffusion_model.input_blocks.4.0.skip_connection.weight": "unet_down_blocks.1.resnets.0.conv_shortcut.weight", + "model.diffusion_model.input_blocks.4.1.norm.bias": "unet_down_blocks.1.attentions.0.norm.bias", + "model.diffusion_model.input_blocks.4.1.norm.weight": "unet_down_blocks.1.attentions.0.norm.weight", + "model.diffusion_model.input_blocks.4.1.proj_in.bias": "unet_down_blocks.1.attentions.0.proj_in.bias", + "model.diffusion_model.input_blocks.4.1.proj_in.weight": "unet_down_blocks.1.attentions.0.proj_in.weight", + "model.diffusion_model.input_blocks.4.1.proj_out.bias": "unet_down_blocks.1.attentions.0.proj_out.bias", + "model.diffusion_model.input_blocks.4.1.proj_out.weight": "unet_down_blocks.1.attentions.0.proj_out.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.1.time_emb_proj.weight", + "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": "unet_down_blocks.1.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": "unet_down_blocks.1.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "unet_down_blocks.1.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": "unet_down_blocks.1.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": "unet_down_blocks.1.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": "unet_down_blocks.1.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": "unet_down_blocks.1.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": "unet_down_blocks.1.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.5.1.norm.bias": "unet_down_blocks.1.attentions.1.norm.bias", + "model.diffusion_model.input_blocks.5.1.norm.weight": "unet_down_blocks.1.attentions.1.norm.weight", + "model.diffusion_model.input_blocks.5.1.proj_in.bias": "unet_down_blocks.1.attentions.1.proj_in.bias", + "model.diffusion_model.input_blocks.5.1.proj_in.weight": "unet_down_blocks.1.attentions.1.proj_in.weight", + "model.diffusion_model.input_blocks.5.1.proj_out.bias": "unet_down_blocks.1.attentions.1.proj_out.bias", + "model.diffusion_model.input_blocks.5.1.proj_out.weight": "unet_down_blocks.1.attentions.1.proj_out.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.6.0.op.bias": "unet_down_blocks.1.downsamplers.0.conv.bias", + "model.diffusion_model.input_blocks.6.0.op.weight": "unet_down_blocks.1.downsamplers.0.conv.weight", + "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": "unet_down_blocks.2.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": "unet_down_blocks.2.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "unet_down_blocks.2.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": "unet_down_blocks.2.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": "unet_down_blocks.2.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": "unet_down_blocks.2.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": "unet_down_blocks.2.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": "unet_down_blocks.2.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.7.0.skip_connection.bias": "unet_down_blocks.2.resnets.0.conv_shortcut.bias", + "model.diffusion_model.input_blocks.7.0.skip_connection.weight": "unet_down_blocks.2.resnets.0.conv_shortcut.weight", + "model.diffusion_model.input_blocks.7.1.norm.bias": "unet_down_blocks.2.attentions.0.norm.bias", + "model.diffusion_model.input_blocks.7.1.norm.weight": "unet_down_blocks.2.attentions.0.norm.weight", + "model.diffusion_model.input_blocks.7.1.proj_in.bias": "unet_down_blocks.2.attentions.0.proj_in.bias", + "model.diffusion_model.input_blocks.7.1.proj_in.weight": "unet_down_blocks.2.attentions.0.proj_in.weight", + "model.diffusion_model.input_blocks.7.1.proj_out.bias": "unet_down_blocks.2.attentions.0.proj_out.bias", + "model.diffusion_model.input_blocks.7.1.proj_out.weight": "unet_down_blocks.2.attentions.0.proj_out.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.1.time_emb_proj.weight", + "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": "unet_down_blocks.2.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": "unet_down_blocks.2.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "unet_down_blocks.2.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": "unet_down_blocks.2.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": "unet_down_blocks.2.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": "unet_down_blocks.2.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": "unet_down_blocks.2.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": "unet_down_blocks.2.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.8.1.norm.bias": "unet_down_blocks.2.attentions.1.norm.bias", + "model.diffusion_model.input_blocks.8.1.norm.weight": "unet_down_blocks.2.attentions.1.norm.weight", + "model.diffusion_model.input_blocks.8.1.proj_in.bias": "unet_down_blocks.2.attentions.1.proj_in.bias", + "model.diffusion_model.input_blocks.8.1.proj_in.weight": "unet_down_blocks.2.attentions.1.proj_in.weight", + "model.diffusion_model.input_blocks.8.1.proj_out.bias": "unet_down_blocks.2.attentions.1.proj_out.bias", + "model.diffusion_model.input_blocks.8.1.proj_out.weight": "unet_down_blocks.2.attentions.1.proj_out.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.9.0.op.bias": "unet_down_blocks.2.downsamplers.0.conv.bias", + "model.diffusion_model.input_blocks.9.0.op.weight": "unet_down_blocks.2.downsamplers.0.conv.weight", + "model.diffusion_model.middle_block.0.emb_layers.1.bias": "unet_mid_block.resnets.0.conv1.bias", + "model.diffusion_model.middle_block.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.1.time_emb_proj.weight", + "model.diffusion_model.middle_block.0.in_layers.0.bias": "unet_mid_block.resnets.0.norm1.bias", + "model.diffusion_model.middle_block.0.in_layers.0.weight": "unet_mid_block.resnets.0.norm1.weight", + "model.diffusion_model.middle_block.0.in_layers.2.bias": "unet_mid_block.resnets.0.time_emb_proj.bias", + "model.diffusion_model.middle_block.0.in_layers.2.weight": "unet_mid_block.resnets.0.conv1.weight", + "model.diffusion_model.middle_block.0.out_layers.0.bias": "unet_mid_block.resnets.0.norm2.bias", + "model.diffusion_model.middle_block.0.out_layers.0.weight": "unet_mid_block.resnets.0.norm2.weight", + "model.diffusion_model.middle_block.0.out_layers.3.bias": "unet_mid_block.resnets.0.conv2.bias", + "model.diffusion_model.middle_block.0.out_layers.3.weight": "unet_mid_block.resnets.0.conv2.weight", + "model.diffusion_model.middle_block.1.norm.bias": "unet_mid_block.attentions.0.norm.bias", + "model.diffusion_model.middle_block.1.norm.weight": "unet_mid_block.attentions.0.norm.weight", + "model.diffusion_model.middle_block.1.proj_in.bias": "unet_mid_block.attentions.0.proj_in.bias", + "model.diffusion_model.middle_block.1.proj_in.weight": "unet_mid_block.attentions.0.proj_in.weight", + "model.diffusion_model.middle_block.1.proj_out.bias": "unet_mid_block.attentions.0.proj_out.bias", + "model.diffusion_model.middle_block.1.proj_out.weight": "unet_mid_block.attentions.0.proj_out.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.middle_block.2.emb_layers.1.bias": "unet_mid_block.resnets.1.conv1.bias", + "model.diffusion_model.middle_block.2.emb_layers.1.weight": "unet_up_blocks.0.resnets.0.time_emb_proj.weight", + "model.diffusion_model.middle_block.2.in_layers.0.bias": "unet_mid_block.resnets.1.norm1.bias", + "model.diffusion_model.middle_block.2.in_layers.0.weight": "unet_mid_block.resnets.1.norm1.weight", + "model.diffusion_model.middle_block.2.in_layers.2.bias": "unet_mid_block.resnets.1.time_emb_proj.bias", + "model.diffusion_model.middle_block.2.in_layers.2.weight": "unet_mid_block.resnets.1.conv1.weight", + "model.diffusion_model.middle_block.2.out_layers.0.bias": "unet_mid_block.resnets.1.norm2.bias", + "model.diffusion_model.middle_block.2.out_layers.0.weight": "unet_mid_block.resnets.1.norm2.weight", + "model.diffusion_model.middle_block.2.out_layers.3.bias": "unet_mid_block.resnets.1.conv2.bias", + "model.diffusion_model.middle_block.2.out_layers.3.weight": "unet_mid_block.resnets.1.conv2.weight", + "model.diffusion_model.out.0.bias": "unet_conv_norm_out.bias", + "model.diffusion_model.out.0.weight": "unet_conv_norm_out.weight", + "model.diffusion_model.out.2.bias": "unet_conv_out.bias", + "model.diffusion_model.out.2.weight": "unet_conv_out.weight", + "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": "unet_up_blocks.0.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": "unet_up_blocks.0.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "unet_up_blocks.0.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": "unet_up_blocks.0.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": "unet_up_blocks.0.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": "unet_up_blocks.0.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": "unet_up_blocks.0.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": "unet_up_blocks.0.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.0.0.skip_connection.bias": "unet_up_blocks.0.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.0.0.skip_connection.weight": "unet_up_blocks.0.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": "unet_up_blocks.0.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": "unet_up_blocks.0.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "unet_up_blocks.0.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": "unet_up_blocks.0.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": "unet_up_blocks.0.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": "unet_up_blocks.0.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": "unet_up_blocks.0.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": "unet_up_blocks.0.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.1.0.skip_connection.bias": "unet_up_blocks.0.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.1.0.skip_connection.weight": "unet_up_blocks.0.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": "unet_up_blocks.3.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": "unet_up_blocks.3.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": "unet_up_blocks.3.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": "unet_up_blocks.3.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": "unet_up_blocks.3.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": "unet_up_blocks.3.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": "unet_up_blocks.3.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": "unet_up_blocks.3.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.10.0.skip_connection.bias": "unet_up_blocks.3.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.10.0.skip_connection.weight": "unet_up_blocks.3.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.10.1.norm.bias": "unet_up_blocks.3.attentions.1.norm.bias", + "model.diffusion_model.output_blocks.10.1.norm.weight": "unet_up_blocks.3.attentions.1.norm.weight", + "model.diffusion_model.output_blocks.10.1.proj_in.bias": "unet_up_blocks.3.attentions.1.proj_in.bias", + "model.diffusion_model.output_blocks.10.1.proj_in.weight": "unet_up_blocks.3.attentions.1.proj_in.weight", + "model.diffusion_model.output_blocks.10.1.proj_out.bias": "unet_up_blocks.3.attentions.1.proj_out.bias", + "model.diffusion_model.output_blocks.10.1.proj_out.weight": "unet_up_blocks.3.attentions.1.proj_out.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": "unet_up_blocks.3.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": "unet_up_blocks.3.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": "unet_up_blocks.3.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": "unet_up_blocks.3.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": "unet_up_blocks.3.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": "unet_up_blocks.3.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": "unet_up_blocks.3.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": "unet_up_blocks.3.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.11.0.skip_connection.bias": "unet_up_blocks.3.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.11.0.skip_connection.weight": "unet_up_blocks.3.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.11.1.norm.bias": "unet_up_blocks.3.attentions.2.norm.bias", + "model.diffusion_model.output_blocks.11.1.norm.weight": "unet_up_blocks.3.attentions.2.norm.weight", + "model.diffusion_model.output_blocks.11.1.proj_in.bias": "unet_up_blocks.3.attentions.2.proj_in.bias", + "model.diffusion_model.output_blocks.11.1.proj_in.weight": "unet_up_blocks.3.attentions.2.proj_in.weight", + "model.diffusion_model.output_blocks.11.1.proj_out.bias": "unet_up_blocks.3.attentions.2.proj_out.bias", + "model.diffusion_model.output_blocks.11.1.proj_out.weight": "unet_up_blocks.3.attentions.2.proj_out.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "unet_mid_block.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": "unet_up_blocks.0.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": "unet_up_blocks.0.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "unet_up_blocks.0.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": "unet_up_blocks.0.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": "unet_up_blocks.0.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": "unet_up_blocks.0.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": "unet_up_blocks.0.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": "unet_up_blocks.0.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.2.0.skip_connection.bias": "unet_up_blocks.0.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.2.0.skip_connection.weight": "unet_up_blocks.0.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.2.1.conv.bias": "unet_up_blocks.0.upsamplers.0.conv.bias", + "model.diffusion_model.output_blocks.2.1.conv.weight": "unet_up_blocks.0.upsamplers.0.conv.weight", + "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": "unet_up_blocks.1.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": "unet_up_blocks.1.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "unet_up_blocks.1.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": "unet_up_blocks.1.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": "unet_up_blocks.1.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": "unet_up_blocks.1.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": "unet_up_blocks.1.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": "unet_up_blocks.1.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.3.0.skip_connection.bias": "unet_up_blocks.1.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.3.0.skip_connection.weight": "unet_up_blocks.1.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.3.1.norm.bias": "unet_up_blocks.1.attentions.0.norm.bias", + "model.diffusion_model.output_blocks.3.1.norm.weight": "unet_up_blocks.1.attentions.0.norm.weight", + "model.diffusion_model.output_blocks.3.1.proj_in.bias": "unet_up_blocks.1.attentions.0.proj_in.bias", + "model.diffusion_model.output_blocks.3.1.proj_in.weight": "unet_up_blocks.1.attentions.0.proj_in.weight", + "model.diffusion_model.output_blocks.3.1.proj_out.bias": "unet_up_blocks.1.attentions.0.proj_out.bias", + "model.diffusion_model.output_blocks.3.1.proj_out.weight": "unet_up_blocks.1.attentions.0.proj_out.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": "unet_up_blocks.1.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": "unet_up_blocks.1.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "unet_up_blocks.1.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": "unet_up_blocks.1.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": "unet_up_blocks.1.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": "unet_up_blocks.1.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": "unet_up_blocks.1.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": "unet_up_blocks.1.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.4.0.skip_connection.bias": "unet_up_blocks.1.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.4.0.skip_connection.weight": "unet_up_blocks.1.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.4.1.norm.bias": "unet_up_blocks.1.attentions.1.norm.bias", + "model.diffusion_model.output_blocks.4.1.norm.weight": "unet_up_blocks.1.attentions.1.norm.weight", + "model.diffusion_model.output_blocks.4.1.proj_in.bias": "unet_up_blocks.1.attentions.1.proj_in.bias", + "model.diffusion_model.output_blocks.4.1.proj_in.weight": "unet_up_blocks.1.attentions.1.proj_in.weight", + "model.diffusion_model.output_blocks.4.1.proj_out.bias": "unet_up_blocks.1.attentions.1.proj_out.bias", + "model.diffusion_model.output_blocks.4.1.proj_out.weight": "unet_up_blocks.1.attentions.1.proj_out.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": "unet_up_blocks.1.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": "unet_up_blocks.1.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "unet_up_blocks.1.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": "unet_up_blocks.1.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": "unet_up_blocks.1.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": "unet_up_blocks.1.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": "unet_up_blocks.1.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": "unet_up_blocks.1.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.5.0.skip_connection.bias": "unet_up_blocks.1.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.5.0.skip_connection.weight": "unet_up_blocks.1.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.5.1.norm.bias": "unet_up_blocks.1.attentions.2.norm.bias", + "model.diffusion_model.output_blocks.5.1.norm.weight": "unet_up_blocks.1.attentions.2.norm.weight", + "model.diffusion_model.output_blocks.5.1.proj_in.bias": "unet_up_blocks.1.attentions.2.proj_in.bias", + "model.diffusion_model.output_blocks.5.1.proj_in.weight": "unet_up_blocks.1.attentions.2.proj_in.weight", + "model.diffusion_model.output_blocks.5.1.proj_out.bias": "unet_up_blocks.1.attentions.2.proj_out.bias", + "model.diffusion_model.output_blocks.5.1.proj_out.weight": "unet_up_blocks.1.attentions.2.proj_out.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.5.2.conv.bias": "unet_up_blocks.1.upsamplers.0.conv.bias", + "model.diffusion_model.output_blocks.5.2.conv.weight": "unet_up_blocks.1.upsamplers.0.conv.weight", + "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": "unet_up_blocks.2.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": "unet_up_blocks.2.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "unet_up_blocks.2.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": "unet_up_blocks.2.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": "unet_up_blocks.2.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": "unet_up_blocks.2.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": "unet_up_blocks.2.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": "unet_up_blocks.2.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.6.0.skip_connection.bias": "unet_up_blocks.2.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.6.0.skip_connection.weight": "unet_up_blocks.2.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.6.1.norm.bias": "unet_up_blocks.2.attentions.0.norm.bias", + "model.diffusion_model.output_blocks.6.1.norm.weight": "unet_up_blocks.2.attentions.0.norm.weight", + "model.diffusion_model.output_blocks.6.1.proj_in.bias": "unet_up_blocks.2.attentions.0.proj_in.bias", + "model.diffusion_model.output_blocks.6.1.proj_in.weight": "unet_up_blocks.2.attentions.0.proj_in.weight", + "model.diffusion_model.output_blocks.6.1.proj_out.bias": "unet_up_blocks.2.attentions.0.proj_out.bias", + "model.diffusion_model.output_blocks.6.1.proj_out.weight": "unet_up_blocks.2.attentions.0.proj_out.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": "unet_up_blocks.2.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": "unet_up_blocks.2.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "unet_up_blocks.2.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": "unet_up_blocks.2.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": "unet_up_blocks.2.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": "unet_up_blocks.2.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": "unet_up_blocks.2.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": "unet_up_blocks.2.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.7.0.skip_connection.bias": "unet_up_blocks.2.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.7.0.skip_connection.weight": "unet_up_blocks.2.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.7.1.norm.bias": "unet_up_blocks.2.attentions.1.norm.bias", + "model.diffusion_model.output_blocks.7.1.norm.weight": "unet_up_blocks.2.attentions.1.norm.weight", + "model.diffusion_model.output_blocks.7.1.proj_in.bias": "unet_up_blocks.2.attentions.1.proj_in.bias", + "model.diffusion_model.output_blocks.7.1.proj_in.weight": "unet_up_blocks.2.attentions.1.proj_in.weight", + "model.diffusion_model.output_blocks.7.1.proj_out.bias": "unet_up_blocks.2.attentions.1.proj_out.bias", + "model.diffusion_model.output_blocks.7.1.proj_out.weight": "unet_up_blocks.2.attentions.1.proj_out.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": "unet_up_blocks.2.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": "unet_up_blocks.2.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "unet_up_blocks.2.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": "unet_up_blocks.2.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": "unet_up_blocks.2.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": "unet_up_blocks.2.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": "unet_up_blocks.2.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": "unet_up_blocks.2.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.8.0.skip_connection.bias": "unet_up_blocks.2.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.8.0.skip_connection.weight": "unet_up_blocks.2.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.8.1.norm.bias": "unet_up_blocks.2.attentions.2.norm.bias", + "model.diffusion_model.output_blocks.8.1.norm.weight": "unet_up_blocks.2.attentions.2.norm.weight", + "model.diffusion_model.output_blocks.8.1.proj_in.bias": "unet_up_blocks.2.attentions.2.proj_in.bias", + "model.diffusion_model.output_blocks.8.1.proj_in.weight": "unet_up_blocks.2.attentions.2.proj_in.weight", + "model.diffusion_model.output_blocks.8.1.proj_out.bias": "unet_up_blocks.2.attentions.2.proj_out.bias", + "model.diffusion_model.output_blocks.8.1.proj_out.weight": "unet_up_blocks.2.attentions.2.proj_out.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.8.2.conv.bias": "unet_up_blocks.2.upsamplers.0.conv.bias", + "model.diffusion_model.output_blocks.8.2.conv.weight": "unet_up_blocks.2.upsamplers.0.conv.weight", + "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": "unet_up_blocks.3.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": "unet_up_blocks.3.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": "unet_up_blocks.3.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": "unet_up_blocks.3.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": "unet_up_blocks.3.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": "unet_up_blocks.3.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": "unet_up_blocks.3.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": "unet_up_blocks.3.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.9.0.skip_connection.bias": "unet_up_blocks.3.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.9.0.skip_connection.weight": "unet_up_blocks.3.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.9.1.norm.bias": "unet_up_blocks.3.attentions.0.norm.bias", + "model.diffusion_model.output_blocks.9.1.norm.weight": "unet_up_blocks.3.attentions.0.norm.weight", + "model.diffusion_model.output_blocks.9.1.proj_in.bias": "unet_up_blocks.3.attentions.0.proj_in.bias", + "model.diffusion_model.output_blocks.9.1.proj_in.weight": "unet_up_blocks.3.attentions.0.proj_in.weight", + "model.diffusion_model.output_blocks.9.1.proj_out.bias": "unet_up_blocks.3.attentions.0.proj_out.bias", + "model.diffusion_model.output_blocks.9.1.proj_out.weight": "unet_up_blocks.3.attentions.0.proj_out.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.time_embed.0.bias": "unet_time_embedding.linear_1.bias", + "model.diffusion_model.time_embed.0.weight": "unet_time_embedding.linear_1.weight", + "model.diffusion_model.time_embed.2.bias": "unet_time_embedding.linear_2.bias", + "model.diffusion_model.time_embed.2.weight": "unet_mid_block.resnets.1.time_emb_proj.weight" + }, + "ldm_diffusers_shape_map": { + "first_stage_model.decoder.mid.attn_1.k.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.mid.attn_1.proj_out.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.mid.attn_1.q.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.mid.attn_1.v.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [ + [ + 128, + 256, + 1, + 1 + ], + [ + 128, + 256, + 1, + 1 + ] + ], + "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [ + [ + 256, + 512, + 1, + 1 + ], + [ + 256, + 512, + 1, + 1 + ] + ], + "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [ + [ + 256, + 128, + 1, + 1 + ], + [ + 256, + 128, + 1, + 1 + ] + ], + "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [ + [ + 512, + 256, + 1, + 1 + ], + [ + 512, + 256, + 1, + 1 + ] + ], + "first_stage_model.encoder.mid.attn_1.k.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.proj_out.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.q.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.v.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.post_quant_conv.weight": [ + [ + 4, + 4, + 1, + 1 + ], + [ + 4, + 4, + 1, + 1 + ] + ], + "first_stage_model.quant_conv.weight": [ + [ + 8, + 8, + 1, + 1 + ], + [ + 8, + 8, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.1.1.proj_in.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.1.1.proj_out.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.2.1.proj_in.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.2.1.proj_out.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [ + [ + 640, + 320, + 1, + 1 + ], + [ + 640, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.4.1.proj_in.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.4.1.proj_out.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.5.1.proj_in.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.5.1.proj_out.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [ + [ + 1280, + 640, + 1, + 1 + ], + [ + 1280, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.7.1.proj_in.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.7.1.proj_out.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.8.1.proj_in.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.8.1.proj_out.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.middle_block.1.proj_in.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.middle_block.1.proj_out.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [ + [ + 320, + 640, + 1, + 1 + ], + [ + 320, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.10.1.proj_in.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.10.1.proj_out.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [ + [ + 320, + 640, + 1, + 1 + ], + [ + 320, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.11.1.proj_in.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.11.1.proj_out.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.3.1.proj_in.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.3.1.proj_out.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.4.1.proj_in.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.4.1.proj_out.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [ + [ + 1280, + 1920, + 1, + 1 + ], + [ + 1280, + 1920, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.5.1.proj_in.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.5.1.proj_out.weight": [ + [ + 1280, + 1280, + 1, + 1 + ], + [ + 1280, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [ + [ + 640, + 1920, + 1, + 1 + ], + [ + 640, + 1920, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.6.1.proj_in.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.6.1.proj_out.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [ + [ + 640, + 1280, + 1, + 1 + ], + [ + 640, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.7.1.proj_in.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.7.1.proj_out.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [ + [ + 640, + 960, + 1, + 1 + ], + [ + 640, + 960, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.8.1.proj_in.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.8.1.proj_out.weight": [ + [ + 640, + 640, + 1, + 1 + ], + [ + 640, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [ + [ + 320, + 960, + 1, + 1 + ], + [ + 320, + 960, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.9.1.proj_in.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.9.1.proj_out.weight": [ + [ + 320, + 320, + 1, + 1 + ], + [ + 320, + 320, + 1, + 1 + ] + ] + }, + "ldm_diffusers_operator_map": {}, + "diffusers_ldm_operator_map": {} +} \ No newline at end of file diff --git a/toolkit/keymaps/stable_diffusion_sd1_ldm_base.safetensors b/toolkit/keymaps/stable_diffusion_sd1_ldm_base.safetensors new file mode 100644 index 0000000..3969499 Binary files /dev/null and b/toolkit/keymaps/stable_diffusion_sd1_ldm_base.safetensors differ diff --git a/toolkit/keymaps/stable_diffusion_sd2.json b/toolkit/keymaps/stable_diffusion_sd2.json new file mode 100644 index 0000000..59bc151 --- /dev/null +++ b/toolkit/keymaps/stable_diffusion_sd2.json @@ -0,0 +1,2773 @@ +{ + "ldm_diffusers_keymap": { + "cond_stage_model.model.ln_final.bias": "te_text_model.final_layer_norm.bias", + "cond_stage_model.model.ln_final.weight": "te_text_model.final_layer_norm.weight", + "cond_stage_model.model.positional_embedding": "te_text_model.embeddings.position_embedding.weight", + "cond_stage_model.model.token_embedding.weight": "te_text_model.embeddings.token_embedding.weight", + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias": "te_text_model.encoder.layers.0.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight": "te_text_model.encoder.layers.0.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.bias": "te_text_model.encoder.layers.0.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.weight": "te_text_model.encoder.layers.0.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.0.ln_1.bias": "te_text_model.encoder.layers.0.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.0.ln_1.weight": "te_text_model.encoder.layers.0.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.0.ln_2.bias": "te_text_model.encoder.layers.0.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.0.ln_2.weight": "te_text_model.encoder.layers.0.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.bias": "te_text_model.encoder.layers.0.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.weight": "te_text_model.encoder.layers.0.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.bias": "te_text_model.encoder.layers.0.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.weight": "te_text_model.encoder.layers.0.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias": "te_text_model.encoder.layers.1.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight": "te_text_model.encoder.layers.1.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.bias": "te_text_model.encoder.layers.1.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.weight": "te_text_model.encoder.layers.1.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.1.ln_1.bias": "te_text_model.encoder.layers.1.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.1.ln_1.weight": "te_text_model.encoder.layers.1.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.1.ln_2.bias": "te_text_model.encoder.layers.1.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.1.ln_2.weight": "te_text_model.encoder.layers.1.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.bias": "te_text_model.encoder.layers.1.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.weight": "te_text_model.encoder.layers.1.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.bias": "te_text_model.encoder.layers.1.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.weight": "te_text_model.encoder.layers.1.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias": "te_text_model.encoder.layers.10.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight": "te_text_model.encoder.layers.10.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.bias": "te_text_model.encoder.layers.10.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.weight": "te_text_model.encoder.layers.10.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.10.ln_1.bias": "te_text_model.encoder.layers.10.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.10.ln_1.weight": "te_text_model.encoder.layers.10.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.10.ln_2.bias": "te_text_model.encoder.layers.10.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.10.ln_2.weight": "te_text_model.encoder.layers.10.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.bias": "te_text_model.encoder.layers.10.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.weight": "te_text_model.encoder.layers.10.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.bias": "te_text_model.encoder.layers.10.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.weight": "te_text_model.encoder.layers.10.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias": "te_text_model.encoder.layers.11.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight": "te_text_model.encoder.layers.11.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.bias": "te_text_model.encoder.layers.11.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.weight": "te_text_model.encoder.layers.11.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.11.ln_1.bias": "te_text_model.encoder.layers.11.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.11.ln_1.weight": "te_text_model.encoder.layers.11.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.11.ln_2.bias": "te_text_model.encoder.layers.11.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.11.ln_2.weight": "te_text_model.encoder.layers.11.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.bias": "te_text_model.encoder.layers.11.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.weight": "te_text_model.encoder.layers.11.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.bias": "te_text_model.encoder.layers.11.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.weight": "te_text_model.encoder.layers.11.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias": "te_text_model.encoder.layers.12.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight": "te_text_model.encoder.layers.12.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.bias": "te_text_model.encoder.layers.12.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.weight": "te_text_model.encoder.layers.12.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.12.ln_1.bias": "te_text_model.encoder.layers.12.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.12.ln_1.weight": "te_text_model.encoder.layers.12.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.12.ln_2.bias": "te_text_model.encoder.layers.12.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.12.ln_2.weight": "te_text_model.encoder.layers.12.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.bias": "te_text_model.encoder.layers.12.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.weight": "te_text_model.encoder.layers.12.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.bias": "te_text_model.encoder.layers.12.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.weight": "te_text_model.encoder.layers.12.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias": "te_text_model.encoder.layers.13.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight": "te_text_model.encoder.layers.13.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.bias": "te_text_model.encoder.layers.13.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.weight": "te_text_model.encoder.layers.13.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.13.ln_1.bias": "te_text_model.encoder.layers.13.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.13.ln_1.weight": "te_text_model.encoder.layers.13.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.13.ln_2.bias": "te_text_model.encoder.layers.13.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.13.ln_2.weight": "te_text_model.encoder.layers.13.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.bias": "te_text_model.encoder.layers.13.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.weight": "te_text_model.encoder.layers.13.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.bias": "te_text_model.encoder.layers.13.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.weight": "te_text_model.encoder.layers.13.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias": "te_text_model.encoder.layers.14.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight": "te_text_model.encoder.layers.14.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.bias": "te_text_model.encoder.layers.14.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.weight": "te_text_model.encoder.layers.14.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.14.ln_1.bias": "te_text_model.encoder.layers.14.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.14.ln_1.weight": "te_text_model.encoder.layers.14.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.14.ln_2.bias": "te_text_model.encoder.layers.14.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.14.ln_2.weight": "te_text_model.encoder.layers.14.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.bias": "te_text_model.encoder.layers.14.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.weight": "te_text_model.encoder.layers.14.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.bias": "te_text_model.encoder.layers.14.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.weight": "te_text_model.encoder.layers.14.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias": "te_text_model.encoder.layers.15.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight": "te_text_model.encoder.layers.15.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.bias": "te_text_model.encoder.layers.15.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.weight": "te_text_model.encoder.layers.15.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.15.ln_1.bias": "te_text_model.encoder.layers.15.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.15.ln_1.weight": "te_text_model.encoder.layers.15.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.15.ln_2.bias": "te_text_model.encoder.layers.15.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.15.ln_2.weight": "te_text_model.encoder.layers.15.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.bias": "te_text_model.encoder.layers.15.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.weight": "te_text_model.encoder.layers.15.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.bias": "te_text_model.encoder.layers.15.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.weight": "te_text_model.encoder.layers.15.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias": "te_text_model.encoder.layers.16.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight": "te_text_model.encoder.layers.16.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.bias": "te_text_model.encoder.layers.16.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.weight": "te_text_model.encoder.layers.16.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.16.ln_1.bias": "te_text_model.encoder.layers.16.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.16.ln_1.weight": "te_text_model.encoder.layers.16.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.16.ln_2.bias": "te_text_model.encoder.layers.16.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.16.ln_2.weight": "te_text_model.encoder.layers.16.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.bias": "te_text_model.encoder.layers.16.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.weight": "te_text_model.encoder.layers.16.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.bias": "te_text_model.encoder.layers.16.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.weight": "te_text_model.encoder.layers.16.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias": "te_text_model.encoder.layers.17.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight": "te_text_model.encoder.layers.17.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.bias": "te_text_model.encoder.layers.17.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.weight": "te_text_model.encoder.layers.17.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.17.ln_1.bias": "te_text_model.encoder.layers.17.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.17.ln_1.weight": "te_text_model.encoder.layers.17.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.17.ln_2.bias": "te_text_model.encoder.layers.17.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.17.ln_2.weight": "te_text_model.encoder.layers.17.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.bias": "te_text_model.encoder.layers.17.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.weight": "te_text_model.encoder.layers.17.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.bias": "te_text_model.encoder.layers.17.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.weight": "te_text_model.encoder.layers.17.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias": "te_text_model.encoder.layers.18.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight": "te_text_model.encoder.layers.18.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.bias": "te_text_model.encoder.layers.18.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.weight": "te_text_model.encoder.layers.18.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.18.ln_1.bias": "te_text_model.encoder.layers.18.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.18.ln_1.weight": "te_text_model.encoder.layers.18.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.18.ln_2.bias": "te_text_model.encoder.layers.18.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.18.ln_2.weight": "te_text_model.encoder.layers.18.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.bias": "te_text_model.encoder.layers.18.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.weight": "te_text_model.encoder.layers.18.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.bias": "te_text_model.encoder.layers.18.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.weight": "te_text_model.encoder.layers.18.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias": "te_text_model.encoder.layers.19.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight": "te_text_model.encoder.layers.19.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.bias": "te_text_model.encoder.layers.19.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.weight": "te_text_model.encoder.layers.19.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.19.ln_1.bias": "te_text_model.encoder.layers.19.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.19.ln_1.weight": "te_text_model.encoder.layers.19.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.19.ln_2.bias": "te_text_model.encoder.layers.19.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.19.ln_2.weight": "te_text_model.encoder.layers.19.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.bias": "te_text_model.encoder.layers.19.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.weight": "te_text_model.encoder.layers.19.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.bias": "te_text_model.encoder.layers.19.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.weight": "te_text_model.encoder.layers.19.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias": "te_text_model.encoder.layers.2.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight": "te_text_model.encoder.layers.2.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.bias": "te_text_model.encoder.layers.2.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.weight": "te_text_model.encoder.layers.2.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.2.ln_1.bias": "te_text_model.encoder.layers.2.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.2.ln_1.weight": "te_text_model.encoder.layers.2.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.2.ln_2.bias": "te_text_model.encoder.layers.2.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.2.ln_2.weight": "te_text_model.encoder.layers.2.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.bias": "te_text_model.encoder.layers.2.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.weight": "te_text_model.encoder.layers.2.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.bias": "te_text_model.encoder.layers.2.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.weight": "te_text_model.encoder.layers.2.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias": "te_text_model.encoder.layers.20.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight": "te_text_model.encoder.layers.20.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.bias": "te_text_model.encoder.layers.20.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.weight": "te_text_model.encoder.layers.20.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.20.ln_1.bias": "te_text_model.encoder.layers.20.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.20.ln_1.weight": "te_text_model.encoder.layers.20.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.20.ln_2.bias": "te_text_model.encoder.layers.20.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.20.ln_2.weight": "te_text_model.encoder.layers.20.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.bias": "te_text_model.encoder.layers.20.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.weight": "te_text_model.encoder.layers.20.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.bias": "te_text_model.encoder.layers.20.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.weight": "te_text_model.encoder.layers.20.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias": "te_text_model.encoder.layers.21.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight": "te_text_model.encoder.layers.21.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.bias": "te_text_model.encoder.layers.21.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.weight": "te_text_model.encoder.layers.21.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.21.ln_1.bias": "te_text_model.encoder.layers.21.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.21.ln_1.weight": "te_text_model.encoder.layers.21.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.21.ln_2.bias": "te_text_model.encoder.layers.21.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.21.ln_2.weight": "te_text_model.encoder.layers.21.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.bias": "te_text_model.encoder.layers.21.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.weight": "te_text_model.encoder.layers.21.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.bias": "te_text_model.encoder.layers.21.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.weight": "te_text_model.encoder.layers.21.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias": "te_text_model.encoder.layers.22.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight": "te_text_model.encoder.layers.22.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.bias": "te_text_model.encoder.layers.22.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.weight": "te_text_model.encoder.layers.22.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.22.ln_1.bias": "te_text_model.encoder.layers.22.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.22.ln_1.weight": "te_text_model.encoder.layers.22.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.22.ln_2.bias": "te_text_model.encoder.layers.22.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.22.ln_2.weight": "te_text_model.encoder.layers.22.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.bias": "te_text_model.encoder.layers.22.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.weight": "te_text_model.encoder.layers.22.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.bias": "te_text_model.encoder.layers.22.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.weight": "te_text_model.encoder.layers.22.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias": "te_text_model.encoder.layers.3.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight": "te_text_model.encoder.layers.3.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.bias": "te_text_model.encoder.layers.3.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.weight": "te_text_model.encoder.layers.3.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.3.ln_1.bias": "te_text_model.encoder.layers.3.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.3.ln_1.weight": "te_text_model.encoder.layers.3.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.3.ln_2.bias": "te_text_model.encoder.layers.3.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.3.ln_2.weight": "te_text_model.encoder.layers.3.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.bias": "te_text_model.encoder.layers.3.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.weight": "te_text_model.encoder.layers.3.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.bias": "te_text_model.encoder.layers.3.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.weight": "te_text_model.encoder.layers.3.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias": "te_text_model.encoder.layers.4.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight": "te_text_model.encoder.layers.4.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.bias": "te_text_model.encoder.layers.4.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.weight": "te_text_model.encoder.layers.4.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.4.ln_1.bias": "te_text_model.encoder.layers.4.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.4.ln_1.weight": "te_text_model.encoder.layers.4.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.4.ln_2.bias": "te_text_model.encoder.layers.4.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.4.ln_2.weight": "te_text_model.encoder.layers.4.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.bias": "te_text_model.encoder.layers.4.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.weight": "te_text_model.encoder.layers.4.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.bias": "te_text_model.encoder.layers.4.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.weight": "te_text_model.encoder.layers.4.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias": "te_text_model.encoder.layers.5.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight": "te_text_model.encoder.layers.5.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.bias": "te_text_model.encoder.layers.5.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.weight": "te_text_model.encoder.layers.5.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.5.ln_1.bias": "te_text_model.encoder.layers.5.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.5.ln_1.weight": "te_text_model.encoder.layers.5.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.5.ln_2.bias": "te_text_model.encoder.layers.5.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.5.ln_2.weight": "te_text_model.encoder.layers.5.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.bias": "te_text_model.encoder.layers.5.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.weight": "te_text_model.encoder.layers.5.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.bias": "te_text_model.encoder.layers.5.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.weight": "te_text_model.encoder.layers.5.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias": "te_text_model.encoder.layers.6.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight": "te_text_model.encoder.layers.6.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.bias": "te_text_model.encoder.layers.6.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.weight": "te_text_model.encoder.layers.6.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.6.ln_1.bias": "te_text_model.encoder.layers.6.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.6.ln_1.weight": "te_text_model.encoder.layers.6.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.6.ln_2.bias": "te_text_model.encoder.layers.6.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.6.ln_2.weight": "te_text_model.encoder.layers.6.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.bias": "te_text_model.encoder.layers.6.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.weight": "te_text_model.encoder.layers.6.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.bias": "te_text_model.encoder.layers.6.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.weight": "te_text_model.encoder.layers.6.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias": "te_text_model.encoder.layers.7.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight": "te_text_model.encoder.layers.7.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.bias": "te_text_model.encoder.layers.7.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.weight": "te_text_model.encoder.layers.7.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.7.ln_1.bias": "te_text_model.encoder.layers.7.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.7.ln_1.weight": "te_text_model.encoder.layers.7.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.7.ln_2.bias": "te_text_model.encoder.layers.7.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.7.ln_2.weight": "te_text_model.encoder.layers.7.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.bias": "te_text_model.encoder.layers.7.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.weight": "te_text_model.encoder.layers.7.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.bias": "te_text_model.encoder.layers.7.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.weight": "te_text_model.encoder.layers.7.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias": "te_text_model.encoder.layers.8.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight": "te_text_model.encoder.layers.8.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.bias": "te_text_model.encoder.layers.8.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.weight": "te_text_model.encoder.layers.8.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.8.ln_1.bias": "te_text_model.encoder.layers.8.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.8.ln_1.weight": "te_text_model.encoder.layers.8.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.8.ln_2.bias": "te_text_model.encoder.layers.8.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.8.ln_2.weight": "te_text_model.encoder.layers.8.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.bias": "te_text_model.encoder.layers.8.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.weight": "te_text_model.encoder.layers.8.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.bias": "te_text_model.encoder.layers.8.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.weight": "te_text_model.encoder.layers.8.mlp.fc2.weight", + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias": "te_text_model.encoder.layers.9.self_attn.MERGED.bias", + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight": "te_text_model.encoder.layers.9.self_attn.MERGED.weight", + "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.bias": "te_text_model.encoder.layers.9.self_attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.weight": "te_text_model.encoder.layers.9.self_attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.9.ln_1.bias": "te_text_model.encoder.layers.9.layer_norm1.bias", + "cond_stage_model.model.transformer.resblocks.9.ln_1.weight": "te_text_model.encoder.layers.9.layer_norm1.weight", + "cond_stage_model.model.transformer.resblocks.9.ln_2.bias": "te_text_model.encoder.layers.9.layer_norm2.bias", + "cond_stage_model.model.transformer.resblocks.9.ln_2.weight": "te_text_model.encoder.layers.9.layer_norm2.weight", + "cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.bias": "te_text_model.encoder.layers.9.mlp.fc1.bias", + "cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.weight": "te_text_model.encoder.layers.9.mlp.fc1.weight", + "cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.bias": "te_text_model.encoder.layers.9.mlp.fc2.bias", + "cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.weight": "te_text_model.encoder.layers.9.mlp.fc2.weight", + "first_stage_model.decoder.conv_in.bias": "vae_decoder.conv_in.bias", + "first_stage_model.decoder.conv_in.weight": "vae_decoder.conv_in.weight", + "first_stage_model.decoder.conv_out.bias": "vae_decoder.conv_out.bias", + "first_stage_model.decoder.conv_out.weight": "vae_decoder.conv_out.weight", + "first_stage_model.decoder.mid.attn_1.k.bias": "vae_decoder.mid_block.attentions.0.to_k.bias", + "first_stage_model.decoder.mid.attn_1.k.weight": "vae_decoder.mid_block.attentions.0.to_k.weight", + "first_stage_model.decoder.mid.attn_1.norm.bias": "vae_decoder.mid_block.attentions.0.group_norm.bias", + "first_stage_model.decoder.mid.attn_1.norm.weight": "vae_decoder.mid_block.attentions.0.group_norm.weight", + "first_stage_model.decoder.mid.attn_1.proj_out.bias": "vae_decoder.mid_block.attentions.0.to_out.0.bias", + "first_stage_model.decoder.mid.attn_1.proj_out.weight": "vae_decoder.mid_block.attentions.0.to_out.0.weight", + "first_stage_model.decoder.mid.attn_1.q.bias": "vae_decoder.mid_block.attentions.0.to_q.bias", + "first_stage_model.decoder.mid.attn_1.q.weight": "vae_decoder.mid_block.attentions.0.to_q.weight", + "first_stage_model.decoder.mid.attn_1.v.bias": "vae_decoder.mid_block.attentions.0.to_v.bias", + "first_stage_model.decoder.mid.attn_1.v.weight": "vae_decoder.mid_block.attentions.0.to_v.weight", + "first_stage_model.decoder.mid.block_1.conv1.bias": "vae_decoder.mid_block.resnets.0.conv1.bias", + "first_stage_model.decoder.mid.block_1.conv1.weight": "vae_decoder.mid_block.resnets.0.conv1.weight", + "first_stage_model.decoder.mid.block_1.conv2.bias": "vae_decoder.mid_block.resnets.0.conv2.bias", + "first_stage_model.decoder.mid.block_1.conv2.weight": "vae_decoder.mid_block.resnets.0.conv2.weight", + "first_stage_model.decoder.mid.block_1.norm1.bias": "vae_decoder.mid_block.resnets.0.norm1.bias", + "first_stage_model.decoder.mid.block_1.norm1.weight": "vae_decoder.mid_block.resnets.0.norm1.weight", + "first_stage_model.decoder.mid.block_1.norm2.bias": "vae_decoder.mid_block.resnets.0.norm2.bias", + "first_stage_model.decoder.mid.block_1.norm2.weight": "vae_decoder.mid_block.resnets.0.norm2.weight", + "first_stage_model.decoder.mid.block_2.conv1.bias": "vae_decoder.mid_block.resnets.1.conv1.bias", + "first_stage_model.decoder.mid.block_2.conv1.weight": "vae_decoder.mid_block.resnets.1.conv1.weight", + "first_stage_model.decoder.mid.block_2.conv2.bias": "vae_decoder.mid_block.resnets.1.conv2.bias", + "first_stage_model.decoder.mid.block_2.conv2.weight": "vae_decoder.mid_block.resnets.1.conv2.weight", + "first_stage_model.decoder.mid.block_2.norm1.bias": "vae_decoder.mid_block.resnets.1.norm1.bias", + "first_stage_model.decoder.mid.block_2.norm1.weight": "vae_decoder.mid_block.resnets.1.norm1.weight", + "first_stage_model.decoder.mid.block_2.norm2.bias": "vae_decoder.mid_block.resnets.1.norm2.bias", + "first_stage_model.decoder.mid.block_2.norm2.weight": "vae_decoder.mid_block.resnets.1.norm2.weight", + "first_stage_model.decoder.norm_out.bias": "vae_decoder.conv_norm_out.bias", + "first_stage_model.decoder.norm_out.weight": "vae_decoder.conv_norm_out.weight", + "first_stage_model.decoder.up.0.block.0.conv1.bias": "vae_decoder.up_blocks.3.resnets.0.conv1.bias", + "first_stage_model.decoder.up.0.block.0.conv1.weight": "vae_decoder.up_blocks.3.resnets.0.conv1.weight", + "first_stage_model.decoder.up.0.block.0.conv2.bias": "vae_decoder.up_blocks.3.resnets.0.conv2.bias", + "first_stage_model.decoder.up.0.block.0.conv2.weight": "vae_decoder.up_blocks.3.resnets.0.conv2.weight", + "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.bias", + "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.weight", + "first_stage_model.decoder.up.0.block.0.norm1.bias": "vae_decoder.up_blocks.3.resnets.0.norm1.bias", + "first_stage_model.decoder.up.0.block.0.norm1.weight": "vae_decoder.up_blocks.3.resnets.0.norm1.weight", + "first_stage_model.decoder.up.0.block.0.norm2.bias": "vae_decoder.up_blocks.3.resnets.0.norm2.bias", + "first_stage_model.decoder.up.0.block.0.norm2.weight": "vae_decoder.up_blocks.3.resnets.0.norm2.weight", + "first_stage_model.decoder.up.0.block.1.conv1.bias": "vae_decoder.up_blocks.3.resnets.1.conv1.bias", + "first_stage_model.decoder.up.0.block.1.conv1.weight": "vae_decoder.up_blocks.3.resnets.1.conv1.weight", + "first_stage_model.decoder.up.0.block.1.conv2.bias": "vae_decoder.up_blocks.3.resnets.1.conv2.bias", + "first_stage_model.decoder.up.0.block.1.conv2.weight": "vae_decoder.up_blocks.3.resnets.1.conv2.weight", + "first_stage_model.decoder.up.0.block.1.norm1.bias": "vae_decoder.up_blocks.3.resnets.1.norm1.bias", + "first_stage_model.decoder.up.0.block.1.norm1.weight": "vae_decoder.up_blocks.3.resnets.1.norm1.weight", + "first_stage_model.decoder.up.0.block.1.norm2.bias": "vae_decoder.up_blocks.3.resnets.1.norm2.bias", + "first_stage_model.decoder.up.0.block.1.norm2.weight": "vae_decoder.up_blocks.3.resnets.1.norm2.weight", + "first_stage_model.decoder.up.0.block.2.conv1.bias": "vae_decoder.up_blocks.3.resnets.2.conv1.bias", + "first_stage_model.decoder.up.0.block.2.conv1.weight": "vae_decoder.up_blocks.3.resnets.2.conv1.weight", + "first_stage_model.decoder.up.0.block.2.conv2.bias": "vae_decoder.up_blocks.3.resnets.2.conv2.bias", + "first_stage_model.decoder.up.0.block.2.conv2.weight": "vae_decoder.up_blocks.3.resnets.2.conv2.weight", + "first_stage_model.decoder.up.0.block.2.norm1.bias": "vae_decoder.up_blocks.3.resnets.2.norm1.bias", + "first_stage_model.decoder.up.0.block.2.norm1.weight": "vae_decoder.up_blocks.3.resnets.2.norm1.weight", + "first_stage_model.decoder.up.0.block.2.norm2.bias": "vae_decoder.up_blocks.3.resnets.2.norm2.bias", + "first_stage_model.decoder.up.0.block.2.norm2.weight": "vae_decoder.up_blocks.3.resnets.2.norm2.weight", + "first_stage_model.decoder.up.1.block.0.conv1.bias": "vae_decoder.up_blocks.2.resnets.0.conv1.bias", + "first_stage_model.decoder.up.1.block.0.conv1.weight": "vae_decoder.up_blocks.2.resnets.0.conv1.weight", + "first_stage_model.decoder.up.1.block.0.conv2.bias": "vae_decoder.up_blocks.2.resnets.0.conv2.bias", + "first_stage_model.decoder.up.1.block.0.conv2.weight": "vae_decoder.up_blocks.2.resnets.0.conv2.weight", + "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.bias", + "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.weight", + "first_stage_model.decoder.up.1.block.0.norm1.bias": "vae_decoder.up_blocks.2.resnets.0.norm1.bias", + "first_stage_model.decoder.up.1.block.0.norm1.weight": "vae_decoder.up_blocks.2.resnets.0.norm1.weight", + "first_stage_model.decoder.up.1.block.0.norm2.bias": "vae_decoder.up_blocks.2.resnets.0.norm2.bias", + "first_stage_model.decoder.up.1.block.0.norm2.weight": "vae_decoder.up_blocks.2.resnets.0.norm2.weight", + "first_stage_model.decoder.up.1.block.1.conv1.bias": "vae_decoder.up_blocks.2.resnets.1.conv1.bias", + "first_stage_model.decoder.up.1.block.1.conv1.weight": "vae_decoder.up_blocks.2.resnets.1.conv1.weight", + "first_stage_model.decoder.up.1.block.1.conv2.bias": "vae_decoder.up_blocks.2.resnets.1.conv2.bias", + "first_stage_model.decoder.up.1.block.1.conv2.weight": "vae_decoder.up_blocks.2.resnets.1.conv2.weight", + "first_stage_model.decoder.up.1.block.1.norm1.bias": "vae_decoder.up_blocks.2.resnets.1.norm1.bias", + "first_stage_model.decoder.up.1.block.1.norm1.weight": "vae_decoder.up_blocks.2.resnets.1.norm1.weight", + "first_stage_model.decoder.up.1.block.1.norm2.bias": "vae_decoder.up_blocks.2.resnets.1.norm2.bias", + "first_stage_model.decoder.up.1.block.1.norm2.weight": "vae_decoder.up_blocks.2.resnets.1.norm2.weight", + "first_stage_model.decoder.up.1.block.2.conv1.bias": "vae_decoder.up_blocks.2.resnets.2.conv1.bias", + "first_stage_model.decoder.up.1.block.2.conv1.weight": "vae_decoder.up_blocks.2.resnets.2.conv1.weight", + "first_stage_model.decoder.up.1.block.2.conv2.bias": "vae_decoder.up_blocks.2.resnets.2.conv2.bias", + "first_stage_model.decoder.up.1.block.2.conv2.weight": "vae_decoder.up_blocks.2.resnets.2.conv2.weight", + "first_stage_model.decoder.up.1.block.2.norm1.bias": "vae_decoder.up_blocks.2.resnets.2.norm1.bias", + "first_stage_model.decoder.up.1.block.2.norm1.weight": "vae_decoder.up_blocks.2.resnets.2.norm1.weight", + "first_stage_model.decoder.up.1.block.2.norm2.bias": "vae_decoder.up_blocks.2.resnets.2.norm2.bias", + "first_stage_model.decoder.up.1.block.2.norm2.weight": "vae_decoder.up_blocks.2.resnets.2.norm2.weight", + "first_stage_model.decoder.up.1.upsample.conv.bias": "vae_decoder.up_blocks.2.upsamplers.0.conv.bias", + "first_stage_model.decoder.up.1.upsample.conv.weight": "vae_decoder.up_blocks.2.upsamplers.0.conv.weight", + "first_stage_model.decoder.up.2.block.0.conv1.bias": "vae_decoder.up_blocks.1.resnets.0.conv1.bias", + "first_stage_model.decoder.up.2.block.0.conv1.weight": "vae_decoder.up_blocks.1.resnets.0.conv1.weight", + "first_stage_model.decoder.up.2.block.0.conv2.bias": "vae_decoder.up_blocks.1.resnets.0.conv2.bias", + "first_stage_model.decoder.up.2.block.0.conv2.weight": "vae_decoder.up_blocks.1.resnets.0.conv2.weight", + "first_stage_model.decoder.up.2.block.0.norm1.bias": "vae_decoder.up_blocks.1.resnets.0.norm1.bias", + "first_stage_model.decoder.up.2.block.0.norm1.weight": "vae_decoder.up_blocks.1.resnets.0.norm1.weight", + "first_stage_model.decoder.up.2.block.0.norm2.bias": "vae_decoder.up_blocks.1.resnets.0.norm2.bias", + "first_stage_model.decoder.up.2.block.0.norm2.weight": "vae_decoder.up_blocks.1.resnets.0.norm2.weight", + "first_stage_model.decoder.up.2.block.1.conv1.bias": "vae_decoder.up_blocks.1.resnets.1.conv1.bias", + "first_stage_model.decoder.up.2.block.1.conv1.weight": "vae_decoder.up_blocks.1.resnets.1.conv1.weight", + "first_stage_model.decoder.up.2.block.1.conv2.bias": "vae_decoder.up_blocks.1.resnets.1.conv2.bias", + "first_stage_model.decoder.up.2.block.1.conv2.weight": "vae_decoder.up_blocks.1.resnets.1.conv2.weight", + "first_stage_model.decoder.up.2.block.1.norm1.bias": "vae_decoder.up_blocks.1.resnets.1.norm1.bias", + "first_stage_model.decoder.up.2.block.1.norm1.weight": "vae_decoder.up_blocks.1.resnets.1.norm1.weight", + "first_stage_model.decoder.up.2.block.1.norm2.bias": "vae_decoder.up_blocks.1.resnets.1.norm2.bias", + "first_stage_model.decoder.up.2.block.1.norm2.weight": "vae_decoder.up_blocks.1.resnets.1.norm2.weight", + "first_stage_model.decoder.up.2.block.2.conv1.bias": "vae_decoder.up_blocks.1.resnets.2.conv1.bias", + "first_stage_model.decoder.up.2.block.2.conv1.weight": "vae_decoder.up_blocks.1.resnets.2.conv1.weight", + "first_stage_model.decoder.up.2.block.2.conv2.bias": "vae_decoder.up_blocks.1.resnets.2.conv2.bias", + "first_stage_model.decoder.up.2.block.2.conv2.weight": "vae_decoder.up_blocks.1.resnets.2.conv2.weight", + "first_stage_model.decoder.up.2.block.2.norm1.bias": "vae_decoder.up_blocks.1.resnets.2.norm1.bias", + "first_stage_model.decoder.up.2.block.2.norm1.weight": "vae_decoder.up_blocks.1.resnets.2.norm1.weight", + "first_stage_model.decoder.up.2.block.2.norm2.bias": "vae_decoder.up_blocks.1.resnets.2.norm2.bias", + "first_stage_model.decoder.up.2.block.2.norm2.weight": "vae_decoder.up_blocks.1.resnets.2.norm2.weight", + "first_stage_model.decoder.up.2.upsample.conv.bias": "vae_decoder.up_blocks.1.upsamplers.0.conv.bias", + "first_stage_model.decoder.up.2.upsample.conv.weight": "vae_decoder.up_blocks.1.upsamplers.0.conv.weight", + "first_stage_model.decoder.up.3.block.0.conv1.bias": "vae_decoder.up_blocks.0.resnets.0.conv1.bias", + "first_stage_model.decoder.up.3.block.0.conv1.weight": "vae_decoder.up_blocks.0.resnets.0.conv1.weight", + "first_stage_model.decoder.up.3.block.0.conv2.bias": "vae_decoder.up_blocks.0.resnets.0.conv2.bias", + "first_stage_model.decoder.up.3.block.0.conv2.weight": "vae_decoder.up_blocks.0.resnets.0.conv2.weight", + "first_stage_model.decoder.up.3.block.0.norm1.bias": "vae_decoder.up_blocks.0.resnets.0.norm1.bias", + "first_stage_model.decoder.up.3.block.0.norm1.weight": "vae_decoder.up_blocks.0.resnets.0.norm1.weight", + "first_stage_model.decoder.up.3.block.0.norm2.bias": "vae_decoder.up_blocks.0.resnets.0.norm2.bias", + "first_stage_model.decoder.up.3.block.0.norm2.weight": "vae_decoder.up_blocks.0.resnets.0.norm2.weight", + "first_stage_model.decoder.up.3.block.1.conv1.bias": "vae_decoder.up_blocks.0.resnets.1.conv1.bias", + "first_stage_model.decoder.up.3.block.1.conv1.weight": "vae_decoder.up_blocks.0.resnets.1.conv1.weight", + "first_stage_model.decoder.up.3.block.1.conv2.bias": "vae_decoder.up_blocks.0.resnets.1.conv2.bias", + "first_stage_model.decoder.up.3.block.1.conv2.weight": "vae_decoder.up_blocks.0.resnets.1.conv2.weight", + "first_stage_model.decoder.up.3.block.1.norm1.bias": "vae_decoder.up_blocks.0.resnets.1.norm1.bias", + "first_stage_model.decoder.up.3.block.1.norm1.weight": "vae_decoder.up_blocks.0.resnets.1.norm1.weight", + "first_stage_model.decoder.up.3.block.1.norm2.bias": "vae_decoder.up_blocks.0.resnets.1.norm2.bias", + "first_stage_model.decoder.up.3.block.1.norm2.weight": "vae_decoder.up_blocks.0.resnets.1.norm2.weight", + "first_stage_model.decoder.up.3.block.2.conv1.bias": "vae_decoder.up_blocks.0.resnets.2.conv1.bias", + "first_stage_model.decoder.up.3.block.2.conv1.weight": "vae_decoder.up_blocks.0.resnets.2.conv1.weight", + "first_stage_model.decoder.up.3.block.2.conv2.bias": "vae_decoder.up_blocks.0.resnets.2.conv2.bias", + "first_stage_model.decoder.up.3.block.2.conv2.weight": "vae_decoder.up_blocks.0.resnets.2.conv2.weight", + "first_stage_model.decoder.up.3.block.2.norm1.bias": "vae_decoder.up_blocks.0.resnets.2.norm1.bias", + "first_stage_model.decoder.up.3.block.2.norm1.weight": "vae_decoder.up_blocks.0.resnets.2.norm1.weight", + "first_stage_model.decoder.up.3.block.2.norm2.bias": "vae_decoder.up_blocks.0.resnets.2.norm2.bias", + "first_stage_model.decoder.up.3.block.2.norm2.weight": "vae_decoder.up_blocks.0.resnets.2.norm2.weight", + "first_stage_model.decoder.up.3.upsample.conv.bias": "vae_decoder.up_blocks.0.upsamplers.0.conv.bias", + "first_stage_model.decoder.up.3.upsample.conv.weight": "vae_decoder.up_blocks.0.upsamplers.0.conv.weight", + "first_stage_model.encoder.conv_in.bias": "vae_encoder.conv_in.bias", + "first_stage_model.encoder.conv_in.weight": "vae_encoder.conv_in.weight", + "first_stage_model.encoder.conv_out.bias": "vae_encoder.conv_out.bias", + "first_stage_model.encoder.conv_out.weight": "vae_encoder.conv_out.weight", + "first_stage_model.encoder.down.0.block.0.conv1.bias": "vae_encoder.down_blocks.0.resnets.0.conv1.bias", + "first_stage_model.encoder.down.0.block.0.conv1.weight": "vae_encoder.down_blocks.0.resnets.0.conv1.weight", + "first_stage_model.encoder.down.0.block.0.conv2.bias": "vae_encoder.down_blocks.0.resnets.0.conv2.bias", + "first_stage_model.encoder.down.0.block.0.conv2.weight": "vae_encoder.down_blocks.0.resnets.0.conv2.weight", + "first_stage_model.encoder.down.0.block.0.norm1.bias": "vae_encoder.down_blocks.0.resnets.0.norm1.bias", + "first_stage_model.encoder.down.0.block.0.norm1.weight": "vae_encoder.down_blocks.0.resnets.0.norm1.weight", + "first_stage_model.encoder.down.0.block.0.norm2.bias": "vae_encoder.down_blocks.0.resnets.0.norm2.bias", + "first_stage_model.encoder.down.0.block.0.norm2.weight": "vae_encoder.down_blocks.0.resnets.0.norm2.weight", + "first_stage_model.encoder.down.0.block.1.conv1.bias": "vae_encoder.down_blocks.0.resnets.1.conv1.bias", + "first_stage_model.encoder.down.0.block.1.conv1.weight": "vae_encoder.down_blocks.0.resnets.1.conv1.weight", + "first_stage_model.encoder.down.0.block.1.conv2.bias": "vae_encoder.down_blocks.0.resnets.1.conv2.bias", + "first_stage_model.encoder.down.0.block.1.conv2.weight": "vae_encoder.down_blocks.0.resnets.1.conv2.weight", + "first_stage_model.encoder.down.0.block.1.norm1.bias": "vae_encoder.down_blocks.0.resnets.1.norm1.bias", + "first_stage_model.encoder.down.0.block.1.norm1.weight": "vae_encoder.down_blocks.0.resnets.1.norm1.weight", + "first_stage_model.encoder.down.0.block.1.norm2.bias": "vae_encoder.down_blocks.0.resnets.1.norm2.bias", + "first_stage_model.encoder.down.0.block.1.norm2.weight": "vae_encoder.down_blocks.0.resnets.1.norm2.weight", + "first_stage_model.encoder.down.0.downsample.conv.bias": "vae_encoder.down_blocks.0.downsamplers.0.conv.bias", + "first_stage_model.encoder.down.0.downsample.conv.weight": "vae_encoder.down_blocks.0.downsamplers.0.conv.weight", + "first_stage_model.encoder.down.1.block.0.conv1.bias": "vae_encoder.down_blocks.1.resnets.0.conv1.bias", + "first_stage_model.encoder.down.1.block.0.conv1.weight": "vae_encoder.down_blocks.1.resnets.0.conv1.weight", + "first_stage_model.encoder.down.1.block.0.conv2.bias": "vae_encoder.down_blocks.1.resnets.0.conv2.bias", + "first_stage_model.encoder.down.1.block.0.conv2.weight": "vae_encoder.down_blocks.1.resnets.0.conv2.weight", + "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.bias", + "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.weight", + "first_stage_model.encoder.down.1.block.0.norm1.bias": "vae_encoder.down_blocks.1.resnets.0.norm1.bias", + "first_stage_model.encoder.down.1.block.0.norm1.weight": "vae_encoder.down_blocks.1.resnets.0.norm1.weight", + "first_stage_model.encoder.down.1.block.0.norm2.bias": "vae_encoder.down_blocks.1.resnets.0.norm2.bias", + "first_stage_model.encoder.down.1.block.0.norm2.weight": "vae_encoder.down_blocks.1.resnets.0.norm2.weight", + "first_stage_model.encoder.down.1.block.1.conv1.bias": "vae_encoder.down_blocks.1.resnets.1.conv1.bias", + "first_stage_model.encoder.down.1.block.1.conv1.weight": "vae_encoder.down_blocks.1.resnets.1.conv1.weight", + "first_stage_model.encoder.down.1.block.1.conv2.bias": "vae_encoder.down_blocks.1.resnets.1.conv2.bias", + "first_stage_model.encoder.down.1.block.1.conv2.weight": "vae_encoder.down_blocks.1.resnets.1.conv2.weight", + "first_stage_model.encoder.down.1.block.1.norm1.bias": "vae_encoder.down_blocks.1.resnets.1.norm1.bias", + "first_stage_model.encoder.down.1.block.1.norm1.weight": "vae_encoder.down_blocks.1.resnets.1.norm1.weight", + "first_stage_model.encoder.down.1.block.1.norm2.bias": "vae_encoder.down_blocks.1.resnets.1.norm2.bias", + "first_stage_model.encoder.down.1.block.1.norm2.weight": "vae_encoder.down_blocks.1.resnets.1.norm2.weight", + "first_stage_model.encoder.down.1.downsample.conv.bias": "vae_encoder.down_blocks.1.downsamplers.0.conv.bias", + "first_stage_model.encoder.down.1.downsample.conv.weight": "vae_encoder.down_blocks.1.downsamplers.0.conv.weight", + "first_stage_model.encoder.down.2.block.0.conv1.bias": "vae_encoder.down_blocks.2.resnets.0.conv1.bias", + "first_stage_model.encoder.down.2.block.0.conv1.weight": "vae_encoder.down_blocks.2.resnets.0.conv1.weight", + "first_stage_model.encoder.down.2.block.0.conv2.bias": "vae_encoder.down_blocks.2.resnets.0.conv2.bias", + "first_stage_model.encoder.down.2.block.0.conv2.weight": "vae_encoder.down_blocks.2.resnets.0.conv2.weight", + "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.bias", + "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.weight", + "first_stage_model.encoder.down.2.block.0.norm1.bias": "vae_encoder.down_blocks.2.resnets.0.norm1.bias", + "first_stage_model.encoder.down.2.block.0.norm1.weight": "vae_encoder.down_blocks.2.resnets.0.norm1.weight", + "first_stage_model.encoder.down.2.block.0.norm2.bias": "vae_encoder.down_blocks.2.resnets.0.norm2.bias", + "first_stage_model.encoder.down.2.block.0.norm2.weight": "vae_encoder.down_blocks.2.resnets.0.norm2.weight", + "first_stage_model.encoder.down.2.block.1.conv1.bias": "vae_encoder.down_blocks.2.resnets.1.conv1.bias", + "first_stage_model.encoder.down.2.block.1.conv1.weight": "vae_encoder.down_blocks.2.resnets.1.conv1.weight", + "first_stage_model.encoder.down.2.block.1.conv2.bias": "vae_encoder.down_blocks.2.resnets.1.conv2.bias", + "first_stage_model.encoder.down.2.block.1.conv2.weight": "vae_encoder.down_blocks.2.resnets.1.conv2.weight", + "first_stage_model.encoder.down.2.block.1.norm1.bias": "vae_encoder.down_blocks.2.resnets.1.norm1.bias", + "first_stage_model.encoder.down.2.block.1.norm1.weight": "vae_encoder.down_blocks.2.resnets.1.norm1.weight", + "first_stage_model.encoder.down.2.block.1.norm2.bias": "vae_encoder.down_blocks.2.resnets.1.norm2.bias", + "first_stage_model.encoder.down.2.block.1.norm2.weight": "vae_encoder.down_blocks.2.resnets.1.norm2.weight", + "first_stage_model.encoder.down.2.downsample.conv.bias": "vae_encoder.down_blocks.2.downsamplers.0.conv.bias", + "first_stage_model.encoder.down.2.downsample.conv.weight": "vae_encoder.down_blocks.2.downsamplers.0.conv.weight", + "first_stage_model.encoder.down.3.block.0.conv1.bias": "vae_encoder.down_blocks.3.resnets.0.conv1.bias", + "first_stage_model.encoder.down.3.block.0.conv1.weight": "vae_encoder.down_blocks.3.resnets.0.conv1.weight", + "first_stage_model.encoder.down.3.block.0.conv2.bias": "vae_encoder.down_blocks.3.resnets.0.conv2.bias", + "first_stage_model.encoder.down.3.block.0.conv2.weight": "vae_encoder.down_blocks.3.resnets.0.conv2.weight", + "first_stage_model.encoder.down.3.block.0.norm1.bias": "vae_encoder.down_blocks.3.resnets.0.norm1.bias", + "first_stage_model.encoder.down.3.block.0.norm1.weight": "vae_encoder.down_blocks.3.resnets.0.norm1.weight", + "first_stage_model.encoder.down.3.block.0.norm2.bias": "vae_encoder.down_blocks.3.resnets.0.norm2.bias", + "first_stage_model.encoder.down.3.block.0.norm2.weight": "vae_encoder.down_blocks.3.resnets.0.norm2.weight", + "first_stage_model.encoder.down.3.block.1.conv1.bias": "vae_encoder.down_blocks.3.resnets.1.conv1.bias", + "first_stage_model.encoder.down.3.block.1.conv1.weight": "vae_encoder.down_blocks.3.resnets.1.conv1.weight", + "first_stage_model.encoder.down.3.block.1.conv2.bias": "vae_encoder.down_blocks.3.resnets.1.conv2.bias", + "first_stage_model.encoder.down.3.block.1.conv2.weight": "vae_encoder.down_blocks.3.resnets.1.conv2.weight", + "first_stage_model.encoder.down.3.block.1.norm1.bias": "vae_encoder.down_blocks.3.resnets.1.norm1.bias", + "first_stage_model.encoder.down.3.block.1.norm1.weight": "vae_encoder.down_blocks.3.resnets.1.norm1.weight", + "first_stage_model.encoder.down.3.block.1.norm2.bias": "vae_encoder.down_blocks.3.resnets.1.norm2.bias", + "first_stage_model.encoder.down.3.block.1.norm2.weight": "vae_encoder.down_blocks.3.resnets.1.norm2.weight", + "first_stage_model.encoder.mid.attn_1.k.bias": "vae_encoder.mid_block.attentions.0.to_k.bias", + "first_stage_model.encoder.mid.attn_1.k.weight": "vae_encoder.mid_block.attentions.0.to_k.weight", + "first_stage_model.encoder.mid.attn_1.norm.bias": "vae_encoder.mid_block.attentions.0.group_norm.bias", + "first_stage_model.encoder.mid.attn_1.norm.weight": "vae_encoder.mid_block.attentions.0.group_norm.weight", + "first_stage_model.encoder.mid.attn_1.proj_out.bias": "vae_encoder.mid_block.attentions.0.to_out.0.bias", + "first_stage_model.encoder.mid.attn_1.proj_out.weight": "vae_encoder.mid_block.attentions.0.to_out.0.weight", + "first_stage_model.encoder.mid.attn_1.q.bias": "vae_encoder.mid_block.attentions.0.to_q.bias", + "first_stage_model.encoder.mid.attn_1.q.weight": "vae_encoder.mid_block.attentions.0.to_q.weight", + "first_stage_model.encoder.mid.attn_1.v.bias": "vae_encoder.mid_block.attentions.0.to_v.bias", + "first_stage_model.encoder.mid.attn_1.v.weight": "vae_encoder.mid_block.attentions.0.to_v.weight", + "first_stage_model.encoder.mid.block_1.conv1.bias": "vae_encoder.mid_block.resnets.0.conv1.bias", + "first_stage_model.encoder.mid.block_1.conv1.weight": "vae_encoder.mid_block.resnets.0.conv1.weight", + "first_stage_model.encoder.mid.block_1.conv2.bias": "vae_encoder.mid_block.resnets.0.conv2.bias", + "first_stage_model.encoder.mid.block_1.conv2.weight": "vae_encoder.mid_block.resnets.0.conv2.weight", + "first_stage_model.encoder.mid.block_1.norm1.bias": "vae_encoder.mid_block.resnets.0.norm1.bias", + "first_stage_model.encoder.mid.block_1.norm1.weight": "vae_encoder.mid_block.resnets.0.norm1.weight", + "first_stage_model.encoder.mid.block_1.norm2.bias": "vae_encoder.mid_block.resnets.0.norm2.bias", + "first_stage_model.encoder.mid.block_1.norm2.weight": "vae_encoder.mid_block.resnets.0.norm2.weight", + "first_stage_model.encoder.mid.block_2.conv1.bias": "vae_encoder.mid_block.resnets.1.conv1.bias", + "first_stage_model.encoder.mid.block_2.conv1.weight": "vae_encoder.mid_block.resnets.1.conv1.weight", + "first_stage_model.encoder.mid.block_2.conv2.bias": "vae_encoder.mid_block.resnets.1.conv2.bias", + "first_stage_model.encoder.mid.block_2.conv2.weight": "vae_encoder.mid_block.resnets.1.conv2.weight", + "first_stage_model.encoder.mid.block_2.norm1.bias": "vae_encoder.mid_block.resnets.1.norm1.bias", + "first_stage_model.encoder.mid.block_2.norm1.weight": "vae_encoder.mid_block.resnets.1.norm1.weight", + "first_stage_model.encoder.mid.block_2.norm2.bias": "vae_encoder.mid_block.resnets.1.norm2.bias", + "first_stage_model.encoder.mid.block_2.norm2.weight": "vae_encoder.mid_block.resnets.1.norm2.weight", + "first_stage_model.encoder.norm_out.bias": "vae_encoder.conv_norm_out.bias", + "first_stage_model.encoder.norm_out.weight": "vae_encoder.conv_norm_out.weight", + "first_stage_model.post_quant_conv.bias": "vae_post_quant_conv.bias", + "first_stage_model.post_quant_conv.weight": "vae_post_quant_conv.weight", + "first_stage_model.quant_conv.bias": "vae_quant_conv.bias", + "first_stage_model.quant_conv.weight": "vae_quant_conv.weight", + "model.diffusion_model.input_blocks.0.0.bias": "unet_conv_in.bias", + "model.diffusion_model.input_blocks.0.0.weight": "unet_conv_in.weight", + "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": "unet_down_blocks.0.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": "unet_down_blocks.0.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "unet_down_blocks.0.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": "unet_down_blocks.0.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": "unet_down_blocks.0.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": "unet_down_blocks.0.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": "unet_down_blocks.0.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": "unet_down_blocks.0.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.1.1.norm.bias": "unet_down_blocks.0.attentions.0.norm.bias", + "model.diffusion_model.input_blocks.1.1.norm.weight": "unet_down_blocks.0.attentions.0.norm.weight", + "model.diffusion_model.input_blocks.1.1.proj_in.bias": "unet_down_blocks.0.attentions.0.proj_in.bias", + "model.diffusion_model.input_blocks.1.1.proj_in.weight": "unet_down_blocks.0.attentions.0.proj_in.weight", + "model.diffusion_model.input_blocks.1.1.proj_out.bias": "unet_down_blocks.0.attentions.0.proj_out.bias", + "model.diffusion_model.input_blocks.1.1.proj_out.weight": "unet_down_blocks.0.attentions.0.proj_out.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.0.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": "unet_time_embedding.linear_2.weight", + "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": "unet_down_blocks.3.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": "unet_down_blocks.3.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": "unet_down_blocks.3.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": "unet_down_blocks.3.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": "unet_down_blocks.3.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": "unet_down_blocks.3.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": "unet_down_blocks.3.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": "unet_down_blocks.3.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": "unet_down_blocks.3.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": "unet_down_blocks.3.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": "unet_down_blocks.3.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": "unet_down_blocks.3.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": "unet_down_blocks.3.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": "unet_down_blocks.3.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": "unet_down_blocks.3.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": "unet_down_blocks.3.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": "unet_down_blocks.3.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.1.time_emb_proj.weight", + "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": "unet_down_blocks.0.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": "unet_down_blocks.0.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "unet_down_blocks.0.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": "unet_down_blocks.0.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": "unet_down_blocks.0.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": "unet_down_blocks.0.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": "unet_down_blocks.0.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": "unet_down_blocks.0.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.2.1.norm.bias": "unet_down_blocks.0.attentions.1.norm.bias", + "model.diffusion_model.input_blocks.2.1.norm.weight": "unet_down_blocks.0.attentions.1.norm.weight", + "model.diffusion_model.input_blocks.2.1.proj_in.bias": "unet_down_blocks.0.attentions.1.proj_in.bias", + "model.diffusion_model.input_blocks.2.1.proj_in.weight": "unet_down_blocks.0.attentions.1.proj_in.weight", + "model.diffusion_model.input_blocks.2.1.proj_out.bias": "unet_down_blocks.0.attentions.1.proj_out.bias", + "model.diffusion_model.input_blocks.2.1.proj_out.weight": "unet_down_blocks.0.attentions.1.proj_out.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.0.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.3.0.op.bias": "unet_down_blocks.0.downsamplers.0.conv.bias", + "model.diffusion_model.input_blocks.3.0.op.weight": "unet_down_blocks.0.downsamplers.0.conv.weight", + "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": "unet_down_blocks.1.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": "unet_down_blocks.1.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "unet_down_blocks.1.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": "unet_down_blocks.1.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": "unet_down_blocks.1.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": "unet_down_blocks.1.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": "unet_down_blocks.1.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": "unet_down_blocks.1.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.4.0.skip_connection.bias": "unet_down_blocks.1.resnets.0.conv_shortcut.bias", + "model.diffusion_model.input_blocks.4.0.skip_connection.weight": "unet_down_blocks.1.resnets.0.conv_shortcut.weight", + "model.diffusion_model.input_blocks.4.1.norm.bias": "unet_down_blocks.1.attentions.0.norm.bias", + "model.diffusion_model.input_blocks.4.1.norm.weight": "unet_down_blocks.1.attentions.0.norm.weight", + "model.diffusion_model.input_blocks.4.1.proj_in.bias": "unet_down_blocks.1.attentions.0.proj_in.bias", + "model.diffusion_model.input_blocks.4.1.proj_in.weight": "unet_down_blocks.1.attentions.0.proj_in.weight", + "model.diffusion_model.input_blocks.4.1.proj_out.bias": "unet_down_blocks.1.attentions.0.proj_out.bias", + "model.diffusion_model.input_blocks.4.1.proj_out.weight": "unet_down_blocks.1.attentions.0.proj_out.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.1.time_emb_proj.weight", + "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": "unet_down_blocks.1.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": "unet_down_blocks.1.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "unet_down_blocks.1.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": "unet_down_blocks.1.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": "unet_down_blocks.1.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": "unet_down_blocks.1.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": "unet_down_blocks.1.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": "unet_down_blocks.1.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.5.1.norm.bias": "unet_down_blocks.1.attentions.1.norm.bias", + "model.diffusion_model.input_blocks.5.1.norm.weight": "unet_down_blocks.1.attentions.1.norm.weight", + "model.diffusion_model.input_blocks.5.1.proj_in.bias": "unet_down_blocks.1.attentions.1.proj_in.bias", + "model.diffusion_model.input_blocks.5.1.proj_in.weight": "unet_down_blocks.1.attentions.1.proj_in.weight", + "model.diffusion_model.input_blocks.5.1.proj_out.bias": "unet_down_blocks.1.attentions.1.proj_out.bias", + "model.diffusion_model.input_blocks.5.1.proj_out.weight": "unet_down_blocks.1.attentions.1.proj_out.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.6.0.op.bias": "unet_down_blocks.1.downsamplers.0.conv.bias", + "model.diffusion_model.input_blocks.6.0.op.weight": "unet_down_blocks.1.downsamplers.0.conv.weight", + "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": "unet_down_blocks.2.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": "unet_down_blocks.2.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "unet_down_blocks.2.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": "unet_down_blocks.2.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": "unet_down_blocks.2.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": "unet_down_blocks.2.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": "unet_down_blocks.2.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": "unet_down_blocks.2.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.7.0.skip_connection.bias": "unet_down_blocks.2.resnets.0.conv_shortcut.bias", + "model.diffusion_model.input_blocks.7.0.skip_connection.weight": "unet_down_blocks.2.resnets.0.conv_shortcut.weight", + "model.diffusion_model.input_blocks.7.1.norm.bias": "unet_down_blocks.2.attentions.0.norm.bias", + "model.diffusion_model.input_blocks.7.1.norm.weight": "unet_down_blocks.2.attentions.0.norm.weight", + "model.diffusion_model.input_blocks.7.1.proj_in.bias": "unet_down_blocks.2.attentions.0.proj_in.bias", + "model.diffusion_model.input_blocks.7.1.proj_in.weight": "unet_down_blocks.2.attentions.0.proj_in.weight", + "model.diffusion_model.input_blocks.7.1.proj_out.bias": "unet_down_blocks.2.attentions.0.proj_out.bias", + "model.diffusion_model.input_blocks.7.1.proj_out.weight": "unet_down_blocks.2.attentions.0.proj_out.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.1.time_emb_proj.weight", + "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": "unet_down_blocks.2.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": "unet_down_blocks.2.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "unet_down_blocks.2.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": "unet_down_blocks.2.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": "unet_down_blocks.2.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": "unet_down_blocks.2.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": "unet_down_blocks.2.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": "unet_down_blocks.2.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.8.1.norm.bias": "unet_down_blocks.2.attentions.1.norm.bias", + "model.diffusion_model.input_blocks.8.1.norm.weight": "unet_down_blocks.2.attentions.1.norm.weight", + "model.diffusion_model.input_blocks.8.1.proj_in.bias": "unet_down_blocks.2.attentions.1.proj_in.bias", + "model.diffusion_model.input_blocks.8.1.proj_in.weight": "unet_down_blocks.2.attentions.1.proj_in.weight", + "model.diffusion_model.input_blocks.8.1.proj_out.bias": "unet_down_blocks.2.attentions.1.proj_out.bias", + "model.diffusion_model.input_blocks.8.1.proj_out.weight": "unet_down_blocks.2.attentions.1.proj_out.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.9.0.op.bias": "unet_down_blocks.2.downsamplers.0.conv.bias", + "model.diffusion_model.input_blocks.9.0.op.weight": "unet_down_blocks.2.downsamplers.0.conv.weight", + "model.diffusion_model.middle_block.0.emb_layers.1.bias": "unet_mid_block.resnets.0.conv1.bias", + "model.diffusion_model.middle_block.0.emb_layers.1.weight": "unet_down_blocks.3.resnets.1.time_emb_proj.weight", + "model.diffusion_model.middle_block.0.in_layers.0.bias": "unet_mid_block.resnets.0.norm1.bias", + "model.diffusion_model.middle_block.0.in_layers.0.weight": "unet_mid_block.resnets.0.norm1.weight", + "model.diffusion_model.middle_block.0.in_layers.2.bias": "unet_mid_block.resnets.0.time_emb_proj.bias", + "model.diffusion_model.middle_block.0.in_layers.2.weight": "unet_mid_block.resnets.0.conv1.weight", + "model.diffusion_model.middle_block.0.out_layers.0.bias": "unet_mid_block.resnets.0.norm2.bias", + "model.diffusion_model.middle_block.0.out_layers.0.weight": "unet_mid_block.resnets.0.norm2.weight", + "model.diffusion_model.middle_block.0.out_layers.3.bias": "unet_mid_block.resnets.0.conv2.bias", + "model.diffusion_model.middle_block.0.out_layers.3.weight": "unet_mid_block.resnets.0.conv2.weight", + "model.diffusion_model.middle_block.1.norm.bias": "unet_mid_block.attentions.0.norm.bias", + "model.diffusion_model.middle_block.1.norm.weight": "unet_mid_block.attentions.0.norm.weight", + "model.diffusion_model.middle_block.1.proj_in.bias": "unet_mid_block.attentions.0.proj_in.bias", + "model.diffusion_model.middle_block.1.proj_in.weight": "unet_mid_block.attentions.0.proj_in.weight", + "model.diffusion_model.middle_block.1.proj_out.bias": "unet_mid_block.attentions.0.proj_out.bias", + "model.diffusion_model.middle_block.1.proj_out.weight": "unet_mid_block.attentions.0.proj_out.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": "unet_mid_block.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": "unet_mid_block.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": "unet_mid_block.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.middle_block.2.emb_layers.1.bias": "unet_mid_block.resnets.1.conv1.bias", + "model.diffusion_model.middle_block.2.emb_layers.1.weight": "unet_up_blocks.0.resnets.0.time_emb_proj.weight", + "model.diffusion_model.middle_block.2.in_layers.0.bias": "unet_mid_block.resnets.1.norm1.bias", + "model.diffusion_model.middle_block.2.in_layers.0.weight": "unet_mid_block.resnets.1.norm1.weight", + "model.diffusion_model.middle_block.2.in_layers.2.bias": "unet_mid_block.resnets.1.time_emb_proj.bias", + "model.diffusion_model.middle_block.2.in_layers.2.weight": "unet_mid_block.resnets.1.conv1.weight", + "model.diffusion_model.middle_block.2.out_layers.0.bias": "unet_mid_block.resnets.1.norm2.bias", + "model.diffusion_model.middle_block.2.out_layers.0.weight": "unet_mid_block.resnets.1.norm2.weight", + "model.diffusion_model.middle_block.2.out_layers.3.bias": "unet_mid_block.resnets.1.conv2.bias", + "model.diffusion_model.middle_block.2.out_layers.3.weight": "unet_mid_block.resnets.1.conv2.weight", + "model.diffusion_model.out.0.bias": "unet_conv_norm_out.bias", + "model.diffusion_model.out.0.weight": "unet_conv_norm_out.weight", + "model.diffusion_model.out.2.bias": "unet_conv_out.bias", + "model.diffusion_model.out.2.weight": "unet_conv_out.weight", + "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": "unet_up_blocks.0.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": "unet_up_blocks.0.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "unet_up_blocks.0.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": "unet_up_blocks.0.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": "unet_up_blocks.0.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": "unet_up_blocks.0.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": "unet_up_blocks.0.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": "unet_up_blocks.0.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.0.0.skip_connection.bias": "unet_up_blocks.0.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.0.0.skip_connection.weight": "unet_up_blocks.0.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": "unet_up_blocks.0.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": "unet_up_blocks.0.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "unet_up_blocks.0.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": "unet_up_blocks.0.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": "unet_up_blocks.0.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": "unet_up_blocks.0.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": "unet_up_blocks.0.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": "unet_up_blocks.0.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.1.0.skip_connection.bias": "unet_up_blocks.0.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.1.0.skip_connection.weight": "unet_up_blocks.0.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": "unet_up_blocks.3.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": "unet_up_blocks.3.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": "unet_up_blocks.3.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": "unet_up_blocks.3.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": "unet_up_blocks.3.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": "unet_up_blocks.3.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": "unet_up_blocks.3.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": "unet_up_blocks.3.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.10.0.skip_connection.bias": "unet_up_blocks.3.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.10.0.skip_connection.weight": "unet_up_blocks.3.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.10.1.norm.bias": "unet_up_blocks.3.attentions.1.norm.bias", + "model.diffusion_model.output_blocks.10.1.norm.weight": "unet_up_blocks.3.attentions.1.norm.weight", + "model.diffusion_model.output_blocks.10.1.proj_in.bias": "unet_up_blocks.3.attentions.1.proj_in.bias", + "model.diffusion_model.output_blocks.10.1.proj_in.weight": "unet_up_blocks.3.attentions.1.proj_in.weight", + "model.diffusion_model.output_blocks.10.1.proj_out.bias": "unet_up_blocks.3.attentions.1.proj_out.bias", + "model.diffusion_model.output_blocks.10.1.proj_out.weight": "unet_up_blocks.3.attentions.1.proj_out.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": "unet_up_blocks.3.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": "unet_up_blocks.3.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": "unet_up_blocks.3.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": "unet_up_blocks.3.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": "unet_up_blocks.3.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": "unet_up_blocks.3.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": "unet_up_blocks.3.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": "unet_up_blocks.3.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.11.0.skip_connection.bias": "unet_up_blocks.3.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.11.0.skip_connection.weight": "unet_up_blocks.3.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.11.1.norm.bias": "unet_up_blocks.3.attentions.2.norm.bias", + "model.diffusion_model.output_blocks.11.1.norm.weight": "unet_up_blocks.3.attentions.2.norm.weight", + "model.diffusion_model.output_blocks.11.1.proj_in.bias": "unet_up_blocks.3.attentions.2.proj_in.bias", + "model.diffusion_model.output_blocks.11.1.proj_in.weight": "unet_up_blocks.3.attentions.2.proj_in.weight", + "model.diffusion_model.output_blocks.11.1.proj_out.bias": "unet_up_blocks.3.attentions.2.proj_out.bias", + "model.diffusion_model.output_blocks.11.1.proj_out.weight": "unet_up_blocks.3.attentions.2.proj_out.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.2.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "unet_mid_block.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": "unet_up_blocks.0.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": "unet_up_blocks.0.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "unet_up_blocks.0.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": "unet_up_blocks.0.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": "unet_up_blocks.0.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": "unet_up_blocks.0.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": "unet_up_blocks.0.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": "unet_up_blocks.0.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.2.0.skip_connection.bias": "unet_up_blocks.0.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.2.0.skip_connection.weight": "unet_up_blocks.0.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.2.1.conv.bias": "unet_up_blocks.0.upsamplers.0.conv.bias", + "model.diffusion_model.output_blocks.2.1.conv.weight": "unet_up_blocks.0.upsamplers.0.conv.weight", + "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": "unet_up_blocks.1.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": "unet_up_blocks.1.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "unet_up_blocks.1.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": "unet_up_blocks.1.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": "unet_up_blocks.1.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": "unet_up_blocks.1.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": "unet_up_blocks.1.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": "unet_up_blocks.1.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.3.0.skip_connection.bias": "unet_up_blocks.1.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.3.0.skip_connection.weight": "unet_up_blocks.1.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.3.1.norm.bias": "unet_up_blocks.1.attentions.0.norm.bias", + "model.diffusion_model.output_blocks.3.1.norm.weight": "unet_up_blocks.1.attentions.0.norm.weight", + "model.diffusion_model.output_blocks.3.1.proj_in.bias": "unet_up_blocks.1.attentions.0.proj_in.bias", + "model.diffusion_model.output_blocks.3.1.proj_in.weight": "unet_up_blocks.1.attentions.0.proj_in.weight", + "model.diffusion_model.output_blocks.3.1.proj_out.bias": "unet_up_blocks.1.attentions.0.proj_out.bias", + "model.diffusion_model.output_blocks.3.1.proj_out.weight": "unet_up_blocks.1.attentions.0.proj_out.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": "unet_up_blocks.1.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": "unet_up_blocks.1.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "unet_up_blocks.1.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": "unet_up_blocks.1.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": "unet_up_blocks.1.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": "unet_up_blocks.1.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": "unet_up_blocks.1.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": "unet_up_blocks.1.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.4.0.skip_connection.bias": "unet_up_blocks.1.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.4.0.skip_connection.weight": "unet_up_blocks.1.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.4.1.norm.bias": "unet_up_blocks.1.attentions.1.norm.bias", + "model.diffusion_model.output_blocks.4.1.norm.weight": "unet_up_blocks.1.attentions.1.norm.weight", + "model.diffusion_model.output_blocks.4.1.proj_in.bias": "unet_up_blocks.1.attentions.1.proj_in.bias", + "model.diffusion_model.output_blocks.4.1.proj_in.weight": "unet_up_blocks.1.attentions.1.proj_in.weight", + "model.diffusion_model.output_blocks.4.1.proj_out.bias": "unet_up_blocks.1.attentions.1.proj_out.bias", + "model.diffusion_model.output_blocks.4.1.proj_out.weight": "unet_up_blocks.1.attentions.1.proj_out.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": "unet_up_blocks.1.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": "unet_up_blocks.1.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "unet_up_blocks.1.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": "unet_up_blocks.1.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": "unet_up_blocks.1.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": "unet_up_blocks.1.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": "unet_up_blocks.1.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": "unet_up_blocks.1.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.5.0.skip_connection.bias": "unet_up_blocks.1.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.5.0.skip_connection.weight": "unet_up_blocks.1.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.5.1.norm.bias": "unet_up_blocks.1.attentions.2.norm.bias", + "model.diffusion_model.output_blocks.5.1.norm.weight": "unet_up_blocks.1.attentions.2.norm.weight", + "model.diffusion_model.output_blocks.5.1.proj_in.bias": "unet_up_blocks.1.attentions.2.proj_in.bias", + "model.diffusion_model.output_blocks.5.1.proj_in.weight": "unet_up_blocks.1.attentions.2.proj_in.weight", + "model.diffusion_model.output_blocks.5.1.proj_out.bias": "unet_up_blocks.1.attentions.2.proj_out.bias", + "model.diffusion_model.output_blocks.5.1.proj_out.weight": "unet_up_blocks.1.attentions.2.proj_out.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.5.2.conv.bias": "unet_up_blocks.1.upsamplers.0.conv.bias", + "model.diffusion_model.output_blocks.5.2.conv.weight": "unet_up_blocks.1.upsamplers.0.conv.weight", + "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": "unet_up_blocks.2.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": "unet_up_blocks.2.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "unet_up_blocks.2.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": "unet_up_blocks.2.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": "unet_up_blocks.2.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": "unet_up_blocks.2.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": "unet_up_blocks.2.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": "unet_up_blocks.2.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.6.0.skip_connection.bias": "unet_up_blocks.2.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.6.0.skip_connection.weight": "unet_up_blocks.2.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.6.1.norm.bias": "unet_up_blocks.2.attentions.0.norm.bias", + "model.diffusion_model.output_blocks.6.1.norm.weight": "unet_up_blocks.2.attentions.0.norm.weight", + "model.diffusion_model.output_blocks.6.1.proj_in.bias": "unet_up_blocks.2.attentions.0.proj_in.bias", + "model.diffusion_model.output_blocks.6.1.proj_in.weight": "unet_up_blocks.2.attentions.0.proj_in.weight", + "model.diffusion_model.output_blocks.6.1.proj_out.bias": "unet_up_blocks.2.attentions.0.proj_out.bias", + "model.diffusion_model.output_blocks.6.1.proj_out.weight": "unet_up_blocks.2.attentions.0.proj_out.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": "unet_up_blocks.2.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": "unet_up_blocks.2.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "unet_up_blocks.2.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": "unet_up_blocks.2.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": "unet_up_blocks.2.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": "unet_up_blocks.2.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": "unet_up_blocks.2.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": "unet_up_blocks.2.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.7.0.skip_connection.bias": "unet_up_blocks.2.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.7.0.skip_connection.weight": "unet_up_blocks.2.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.7.1.norm.bias": "unet_up_blocks.2.attentions.1.norm.bias", + "model.diffusion_model.output_blocks.7.1.norm.weight": "unet_up_blocks.2.attentions.1.norm.weight", + "model.diffusion_model.output_blocks.7.1.proj_in.bias": "unet_up_blocks.2.attentions.1.proj_in.bias", + "model.diffusion_model.output_blocks.7.1.proj_in.weight": "unet_up_blocks.2.attentions.1.proj_in.weight", + "model.diffusion_model.output_blocks.7.1.proj_out.bias": "unet_up_blocks.2.attentions.1.proj_out.bias", + "model.diffusion_model.output_blocks.7.1.proj_out.weight": "unet_up_blocks.2.attentions.1.proj_out.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": "unet_up_blocks.2.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": "unet_up_blocks.2.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "unet_up_blocks.2.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": "unet_up_blocks.2.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": "unet_up_blocks.2.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": "unet_up_blocks.2.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": "unet_up_blocks.2.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": "unet_up_blocks.2.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.8.0.skip_connection.bias": "unet_up_blocks.2.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.8.0.skip_connection.weight": "unet_up_blocks.2.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.8.1.norm.bias": "unet_up_blocks.2.attentions.2.norm.bias", + "model.diffusion_model.output_blocks.8.1.norm.weight": "unet_up_blocks.2.attentions.2.norm.weight", + "model.diffusion_model.output_blocks.8.1.proj_in.bias": "unet_up_blocks.2.attentions.2.proj_in.bias", + "model.diffusion_model.output_blocks.8.1.proj_in.weight": "unet_up_blocks.2.attentions.2.proj_in.weight", + "model.diffusion_model.output_blocks.8.1.proj_out.bias": "unet_up_blocks.2.attentions.2.proj_out.bias", + "model.diffusion_model.output_blocks.8.1.proj_out.weight": "unet_up_blocks.2.attentions.2.proj_out.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.2.attentions.2.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.8.2.conv.bias": "unet_up_blocks.2.upsamplers.0.conv.bias", + "model.diffusion_model.output_blocks.8.2.conv.weight": "unet_up_blocks.2.upsamplers.0.conv.weight", + "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": "unet_up_blocks.3.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": "unet_up_blocks.3.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": "unet_up_blocks.3.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": "unet_up_blocks.3.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": "unet_up_blocks.3.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": "unet_up_blocks.3.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": "unet_up_blocks.3.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": "unet_up_blocks.3.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": "unet_up_blocks.3.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": "unet_up_blocks.3.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.9.0.skip_connection.bias": "unet_up_blocks.3.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.9.0.skip_connection.weight": "unet_up_blocks.3.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.9.1.norm.bias": "unet_up_blocks.3.attentions.0.norm.bias", + "model.diffusion_model.output_blocks.9.1.norm.weight": "unet_up_blocks.3.attentions.0.norm.weight", + "model.diffusion_model.output_blocks.9.1.proj_in.bias": "unet_up_blocks.3.attentions.0.proj_in.bias", + "model.diffusion_model.output_blocks.9.1.proj_in.weight": "unet_up_blocks.3.attentions.0.proj_in.weight", + "model.diffusion_model.output_blocks.9.1.proj_out.bias": "unet_up_blocks.3.attentions.0.proj_out.bias", + "model.diffusion_model.output_blocks.9.1.proj_out.weight": "unet_up_blocks.3.attentions.0.proj_out.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.3.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.time_embed.0.bias": "unet_time_embedding.linear_1.bias", + "model.diffusion_model.time_embed.0.weight": "unet_time_embedding.linear_1.weight", + "model.diffusion_model.time_embed.2.bias": "unet_time_embedding.linear_2.bias", + "model.diffusion_model.time_embed.2.weight": "unet_mid_block.resnets.1.time_emb_proj.weight" + }, + "ldm_diffusers_shape_map": { + "first_stage_model.decoder.mid.attn_1.k.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.mid.attn_1.proj_out.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.mid.attn_1.q.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.mid.attn_1.v.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [ + [ + 128, + 256, + 1, + 1 + ], + [ + 128, + 256, + 1, + 1 + ] + ], + "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [ + [ + 256, + 512, + 1, + 1 + ], + [ + 256, + 512, + 1, + 1 + ] + ], + "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [ + [ + 256, + 128, + 1, + 1 + ], + [ + 256, + 128, + 1, + 1 + ] + ], + "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [ + [ + 512, + 256, + 1, + 1 + ], + [ + 512, + 256, + 1, + 1 + ] + ], + "first_stage_model.encoder.mid.attn_1.k.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.proj_out.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.q.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.v.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.post_quant_conv.weight": [ + [ + 4, + 4, + 1, + 1 + ], + [ + 4, + 4, + 1, + 1 + ] + ], + "first_stage_model.quant_conv.weight": [ + [ + 8, + 8, + 1, + 1 + ], + [ + 8, + 8, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [ + [ + 640, + 320, + 1, + 1 + ], + [ + 640, + 320, + 1, + 1 + ] + ], + "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [ + [ + 1280, + 640, + 1, + 1 + ], + [ + 1280, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [ + [ + 320, + 640, + 1, + 1 + ], + [ + 320, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [ + [ + 320, + 640, + 1, + 1 + ], + [ + 320, + 640, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [ + [ + 1280, + 2560, + 1, + 1 + ], + [ + 1280, + 2560, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [ + [ + 1280, + 1920, + 1, + 1 + ], + [ + 1280, + 1920, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [ + [ + 640, + 1920, + 1, + 1 + ], + [ + 640, + 1920, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [ + [ + 640, + 1280, + 1, + 1 + ], + [ + 640, + 1280, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [ + [ + 640, + 960, + 1, + 1 + ], + [ + 640, + 960, + 1, + 1 + ] + ], + "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [ + [ + 320, + 960, + 1, + 1 + ], + [ + 320, + 960, + 1, + 1 + ] + ] + }, + "ldm_diffusers_operator_map": { + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.0.self_attn.q_proj.bias", + "te_text_model.encoder.layers.0.self_attn.k_proj.bias", + "te_text_model.encoder.layers.0.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.0.self_attn.q_proj.weight", + "te_text_model.encoder.layers.0.self_attn.k_proj.weight", + "te_text_model.encoder.layers.0.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.0.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.1.self_attn.q_proj.bias", + "te_text_model.encoder.layers.1.self_attn.k_proj.bias", + "te_text_model.encoder.layers.1.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.1.self_attn.q_proj.weight", + "te_text_model.encoder.layers.1.self_attn.k_proj.weight", + "te_text_model.encoder.layers.1.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.1.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.10.self_attn.q_proj.bias", + "te_text_model.encoder.layers.10.self_attn.k_proj.bias", + "te_text_model.encoder.layers.10.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.10.self_attn.q_proj.weight", + "te_text_model.encoder.layers.10.self_attn.k_proj.weight", + "te_text_model.encoder.layers.10.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.10.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.11.self_attn.q_proj.bias", + "te_text_model.encoder.layers.11.self_attn.k_proj.bias", + "te_text_model.encoder.layers.11.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.11.self_attn.q_proj.weight", + "te_text_model.encoder.layers.11.self_attn.k_proj.weight", + "te_text_model.encoder.layers.11.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.11.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.12.self_attn.q_proj.bias", + "te_text_model.encoder.layers.12.self_attn.k_proj.bias", + "te_text_model.encoder.layers.12.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.12.self_attn.q_proj.weight", + "te_text_model.encoder.layers.12.self_attn.k_proj.weight", + "te_text_model.encoder.layers.12.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.12.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.13.self_attn.q_proj.bias", + "te_text_model.encoder.layers.13.self_attn.k_proj.bias", + "te_text_model.encoder.layers.13.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.13.self_attn.q_proj.weight", + "te_text_model.encoder.layers.13.self_attn.k_proj.weight", + "te_text_model.encoder.layers.13.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.13.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.14.self_attn.q_proj.bias", + "te_text_model.encoder.layers.14.self_attn.k_proj.bias", + "te_text_model.encoder.layers.14.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.14.self_attn.q_proj.weight", + "te_text_model.encoder.layers.14.self_attn.k_proj.weight", + "te_text_model.encoder.layers.14.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.14.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.15.self_attn.q_proj.bias", + "te_text_model.encoder.layers.15.self_attn.k_proj.bias", + "te_text_model.encoder.layers.15.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.15.self_attn.q_proj.weight", + "te_text_model.encoder.layers.15.self_attn.k_proj.weight", + "te_text_model.encoder.layers.15.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.15.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.16.self_attn.q_proj.bias", + "te_text_model.encoder.layers.16.self_attn.k_proj.bias", + "te_text_model.encoder.layers.16.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.16.self_attn.q_proj.weight", + "te_text_model.encoder.layers.16.self_attn.k_proj.weight", + "te_text_model.encoder.layers.16.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.16.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.17.self_attn.q_proj.bias", + "te_text_model.encoder.layers.17.self_attn.k_proj.bias", + "te_text_model.encoder.layers.17.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.17.self_attn.q_proj.weight", + "te_text_model.encoder.layers.17.self_attn.k_proj.weight", + "te_text_model.encoder.layers.17.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.17.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.18.self_attn.q_proj.bias", + "te_text_model.encoder.layers.18.self_attn.k_proj.bias", + "te_text_model.encoder.layers.18.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.18.self_attn.q_proj.weight", + "te_text_model.encoder.layers.18.self_attn.k_proj.weight", + "te_text_model.encoder.layers.18.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.18.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.19.self_attn.q_proj.bias", + "te_text_model.encoder.layers.19.self_attn.k_proj.bias", + "te_text_model.encoder.layers.19.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.19.self_attn.q_proj.weight", + "te_text_model.encoder.layers.19.self_attn.k_proj.weight", + "te_text_model.encoder.layers.19.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.19.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.2.self_attn.q_proj.bias", + "te_text_model.encoder.layers.2.self_attn.k_proj.bias", + "te_text_model.encoder.layers.2.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.2.self_attn.q_proj.weight", + "te_text_model.encoder.layers.2.self_attn.k_proj.weight", + "te_text_model.encoder.layers.2.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.2.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.20.self_attn.q_proj.bias", + "te_text_model.encoder.layers.20.self_attn.k_proj.bias", + "te_text_model.encoder.layers.20.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.20.self_attn.q_proj.weight", + "te_text_model.encoder.layers.20.self_attn.k_proj.weight", + "te_text_model.encoder.layers.20.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.20.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.21.self_attn.q_proj.bias", + "te_text_model.encoder.layers.21.self_attn.k_proj.bias", + "te_text_model.encoder.layers.21.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.21.self_attn.q_proj.weight", + "te_text_model.encoder.layers.21.self_attn.k_proj.weight", + "te_text_model.encoder.layers.21.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.21.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.22.self_attn.q_proj.bias", + "te_text_model.encoder.layers.22.self_attn.k_proj.bias", + "te_text_model.encoder.layers.22.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.22.self_attn.q_proj.weight", + "te_text_model.encoder.layers.22.self_attn.k_proj.weight", + "te_text_model.encoder.layers.22.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.22.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.3.self_attn.q_proj.bias", + "te_text_model.encoder.layers.3.self_attn.k_proj.bias", + "te_text_model.encoder.layers.3.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.3.self_attn.q_proj.weight", + "te_text_model.encoder.layers.3.self_attn.k_proj.weight", + "te_text_model.encoder.layers.3.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.3.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.4.self_attn.q_proj.bias", + "te_text_model.encoder.layers.4.self_attn.k_proj.bias", + "te_text_model.encoder.layers.4.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.4.self_attn.q_proj.weight", + "te_text_model.encoder.layers.4.self_attn.k_proj.weight", + "te_text_model.encoder.layers.4.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.4.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.5.self_attn.q_proj.bias", + "te_text_model.encoder.layers.5.self_attn.k_proj.bias", + "te_text_model.encoder.layers.5.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.5.self_attn.q_proj.weight", + "te_text_model.encoder.layers.5.self_attn.k_proj.weight", + "te_text_model.encoder.layers.5.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.5.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.6.self_attn.q_proj.bias", + "te_text_model.encoder.layers.6.self_attn.k_proj.bias", + "te_text_model.encoder.layers.6.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.6.self_attn.q_proj.weight", + "te_text_model.encoder.layers.6.self_attn.k_proj.weight", + "te_text_model.encoder.layers.6.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.6.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.7.self_attn.q_proj.bias", + "te_text_model.encoder.layers.7.self_attn.k_proj.bias", + "te_text_model.encoder.layers.7.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.7.self_attn.q_proj.weight", + "te_text_model.encoder.layers.7.self_attn.k_proj.weight", + "te_text_model.encoder.layers.7.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.7.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.8.self_attn.q_proj.bias", + "te_text_model.encoder.layers.8.self_attn.k_proj.bias", + "te_text_model.encoder.layers.8.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.8.self_attn.q_proj.weight", + "te_text_model.encoder.layers.8.self_attn.k_proj.weight", + "te_text_model.encoder.layers.8.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.8.self_attn.MERGED.weight" + }, + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias": { + "cat": [ + "te_text_model.encoder.layers.9.self_attn.q_proj.bias", + "te_text_model.encoder.layers.9.self_attn.k_proj.bias", + "te_text_model.encoder.layers.9.self_attn.v_proj.bias" + ] + }, + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight": { + "cat": [ + "te_text_model.encoder.layers.9.self_attn.q_proj.weight", + "te_text_model.encoder.layers.9.self_attn.k_proj.weight", + "te_text_model.encoder.layers.9.self_attn.v_proj.weight" + ], + "target": "te_text_model.encoder.layers.9.self_attn.MERGED.weight" + } + }, + "diffusers_ldm_operator_map": { + "te_text_model.encoder.layers.0.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.0.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.0.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.0.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.0.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.0.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.1.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.1.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.1.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.1.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.1.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.1.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.10.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.10.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.10.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.10.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.10.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.10.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.11.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.11.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.11.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.11.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.11.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.11.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.12.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.12.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.12.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.12.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.12.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.12.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.13.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.13.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.13.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.13.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.13.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.13.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.14.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.14.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.14.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.14.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.14.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.14.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.15.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.15.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.15.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.15.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.15.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.15.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.16.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.16.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.16.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.16.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.16.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.16.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.17.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.17.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.17.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.17.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.17.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.17.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.18.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.18.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.18.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.18.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.18.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.18.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.19.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.19.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.19.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.19.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.19.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.19.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.2.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.2.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.2.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.2.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.2.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.2.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.20.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.20.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.20.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.20.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.20.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.20.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.21.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.21.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.21.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.21.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.21.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.21.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.22.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.22.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.22.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.22.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.22.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.22.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.3.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.3.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.3.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.3.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.3.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.3.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.4.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.4.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.4.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.4.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.4.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.4.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.5.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.5.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.5.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.5.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.5.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.5.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.6.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.6.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.6.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.6.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.6.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.6.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.7.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.7.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.7.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.7.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.7.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.7.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.8.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.8.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.8.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.8.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.8.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.8.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.9.self_attn.q_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.9.self_attn.k_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.9.self_attn.v_proj.bias": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias", + "2048:, :" + ] + }, + "te_text_model.encoder.layers.9.self_attn.q_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight", + "0:1024, :" + ] + }, + "te_text_model.encoder.layers.9.self_attn.k_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight", + "1024:2048, :" + ] + }, + "te_text_model.encoder.layers.9.self_attn.v_proj.weight": { + "slice": [ + "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight", + "2048:, :" + ] + } + } +} \ No newline at end of file diff --git a/toolkit/keymaps/stable_diffusion_sd2_ldm_base.safetensors b/toolkit/keymaps/stable_diffusion_sd2_ldm_base.safetensors new file mode 100644 index 0000000..59e937f Binary files /dev/null and b/toolkit/keymaps/stable_diffusion_sd2_ldm_base.safetensors differ diff --git a/toolkit/keymaps/stable_diffusion_sd2_unmatched.json b/toolkit/keymaps/stable_diffusion_sd2_unmatched.json new file mode 100644 index 0000000..3814d87 --- /dev/null +++ b/toolkit/keymaps/stable_diffusion_sd2_unmatched.json @@ -0,0 +1,200 @@ +{ + "ldm": { + "alphas_cumprod": { + "shape": [ + 1000 + ], + "min": 0.00466156005859375, + "max": 0.9990234375 + }, + "alphas_cumprod_prev": { + "shape": [ + 1000 + ], + "min": 0.0047149658203125, + "max": 1.0 + }, + "betas": { + "shape": [ + 1000 + ], + "min": 0.0008502006530761719, + "max": 0.01200103759765625 + }, + "cond_stage_model.model.logit_scale": { + "shape": [], + "min": 4.60546875, + "max": 4.60546875 + }, + "cond_stage_model.model.text_projection": { + "shape": [ + 1024, + 1024 + ], + "min": -0.109130859375, + "max": 0.09271240234375 + }, + "cond_stage_model.model.transformer.resblocks.23.attn.in_proj_bias": { + "shape": [ + 3072 + ], + "min": -2.525390625, + "max": 2.591796875 + }, + "cond_stage_model.model.transformer.resblocks.23.attn.in_proj_weight": { + "shape": [ + 3072, + 1024 + ], + "min": -0.12261962890625, + "max": 0.1258544921875 + }, + "cond_stage_model.model.transformer.resblocks.23.attn.out_proj.bias": { + "shape": [ + 1024 + ], + "min": -0.422607421875, + "max": 1.17578125 + }, + "cond_stage_model.model.transformer.resblocks.23.attn.out_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "min": -0.0738525390625, + "max": 0.08673095703125 + }, + "cond_stage_model.model.transformer.resblocks.23.ln_1.bias": { + "shape": [ + 1024 + ], + "min": -3.392578125, + "max": 0.90625 + }, + "cond_stage_model.model.transformer.resblocks.23.ln_1.weight": { + "shape": [ + 1024 + ], + "min": 0.379638671875, + "max": 2.02734375 + }, + "cond_stage_model.model.transformer.resblocks.23.ln_2.bias": { + "shape": [ + 1024 + ], + "min": -0.833984375, + "max": 2.525390625 + }, + "cond_stage_model.model.transformer.resblocks.23.ln_2.weight": { + "shape": [ + 1024 + ], + "min": 1.17578125, + "max": 2.037109375 + }, + "cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.bias": { + "shape": [ + 4096 + ], + "min": -1.619140625, + "max": 0.5595703125 + }, + "cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.weight": { + "shape": [ + 4096, + 1024 + ], + "min": -0.08953857421875, + "max": 0.13232421875 + }, + "cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.bias": { + "shape": [ + 1024 + ], + "min": -1.8662109375, + "max": 0.74658203125 + }, + "cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.weight": { + "shape": [ + 1024, + 4096 + ], + "min": -0.12939453125, + "max": 0.1009521484375 + }, + "log_one_minus_alphas_cumprod": { + "shape": [ + 1000 + ], + "min": -7.0703125, + "max": -0.004669189453125 + }, + "model_ema.decay": { + "shape": [], + "min": 1.0, + "max": 1.0 + }, + "model_ema.num_updates": { + "shape": [], + "min": 219996, + "max": 219996 + }, + "posterior_log_variance_clipped": { + "shape": [ + 1000 + ], + "min": -46.0625, + "max": -4.421875 + }, + "posterior_mean_coef1": { + "shape": [ + 1000 + ], + "min": 0.000827789306640625, + "max": 1.0 + }, + "posterior_mean_coef2": { + "shape": [ + 1000 + ], + "min": 0.0, + "max": 0.99560546875 + }, + "posterior_variance": { + "shape": [ + 1000 + ], + "min": 0.0, + "max": 0.01200103759765625 + }, + "sqrt_alphas_cumprod": { + "shape": [ + 1000 + ], + "min": 0.0682373046875, + "max": 0.99951171875 + }, + "sqrt_one_minus_alphas_cumprod": { + "shape": [ + 1000 + ], + "min": 0.0291595458984375, + "max": 0.99755859375 + }, + "sqrt_recip_alphas_cumprod": { + "shape": [ + 1000 + ], + "min": 1.0, + "max": 14.6484375 + }, + "sqrt_recipm1_alphas_cumprod": { + "shape": [ + 1000 + ], + "min": 0.0291595458984375, + "max": 14.6171875 + } + }, + "diffusers": {} +} \ No newline at end of file diff --git a/toolkit/saving.py b/toolkit/saving.py index 44e295e..aab0d4b 100644 --- a/toolkit/saving.py +++ b/toolkit/saving.py @@ -56,7 +56,7 @@ def convert_state_dict_to_ldm_with_mapping( tensor_to_slice = diffusers_state_dict[ldm_diffusers_operator_map[ldm_key]['slice'][0]] slice_text = diffusers_state_dict[ldm_diffusers_operator_map[ldm_key]['slice'][1]] converted_state_dict[ldm_key] = tensor_to_slice[get_slices_from_string(slice_text)].detach().to(device, - dtype=dtype) + dtype=dtype) # process the rest of the keys for ldm_key in ldm_diffusers_keymap: @@ -71,6 +71,35 @@ def convert_state_dict_to_ldm_with_mapping( return converted_state_dict +def get_ldm_state_dict_from_diffusers( + state_dict: 'OrderedDict', + sd_version: Literal['1', '2', 'sdxl'] = '2', + device='cpu', + dtype=get_torch_dtype('fp32'), +): + if sd_version == '1': + base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sd1_ldm_base.safetensors') + mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sd1.json') + elif sd_version == '2': + base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sd2_ldm_base.safetensors') + mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sd2.json') + elif sd_version == 'sdxl': + # load our base + base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sdxl_ldm_base.safetensors') + mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sdxl.json') + else: + raise ValueError(f"Invalid sd_version {sd_version}") + + # convert the state dict + return convert_state_dict_to_ldm_with_mapping( + state_dict, + mapping_path, + base_path, + device=device, + dtype=dtype + ) + + def save_ldm_model_from_diffusers( sd: 'StableDiffusion', output_file: str, @@ -78,21 +107,13 @@ def save_ldm_model_from_diffusers( save_dtype=get_torch_dtype('fp16'), sd_version: Literal['1', '2', 'sdxl'] = '2' ): - if sd_version != 'sdxl': - # not supported yet - raise NotImplementedError("Only SDXL is supported at this time with this method") - # load our base - base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sdxl_ldm_base.safetensors') - mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_sdxl.json') - - # convert the state dict - converted_state_dict = convert_state_dict_to_ldm_with_mapping( + converted_state_dict = get_ldm_state_dict_from_diffusers( sd.state_dict(), - mapping_path, - base_path, + sd_version, device='cpu', dtype=save_dtype ) + # make sure parent folder exists os.makedirs(os.path.dirname(output_file), exist_ok=True) save_file(converted_state_dict, output_file, metadata=meta) diff --git a/toolkit/stable_diffusion_model.py b/toolkit/stable_diffusion_model.py index b831cdd..d74e4c7 100644 --- a/toolkit/stable_diffusion_model.py +++ b/toolkit/stable_diffusion_model.py @@ -752,61 +752,28 @@ class StableDiffusion: for i, encoder in enumerate(self.text_encoder): for k, v in encoder.state_dict().items(): new_key = k if k.startswith( - f"{SD_PREFIX_TEXT_ENCODER}{i}") else f"{SD_PREFIX_TEXT_ENCODER}{i}_{k}" + f"{SD_PREFIX_TEXT_ENCODER}{i}_") else f"{SD_PREFIX_TEXT_ENCODER}{i}_{k}" state_dict[new_key] = v else: for k, v in self.text_encoder.state_dict().items(): - new_key = k if k.startswith(f"{SD_PREFIX_TEXT_ENCODER}") else f"{SD_PREFIX_TEXT_ENCODER}_{k}" + new_key = k if k.startswith(f"{SD_PREFIX_TEXT_ENCODER}_") else f"{SD_PREFIX_TEXT_ENCODER}_{k}" state_dict[new_key] = v if unet: for k, v in self.unet.state_dict().items(): - new_key = k if k.startswith(f"{SD_PREFIX_UNET}") else f"{SD_PREFIX_UNET}_{k}" + new_key = k if k.startswith(f"{SD_PREFIX_UNET}_") else f"{SD_PREFIX_UNET}_{k}" state_dict[new_key] = v return state_dict def save(self, output_file: str, meta: OrderedDict, save_dtype=get_torch_dtype('fp16'), logit_scale=None): - state_dict = {} - # prepare metadata - meta = get_meta_for_safetensors(meta) - - def update_sd(prefix, sd): - for k, v in sd.items(): - key = prefix + k - v = v.detach().clone() - state_dict[key] = v.to("cpu", dtype=get_torch_dtype(save_dtype)) - # make sure there are not nan values - if torch.isnan(state_dict[key]).any(): - raise ValueError(f"NaN value in state dict: {key}") - - # todo see what logit scale is + version_string = '1' + if self.is_v2: + version_string = '2' if self.is_xl: - save_ldm_model_from_diffusers( - sd=self, - output_file=output_file, - meta=meta, - save_dtype=save_dtype, - sd_version='sdxl', - ) - - else: - # Convert the UNet model - unet_state_dict = convert_unet_state_dict_to_sd(self.is_v2, self.unet.state_dict()) - update_sd("model.diffusion_model.", unet_state_dict) - - # Convert the text encoder model - if self.is_v2: - make_dummy = True - text_enc_dict = convert_text_encoder_state_dict_to_sd_v2(self.text_encoder.state_dict(), make_dummy) - update_sd("cond_stage_model.model.", text_enc_dict) - else: - text_enc_dict = self.text_encoder.state_dict() - update_sd("cond_stage_model.transformer.", text_enc_dict) - - # Convert the VAE - if self.vae is not None: - vae_dict = model_util.convert_vae_state_dict(self.vae.state_dict()) - update_sd("first_stage_model.", vae_dict) - - # make sure parent folder exists - os.makedirs(os.path.dirname(output_file), exist_ok=True) - save_file(state_dict, output_file, metadata=meta) + version_string = 'sdxl' + save_ldm_model_from_diffusers( + sd=self, + output_file=output_file, + meta=meta, + save_dtype=save_dtype, + sd_version=version_string, + )