From 14ff51ceb4b961f0d7eddb489261c06601e5b286 Mon Sep 17 00:00:00 2001 From: Jaret Burkett Date: Tue, 29 Aug 2023 12:31:19 -0600 Subject: [PATCH] fixed issues with converting and saving models. Cleaned keys. Improved testing for cycle load saving. --- jobs/process/BaseSDTrainProcess.py | 24 +- testing/generate_weight_mappings.py | 32 +- toolkit/config_modules.py | 2 + toolkit/keymaps/stable_diffusion_sd1.json | 728 ----------- toolkit/keymaps/stable_diffusion_sd2.json | 395 +----- toolkit/keymaps/stable_diffusion_sdxl.json | 1070 ++++++++++------- .../stable_diffusion_sdxl_unmatched.json | 16 +- toolkit/saving.py | 24 + toolkit/stable_diffusion_model.py | 61 +- 9 files changed, 784 insertions(+), 1568 deletions(-) diff --git a/jobs/process/BaseSDTrainProcess.py b/jobs/process/BaseSDTrainProcess.py index 1d48a010..583b9264 100644 --- a/jobs/process/BaseSDTrainProcess.py +++ b/jobs/process/BaseSDTrainProcess.py @@ -481,22 +481,14 @@ class BaseSDTrainProcess(BaseTrainProcess): params = self.embedding.get_trainable_params() else: - params = [] - # assume dreambooth/finetune - if self.train_config.train_text_encoder: - if self.sd.is_xl: - for te in text_encoder: - te.requires_grad_(True) - te.train() - params += te.parameters() - else: - text_encoder.requires_grad_(True) - text_encoder.train() - params += text_encoder.parameters() - if self.train_config.train_unet: - unet.requires_grad_(True) - unet.train() - params += unet.parameters() + params = self.sd.prepare_optimizer_params( + vae=False, + unet=self.train_config.train_unet, + text_encoder=self.train_config.train_text_encoder, + text_encoder_lr=self.train_config.lr, + unet_lr=self.train_config.lr, + default_lr=self.train_config.lr + ) ### HOOK ### params = self.hook_add_extra_train_params(params) diff --git a/testing/generate_weight_mappings.py b/testing/generate_weight_mappings.py index 17425a58..4f407847 100644 --- a/testing/generate_weight_mappings.py +++ b/testing/generate_weight_mappings.py @@ -95,6 +95,8 @@ matched_diffusers_keys = [] error_margin = 1e-4 +tmp_merge_key = "TMP___MERGE" + te_suffix = '' proj_pattern_weight = None proj_pattern_bias = None @@ -139,7 +141,7 @@ if args.sdxl or args.sd2: f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.weight") # make diffusers convertable_dict diffusers_state_dict[ - f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.weight"] = new_val + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.{tmp_merge_key}.weight"] = new_val # add operator ldm_operator_map[ldm_key] = { @@ -148,7 +150,6 @@ if args.sdxl or args.sd2: f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.weight", f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.weight", ], - "target": f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.weight" } # text_model_dict[new_key + ".q_proj.weight"] = checkpoint[key][:d_model, :] @@ -189,7 +190,7 @@ if args.sdxl or args.sd2: matched_diffusers_keys.append(f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.bias") # make diffusers convertable_dict diffusers_state_dict[ - f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.bias"] = new_val + f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.{tmp_merge_key}.bias"] = new_val # add operator ldm_operator_map[ldm_key] = { @@ -198,7 +199,6 @@ if args.sdxl or args.sd2: f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.k_proj.bias", f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.v_proj.bias", ], - # "target": f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.MERGED.bias" } # add diffusers operators @@ -359,13 +359,35 @@ for key in unmatched_ldm_keys: save_file(remaining_ldm_values, os.path.join(KEYMAPS_FOLDER, f'{name}_ldm_base.safetensors')) print(f'Saved remaining ldm values to {os.path.join(KEYMAPS_FOLDER, f"{name}_ldm_base.safetensors")}') +# do cleanup of some left overs and bugs +to_remove = [] +for ldm_key, diffusers_key in ldm_diffusers_keymap.items(): + # get rid of tmp merge keys used to slicing + if tmp_merge_key in diffusers_key or tmp_merge_key in ldm_key: + to_remove.append(ldm_key) + +for key in to_remove: + del ldm_diffusers_keymap[key] + +to_remove = [] +# remove identical shape mappings. Not sure why they exist but they do +for ldm_key, shape_list in ldm_diffusers_shape_map.items(): + # remove identical shape mappings. Not sure why they exist but they do + # convert to json string to make it easier to compare + ldm_shape = json.dumps(shape_list[0]) + diffusers_shape = json.dumps(shape_list[1]) + if ldm_shape == diffusers_shape: + to_remove.append(ldm_key) + +for key in to_remove: + del ldm_diffusers_shape_map[key] + dest_path = os.path.join(KEYMAPS_FOLDER, f'{name}.json') save_obj = OrderedDict() save_obj["ldm_diffusers_keymap"] = ldm_diffusers_keymap save_obj["ldm_diffusers_shape_map"] = ldm_diffusers_shape_map save_obj["ldm_diffusers_operator_map"] = ldm_operator_map save_obj["diffusers_ldm_operator_map"] = diffusers_operator_map - with open(dest_path, 'w') as f: f.write(json.dumps(save_obj, indent=4)) diff --git a/toolkit/config_modules.py b/toolkit/config_modules.py index a06b6a94..58e2c26e 100644 --- a/toolkit/config_modules.py +++ b/toolkit/config_modules.py @@ -67,6 +67,8 @@ class TrainConfig: self.noise_scheduler = kwargs.get('noise_scheduler', 'ddpm') self.steps: int = kwargs.get('steps', 1000) self.lr = kwargs.get('lr', 1e-6) + self.unet_lr = kwargs.get('unet_lr', self.lr) + self.text_encoder_lr = kwargs.get('text_encoder_lr', self.lr) self.optimizer = kwargs.get('optimizer', 'adamw') self.lr_scheduler = kwargs.get('lr_scheduler', 'constant') self.max_denoising_steps: int = kwargs.get('max_denoising_steps', 50) diff --git a/toolkit/keymaps/stable_diffusion_sd1.json b/toolkit/keymaps/stable_diffusion_sd1.json index bc00f08d..83d83fef 100644 --- a/toolkit/keymaps/stable_diffusion_sd1.json +++ b/toolkit/keymaps/stable_diffusion_sd1.json @@ -1180,62 +1180,6 @@ 512 ] ], - "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [ - [ - 128, - 256, - 1, - 1 - ], - [ - 128, - 256, - 1, - 1 - ] - ], - "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [ - [ - 256, - 512, - 1, - 1 - ], - [ - 256, - 512, - 1, - 1 - ] - ], - "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [ - [ - 256, - 128, - 1, - 1 - ], - [ - 256, - 128, - 1, - 1 - ] - ], - "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [ - [ - 512, - 256, - 1, - 1 - ], - [ - 512, - 256, - 1, - 1 - ] - ], "first_stage_model.encoder.mid.attn_1.k.weight": [ [ 512, @@ -1283,678 +1227,6 @@ 512, 512 ] - ], - "first_stage_model.post_quant_conv.weight": [ - [ - 4, - 4, - 1, - 1 - ], - [ - 4, - 4, - 1, - 1 - ] - ], - "first_stage_model.quant_conv.weight": [ - [ - 8, - 8, - 1, - 1 - ], - [ - 8, - 8, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.1.1.proj_in.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.1.1.proj_out.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.2.1.proj_in.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.2.1.proj_out.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [ - [ - 640, - 320, - 1, - 1 - ], - [ - 640, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.4.1.proj_in.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.4.1.proj_out.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.5.1.proj_in.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.5.1.proj_out.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [ - [ - 1280, - 640, - 1, - 1 - ], - [ - 1280, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.7.1.proj_in.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.7.1.proj_out.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.8.1.proj_in.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.8.1.proj_out.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.middle_block.1.proj_in.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.middle_block.1.proj_out.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [ - [ - 320, - 640, - 1, - 1 - ], - [ - 320, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.10.1.proj_in.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.10.1.proj_out.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [ - [ - 320, - 640, - 1, - 1 - ], - [ - 320, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.11.1.proj_in.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.11.1.proj_out.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.3.1.proj_in.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.3.1.proj_out.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.4.1.proj_in.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.4.1.proj_out.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [ - [ - 1280, - 1920, - 1, - 1 - ], - [ - 1280, - 1920, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.5.1.proj_in.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.5.1.proj_out.weight": [ - [ - 1280, - 1280, - 1, - 1 - ], - [ - 1280, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [ - [ - 640, - 1920, - 1, - 1 - ], - [ - 640, - 1920, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.6.1.proj_in.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.6.1.proj_out.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [ - [ - 640, - 1280, - 1, - 1 - ], - [ - 640, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.7.1.proj_in.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.7.1.proj_out.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [ - [ - 640, - 960, - 1, - 1 - ], - [ - 640, - 960, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.8.1.proj_in.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.8.1.proj_out.weight": [ - [ - 640, - 640, - 1, - 1 - ], - [ - 640, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [ - [ - 320, - 960, - 1, - 1 - ], - [ - 320, - 960, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.9.1.proj_in.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.9.1.proj_out.weight": [ - [ - 320, - 320, - 1, - 1 - ], - [ - 320, - 320, - 1, - 1 - ] ] }, "ldm_diffusers_operator_map": {}, diff --git a/toolkit/keymaps/stable_diffusion_sd2.json b/toolkit/keymaps/stable_diffusion_sd2.json index 59bc1511..e5390717 100644 --- a/toolkit/keymaps/stable_diffusion_sd2.json +++ b/toolkit/keymaps/stable_diffusion_sd2.json @@ -4,8 +4,6 @@ "cond_stage_model.model.ln_final.weight": "te_text_model.final_layer_norm.weight", "cond_stage_model.model.positional_embedding": "te_text_model.embeddings.position_embedding.weight", "cond_stage_model.model.token_embedding.weight": "te_text_model.embeddings.token_embedding.weight", - "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias": "te_text_model.encoder.layers.0.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight": "te_text_model.encoder.layers.0.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.bias": "te_text_model.encoder.layers.0.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.weight": "te_text_model.encoder.layers.0.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.0.ln_1.bias": "te_text_model.encoder.layers.0.layer_norm1.bias", @@ -16,8 +14,6 @@ "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.weight": "te_text_model.encoder.layers.0.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.bias": "te_text_model.encoder.layers.0.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.weight": "te_text_model.encoder.layers.0.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias": "te_text_model.encoder.layers.1.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight": "te_text_model.encoder.layers.1.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.bias": "te_text_model.encoder.layers.1.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.weight": "te_text_model.encoder.layers.1.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.1.ln_1.bias": "te_text_model.encoder.layers.1.layer_norm1.bias", @@ -28,8 +24,6 @@ "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.weight": "te_text_model.encoder.layers.1.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.bias": "te_text_model.encoder.layers.1.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.weight": "te_text_model.encoder.layers.1.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias": "te_text_model.encoder.layers.10.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight": "te_text_model.encoder.layers.10.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.bias": "te_text_model.encoder.layers.10.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.weight": "te_text_model.encoder.layers.10.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.10.ln_1.bias": "te_text_model.encoder.layers.10.layer_norm1.bias", @@ -40,8 +34,6 @@ "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.weight": "te_text_model.encoder.layers.10.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.bias": "te_text_model.encoder.layers.10.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.weight": "te_text_model.encoder.layers.10.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias": "te_text_model.encoder.layers.11.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight": "te_text_model.encoder.layers.11.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.bias": "te_text_model.encoder.layers.11.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.weight": "te_text_model.encoder.layers.11.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.11.ln_1.bias": "te_text_model.encoder.layers.11.layer_norm1.bias", @@ -52,8 +44,6 @@ "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.weight": "te_text_model.encoder.layers.11.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.bias": "te_text_model.encoder.layers.11.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.weight": "te_text_model.encoder.layers.11.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias": "te_text_model.encoder.layers.12.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight": "te_text_model.encoder.layers.12.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.bias": "te_text_model.encoder.layers.12.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.weight": "te_text_model.encoder.layers.12.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.12.ln_1.bias": "te_text_model.encoder.layers.12.layer_norm1.bias", @@ -64,8 +54,6 @@ "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.weight": "te_text_model.encoder.layers.12.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.bias": "te_text_model.encoder.layers.12.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.weight": "te_text_model.encoder.layers.12.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias": "te_text_model.encoder.layers.13.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight": "te_text_model.encoder.layers.13.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.bias": "te_text_model.encoder.layers.13.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.weight": "te_text_model.encoder.layers.13.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.13.ln_1.bias": "te_text_model.encoder.layers.13.layer_norm1.bias", @@ -76,8 +64,6 @@ "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.weight": "te_text_model.encoder.layers.13.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.bias": "te_text_model.encoder.layers.13.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.weight": "te_text_model.encoder.layers.13.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias": "te_text_model.encoder.layers.14.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight": "te_text_model.encoder.layers.14.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.bias": "te_text_model.encoder.layers.14.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.weight": "te_text_model.encoder.layers.14.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.14.ln_1.bias": "te_text_model.encoder.layers.14.layer_norm1.bias", @@ -88,8 +74,6 @@ "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.weight": "te_text_model.encoder.layers.14.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.bias": "te_text_model.encoder.layers.14.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.weight": "te_text_model.encoder.layers.14.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias": "te_text_model.encoder.layers.15.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight": "te_text_model.encoder.layers.15.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.bias": "te_text_model.encoder.layers.15.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.weight": "te_text_model.encoder.layers.15.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.15.ln_1.bias": "te_text_model.encoder.layers.15.layer_norm1.bias", @@ -100,8 +84,6 @@ "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.weight": "te_text_model.encoder.layers.15.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.bias": "te_text_model.encoder.layers.15.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.weight": "te_text_model.encoder.layers.15.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias": "te_text_model.encoder.layers.16.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight": "te_text_model.encoder.layers.16.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.bias": "te_text_model.encoder.layers.16.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.weight": "te_text_model.encoder.layers.16.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.16.ln_1.bias": "te_text_model.encoder.layers.16.layer_norm1.bias", @@ -112,8 +94,6 @@ "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.weight": "te_text_model.encoder.layers.16.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.bias": "te_text_model.encoder.layers.16.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.weight": "te_text_model.encoder.layers.16.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias": "te_text_model.encoder.layers.17.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight": "te_text_model.encoder.layers.17.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.bias": "te_text_model.encoder.layers.17.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.weight": "te_text_model.encoder.layers.17.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.17.ln_1.bias": "te_text_model.encoder.layers.17.layer_norm1.bias", @@ -124,8 +104,6 @@ "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.weight": "te_text_model.encoder.layers.17.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.bias": "te_text_model.encoder.layers.17.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.weight": "te_text_model.encoder.layers.17.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias": "te_text_model.encoder.layers.18.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight": "te_text_model.encoder.layers.18.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.bias": "te_text_model.encoder.layers.18.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.weight": "te_text_model.encoder.layers.18.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.18.ln_1.bias": "te_text_model.encoder.layers.18.layer_norm1.bias", @@ -136,8 +114,6 @@ "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.weight": "te_text_model.encoder.layers.18.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.bias": "te_text_model.encoder.layers.18.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.weight": "te_text_model.encoder.layers.18.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias": "te_text_model.encoder.layers.19.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight": "te_text_model.encoder.layers.19.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.bias": "te_text_model.encoder.layers.19.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.weight": "te_text_model.encoder.layers.19.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.19.ln_1.bias": "te_text_model.encoder.layers.19.layer_norm1.bias", @@ -148,8 +124,6 @@ "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.weight": "te_text_model.encoder.layers.19.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.bias": "te_text_model.encoder.layers.19.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.weight": "te_text_model.encoder.layers.19.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias": "te_text_model.encoder.layers.2.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight": "te_text_model.encoder.layers.2.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.bias": "te_text_model.encoder.layers.2.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.weight": "te_text_model.encoder.layers.2.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.2.ln_1.bias": "te_text_model.encoder.layers.2.layer_norm1.bias", @@ -160,8 +134,6 @@ "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.weight": "te_text_model.encoder.layers.2.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.bias": "te_text_model.encoder.layers.2.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.weight": "te_text_model.encoder.layers.2.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias": "te_text_model.encoder.layers.20.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight": "te_text_model.encoder.layers.20.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.bias": "te_text_model.encoder.layers.20.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.weight": "te_text_model.encoder.layers.20.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.20.ln_1.bias": "te_text_model.encoder.layers.20.layer_norm1.bias", @@ -172,8 +144,6 @@ "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.weight": "te_text_model.encoder.layers.20.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.bias": "te_text_model.encoder.layers.20.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.weight": "te_text_model.encoder.layers.20.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias": "te_text_model.encoder.layers.21.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight": "te_text_model.encoder.layers.21.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.bias": "te_text_model.encoder.layers.21.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.weight": "te_text_model.encoder.layers.21.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.21.ln_1.bias": "te_text_model.encoder.layers.21.layer_norm1.bias", @@ -184,8 +154,6 @@ "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.weight": "te_text_model.encoder.layers.21.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.bias": "te_text_model.encoder.layers.21.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.weight": "te_text_model.encoder.layers.21.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias": "te_text_model.encoder.layers.22.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight": "te_text_model.encoder.layers.22.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.bias": "te_text_model.encoder.layers.22.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.weight": "te_text_model.encoder.layers.22.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.22.ln_1.bias": "te_text_model.encoder.layers.22.layer_norm1.bias", @@ -196,8 +164,6 @@ "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.weight": "te_text_model.encoder.layers.22.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.bias": "te_text_model.encoder.layers.22.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.weight": "te_text_model.encoder.layers.22.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias": "te_text_model.encoder.layers.3.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight": "te_text_model.encoder.layers.3.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.bias": "te_text_model.encoder.layers.3.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.weight": "te_text_model.encoder.layers.3.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.3.ln_1.bias": "te_text_model.encoder.layers.3.layer_norm1.bias", @@ -208,8 +174,6 @@ "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.weight": "te_text_model.encoder.layers.3.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.bias": "te_text_model.encoder.layers.3.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.weight": "te_text_model.encoder.layers.3.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias": "te_text_model.encoder.layers.4.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight": "te_text_model.encoder.layers.4.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.bias": "te_text_model.encoder.layers.4.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.weight": "te_text_model.encoder.layers.4.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.4.ln_1.bias": "te_text_model.encoder.layers.4.layer_norm1.bias", @@ -220,8 +184,6 @@ "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.weight": "te_text_model.encoder.layers.4.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.bias": "te_text_model.encoder.layers.4.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.weight": "te_text_model.encoder.layers.4.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias": "te_text_model.encoder.layers.5.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight": "te_text_model.encoder.layers.5.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.bias": "te_text_model.encoder.layers.5.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.weight": "te_text_model.encoder.layers.5.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.5.ln_1.bias": "te_text_model.encoder.layers.5.layer_norm1.bias", @@ -232,8 +194,6 @@ "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.weight": "te_text_model.encoder.layers.5.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.bias": "te_text_model.encoder.layers.5.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.weight": "te_text_model.encoder.layers.5.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias": "te_text_model.encoder.layers.6.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight": "te_text_model.encoder.layers.6.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.bias": "te_text_model.encoder.layers.6.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.weight": "te_text_model.encoder.layers.6.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.6.ln_1.bias": "te_text_model.encoder.layers.6.layer_norm1.bias", @@ -244,8 +204,6 @@ "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.weight": "te_text_model.encoder.layers.6.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.bias": "te_text_model.encoder.layers.6.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.weight": "te_text_model.encoder.layers.6.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias": "te_text_model.encoder.layers.7.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight": "te_text_model.encoder.layers.7.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.bias": "te_text_model.encoder.layers.7.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.weight": "te_text_model.encoder.layers.7.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.7.ln_1.bias": "te_text_model.encoder.layers.7.layer_norm1.bias", @@ -256,8 +214,6 @@ "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.weight": "te_text_model.encoder.layers.7.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.bias": "te_text_model.encoder.layers.7.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.weight": "te_text_model.encoder.layers.7.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias": "te_text_model.encoder.layers.8.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight": "te_text_model.encoder.layers.8.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.bias": "te_text_model.encoder.layers.8.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.weight": "te_text_model.encoder.layers.8.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.8.ln_1.bias": "te_text_model.encoder.layers.8.layer_norm1.bias", @@ -268,8 +224,6 @@ "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.weight": "te_text_model.encoder.layers.8.mlp.fc1.weight", "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.bias": "te_text_model.encoder.layers.8.mlp.fc2.bias", "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.weight": "te_text_model.encoder.layers.8.mlp.fc2.weight", - "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias": "te_text_model.encoder.layers.9.self_attn.MERGED.bias", - "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight": "te_text_model.encoder.layers.9.self_attn.MERGED.weight", "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.bias": "te_text_model.encoder.layers.9.self_attn.out_proj.bias", "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.weight": "te_text_model.encoder.layers.9.self_attn.out_proj.weight", "cond_stage_model.model.transformer.resblocks.9.ln_1.bias": "te_text_model.encoder.layers.9.layer_norm1.bias", @@ -1264,62 +1218,6 @@ 512 ] ], - "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [ - [ - 128, - 256, - 1, - 1 - ], - [ - 128, - 256, - 1, - 1 - ] - ], - "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [ - [ - 256, - 512, - 1, - 1 - ], - [ - 256, - 512, - 1, - 1 - ] - ], - "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [ - [ - 256, - 128, - 1, - 1 - ], - [ - 256, - 128, - 1, - 1 - ] - ], - "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [ - [ - 512, - 256, - 1, - 1 - ], - [ - 512, - 256, - 1, - 1 - ] - ], "first_stage_model.encoder.mid.attn_1.k.weight": [ [ 512, @@ -1367,230 +1265,6 @@ 512, 512 ] - ], - "first_stage_model.post_quant_conv.weight": [ - [ - 4, - 4, - 1, - 1 - ], - [ - 4, - 4, - 1, - 1 - ] - ], - "first_stage_model.quant_conv.weight": [ - [ - 8, - 8, - 1, - 1 - ], - [ - 8, - 8, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [ - [ - 640, - 320, - 1, - 1 - ], - [ - 640, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [ - [ - 1280, - 640, - 1, - 1 - ], - [ - 1280, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [ - [ - 320, - 640, - 1, - 1 - ], - [ - 320, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [ - [ - 320, - 640, - 1, - 1 - ], - [ - 320, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [ - [ - 1280, - 1920, - 1, - 1 - ], - [ - 1280, - 1920, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [ - [ - 640, - 1920, - 1, - 1 - ], - [ - 640, - 1920, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [ - [ - 640, - 1280, - 1, - 1 - ], - [ - 640, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [ - [ - 640, - 960, - 1, - 1 - ], - [ - 640, - 960, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [ - [ - 320, - 960, - 1, - 1 - ], - [ - 320, - 960, - 1, - 1 - ] ] }, "ldm_diffusers_operator_map": { @@ -1606,8 +1280,7 @@ "te_text_model.encoder.layers.0.self_attn.q_proj.weight", "te_text_model.encoder.layers.0.self_attn.k_proj.weight", "te_text_model.encoder.layers.0.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.0.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias": { "cat": [ @@ -1621,8 +1294,7 @@ "te_text_model.encoder.layers.1.self_attn.q_proj.weight", "te_text_model.encoder.layers.1.self_attn.k_proj.weight", "te_text_model.encoder.layers.1.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.1.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias": { "cat": [ @@ -1636,8 +1308,7 @@ "te_text_model.encoder.layers.10.self_attn.q_proj.weight", "te_text_model.encoder.layers.10.self_attn.k_proj.weight", "te_text_model.encoder.layers.10.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.10.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias": { "cat": [ @@ -1651,8 +1322,7 @@ "te_text_model.encoder.layers.11.self_attn.q_proj.weight", "te_text_model.encoder.layers.11.self_attn.k_proj.weight", "te_text_model.encoder.layers.11.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.11.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias": { "cat": [ @@ -1666,8 +1336,7 @@ "te_text_model.encoder.layers.12.self_attn.q_proj.weight", "te_text_model.encoder.layers.12.self_attn.k_proj.weight", "te_text_model.encoder.layers.12.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.12.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias": { "cat": [ @@ -1681,8 +1350,7 @@ "te_text_model.encoder.layers.13.self_attn.q_proj.weight", "te_text_model.encoder.layers.13.self_attn.k_proj.weight", "te_text_model.encoder.layers.13.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.13.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias": { "cat": [ @@ -1696,8 +1364,7 @@ "te_text_model.encoder.layers.14.self_attn.q_proj.weight", "te_text_model.encoder.layers.14.self_attn.k_proj.weight", "te_text_model.encoder.layers.14.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.14.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias": { "cat": [ @@ -1711,8 +1378,7 @@ "te_text_model.encoder.layers.15.self_attn.q_proj.weight", "te_text_model.encoder.layers.15.self_attn.k_proj.weight", "te_text_model.encoder.layers.15.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.15.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias": { "cat": [ @@ -1726,8 +1392,7 @@ "te_text_model.encoder.layers.16.self_attn.q_proj.weight", "te_text_model.encoder.layers.16.self_attn.k_proj.weight", "te_text_model.encoder.layers.16.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.16.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias": { "cat": [ @@ -1741,8 +1406,7 @@ "te_text_model.encoder.layers.17.self_attn.q_proj.weight", "te_text_model.encoder.layers.17.self_attn.k_proj.weight", "te_text_model.encoder.layers.17.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.17.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias": { "cat": [ @@ -1756,8 +1420,7 @@ "te_text_model.encoder.layers.18.self_attn.q_proj.weight", "te_text_model.encoder.layers.18.self_attn.k_proj.weight", "te_text_model.encoder.layers.18.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.18.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias": { "cat": [ @@ -1771,8 +1434,7 @@ "te_text_model.encoder.layers.19.self_attn.q_proj.weight", "te_text_model.encoder.layers.19.self_attn.k_proj.weight", "te_text_model.encoder.layers.19.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.19.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias": { "cat": [ @@ -1786,8 +1448,7 @@ "te_text_model.encoder.layers.2.self_attn.q_proj.weight", "te_text_model.encoder.layers.2.self_attn.k_proj.weight", "te_text_model.encoder.layers.2.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.2.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias": { "cat": [ @@ -1801,8 +1462,7 @@ "te_text_model.encoder.layers.20.self_attn.q_proj.weight", "te_text_model.encoder.layers.20.self_attn.k_proj.weight", "te_text_model.encoder.layers.20.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.20.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias": { "cat": [ @@ -1816,8 +1476,7 @@ "te_text_model.encoder.layers.21.self_attn.q_proj.weight", "te_text_model.encoder.layers.21.self_attn.k_proj.weight", "te_text_model.encoder.layers.21.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.21.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias": { "cat": [ @@ -1831,8 +1490,7 @@ "te_text_model.encoder.layers.22.self_attn.q_proj.weight", "te_text_model.encoder.layers.22.self_attn.k_proj.weight", "te_text_model.encoder.layers.22.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.22.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias": { "cat": [ @@ -1846,8 +1504,7 @@ "te_text_model.encoder.layers.3.self_attn.q_proj.weight", "te_text_model.encoder.layers.3.self_attn.k_proj.weight", "te_text_model.encoder.layers.3.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.3.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias": { "cat": [ @@ -1861,8 +1518,7 @@ "te_text_model.encoder.layers.4.self_attn.q_proj.weight", "te_text_model.encoder.layers.4.self_attn.k_proj.weight", "te_text_model.encoder.layers.4.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.4.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias": { "cat": [ @@ -1876,8 +1532,7 @@ "te_text_model.encoder.layers.5.self_attn.q_proj.weight", "te_text_model.encoder.layers.5.self_attn.k_proj.weight", "te_text_model.encoder.layers.5.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.5.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias": { "cat": [ @@ -1891,8 +1546,7 @@ "te_text_model.encoder.layers.6.self_attn.q_proj.weight", "te_text_model.encoder.layers.6.self_attn.k_proj.weight", "te_text_model.encoder.layers.6.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.6.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias": { "cat": [ @@ -1906,8 +1560,7 @@ "te_text_model.encoder.layers.7.self_attn.q_proj.weight", "te_text_model.encoder.layers.7.self_attn.k_proj.weight", "te_text_model.encoder.layers.7.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.7.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias": { "cat": [ @@ -1921,8 +1574,7 @@ "te_text_model.encoder.layers.8.self_attn.q_proj.weight", "te_text_model.encoder.layers.8.self_attn.k_proj.weight", "te_text_model.encoder.layers.8.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.8.self_attn.MERGED.weight" + ] }, "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias": { "cat": [ @@ -1936,8 +1588,7 @@ "te_text_model.encoder.layers.9.self_attn.q_proj.weight", "te_text_model.encoder.layers.9.self_attn.k_proj.weight", "te_text_model.encoder.layers.9.self_attn.v_proj.weight" - ], - "target": "te_text_model.encoder.layers.9.self_attn.MERGED.weight" + ] } }, "diffusers_ldm_operator_map": { diff --git a/toolkit/keymaps/stable_diffusion_sdxl.json b/toolkit/keymaps/stable_diffusion_sdxl.json index baab7ae2..639b69cc 100644 --- a/toolkit/keymaps/stable_diffusion_sdxl.json +++ b/toolkit/keymaps/stable_diffusion_sdxl.json @@ -200,8 +200,6 @@ "conditioner.embedders.1.model.ln_final.weight": "te1_text_model.final_layer_norm.weight", "conditioner.embedders.1.model.positional_embedding": "te1_text_model.embeddings.position_embedding.weight", "conditioner.embedders.1.model.token_embedding.weight": "te1_text_model.embeddings.token_embedding.weight", - "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias": "te1_text_model.encoder.layers.0.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight": "te1_text_model.encoder.layers.0.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.bias": "te1_text_model.encoder.layers.0.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.weight": "te1_text_model.encoder.layers.0.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.0.ln_1.bias": "te1_text_model.encoder.layers.0.layer_norm1.bias", @@ -212,8 +210,6 @@ "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_fc.weight": "te1_text_model.encoder.layers.0.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.bias": "te1_text_model.encoder.layers.0.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.weight": "te1_text_model.encoder.layers.0.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias": "te1_text_model.encoder.layers.1.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight": "te1_text_model.encoder.layers.1.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.bias": "te1_text_model.encoder.layers.1.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.weight": "te1_text_model.encoder.layers.1.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.1.ln_1.bias": "te1_text_model.encoder.layers.1.layer_norm1.bias", @@ -224,8 +220,6 @@ "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_fc.weight": "te1_text_model.encoder.layers.1.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.bias": "te1_text_model.encoder.layers.1.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.weight": "te1_text_model.encoder.layers.1.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias": "te1_text_model.encoder.layers.10.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight": "te1_text_model.encoder.layers.10.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.bias": "te1_text_model.encoder.layers.10.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.weight": "te1_text_model.encoder.layers.2.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.10.ln_1.bias": "te1_text_model.encoder.layers.10.layer_norm1.bias", @@ -236,8 +230,6 @@ "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_fc.weight": "te1_text_model.encoder.layers.10.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.bias": "te1_text_model.encoder.layers.10.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.weight": "te1_text_model.encoder.layers.10.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias": "te1_text_model.encoder.layers.11.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight": "te1_text_model.encoder.layers.11.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.bias": "te1_text_model.encoder.layers.11.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.weight": "te1_text_model.encoder.layers.3.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.11.ln_1.bias": "te1_text_model.encoder.layers.11.layer_norm1.bias", @@ -248,8 +240,6 @@ "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_fc.weight": "te1_text_model.encoder.layers.11.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.bias": "te1_text_model.encoder.layers.11.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.weight": "te1_text_model.encoder.layers.11.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias": "te1_text_model.encoder.layers.12.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight": "te1_text_model.encoder.layers.12.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.bias": "te1_text_model.encoder.layers.12.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.weight": "te1_text_model.encoder.layers.4.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.12.ln_1.bias": "te1_text_model.encoder.layers.12.layer_norm1.bias", @@ -260,8 +250,6 @@ "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_fc.weight": "te1_text_model.encoder.layers.12.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.bias": "te1_text_model.encoder.layers.12.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.weight": "te1_text_model.encoder.layers.12.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias": "te1_text_model.encoder.layers.13.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight": "te1_text_model.encoder.layers.13.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.bias": "te1_text_model.encoder.layers.13.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.weight": "te1_text_model.encoder.layers.5.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.13.ln_1.bias": "te1_text_model.encoder.layers.13.layer_norm1.bias", @@ -272,8 +260,6 @@ "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_fc.weight": "te1_text_model.encoder.layers.13.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.bias": "te1_text_model.encoder.layers.13.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.weight": "te1_text_model.encoder.layers.13.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias": "te1_text_model.encoder.layers.14.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight": "te1_text_model.encoder.layers.14.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.bias": "te1_text_model.encoder.layers.14.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.weight": "te1_text_model.encoder.layers.14.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.14.ln_1.bias": "te1_text_model.encoder.layers.14.layer_norm1.bias", @@ -284,8 +270,6 @@ "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_fc.weight": "te1_text_model.encoder.layers.14.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.bias": "te1_text_model.encoder.layers.14.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.weight": "te1_text_model.encoder.layers.14.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias": "te1_text_model.encoder.layers.15.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight": "te1_text_model.encoder.layers.15.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.bias": "te1_text_model.encoder.layers.15.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.weight": "te1_text_model.encoder.layers.6.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.15.ln_1.bias": "te1_text_model.encoder.layers.15.layer_norm1.bias", @@ -296,8 +280,6 @@ "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_fc.weight": "te1_text_model.encoder.layers.15.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.bias": "te1_text_model.encoder.layers.15.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.weight": "te1_text_model.encoder.layers.15.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias": "te1_text_model.encoder.layers.16.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight": "te1_text_model.encoder.layers.16.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.bias": "te1_text_model.encoder.layers.16.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.weight": "te1_text_model.encoder.layers.16.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.16.ln_1.bias": "te1_text_model.encoder.layers.16.layer_norm1.bias", @@ -308,8 +290,6 @@ "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_fc.weight": "te1_text_model.encoder.layers.16.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.bias": "te1_text_model.encoder.layers.16.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.weight": "te1_text_model.encoder.layers.16.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias": "te1_text_model.encoder.layers.17.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight": "te1_text_model.encoder.layers.17.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.bias": "te1_text_model.encoder.layers.17.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.weight": "te1_text_model.encoder.layers.17.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.17.ln_1.bias": "te1_text_model.encoder.layers.17.layer_norm1.bias", @@ -320,8 +300,6 @@ "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_fc.weight": "te1_text_model.encoder.layers.17.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.bias": "te1_text_model.encoder.layers.17.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.weight": "te1_text_model.encoder.layers.17.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias": "te1_text_model.encoder.layers.18.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight": "te1_text_model.encoder.layers.18.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.bias": "te1_text_model.encoder.layers.18.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.weight": "te1_text_model.encoder.layers.18.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.18.ln_1.bias": "te1_text_model.encoder.layers.18.layer_norm1.bias", @@ -332,8 +310,6 @@ "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_fc.weight": "te1_text_model.encoder.layers.18.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.bias": "te1_text_model.encoder.layers.18.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.weight": "te1_text_model.encoder.layers.18.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias": "te1_text_model.encoder.layers.19.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight": "te1_text_model.encoder.layers.19.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.bias": "te1_text_model.encoder.layers.19.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.weight": "te1_text_model.encoder.layers.19.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.19.ln_1.bias": "te1_text_model.encoder.layers.19.layer_norm1.bias", @@ -344,8 +320,6 @@ "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_fc.weight": "te1_text_model.encoder.layers.19.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.bias": "te1_text_model.encoder.layers.19.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.weight": "te1_text_model.encoder.layers.19.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias": "te1_text_model.encoder.layers.2.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight": "te1_text_model.encoder.layers.2.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.bias": "te1_text_model.encoder.layers.2.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.weight": "te1_text_model.encoder.layers.7.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.2.ln_1.bias": "te1_text_model.encoder.layers.2.layer_norm1.bias", @@ -356,8 +330,6 @@ "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_fc.weight": "te1_text_model.encoder.layers.2.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.bias": "te1_text_model.encoder.layers.2.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.weight": "te1_text_model.encoder.layers.2.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias": "te1_text_model.encoder.layers.20.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight": "te1_text_model.encoder.layers.20.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.bias": "te1_text_model.encoder.layers.20.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.weight": "te1_text_model.encoder.layers.20.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.20.ln_1.bias": "te1_text_model.encoder.layers.20.layer_norm1.bias", @@ -368,8 +340,6 @@ "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_fc.weight": "te1_text_model.encoder.layers.20.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.bias": "te1_text_model.encoder.layers.20.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.weight": "te1_text_model.encoder.layers.20.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias": "te1_text_model.encoder.layers.21.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight": "te1_text_model.encoder.layers.21.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.bias": "te1_text_model.encoder.layers.21.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.weight": "te1_text_model.encoder.layers.21.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.21.ln_1.bias": "te1_text_model.encoder.layers.21.layer_norm1.bias", @@ -380,8 +350,6 @@ "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_fc.weight": "te1_text_model.encoder.layers.21.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.bias": "te1_text_model.encoder.layers.21.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.weight": "te1_text_model.encoder.layers.21.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias": "te1_text_model.encoder.layers.22.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight": "te1_text_model.encoder.layers.22.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.bias": "te1_text_model.encoder.layers.22.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.weight": "te1_text_model.encoder.layers.22.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.22.ln_1.bias": "te1_text_model.encoder.layers.22.layer_norm1.bias", @@ -392,8 +360,6 @@ "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_fc.weight": "te1_text_model.encoder.layers.22.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.bias": "te1_text_model.encoder.layers.22.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.weight": "te1_text_model.encoder.layers.22.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias": "te1_text_model.encoder.layers.23.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight": "te1_text_model.encoder.layers.23.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.bias": "te1_text_model.encoder.layers.23.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.weight": "te1_text_model.encoder.layers.23.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.23.ln_1.bias": "te1_text_model.encoder.layers.23.layer_norm1.bias", @@ -404,8 +370,6 @@ "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_fc.weight": "te1_text_model.encoder.layers.23.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.bias": "te1_text_model.encoder.layers.23.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.weight": "te1_text_model.encoder.layers.23.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias": "te1_text_model.encoder.layers.24.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight": "te1_text_model.encoder.layers.24.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.bias": "te1_text_model.encoder.layers.24.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.weight": "te1_text_model.encoder.layers.24.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.24.ln_1.bias": "te1_text_model.encoder.layers.24.layer_norm1.bias", @@ -416,8 +380,6 @@ "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_fc.weight": "te1_text_model.encoder.layers.24.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.bias": "te1_text_model.encoder.layers.24.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.weight": "te1_text_model.encoder.layers.24.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias": "te1_text_model.encoder.layers.25.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight": "te1_text_model.encoder.layers.25.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.bias": "te1_text_model.encoder.layers.25.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.weight": "te1_text_model.encoder.layers.25.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.25.ln_1.bias": "te1_text_model.encoder.layers.25.layer_norm1.bias", @@ -428,8 +390,6 @@ "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_fc.weight": "te1_text_model.encoder.layers.25.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.bias": "te1_text_model.encoder.layers.25.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.weight": "te1_text_model.encoder.layers.25.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias": "te1_text_model.encoder.layers.26.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight": "te1_text_model.encoder.layers.26.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.bias": "te1_text_model.encoder.layers.26.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.weight": "te1_text_model.encoder.layers.26.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.26.ln_1.bias": "te1_text_model.encoder.layers.26.layer_norm1.bias", @@ -440,8 +400,6 @@ "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_fc.weight": "te1_text_model.encoder.layers.26.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.bias": "te1_text_model.encoder.layers.26.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.weight": "te1_text_model.encoder.layers.26.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias": "te1_text_model.encoder.layers.27.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight": "te1_text_model.encoder.layers.27.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.bias": "te1_text_model.encoder.layers.27.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.weight": "te1_text_model.encoder.layers.27.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.27.ln_1.bias": "te1_text_model.encoder.layers.27.layer_norm1.bias", @@ -452,8 +410,6 @@ "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_fc.weight": "te1_text_model.encoder.layers.27.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.bias": "te1_text_model.encoder.layers.27.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.weight": "te1_text_model.encoder.layers.27.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias": "te1_text_model.encoder.layers.28.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight": "te1_text_model.encoder.layers.28.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.bias": "te1_text_model.encoder.layers.28.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.weight": "te1_text_model.encoder.layers.28.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.28.ln_1.bias": "te1_text_model.encoder.layers.28.layer_norm1.bias", @@ -464,8 +420,6 @@ "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_fc.weight": "te1_text_model.encoder.layers.28.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.bias": "te1_text_model.encoder.layers.28.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.weight": "te1_text_model.encoder.layers.28.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias": "te1_text_model.encoder.layers.29.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight": "te1_text_model.encoder.layers.29.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.bias": "te1_text_model.encoder.layers.29.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.weight": "te1_text_model.encoder.layers.29.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.29.ln_1.bias": "te1_text_model.encoder.layers.29.layer_norm1.bias", @@ -476,8 +430,6 @@ "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_fc.weight": "te1_text_model.encoder.layers.29.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.bias": "te1_text_model.encoder.layers.29.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.weight": "te1_text_model.encoder.layers.29.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias": "te1_text_model.encoder.layers.3.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight": "te1_text_model.encoder.layers.3.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.bias": "te1_text_model.encoder.layers.3.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.weight": "te1_text_model.encoder.layers.8.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.3.ln_1.bias": "te1_text_model.encoder.layers.3.layer_norm1.bias", @@ -488,8 +440,6 @@ "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_fc.weight": "te1_text_model.encoder.layers.3.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.bias": "te1_text_model.encoder.layers.3.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.weight": "te1_text_model.encoder.layers.3.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias": "te1_text_model.encoder.layers.30.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight": "te1_text_model.encoder.layers.30.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.bias": "te1_text_model.encoder.layers.30.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.weight": "te1_text_model.encoder.layers.30.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.30.ln_1.bias": "te1_text_model.encoder.layers.30.layer_norm1.bias", @@ -500,8 +450,6 @@ "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_fc.weight": "te1_text_model.encoder.layers.30.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.bias": "te1_text_model.encoder.layers.30.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.weight": "te1_text_model.encoder.layers.30.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias": "te1_text_model.encoder.layers.31.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight": "te1_text_model.encoder.layers.31.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.bias": "te1_text_model.encoder.layers.31.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.weight": "te1_text_model.encoder.layers.31.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.31.ln_1.bias": "te1_text_model.encoder.layers.31.layer_norm1.bias", @@ -512,8 +460,6 @@ "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_fc.weight": "te1_text_model.encoder.layers.31.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.bias": "te1_text_model.encoder.layers.31.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.weight": "te1_text_model.encoder.layers.31.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias": "te1_text_model.encoder.layers.4.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight": "te1_text_model.encoder.layers.4.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.bias": "te1_text_model.encoder.layers.4.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.weight": "te1_text_model.encoder.layers.9.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.4.ln_1.bias": "te1_text_model.encoder.layers.4.layer_norm1.bias", @@ -524,8 +470,6 @@ "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_fc.weight": "te1_text_model.encoder.layers.4.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.bias": "te1_text_model.encoder.layers.4.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.weight": "te1_text_model.encoder.layers.4.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias": "te1_text_model.encoder.layers.5.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight": "te1_text_model.encoder.layers.5.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.bias": "te1_text_model.encoder.layers.5.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.weight": "te1_text_model.encoder.layers.10.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.5.ln_1.bias": "te1_text_model.encoder.layers.5.layer_norm1.bias", @@ -536,8 +480,6 @@ "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_fc.weight": "te1_text_model.encoder.layers.5.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.bias": "te1_text_model.encoder.layers.5.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.weight": "te1_text_model.encoder.layers.5.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias": "te1_text_model.encoder.layers.6.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight": "te1_text_model.encoder.layers.6.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.bias": "te1_text_model.encoder.layers.6.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.weight": "te1_text_model.encoder.layers.11.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.6.ln_1.bias": "te1_text_model.encoder.layers.6.layer_norm1.bias", @@ -548,8 +490,6 @@ "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_fc.weight": "te1_text_model.encoder.layers.6.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.bias": "te1_text_model.encoder.layers.6.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.weight": "te1_text_model.encoder.layers.6.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias": "te1_text_model.encoder.layers.7.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight": "te1_text_model.encoder.layers.7.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.bias": "te1_text_model.encoder.layers.7.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.weight": "te1_text_model.encoder.layers.12.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.7.ln_1.bias": "te1_text_model.encoder.layers.7.layer_norm1.bias", @@ -560,8 +500,6 @@ "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_fc.weight": "te1_text_model.encoder.layers.7.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.bias": "te1_text_model.encoder.layers.7.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.weight": "te1_text_model.encoder.layers.7.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias": "te1_text_model.encoder.layers.8.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight": "te1_text_model.encoder.layers.8.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.bias": "te1_text_model.encoder.layers.8.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.weight": "te1_text_model.encoder.layers.13.self_attn.out_proj.weight", "conditioner.embedders.1.model.transformer.resblocks.8.ln_1.bias": "te1_text_model.encoder.layers.8.layer_norm1.bias", @@ -572,8 +510,6 @@ "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_fc.weight": "te1_text_model.encoder.layers.8.mlp.fc1.weight", "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.bias": "te1_text_model.encoder.layers.8.mlp.fc2.bias", "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.weight": "te1_text_model.encoder.layers.8.mlp.fc2.weight", - "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias": "te1_text_model.encoder.layers.9.self_attn.MERGED.bias", - "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight": "te1_text_model.encoder.layers.9.self_attn.MERGED.weight", "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.bias": "te1_text_model.encoder.layers.9.self_attn.out_proj.bias", "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.weight", "conditioner.embedders.1.model.transformer.resblocks.9.ln_1.bias": "te1_text_model.encoder.layers.9.layer_norm1.bias", @@ -2562,62 +2498,6 @@ 512 ] ], - "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [ - [ - 128, - 256, - 1, - 1 - ], - [ - 128, - 256, - 1, - 1 - ] - ], - "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [ - [ - 256, - 512, - 1, - 1 - ], - [ - 256, - 512, - 1, - 1 - ] - ], - "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [ - [ - 256, - 128, - 1, - 1 - ], - [ - 256, - 128, - 1, - 1 - ] - ], - "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [ - [ - 512, - 256, - 1, - 1 - ], - [ - 512, - 256, - 1, - 1 - ] - ], "first_stage_model.encoder.mid.attn_1.k.weight": [ [ 512, @@ -2665,188 +2545,6 @@ 512, 512 ] - ], - "first_stage_model.post_quant_conv.weight": [ - [ - 4, - 4, - 1, - 1 - ], - [ - 4, - 4, - 1, - 1 - ] - ], - "first_stage_model.quant_conv.weight": [ - [ - 8, - 8, - 1, - 1 - ], - [ - 8, - 8, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [ - [ - 640, - 320, - 1, - 1 - ], - [ - 640, - 320, - 1, - 1 - ] - ], - "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [ - [ - 1280, - 640, - 1, - 1 - ], - [ - 1280, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [ - [ - 1280, - 2560, - 1, - 1 - ], - [ - 1280, - 2560, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [ - [ - 1280, - 1920, - 1, - 1 - ], - [ - 1280, - 1920, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [ - [ - 640, - 1920, - 1, - 1 - ], - [ - 640, - 1920, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [ - [ - 640, - 1280, - 1, - 1 - ], - [ - 640, - 1280, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [ - [ - 640, - 960, - 1, - 1 - ], - [ - 640, - 960, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [ - [ - 320, - 960, - 1, - 1 - ], - [ - 320, - 960, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [ - [ - 320, - 640, - 1, - 1 - ], - [ - 320, - 640, - 1, - 1 - ] - ], - "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [ - [ - 320, - 640, - 1, - 1 - ], - [ - 320, - 640, - 1, - 1 - ] ] }, "ldm_diffusers_operator_map": { @@ -2855,515 +2553,469 @@ "te1_text_model.encoder.layers.0.self_attn.q_proj.bias", "te1_text_model.encoder.layers.0.self_attn.k_proj.bias", "te1_text_model.encoder.layers.0.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.0.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.0.self_attn.q_proj.weight", "te1_text_model.encoder.layers.0.self_attn.k_proj.weight", "te1_text_model.encoder.layers.0.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.0.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.1.self_attn.q_proj.bias", "te1_text_model.encoder.layers.1.self_attn.k_proj.bias", "te1_text_model.encoder.layers.1.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.1.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.1.self_attn.q_proj.weight", "te1_text_model.encoder.layers.1.self_attn.k_proj.weight", "te1_text_model.encoder.layers.1.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.1.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.10.self_attn.q_proj.bias", "te1_text_model.encoder.layers.10.self_attn.k_proj.bias", "te1_text_model.encoder.layers.10.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.10.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.10.self_attn.q_proj.weight", "te1_text_model.encoder.layers.10.self_attn.k_proj.weight", "te1_text_model.encoder.layers.10.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.10.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.11.self_attn.q_proj.bias", "te1_text_model.encoder.layers.11.self_attn.k_proj.bias", "te1_text_model.encoder.layers.11.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.11.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.11.self_attn.q_proj.weight", "te1_text_model.encoder.layers.11.self_attn.k_proj.weight", "te1_text_model.encoder.layers.11.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.11.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.12.self_attn.q_proj.bias", "te1_text_model.encoder.layers.12.self_attn.k_proj.bias", "te1_text_model.encoder.layers.12.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.12.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.12.self_attn.q_proj.weight", "te1_text_model.encoder.layers.12.self_attn.k_proj.weight", "te1_text_model.encoder.layers.12.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.12.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.13.self_attn.q_proj.bias", "te1_text_model.encoder.layers.13.self_attn.k_proj.bias", "te1_text_model.encoder.layers.13.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.13.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.13.self_attn.q_proj.weight", "te1_text_model.encoder.layers.13.self_attn.k_proj.weight", "te1_text_model.encoder.layers.13.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.13.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.14.self_attn.q_proj.bias", "te1_text_model.encoder.layers.14.self_attn.k_proj.bias", "te1_text_model.encoder.layers.14.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.14.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.14.self_attn.q_proj.weight", "te1_text_model.encoder.layers.14.self_attn.k_proj.weight", "te1_text_model.encoder.layers.14.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.14.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.15.self_attn.q_proj.bias", "te1_text_model.encoder.layers.15.self_attn.k_proj.bias", "te1_text_model.encoder.layers.15.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.15.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.15.self_attn.q_proj.weight", "te1_text_model.encoder.layers.15.self_attn.k_proj.weight", "te1_text_model.encoder.layers.15.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.15.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.16.self_attn.q_proj.bias", "te1_text_model.encoder.layers.16.self_attn.k_proj.bias", "te1_text_model.encoder.layers.16.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.16.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.16.self_attn.q_proj.weight", "te1_text_model.encoder.layers.16.self_attn.k_proj.weight", "te1_text_model.encoder.layers.16.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.16.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.17.self_attn.q_proj.bias", "te1_text_model.encoder.layers.17.self_attn.k_proj.bias", "te1_text_model.encoder.layers.17.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.17.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.17.self_attn.q_proj.weight", "te1_text_model.encoder.layers.17.self_attn.k_proj.weight", "te1_text_model.encoder.layers.17.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.17.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.18.self_attn.q_proj.bias", "te1_text_model.encoder.layers.18.self_attn.k_proj.bias", "te1_text_model.encoder.layers.18.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.18.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.18.self_attn.q_proj.weight", "te1_text_model.encoder.layers.18.self_attn.k_proj.weight", "te1_text_model.encoder.layers.18.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.18.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.19.self_attn.q_proj.bias", "te1_text_model.encoder.layers.19.self_attn.k_proj.bias", "te1_text_model.encoder.layers.19.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.19.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.19.self_attn.q_proj.weight", "te1_text_model.encoder.layers.19.self_attn.k_proj.weight", "te1_text_model.encoder.layers.19.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.19.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.2.self_attn.q_proj.bias", "te1_text_model.encoder.layers.2.self_attn.k_proj.bias", "te1_text_model.encoder.layers.2.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.2.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.2.self_attn.q_proj.weight", "te1_text_model.encoder.layers.2.self_attn.k_proj.weight", "te1_text_model.encoder.layers.2.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.2.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.20.self_attn.q_proj.bias", "te1_text_model.encoder.layers.20.self_attn.k_proj.bias", "te1_text_model.encoder.layers.20.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.20.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.20.self_attn.q_proj.weight", "te1_text_model.encoder.layers.20.self_attn.k_proj.weight", "te1_text_model.encoder.layers.20.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.20.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.21.self_attn.q_proj.bias", "te1_text_model.encoder.layers.21.self_attn.k_proj.bias", "te1_text_model.encoder.layers.21.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.21.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.21.self_attn.q_proj.weight", "te1_text_model.encoder.layers.21.self_attn.k_proj.weight", "te1_text_model.encoder.layers.21.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.21.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.22.self_attn.q_proj.bias", "te1_text_model.encoder.layers.22.self_attn.k_proj.bias", "te1_text_model.encoder.layers.22.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.22.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.22.self_attn.q_proj.weight", "te1_text_model.encoder.layers.22.self_attn.k_proj.weight", "te1_text_model.encoder.layers.22.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.22.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.23.self_attn.q_proj.bias", "te1_text_model.encoder.layers.23.self_attn.k_proj.bias", "te1_text_model.encoder.layers.23.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.23.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.23.self_attn.q_proj.weight", "te1_text_model.encoder.layers.23.self_attn.k_proj.weight", "te1_text_model.encoder.layers.23.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.23.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.24.self_attn.q_proj.bias", "te1_text_model.encoder.layers.24.self_attn.k_proj.bias", "te1_text_model.encoder.layers.24.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.24.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.24.self_attn.q_proj.weight", "te1_text_model.encoder.layers.24.self_attn.k_proj.weight", "te1_text_model.encoder.layers.24.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.24.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.25.self_attn.q_proj.bias", "te1_text_model.encoder.layers.25.self_attn.k_proj.bias", "te1_text_model.encoder.layers.25.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.25.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.25.self_attn.q_proj.weight", "te1_text_model.encoder.layers.25.self_attn.k_proj.weight", "te1_text_model.encoder.layers.25.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.25.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.26.self_attn.q_proj.bias", "te1_text_model.encoder.layers.26.self_attn.k_proj.bias", "te1_text_model.encoder.layers.26.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.26.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.26.self_attn.q_proj.weight", "te1_text_model.encoder.layers.26.self_attn.k_proj.weight", "te1_text_model.encoder.layers.26.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.26.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.27.self_attn.q_proj.bias", "te1_text_model.encoder.layers.27.self_attn.k_proj.bias", "te1_text_model.encoder.layers.27.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.27.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.27.self_attn.q_proj.weight", "te1_text_model.encoder.layers.27.self_attn.k_proj.weight", "te1_text_model.encoder.layers.27.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.27.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.28.self_attn.q_proj.bias", "te1_text_model.encoder.layers.28.self_attn.k_proj.bias", "te1_text_model.encoder.layers.28.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.28.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.28.self_attn.q_proj.weight", "te1_text_model.encoder.layers.28.self_attn.k_proj.weight", "te1_text_model.encoder.layers.28.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.28.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.29.self_attn.q_proj.bias", "te1_text_model.encoder.layers.29.self_attn.k_proj.bias", "te1_text_model.encoder.layers.29.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.29.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.29.self_attn.q_proj.weight", "te1_text_model.encoder.layers.29.self_attn.k_proj.weight", "te1_text_model.encoder.layers.29.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.29.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.3.self_attn.q_proj.bias", "te1_text_model.encoder.layers.3.self_attn.k_proj.bias", "te1_text_model.encoder.layers.3.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.3.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.3.self_attn.q_proj.weight", "te1_text_model.encoder.layers.3.self_attn.k_proj.weight", "te1_text_model.encoder.layers.3.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.3.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.30.self_attn.q_proj.bias", "te1_text_model.encoder.layers.30.self_attn.k_proj.bias", "te1_text_model.encoder.layers.30.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.30.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.30.self_attn.q_proj.weight", "te1_text_model.encoder.layers.30.self_attn.k_proj.weight", "te1_text_model.encoder.layers.30.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.30.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.31.self_attn.q_proj.bias", "te1_text_model.encoder.layers.31.self_attn.k_proj.bias", "te1_text_model.encoder.layers.31.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.31.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.31.self_attn.q_proj.weight", "te1_text_model.encoder.layers.31.self_attn.k_proj.weight", "te1_text_model.encoder.layers.31.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.31.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.4.self_attn.q_proj.bias", "te1_text_model.encoder.layers.4.self_attn.k_proj.bias", "te1_text_model.encoder.layers.4.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.4.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.4.self_attn.q_proj.weight", "te1_text_model.encoder.layers.4.self_attn.k_proj.weight", "te1_text_model.encoder.layers.4.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.4.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.5.self_attn.q_proj.bias", "te1_text_model.encoder.layers.5.self_attn.k_proj.bias", "te1_text_model.encoder.layers.5.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.5.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.5.self_attn.q_proj.weight", "te1_text_model.encoder.layers.5.self_attn.k_proj.weight", "te1_text_model.encoder.layers.5.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.5.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.6.self_attn.q_proj.bias", "te1_text_model.encoder.layers.6.self_attn.k_proj.bias", "te1_text_model.encoder.layers.6.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.6.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.6.self_attn.q_proj.weight", "te1_text_model.encoder.layers.6.self_attn.k_proj.weight", "te1_text_model.encoder.layers.6.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.6.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.7.self_attn.q_proj.bias", "te1_text_model.encoder.layers.7.self_attn.k_proj.bias", "te1_text_model.encoder.layers.7.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.7.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.7.self_attn.q_proj.weight", "te1_text_model.encoder.layers.7.self_attn.k_proj.weight", "te1_text_model.encoder.layers.7.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.7.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.8.self_attn.q_proj.bias", "te1_text_model.encoder.layers.8.self_attn.k_proj.bias", "te1_text_model.encoder.layers.8.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.8.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.8.self_attn.q_proj.weight", "te1_text_model.encoder.layers.8.self_attn.k_proj.weight", "te1_text_model.encoder.layers.8.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.8.self_attn.MERGED.weight" + ] }, "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias": { "cat": [ "te1_text_model.encoder.layers.9.self_attn.q_proj.bias", "te1_text_model.encoder.layers.9.self_attn.k_proj.bias", "te1_text_model.encoder.layers.9.self_attn.v_proj.bias" - ], - "target": "te1_text_model.encoder.layers.9.self_attn.MERGED.bias" + ] }, "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight": { "cat": [ "te1_text_model.encoder.layers.9.self_attn.q_proj.weight", "te1_text_model.encoder.layers.9.self_attn.k_proj.weight", "te1_text_model.encoder.layers.9.self_attn.v_proj.weight" - ], - "target": "te1_text_model.encoder.layers.9.self_attn.MERGED.weight" + ] } }, "diffusers_ldm_operator_map": { + "te1_text_model.encoder.layers.0.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.0.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.0.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.0.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight", @@ -3382,6 +3034,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.1.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.1.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.1.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.1.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight", @@ -3400,6 +3070,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.10.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.10.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.10.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.10.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight", @@ -3418,6 +3106,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.11.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.11.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.11.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.11.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight", @@ -3436,6 +3142,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.12.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.12.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.12.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.12.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight", @@ -3454,6 +3178,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.13.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.13.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.13.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.13.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight", @@ -3472,6 +3214,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.14.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.14.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.14.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.14.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight", @@ -3490,6 +3250,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.15.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.15.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.15.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.15.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight", @@ -3508,6 +3286,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.16.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.16.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.16.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.16.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight", @@ -3526,6 +3322,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.17.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.17.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.17.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.17.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight", @@ -3544,6 +3358,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.18.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.18.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.18.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.18.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight", @@ -3562,6 +3394,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.19.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.19.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.19.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.19.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight", @@ -3580,6 +3430,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.2.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.2.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.2.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.2.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight", @@ -3598,6 +3466,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.20.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.20.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.20.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.20.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight", @@ -3616,6 +3502,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.21.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.21.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.21.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.21.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight", @@ -3634,6 +3538,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.22.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.22.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.22.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.22.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight", @@ -3652,6 +3574,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.23.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.23.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.23.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.23.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight", @@ -3670,6 +3610,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.24.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.24.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.24.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.24.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight", @@ -3688,6 +3646,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.25.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.25.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.25.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.25.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight", @@ -3706,6 +3682,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.26.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.26.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.26.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.26.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight", @@ -3724,6 +3718,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.27.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.27.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.27.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.27.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight", @@ -3742,6 +3754,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.28.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.28.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.28.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.28.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight", @@ -3760,6 +3790,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.29.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.29.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.29.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.29.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight", @@ -3778,6 +3826,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.3.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.3.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.3.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.3.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight", @@ -3796,6 +3862,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.30.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.30.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.30.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.30.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight", @@ -3814,6 +3898,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.31.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.31.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.31.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.31.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight", @@ -3832,6 +3934,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.4.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.4.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.4.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.4.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight", @@ -3850,6 +3970,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.5.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.5.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.5.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.5.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight", @@ -3868,6 +4006,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.6.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.6.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.6.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.6.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight", @@ -3886,6 +4042,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.7.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.7.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.7.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.7.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight", @@ -3904,6 +4078,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.8.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.8.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.8.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.8.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight", @@ -3922,6 +4114,24 @@ "2560:, :" ] }, + "te1_text_model.encoder.layers.9.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.9.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.9.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias", + "2560:, :" + ] + }, "te1_text_model.encoder.layers.9.self_attn.q_proj.weight": { "slice": [ "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight", diff --git a/toolkit/keymaps/stable_diffusion_sdxl_unmatched.json b/toolkit/keymaps/stable_diffusion_sdxl_unmatched.json index 5af96059..d0b2554a 100644 --- a/toolkit/keymaps/stable_diffusion_sdxl_unmatched.json +++ b/toolkit/keymaps/stable_diffusion_sdxl_unmatched.json @@ -6,16 +6,12 @@ 77 ], "min": 0.0, - "max": 76.0, - "mean": 38.0, - "std": 22.375 + "max": 76.0 }, "conditioner.embedders.1.model.logit_scale": { "shape": [], "min": 4.60546875, - "max": 4.60546875, - "mean": 4.60546875, - "std": NaN + "max": 4.60546875 }, "conditioner.embedders.1.model.text_projection": { "shape": [ @@ -23,9 +19,7 @@ 1280 ], "min": -0.15966796875, - "max": 0.230712890625, - "mean": 0.0, - "std": 0.0181732177734375 + "max": 0.230712890625 } }, "diffusers": { @@ -35,9 +29,7 @@ 1280 ], "min": -0.15966796875, - "max": 0.230712890625, - "mean": 2.128152846125886e-05, - "std": 0.018169498071074486 + "max": 0.230712890625 } } } \ No newline at end of file diff --git a/toolkit/saving.py b/toolkit/saving.py index aab0d4bc..a83ee6dc 100644 --- a/toolkit/saving.py +++ b/toolkit/saving.py @@ -32,6 +32,10 @@ def convert_state_dict_to_ldm_with_mapping( with open(mapping_path, 'r') as f: mapping = json.load(f, object_pairs_hook=OrderedDict) + # keep track of keys not matched + ldm_matched_keys = [] + diffusers_matched_keys = [] + ldm_diffusers_keymap = mapping['ldm_diffusers_keymap'] ldm_diffusers_shape_map = mapping['ldm_diffusers_shape_map'] ldm_diffusers_operator_map = mapping['ldm_diffusers_operator_map'] @@ -52,11 +56,15 @@ def convert_state_dict_to_ldm_with_mapping( for diffusers_key in ldm_diffusers_operator_map[ldm_key]['cat']: cat_list.append(diffusers_state_dict[diffusers_key].detach()) converted_state_dict[ldm_key] = torch.cat(cat_list, dim=0).to(device, dtype=dtype) + diffusers_matched_keys.extend(ldm_diffusers_operator_map[ldm_key]['cat']) + ldm_matched_keys.append(ldm_key) if 'slice' in ldm_diffusers_operator_map[ldm_key]: tensor_to_slice = diffusers_state_dict[ldm_diffusers_operator_map[ldm_key]['slice'][0]] slice_text = diffusers_state_dict[ldm_diffusers_operator_map[ldm_key]['slice'][1]] converted_state_dict[ldm_key] = tensor_to_slice[get_slices_from_string(slice_text)].detach().to(device, dtype=dtype) + diffusers_matched_keys.extend(ldm_diffusers_operator_map[ldm_key]['slice']) + ldm_matched_keys.append(ldm_key) # process the rest of the keys for ldm_key in ldm_diffusers_keymap: @@ -67,6 +75,22 @@ def convert_state_dict_to_ldm_with_mapping( if ldm_key in ldm_diffusers_shape_map: tensor = tensor.view(ldm_diffusers_shape_map[ldm_key][0]) converted_state_dict[ldm_key] = tensor + diffusers_matched_keys.append(ldm_diffusers_keymap[ldm_key]) + ldm_matched_keys.append(ldm_key) + + # see if any are missing from know mapping + mapped_diffusers_keys = list(ldm_diffusers_keymap.values()) + mapped_ldm_keys = list(ldm_diffusers_keymap.keys()) + + missing_diffusers_keys = [x for x in mapped_diffusers_keys if x not in diffusers_matched_keys] + missing_ldm_keys = [x for x in mapped_ldm_keys if x not in ldm_matched_keys] + + if len(missing_diffusers_keys) > 0: + print(f"WARNING!!!! Missing {len(missing_diffusers_keys)} diffusers keys") + print(missing_diffusers_keys) + if len(missing_ldm_keys) > 0: + print(f"WARNING!!!! Missing {len(missing_ldm_keys)} ldm keys") + print(missing_ldm_keys) return converted_state_dict diff --git a/toolkit/stable_diffusion_model.py b/toolkit/stable_diffusion_model.py index 9ae8d494..0b25aa02 100644 --- a/toolkit/stable_diffusion_model.py +++ b/toolkit/stable_diffusion_model.py @@ -1,4 +1,5 @@ import gc +import json import typing from typing import Union, List, Tuple import sys @@ -15,7 +16,7 @@ from library.model_util import convert_unet_state_dict_to_sd, convert_text_encod from toolkit import train_tools from toolkit.config_modules import ModelConfig, GenerateImageConfig from toolkit.metadata import get_meta_for_safetensors -from toolkit.paths import REPOS_ROOT +from toolkit.paths import REPOS_ROOT, KEYMAPS_ROOT from toolkit.prompt_utils import inject_trigger_into_prompt, PromptEmbeds from toolkit.saving import save_ldm_model_from_diffusers from toolkit.train_tools import get_torch_dtype, apply_noise_offset @@ -37,6 +38,14 @@ SD_PREFIX_TEXT_ENCODER = "te" SD_PREFIX_TEXT_ENCODER1 = "te1" SD_PREFIX_TEXT_ENCODER2 = "te2" +# prefixed diffusers keys +DO_NOT_TRAIN_WEIGHTS = [ + "unet_time_embedding.linear_1.bias", + "unet_time_embedding.linear_1.weight", + "unet_time_embedding.linear_2.bias", + "unet_time_embedding.linear_2.weight", +] + class BlankNetwork: @@ -63,10 +72,6 @@ def flush(): UNET_IN_CHANNELS = 4 # Stable Diffusion の in_channels は 4 で固定。XLも同じ。 VAE_SCALE_FACTOR = 8 # 2 ** (len(vae.config.block_out_channels) - 1) = 8 - - - - # if is type checking if typing.TYPE_CHECKING: from diffusers import \ @@ -734,3 +739,49 @@ class StableDiffusion: save_dtype=save_dtype, sd_version=version_string, ) + + def prepare_optimizer_params( + self, + unet=False, + text_encoder=False, + text_encoder_lr=None, + unet_lr=None, + default_lr=1e-6, + ): + # todo maybe only get locon ones? + # not all items are saved, to make it match, we need to match out save mappings + # and not train anything not mapped. Also add learning rate + version = 'sd1' + if self.is_xl: + version = 'sdxl' + if self.is_v2: + version = 'sd2' + mapping_filename = f"stable_diffusion_{version}.json" + mapping_path = os.path.join(KEYMAPS_ROOT, mapping_filename) + with open(mapping_path, 'r') as f: + mapping = json.load(f) + ldm_diffusers_keymap = mapping['ldm_diffusers_keymap'] + + trainable_parameters = [] + + if unet: + state_dict = self.state_dict(vae=False, unet=unet, text_encoder=False) + unet_lr = unet_lr if unet_lr is not None else default_lr + params = [] + for key, diffusers_key in ldm_diffusers_keymap.items(): + if diffusers_key in state_dict and diffusers_key not in DO_NOT_TRAIN_WEIGHTS: + params.append(state_dict[diffusers_key]) + param_data = {"params": params, "lr": unet_lr} + trainable_parameters.append(param_data) + + if text_encoder: + state_dict = self.state_dict(vae=False, unet=unet, text_encoder=text_encoder) + text_encoder_lr = text_encoder_lr if text_encoder_lr is not None else default_lr + params = [] + for key, diffusers_key in ldm_diffusers_keymap.items(): + if diffusers_key in state_dict and diffusers_key not in DO_NOT_TRAIN_WEIGHTS: + params.append(state_dict[diffusers_key]) + param_data = {"params": params, "lr": text_encoder_lr} + trainable_parameters.append(param_data) + + return trainable_parameters