diff --git a/extensions_built_in/sd_trainer/SDTrainer.py b/extensions_built_in/sd_trainer/SDTrainer.py index 67624c98..452e028c 100644 --- a/extensions_built_in/sd_trainer/SDTrainer.py +++ b/extensions_built_in/sd_trainer/SDTrainer.py @@ -68,6 +68,8 @@ class SDTrainer(BaseSDTrainProcess): self.sd.vae.to('cpu') flush() add_all_snr_to_noise_scheduler(self.sd.noise_scheduler, self.device_torch) + if self.adapter is not None: + self.adapter.to(self.device_torch) # you can expand these in a child class to make customization easier def calculate_loss( @@ -507,8 +509,8 @@ class SDTrainer(BaseSDTrainProcess): self.sd.unet.train() prior_pred = prior_pred.detach() # remove the residuals as we wont use them on prediction when matching control - if match_adapter_assist and 'down_block_additional_residuals' in pred_kwargs: - del pred_kwargs['down_block_additional_residuals'] + if match_adapter_assist and 'down_intrablock_additional_residuals' in pred_kwargs: + del pred_kwargs['down_intrablock_additional_residuals'] # restore network # self.network.multiplier = network_weight_list self.network.is_active = was_network_active @@ -746,7 +748,7 @@ class SDTrainer(BaseSDTrainProcess): down_block_additional_residuals ] - pred_kwargs['down_block_additional_residuals'] = down_block_additional_residuals + pred_kwargs['down_intrablock_additional_residuals'] = down_block_additional_residuals prior_pred = None if (has_adapter_img and self.assistant_adapter and match_adapter_assist) or self.do_prior_prediction: diff --git a/jobs/process/BaseSDTrainProcess.py b/jobs/process/BaseSDTrainProcess.py index 8c0a9a2e..8bf45ca3 100644 --- a/jobs/process/BaseSDTrainProcess.py +++ b/jobs/process/BaseSDTrainProcess.py @@ -913,7 +913,12 @@ class BaseSDTrainProcess(BaseTrainProcess): self.load_training_state_from_metadata(latest_save_path) # get the noise scheduler - sampler = get_sampler(self.train_config.noise_scheduler) + sampler = get_sampler( + self.train_config.noise_scheduler, + { + "prediction_type": "v_prediction" if self.model_config.is_v_pred else "epsilon", + } + ) if self.train_config.train_refiner and self.model_config.refiner_name_or_path is not None and self.network_config is None: previous_refiner_save = self.get_latest_save_path(self.job.name + '_refiner') @@ -1051,6 +1056,7 @@ class BaseSDTrainProcess(BaseTrainProcess): is_sdxl=self.model_config.is_xl or self.model_config.is_ssd, is_v2=self.model_config.is_v2, is_ssd=self.model_config.is_ssd, + is_vega=self.model_config.is_vega, dropout=self.network_config.dropout, use_text_encoder_1=self.model_config.use_text_encoder_1, use_text_encoder_2=self.model_config.use_text_encoder_2, diff --git a/testing/generate_weight_mappings.py b/testing/generate_weight_mappings.py index c063a9f7..346fe09d 100644 --- a/testing/generate_weight_mappings.py +++ b/testing/generate_weight_mappings.py @@ -54,6 +54,7 @@ parser.add_argument('--name', type=str, default='stable_diffusion', help='name f parser.add_argument('--sdxl', action='store_true', help='is sdxl model') parser.add_argument('--refiner', action='store_true', help='is refiner model') parser.add_argument('--ssd', action='store_true', help='is ssd model') +parser.add_argument('--vega', action='store_true', help='is vega model') parser.add_argument('--sd2', action='store_true', help='is sd 2 model') args = parser.parse_args() @@ -66,15 +67,15 @@ print(f'Loading diffusers model') ignore_ldm_begins_with = [] -diffusers_file_path = file_path +diffusers_file_path = file_path if len(args.file_1) == 1 else args.file_1[1] if args.ssd: diffusers_file_path = "segmind/SSD-1B" +if args.vega: + diffusers_file_path = "segmind/Segmind-Vega" # if args.refiner: # diffusers_file_path = "stabilityai/stable-diffusion-xl-refiner-1.0" -diffusers_file_path = file_path if len(args.file_1) == 1 else args.file_1[1] - if not args.refiner: diffusers_model_config = ModelConfig( @@ -82,6 +83,7 @@ if not args.refiner: is_xl=args.sdxl, is_v2=args.sd2, is_ssd=args.ssd, + is_vega=args.vega, dtype=dtype, ) diffusers_sd = StableDiffusion( @@ -157,7 +159,7 @@ te_suffix = '' proj_pattern_weight = None proj_pattern_bias = None text_proj_layer = None -if args.sdxl or args.ssd: +if args.sdxl or args.ssd or args.vega: te_suffix = '1' ldm_res_block_prefix = "conditioner.embedders.1.model.transformer.resblocks" proj_pattern_weight = r"conditioner\.embedders\.1\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_weight" @@ -176,10 +178,13 @@ if args.sd2: proj_pattern_bias = r"cond_stage_model\.model\.transformer\.resblocks\.(\d+)\.attn\.in_proj_bias" text_proj_layer = "cond_stage_model.model.text_projection" -if args.sdxl or args.sd2 or args.ssd or args.refiner: +if args.sdxl or args.sd2 or args.ssd or args.refiner or args.vega: if "conditioner.embedders.1.model.text_projection" in ldm_dict_keys: # d_model = int(checkpoint[prefix + "text_projection"].shape[0])) d_model = int(ldm_state_dict["conditioner.embedders.1.model.text_projection"].shape[0]) + elif "conditioner.embedders.1.model.text_projection.weight" in ldm_dict_keys: + # d_model = int(checkpoint[prefix + "text_projection"].shape[0])) + d_model = int(ldm_state_dict["conditioner.embedders.1.model.text_projection.weight"].shape[0]) elif "conditioner.embedders.0.model.text_projection" in ldm_dict_keys: # d_model = int(checkpoint[prefix + "text_projection"].shape[0])) d_model = int(ldm_state_dict["conditioner.embedders.0.model.text_projection"].shape[0]) @@ -191,6 +196,8 @@ if args.sdxl or args.sd2 or args.ssd or args.refiner: try: match = re.match(proj_pattern_weight, ldm_key) if match: + if ldm_key == "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight": + print("here") number = int(match.group(1)) new_val = torch.cat([ diffusers_state_dict[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.weight"], @@ -217,6 +224,8 @@ if args.sdxl or args.sd2 or args.ssd or args.refiner: ], } + matched_ldm_keys.append(ldm_key) + # text_model_dict[new_key + ".q_proj.weight"] = checkpoint[key][:d_model, :] # text_model_dict[new_key + ".k_proj.weight"] = checkpoint[key][d_model: d_model * 2, :] # text_model_dict[new_key + ".v_proj.weight"] = checkpoint[key][d_model * 2:, :] @@ -266,6 +275,8 @@ if args.sdxl or args.sd2 or args.ssd or args.refiner: ], } + matched_ldm_keys.append(ldm_key) + # add diffusers operators diffusers_operator_map[f"te{te_suffix}_text_model.encoder.layers.{number}.self_attn.q_proj.bias"] = { "slice": [ @@ -298,6 +309,9 @@ for ldm_key in ldm_dict_keys: ldm_shape_tuple = ldm_state_dict[ldm_key].shape ldm_reduced_shape_tuple = get_reduced_shape(ldm_shape_tuple) for diffusers_key in diffusers_dict_keys: + if ldm_key == "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight" and diffusers_key == "te1_text_model.encoder.layers.0.self_attn.q_proj.weight": + print("here") + diffusers_shape_tuple = diffusers_state_dict[diffusers_key].shape diffusers_reduced_shape_tuple = get_reduced_shape(diffusers_shape_tuple) @@ -356,6 +370,8 @@ if args.sdxl: name += '_sdxl' elif args.ssd: name += '_ssd' +elif args.vega: + name += '_vega' elif args.refiner: name += '_refiner' elif args.sd2: diff --git a/toolkit/config_modules.py b/toolkit/config_modules.py index 01525963..454b923d 100644 --- a/toolkit/config_modules.py +++ b/toolkit/config_modules.py @@ -247,6 +247,7 @@ class ModelConfig: self.is_v2: bool = kwargs.get('is_v2', False) self.is_xl: bool = kwargs.get('is_xl', False) self.is_ssd: bool = kwargs.get('is_ssd', False) + self.is_vega: bool = kwargs.get('is_vega', False) self.is_v_pred: bool = kwargs.get('is_v_pred', False) self.dtype: str = kwargs.get('dtype', 'float16') self.vae_path = kwargs.get('vae_path', None) @@ -267,6 +268,9 @@ class ModelConfig: # sed sdxl as true since it is mostly the same architecture self.is_xl = True + if self.is_vega: + self.is_xl = True + class ReferenceDatasetConfig: def __init__(self, **kwargs): @@ -402,7 +406,7 @@ class DatasetConfig: if legacy_caption_type: self.caption_ext = legacy_caption_type self.caption_type = self.caption_ext - self.guidance_type: GuidanceType = kwargs.get('guidance_type', 'targeted_polarity') + self.guidance_type: GuidanceType = kwargs.get('guidance_type', 'targeted') def preprocess_dataset_raw_config(raw_config: List[dict]) -> List[dict]: diff --git a/toolkit/guidance.py b/toolkit/guidance.py index 6efdfcca..d07e13af 100644 --- a/toolkit/guidance.py +++ b/toolkit/guidance.py @@ -7,7 +7,7 @@ from toolkit.prompt_utils import PromptEmbeds, concat_prompt_embeds from toolkit.stable_diffusion_model import StableDiffusion from toolkit.train_tools import get_torch_dtype -GuidanceType = Literal["targeted", "polarity", "targeted_polarity"] +GuidanceType = Literal["targeted", "polarity", "targeted_polarity", "direct"] DIFFERENTIAL_SCALER = 0.2 @@ -118,8 +118,8 @@ def get_targeted_polarity_loss( # ) # Disable the LoRA network so we can predict parent network knowledge without it - sd.network.is_active = False - sd.unet.eval() + # sd.network.is_active = False + # sd.unet.eval() # Predict noise to get a baseline of what the parent network wants to do with the latents + noise. # This acts as our control to preserve the unaltered parts of the image. @@ -133,15 +133,15 @@ def get_targeted_polarity_loss( # conditional_baseline_prediction, unconditional_baseline_prediction = torch.chunk(baseline_prediction, 2, dim=0) - negative_network_weights = [weight * -1.0 for weight in network_weight_list] - positive_network_weights = [weight * 1.0 for weight in network_weight_list] - cat_network_weight_list = positive_network_weights + negative_network_weights + # negative_network_weights = [weight * -1.0 for weight in network_weight_list] + # positive_network_weights = [weight * 1.0 for weight in network_weight_list] + # cat_network_weight_list = positive_network_weights + negative_network_weights # turn the LoRA network back on. sd.unet.train() - sd.network.is_active = True + # sd.network.is_active = True - sd.network.multiplier = cat_network_weight_list + # sd.network.multiplier = cat_network_weight_list # do our prediction with LoRA active on the scaled guidance latents prediction = sd.predict_noise( @@ -183,9 +183,7 @@ def get_targeted_polarity_loss( return loss - -# targeted -def get_targeted_guidance_loss( +def get_direct_guidance_loss( noisy_latents: torch.Tensor, conditional_embeds: 'PromptEmbeds', match_adapter_assist: bool, @@ -206,81 +204,45 @@ def get_targeted_guidance_loss( conditional_latents = batch.latents.to(device, dtype=dtype).detach() unconditional_latents = batch.unconditional_latents.to(device, dtype=dtype).detach() - # # apply random offset to both latents - # offset = torch.randn((conditional_latents.shape[0], 1, 1, 1), device=device, dtype=dtype) - # offset = offset * 0.1 - # conditional_latents = conditional_latents + offset - # unconditional_latents = unconditional_latents + offset - # - # # get random scale 0f 0.8 to 1.2 - # scale = torch.rand((conditional_latents.shape[0], 1, 1, 1), device=device, dtype=dtype) - # scale = scale * 0.4 - # scale = scale + 0.8 - # conditional_latents = conditional_latents * scale - # unconditional_latents = unconditional_latents * scale - - unconditional_diff = (unconditional_latents - conditional_latents) - - # scale it to the timestep - unconditional_diff_noise = sd.add_noise( - torch.zeros_like(unconditional_latents), - unconditional_diff, - timesteps - ) - unconditional_diff_noise = unconditional_diff_noise.detach().requires_grad_(False) - - target_noise = noise + unconditional_diff_noise - - noisy_latents = sd.add_noise( + conditional_noisy_latents = sd.add_noise( conditional_latents, - target_noise, - # noise, + # target_noise, + noise, timesteps ).detach() - # Disable the LoRA network so we can predict parent network knowledge without it - sd.network.is_active = False - sd.unet.eval() - - # Predict noise to get a baseline of what the parent network wants to do with the latents + noise. - # This acts as our control to preserve the unaltered parts of the image. - baseline_prediction = sd.predict_noise( - latents=noisy_latents.to(device, dtype=dtype).detach(), - conditional_embeddings=conditional_embeds.to(device, dtype=dtype).detach(), - timestep=timesteps, - guidance_scale=1.0, - **pred_kwargs # adapter residuals in here - ).detach().requires_grad_(False) - - # determine the error for the baseline prediction - baseline_prediction_error = baseline_prediction - noise - - prediction_target = baseline_prediction_error + unconditional_diff_noise - - prediction_target = prediction_target.detach().requires_grad_(False) - + unconditional_noisy_latents = sd.add_noise( + unconditional_latents, + noise, + timesteps + ).detach() # turn the LoRA network back on. sd.unet.train() - sd.network.is_active = True + # sd.network.is_active = True - sd.network.multiplier = network_weight_list + # sd.network.multiplier = network_weight_list # do our prediction with LoRA active on the scaled guidance latents prediction = sd.predict_noise( - latents=noisy_latents.to(device, dtype=dtype).detach(), - conditional_embeddings=conditional_embeds.to(device, dtype=dtype).detach(), - timestep=timesteps, + latents=torch.cat([unconditional_noisy_latents, conditional_noisy_latents]).to(device, dtype=dtype).detach(), + conditional_embeddings=concat_prompt_embeds([conditional_embeds,conditional_embeds]).to(device, dtype=dtype).detach(), + timestep=torch.cat([timesteps, timesteps]), guidance_scale=1.0, **pred_kwargs # adapter residuals in here ) - prediction_error = prediction - noise + noise_pred_uncond, noise_pred_cond = torch.chunk(prediction, 2, dim=0) + + guidance_scale = 1.0 + guidance_pred = noise_pred_uncond + guidance_scale * ( + noise_pred_cond - noise_pred_uncond + ) guidance_loss = torch.nn.functional.mse_loss( - prediction_error.float(), - # unconditional_diff_noise.float(), - prediction_target.float(), + guidance_pred.float(), + noise.detach().float(), reduction="none" ) + guidance_loss = guidance_loss.mean([1, 2, 3]) guidance_loss = guidance_loss.mean() @@ -297,6 +259,242 @@ def get_targeted_guidance_loss( return loss +# targeted +def get_targeted_guidance_loss( + noisy_latents: torch.Tensor, + conditional_embeds: 'PromptEmbeds', + match_adapter_assist: bool, + network_weight_list: list, + timesteps: torch.Tensor, + pred_kwargs: dict, + batch: 'DataLoaderBatchDTO', + noise: torch.Tensor, + sd: 'StableDiffusion', + **kwargs +): + with torch.no_grad(): + dtype = get_torch_dtype(sd.torch_dtype) + device = sd.device_torch + + # create the differential mask from the actual tensors + conditional_imgs = batch.tensor.to(device, dtype=dtype).detach() + unconditional_imgs = batch.unconditional_tensor.to(device, dtype=dtype).detach() + differential_mask = torch.abs(conditional_imgs - unconditional_imgs) + differential_mask = differential_mask - differential_mask.min(dim=1, keepdim=True)[0].min(dim=2, keepdim=True)[0].min(dim=3, keepdim=True)[0] + differential_mask = differential_mask / differential_mask.max(dim=1, keepdim=True)[0].max(dim=2, keepdim=True)[0].max(dim=3, keepdim=True)[0] + + # differential_mask is (bs, 3, width, height) + # latents are (bs, 4, width, height) + # reduce the mean on dim 1 to get a single channel mask and stack it to match latents + differential_mask = differential_mask.mean(dim=1, keepdim=True) + differential_mask = torch.cat([differential_mask] * 4, dim=1) + + # scale the mask down to latent size + differential_mask = torch.nn.functional.interpolate( + differential_mask, + size=noisy_latents.shape[2:], + mode="nearest" + ) + + conditional_noisy_latents = noisy_latents + + conditional_latents = batch.latents.to(device, dtype=dtype).detach() + unconditional_latents = batch.unconditional_latents.to(device, dtype=dtype).detach() + + # unconditional_as_noise = unconditional_latents - conditional_latents + # conditional_as_noise = conditional_latents - unconditional_latents + + # Encode the unconditional image into latents + unconditional_noisy_latents = sd.noise_scheduler.add_noise( + unconditional_latents, + noise, + timesteps + ) + conditional_noisy_latents = sd.noise_scheduler.add_noise( + conditional_latents, + noise, + timesteps + ) + + # was_network_active = self.network.is_active + sd.network.is_active = False + sd.unet.eval() + + + # calculate the differential between our conditional (target image) and out unconditional ("bad" image) + # target_differential = unconditional_noisy_latents - conditional_noisy_latents + target_differential = unconditional_latents - conditional_latents + # target_differential = conditional_latents - unconditional_latents + + # scale the target differential by the scheduler + # todo, scale it the right way + # target_differential = sd.noise_scheduler.add_noise( + # torch.zeros_like(target_differential), + # target_differential, + # timesteps + # ) + + # noise_abs_mean = torch.abs(noise + 1e-6).mean(dim=[1, 2, 3], keepdim=True) + + # target_differential = target_differential.detach() + # target_differential_abs_mean = torch.abs(target_differential + 1e-6).mean(dim=[1, 2, 3], keepdim=True) + # # determins scaler to adjust to same abs mean as noise + # scaler = noise_abs_mean / target_differential_abs_mean + + + target_differential_knowledge = target_differential + target_differential_knowledge = target_differential_knowledge.detach() + + # add the target differential to the target latents as if it were noise with the scheduler scaled to + # the current timestep. Scaling the noise here is IMPORTANT and will lead to a blurry targeted area if not done + # properly + # guidance_latents = sd.noise_scheduler.add_noise( + # conditional_noisy_latents, + # target_differential, + # timesteps + # ) + + # guidance_latents = conditional_noisy_latents + target_differential + # target_noise = conditional_noisy_latents + target_differential + + # With LoRA network bypassed, predict noise to get a baseline of what the network + # wants to do with the latents + noise. Pass our target latents here for the input. + target_unconditional = sd.predict_noise( + latents=unconditional_noisy_latents.to(device, dtype=dtype).detach(), + conditional_embeddings=conditional_embeds.to(device, dtype=dtype).detach(), + timestep=timesteps, + guidance_scale=1.0, + **pred_kwargs # adapter residuals in here + ).detach() + # target_conditional = sd.predict_noise( + # latents=conditional_noisy_latents.to(device, dtype=dtype).detach(), + # conditional_embeddings=conditional_embeds.to(device, dtype=dtype).detach(), + # timestep=timesteps, + # guidance_scale=1.0, + # **pred_kwargs # adapter residuals in here + # ).detach() + + # we calculate the networks current knowledge so we do not overlearn what we know + # parent_knowledge = target_unconditional - target_conditional + # parent_knowledge = parent_knowledge.detach() + # del target_conditional + # del target_unconditional + + # we now have the differential noise prediction needed to create our convergence target + # target_unknown_knowledge = target_differential + parent_knowledge + # del parent_knowledge + prior_prediction_loss = torch.nn.functional.mse_loss( + target_unconditional.float(), + noise.float(), + reduction="none" + ).detach().clone() + + # turn the LoRA network back on. + sd.unet.train() + sd.network.is_active = True + sd.network.multiplier = network_weight_list + + # with LoRA active, predict the noise with the scaled differential latents added. This will allow us + # the opportunity to predict the differential + noise that was added to the latents. + prediction_conditional = sd.predict_noise( + latents=conditional_noisy_latents.to(device, dtype=dtype).detach(), + conditional_embeddings=conditional_embeds.to(device, dtype=dtype).detach(), + timestep=timesteps, + guidance_scale=1.0, + **pred_kwargs # adapter residuals in here + ) + + + + # remove the baseline conditional prediction. This will leave only the divergence from the baseline and + # the prediction of the added differential noise + # prediction_positive = prediction_unconditional - target_unconditional + # current_knowledge = target_unconditional - prediction_conditional + # current_differential_knowledge = prediction_conditional - target_unconditional + + # current_unknown_knowledge = parent_knowledge - current_knowledge + # + # current_unknown_knowledge_abs_mean = torch.abs(current_unknown_knowledge + 1e-6).mean(dim=[1, 2, 3], keepdim=True) + # current_unknown_knowledge_std = current_unknown_knowledge / current_unknown_knowledge_abs_mean + + + # for loss, we target ONLY the unscaled differential between our conditional and unconditional latents + # this is the diffusion training process. + # This will guide the network to make identical predictions it previously did for everything EXCEPT our + # differential between the conditional and unconditional images + + # positive_loss = torch.nn.functional.mse_loss( + # current_differential_knowledge.float(), + # target_differential_knowledge.float(), + # reduction="none" + # ) + + normal_loss = torch.nn.functional.mse_loss( + prediction_conditional.float(), + noise.float(), + reduction="none" + ) + # + # # scale positive and neutral loss to the same scale + # positive_loss_abs_mean = torch.abs(positive_loss + 1e-6).mean(dim=[1, 2, 3], keepdim=True) + # normal_loss_abs_mean = torch.abs(normal_loss + 1e-6).mean(dim=[1, 2, 3], keepdim=True) + # scaler = normal_loss_abs_mean / positive_loss_abs_mean + # positive_loss = positive_loss * scaler + + # positive_loss = positive_loss * differential_mask + # positive_loss = positive_loss + # masked_normal_loss = normal_loss * differential_mask + + prior_loss = torch.abs( + normal_loss.float() - prior_prediction_loss.float(), + # ) * (1 - differential_mask) + ) + + decouple = True + + # positive_loss_full = positive_loss + # prior_loss_full = prior_loss + # + # current_scaler = (prior_loss_full.max() / positive_loss_full.max()) + # # positive_loss = positive_loss * current_scaler + # avg_scaler_arr.append(current_scaler.item()) + # avg_scaler = sum(avg_scaler_arr) / len(avg_scaler_arr) + # print(f"avg scaler: {avg_scaler}, current scaler: {current_scaler.item()}") + # # remove extra scalers more than 100 + # if len(avg_scaler_arr) > 100: + # avg_scaler_arr.pop(0) + # + # # positive_loss = positive_loss * avg_scaler + # positive_loss = positive_loss * avg_scaler * 0.1 + + if decouple: + # positive_loss = positive_loss.mean([1, 2, 3]) + prior_loss = prior_loss.mean([1, 2, 3]) + # masked_normal_loss = masked_normal_loss.mean([1, 2, 3]) + positive_loss = prior_loss + # positive_loss = positive_loss + prior_loss + else: + + # positive_loss = positive_loss + prior_loss + positive_loss = prior_loss + positive_loss = positive_loss.mean([1, 2, 3]) + + # positive_loss = positive_loss + adain_loss.mean([1, 2, 3]) + # send it backwards BEFORE switching network polarity + # positive_loss = self.apply_snr(positive_loss, timesteps) + positive_loss = positive_loss.mean() + positive_loss.backward() + # loss = positive_loss.detach() + negative_loss.detach() + loss = positive_loss.detach() + + # add a grad so other backward does not fail + loss.requires_grad_(True) + + # restore network + sd.network.multiplier = network_weight_list + + return loss + def get_guided_loss_polarity( noisy_latents: torch.Tensor, conditional_embeds: PromptEmbeds, @@ -360,17 +558,17 @@ def get_guided_loss_polarity( noise.float(), reduction="none" ) - pred_loss = pred_loss.mean([1, 2, 3]) + # pred_loss = pred_loss.mean([1, 2, 3]) pred_neg_loss = torch.nn.functional.mse_loss( pred_neg.float(), noise.float(), reduction="none" ) - pred_neg_loss = pred_neg_loss.mean([1, 2, 3]) loss = pred_loss + pred_neg_loss + loss = loss.mean([1, 2, 3]) loss = loss.mean() loss.backward() @@ -437,5 +635,18 @@ def get_guidance_loss( sd, **kwargs ) + elif guidance_type == "direct": + return get_direct_guidance_loss( + noisy_latents, + conditional_embeds, + match_adapter_assist, + network_weight_list, + timesteps, + pred_kwargs, + batch, + noise, + sd, + **kwargs + ) else: raise NotImplementedError(f"Guidance type {guidance_type} is not implemented") diff --git a/toolkit/keymaps/stable_diffusion_vega.json b/toolkit/keymaps/stable_diffusion_vega.json new file mode 100644 index 00000000..4117c201 --- /dev/null +++ b/toolkit/keymaps/stable_diffusion_vega.json @@ -0,0 +1,3039 @@ +{ + "ldm_diffusers_keymap": { + "conditioner.embedders.0.transformer.text_model.embeddings.position_embedding.weight": "te0_text_model.embeddings.position_embedding.weight", + "conditioner.embedders.0.transformer.text_model.embeddings.token_embedding.weight": "te0_text_model.embeddings.token_embedding.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm1.bias": "te0_text_model.encoder.layers.0.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm1.weight": "te0_text_model.encoder.layers.0.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm2.bias": "te0_text_model.encoder.layers.0.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm2.weight": "te0_text_model.encoder.layers.0.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc1.bias": "te0_text_model.encoder.layers.0.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc1.weight": "te0_text_model.encoder.layers.0.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc2.bias": "te0_text_model.encoder.layers.0.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc2.weight": "te0_text_model.encoder.layers.0.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias": "te0_text_model.encoder.layers.0.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight": "te0_text_model.encoder.layers.0.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias": "te0_text_model.encoder.layers.0.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight": "te0_text_model.encoder.layers.0.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias": "te0_text_model.encoder.layers.0.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight": "te0_text_model.encoder.layers.0.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias": "te0_text_model.encoder.layers.0.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight": "te0_text_model.encoder.layers.0.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm1.bias": "te0_text_model.encoder.layers.1.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm1.weight": "te0_text_model.encoder.layers.1.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm2.bias": "te0_text_model.encoder.layers.1.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm2.weight": "te0_text_model.encoder.layers.1.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc1.bias": "te0_text_model.encoder.layers.1.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc1.weight": "te0_text_model.encoder.layers.1.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc2.bias": "te0_text_model.encoder.layers.1.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc2.weight": "te0_text_model.encoder.layers.1.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias": "te0_text_model.encoder.layers.1.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight": "te0_text_model.encoder.layers.1.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias": "te0_text_model.encoder.layers.1.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight": "te0_text_model.encoder.layers.1.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias": "te0_text_model.encoder.layers.1.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight": "te0_text_model.encoder.layers.1.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias": "te0_text_model.encoder.layers.1.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight": "te0_text_model.encoder.layers.1.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm1.bias": "te0_text_model.encoder.layers.10.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm1.weight": "te0_text_model.encoder.layers.10.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm2.bias": "te0_text_model.encoder.layers.10.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm2.weight": "te0_text_model.encoder.layers.10.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc1.bias": "te0_text_model.encoder.layers.10.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc1.weight": "te0_text_model.encoder.layers.10.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc2.bias": "te0_text_model.encoder.layers.10.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc2.weight": "te0_text_model.encoder.layers.10.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias": "te0_text_model.encoder.layers.10.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight": "te0_text_model.encoder.layers.10.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias": "te0_text_model.encoder.layers.10.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight": "te0_text_model.encoder.layers.10.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias": "te0_text_model.encoder.layers.10.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight": "te0_text_model.encoder.layers.10.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias": "te0_text_model.encoder.layers.10.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight": "te0_text_model.encoder.layers.10.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm1.bias": "te0_text_model.encoder.layers.11.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm1.weight": "te0_text_model.encoder.layers.11.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm2.bias": "te0_text_model.encoder.layers.11.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm2.weight": "te0_text_model.encoder.layers.11.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc1.bias": "te0_text_model.encoder.layers.11.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc1.weight": "te0_text_model.encoder.layers.11.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc2.bias": "te0_text_model.encoder.layers.11.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc2.weight": "te0_text_model.encoder.layers.11.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": "te0_text_model.encoder.layers.11.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight": "te0_text_model.encoder.layers.11.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias": "te0_text_model.encoder.layers.11.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight": "te0_text_model.encoder.layers.11.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias": "te0_text_model.encoder.layers.11.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight": "te0_text_model.encoder.layers.11.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias": "te0_text_model.encoder.layers.11.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight": "te0_text_model.encoder.layers.11.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm1.bias": "te0_text_model.encoder.layers.2.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm1.weight": "te0_text_model.encoder.layers.2.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm2.bias": "te0_text_model.encoder.layers.2.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm2.weight": "te0_text_model.encoder.layers.2.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc1.bias": "te0_text_model.encoder.layers.2.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc1.weight": "te0_text_model.encoder.layers.2.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc2.bias": "te0_text_model.encoder.layers.2.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc2.weight": "te0_text_model.encoder.layers.2.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": "te0_text_model.encoder.layers.2.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight": "te0_text_model.encoder.layers.2.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias": "te0_text_model.encoder.layers.2.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight": "te0_text_model.encoder.layers.2.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias": "te0_text_model.encoder.layers.2.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight": "te0_text_model.encoder.layers.2.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias": "te0_text_model.encoder.layers.2.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight": "te0_text_model.encoder.layers.2.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm1.bias": "te0_text_model.encoder.layers.3.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm1.weight": "te0_text_model.encoder.layers.3.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm2.bias": "te0_text_model.encoder.layers.3.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm2.weight": "te0_text_model.encoder.layers.3.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc1.bias": "te0_text_model.encoder.layers.3.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc1.weight": "te0_text_model.encoder.layers.3.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc2.bias": "te0_text_model.encoder.layers.3.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc2.weight": "te0_text_model.encoder.layers.3.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": "te0_text_model.encoder.layers.3.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight": "te0_text_model.encoder.layers.3.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias": "te0_text_model.encoder.layers.3.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight": "te0_text_model.encoder.layers.3.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias": "te0_text_model.encoder.layers.3.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight": "te0_text_model.encoder.layers.3.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias": "te0_text_model.encoder.layers.3.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight": "te0_text_model.encoder.layers.3.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm1.bias": "te0_text_model.encoder.layers.4.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm1.weight": "te0_text_model.encoder.layers.4.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm2.bias": "te0_text_model.encoder.layers.4.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm2.weight": "te0_text_model.encoder.layers.4.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc1.bias": "te0_text_model.encoder.layers.4.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc1.weight": "te0_text_model.encoder.layers.4.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc2.bias": "te0_text_model.encoder.layers.4.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc2.weight": "te0_text_model.encoder.layers.4.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias": "te0_text_model.encoder.layers.4.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight": "te0_text_model.encoder.layers.4.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias": "te0_text_model.encoder.layers.4.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight": "te0_text_model.encoder.layers.4.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias": "te0_text_model.encoder.layers.4.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight": "te0_text_model.encoder.layers.4.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias": "te0_text_model.encoder.layers.4.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight": "te0_text_model.encoder.layers.4.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm1.bias": "te0_text_model.encoder.layers.5.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm1.weight": "te0_text_model.encoder.layers.5.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm2.bias": "te0_text_model.encoder.layers.5.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm2.weight": "te0_text_model.encoder.layers.5.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc1.bias": "te0_text_model.encoder.layers.5.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc1.weight": "te0_text_model.encoder.layers.5.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc2.bias": "te0_text_model.encoder.layers.5.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc2.weight": "te0_text_model.encoder.layers.5.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias": "te0_text_model.encoder.layers.5.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight": "te0_text_model.encoder.layers.5.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias": "te0_text_model.encoder.layers.5.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight": "te0_text_model.encoder.layers.5.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias": "te0_text_model.encoder.layers.5.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight": "te0_text_model.encoder.layers.5.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias": "te0_text_model.encoder.layers.5.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight": "te0_text_model.encoder.layers.5.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm1.bias": "te0_text_model.encoder.layers.6.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm1.weight": "te0_text_model.encoder.layers.6.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm2.bias": "te0_text_model.encoder.layers.6.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm2.weight": "te0_text_model.encoder.layers.6.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc1.bias": "te0_text_model.encoder.layers.6.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc1.weight": "te0_text_model.encoder.layers.6.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc2.bias": "te0_text_model.encoder.layers.6.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc2.weight": "te0_text_model.encoder.layers.6.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias": "te0_text_model.encoder.layers.6.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight": "te0_text_model.encoder.layers.6.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias": "te0_text_model.encoder.layers.6.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight": "te0_text_model.encoder.layers.6.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias": "te0_text_model.encoder.layers.6.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight": "te0_text_model.encoder.layers.6.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias": "te0_text_model.encoder.layers.6.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight": "te0_text_model.encoder.layers.6.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm1.bias": "te0_text_model.encoder.layers.7.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm1.weight": "te0_text_model.encoder.layers.7.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm2.bias": "te0_text_model.encoder.layers.7.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm2.weight": "te0_text_model.encoder.layers.7.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc1.bias": "te0_text_model.encoder.layers.7.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc1.weight": "te0_text_model.encoder.layers.7.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc2.bias": "te0_text_model.encoder.layers.7.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc2.weight": "te0_text_model.encoder.layers.7.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias": "te0_text_model.encoder.layers.7.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight": "te0_text_model.encoder.layers.7.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias": "te0_text_model.encoder.layers.7.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight": "te0_text_model.encoder.layers.7.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias": "te0_text_model.encoder.layers.7.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight": "te0_text_model.encoder.layers.7.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias": "te0_text_model.encoder.layers.7.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight": "te0_text_model.encoder.layers.7.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm1.bias": "te0_text_model.encoder.layers.8.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm1.weight": "te0_text_model.encoder.layers.8.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm2.bias": "te0_text_model.encoder.layers.8.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm2.weight": "te0_text_model.encoder.layers.8.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc1.bias": "te0_text_model.encoder.layers.8.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc1.weight": "te0_text_model.encoder.layers.8.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc2.bias": "te0_text_model.encoder.layers.8.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc2.weight": "te0_text_model.encoder.layers.8.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias": "te0_text_model.encoder.layers.8.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight": "te0_text_model.encoder.layers.8.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias": "te0_text_model.encoder.layers.8.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight": "te0_text_model.encoder.layers.8.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias": "te0_text_model.encoder.layers.8.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight": "te0_text_model.encoder.layers.8.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias": "te0_text_model.encoder.layers.8.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight": "te0_text_model.encoder.layers.8.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm1.bias": "te0_text_model.encoder.layers.9.layer_norm1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm1.weight": "te0_text_model.encoder.layers.9.layer_norm1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm2.bias": "te0_text_model.encoder.layers.9.layer_norm2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm2.weight": "te0_text_model.encoder.layers.9.layer_norm2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc1.bias": "te0_text_model.encoder.layers.9.mlp.fc1.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc1.weight": "te0_text_model.encoder.layers.9.mlp.fc1.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc2.bias": "te0_text_model.encoder.layers.9.mlp.fc2.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc2.weight": "te0_text_model.encoder.layers.9.mlp.fc2.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias": "te0_text_model.encoder.layers.9.self_attn.k_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight": "te0_text_model.encoder.layers.9.self_attn.k_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias": "te0_text_model.encoder.layers.9.self_attn.out_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight": "te0_text_model.encoder.layers.9.self_attn.out_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias": "te0_text_model.encoder.layers.9.self_attn.q_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight": "te0_text_model.encoder.layers.9.self_attn.q_proj.weight", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias": "te0_text_model.encoder.layers.9.self_attn.v_proj.bias", + "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight": "te0_text_model.encoder.layers.9.self_attn.v_proj.weight", + "conditioner.embedders.0.transformer.text_model.final_layer_norm.bias": "te0_text_model.final_layer_norm.bias", + "conditioner.embedders.0.transformer.text_model.final_layer_norm.weight": "te0_text_model.final_layer_norm.weight", + "conditioner.embedders.1.model.ln_final.bias": "te1_text_model.final_layer_norm.bias", + "conditioner.embedders.1.model.ln_final.weight": "te1_text_model.final_layer_norm.weight", + "conditioner.embedders.1.model.positional_embedding": "te1_text_model.embeddings.position_embedding.weight", + "conditioner.embedders.1.model.text_projection.weight": "te1_text_projection.weight", + "conditioner.embedders.1.model.token_embedding.weight": "te1_text_model.embeddings.token_embedding.weight", + "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.bias": "te1_text_model.encoder.layers.0.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.weight": "te1_text_model.encoder.layers.0.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.0.ln_1.bias": "te1_text_model.encoder.layers.0.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.0.ln_1.weight": "te1_text_model.encoder.layers.0.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.0.ln_2.bias": "te1_text_model.encoder.layers.0.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.0.ln_2.weight": "te1_text_model.encoder.layers.0.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_fc.bias": "te1_text_model.encoder.layers.0.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_fc.weight": "te1_text_model.encoder.layers.0.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.bias": "te1_text_model.encoder.layers.0.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.weight": "te1_text_model.encoder.layers.0.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.bias": "te1_text_model.encoder.layers.1.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.weight": "te1_text_model.encoder.layers.1.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.1.ln_1.bias": "te1_text_model.encoder.layers.1.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.1.ln_1.weight": "te1_text_model.encoder.layers.1.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.1.ln_2.bias": "te1_text_model.encoder.layers.1.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.1.ln_2.weight": "te1_text_model.encoder.layers.1.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_fc.bias": "te1_text_model.encoder.layers.1.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_fc.weight": "te1_text_model.encoder.layers.1.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.bias": "te1_text_model.encoder.layers.1.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.weight": "te1_text_model.encoder.layers.1.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.bias": "te1_text_model.encoder.layers.10.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.weight": "te1_text_model.encoder.layers.10.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.10.ln_1.bias": "te1_text_model.encoder.layers.10.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.10.ln_1.weight": "te1_text_model.encoder.layers.10.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.10.ln_2.bias": "te1_text_model.encoder.layers.10.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.10.ln_2.weight": "te1_text_model.encoder.layers.10.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_fc.bias": "te1_text_model.encoder.layers.10.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_fc.weight": "te1_text_model.encoder.layers.10.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.bias": "te1_text_model.encoder.layers.10.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.weight": "te1_text_model.encoder.layers.10.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.bias": "te1_text_model.encoder.layers.11.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.weight": "te1_text_model.encoder.layers.11.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.11.ln_1.bias": "te1_text_model.encoder.layers.11.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.11.ln_1.weight": "te1_text_model.encoder.layers.11.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.11.ln_2.bias": "te1_text_model.encoder.layers.11.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.11.ln_2.weight": "te1_text_model.encoder.layers.11.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_fc.bias": "te1_text_model.encoder.layers.11.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_fc.weight": "te1_text_model.encoder.layers.11.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.bias": "te1_text_model.encoder.layers.11.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.weight": "te1_text_model.encoder.layers.11.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.bias": "te1_text_model.encoder.layers.12.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.weight": "te1_text_model.encoder.layers.12.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.12.ln_1.bias": "te1_text_model.encoder.layers.12.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.12.ln_1.weight": "te1_text_model.encoder.layers.12.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.12.ln_2.bias": "te1_text_model.encoder.layers.12.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.12.ln_2.weight": "te1_text_model.encoder.layers.12.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_fc.bias": "te1_text_model.encoder.layers.12.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_fc.weight": "te1_text_model.encoder.layers.12.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.bias": "te1_text_model.encoder.layers.12.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.weight": "te1_text_model.encoder.layers.12.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.bias": "te1_text_model.encoder.layers.13.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.weight": "te1_text_model.encoder.layers.13.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.13.ln_1.bias": "te1_text_model.encoder.layers.13.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.13.ln_1.weight": "te1_text_model.encoder.layers.13.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.13.ln_2.bias": "te1_text_model.encoder.layers.13.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.13.ln_2.weight": "te1_text_model.encoder.layers.13.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_fc.bias": "te1_text_model.encoder.layers.13.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_fc.weight": "te1_text_model.encoder.layers.13.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.bias": "te1_text_model.encoder.layers.13.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.weight": "te1_text_model.encoder.layers.13.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.bias": "te1_text_model.encoder.layers.14.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.weight": "te1_text_model.encoder.layers.14.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.14.ln_1.bias": "te1_text_model.encoder.layers.14.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.14.ln_1.weight": "te1_text_model.encoder.layers.14.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.14.ln_2.bias": "te1_text_model.encoder.layers.14.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.14.ln_2.weight": "te1_text_model.encoder.layers.14.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_fc.bias": "te1_text_model.encoder.layers.14.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_fc.weight": "te1_text_model.encoder.layers.14.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.bias": "te1_text_model.encoder.layers.14.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.weight": "te1_text_model.encoder.layers.14.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.bias": "te1_text_model.encoder.layers.15.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.weight": "te1_text_model.encoder.layers.15.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.15.ln_1.bias": "te1_text_model.encoder.layers.15.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.15.ln_1.weight": "te1_text_model.encoder.layers.15.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.15.ln_2.bias": "te1_text_model.encoder.layers.15.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.15.ln_2.weight": "te1_text_model.encoder.layers.15.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_fc.bias": "te1_text_model.encoder.layers.15.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_fc.weight": "te1_text_model.encoder.layers.15.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.bias": "te1_text_model.encoder.layers.15.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.weight": "te1_text_model.encoder.layers.15.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.bias": "te1_text_model.encoder.layers.16.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.weight": "te1_text_model.encoder.layers.16.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.16.ln_1.bias": "te1_text_model.encoder.layers.16.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.16.ln_1.weight": "te1_text_model.encoder.layers.16.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.16.ln_2.bias": "te1_text_model.encoder.layers.16.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.16.ln_2.weight": "te1_text_model.encoder.layers.16.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_fc.bias": "te1_text_model.encoder.layers.16.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_fc.weight": "te1_text_model.encoder.layers.16.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.bias": "te1_text_model.encoder.layers.16.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.weight": "te1_text_model.encoder.layers.16.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.bias": "te1_text_model.encoder.layers.17.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.weight": "te1_text_model.encoder.layers.17.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.17.ln_1.bias": "te1_text_model.encoder.layers.17.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.17.ln_1.weight": "te1_text_model.encoder.layers.17.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.17.ln_2.bias": "te1_text_model.encoder.layers.17.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.17.ln_2.weight": "te1_text_model.encoder.layers.17.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_fc.bias": "te1_text_model.encoder.layers.17.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_fc.weight": "te1_text_model.encoder.layers.17.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.bias": "te1_text_model.encoder.layers.17.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.weight": "te1_text_model.encoder.layers.17.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.bias": "te1_text_model.encoder.layers.18.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.weight": "te1_text_model.encoder.layers.18.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.18.ln_1.bias": "te1_text_model.encoder.layers.18.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.18.ln_1.weight": "te1_text_model.encoder.layers.18.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.18.ln_2.bias": "te1_text_model.encoder.layers.18.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.18.ln_2.weight": "te1_text_model.encoder.layers.18.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_fc.bias": "te1_text_model.encoder.layers.18.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_fc.weight": "te1_text_model.encoder.layers.18.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.bias": "te1_text_model.encoder.layers.18.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.weight": "te1_text_model.encoder.layers.18.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.bias": "te1_text_model.encoder.layers.19.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.weight": "te1_text_model.encoder.layers.19.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.19.ln_1.bias": "te1_text_model.encoder.layers.19.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.19.ln_1.weight": "te1_text_model.encoder.layers.19.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.19.ln_2.bias": "te1_text_model.encoder.layers.19.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.19.ln_2.weight": "te1_text_model.encoder.layers.19.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_fc.bias": "te1_text_model.encoder.layers.19.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_fc.weight": "te1_text_model.encoder.layers.19.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.bias": "te1_text_model.encoder.layers.19.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.weight": "te1_text_model.encoder.layers.19.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.bias": "te1_text_model.encoder.layers.2.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.weight": "te1_text_model.encoder.layers.2.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.2.ln_1.bias": "te1_text_model.encoder.layers.2.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.2.ln_1.weight": "te1_text_model.encoder.layers.2.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.2.ln_2.bias": "te1_text_model.encoder.layers.2.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.2.ln_2.weight": "te1_text_model.encoder.layers.2.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_fc.bias": "te1_text_model.encoder.layers.2.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_fc.weight": "te1_text_model.encoder.layers.2.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.bias": "te1_text_model.encoder.layers.2.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.weight": "te1_text_model.encoder.layers.2.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.bias": "te1_text_model.encoder.layers.20.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.weight": "te1_text_model.encoder.layers.20.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.20.ln_1.bias": "te1_text_model.encoder.layers.20.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.20.ln_1.weight": "te1_text_model.encoder.layers.20.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.20.ln_2.bias": "te1_text_model.encoder.layers.20.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.20.ln_2.weight": "te1_text_model.encoder.layers.20.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_fc.bias": "te1_text_model.encoder.layers.20.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_fc.weight": "te1_text_model.encoder.layers.20.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.bias": "te1_text_model.encoder.layers.20.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.weight": "te1_text_model.encoder.layers.20.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.bias": "te1_text_model.encoder.layers.21.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.weight": "te1_text_model.encoder.layers.21.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.21.ln_1.bias": "te1_text_model.encoder.layers.21.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.21.ln_1.weight": "te1_text_model.encoder.layers.21.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.21.ln_2.bias": "te1_text_model.encoder.layers.21.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.21.ln_2.weight": "te1_text_model.encoder.layers.21.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_fc.bias": "te1_text_model.encoder.layers.21.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_fc.weight": "te1_text_model.encoder.layers.21.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.bias": "te1_text_model.encoder.layers.21.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.weight": "te1_text_model.encoder.layers.21.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.bias": "te1_text_model.encoder.layers.22.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.weight": "te1_text_model.encoder.layers.22.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.22.ln_1.bias": "te1_text_model.encoder.layers.22.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.22.ln_1.weight": "te1_text_model.encoder.layers.22.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.22.ln_2.bias": "te1_text_model.encoder.layers.22.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.22.ln_2.weight": "te1_text_model.encoder.layers.22.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_fc.bias": "te1_text_model.encoder.layers.22.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_fc.weight": "te1_text_model.encoder.layers.22.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.bias": "te1_text_model.encoder.layers.22.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.weight": "te1_text_model.encoder.layers.22.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.bias": "te1_text_model.encoder.layers.23.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.weight": "te1_text_model.encoder.layers.23.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.23.ln_1.bias": "te1_text_model.encoder.layers.23.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.23.ln_1.weight": "te1_text_model.encoder.layers.23.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.23.ln_2.bias": "te1_text_model.encoder.layers.23.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.23.ln_2.weight": "te1_text_model.encoder.layers.23.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_fc.bias": "te1_text_model.encoder.layers.23.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_fc.weight": "te1_text_model.encoder.layers.23.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.bias": "te1_text_model.encoder.layers.23.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.weight": "te1_text_model.encoder.layers.23.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.bias": "te1_text_model.encoder.layers.24.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.weight": "te1_text_model.encoder.layers.24.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.24.ln_1.bias": "te1_text_model.encoder.layers.24.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.24.ln_1.weight": "te1_text_model.encoder.layers.24.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.24.ln_2.bias": "te1_text_model.encoder.layers.24.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.24.ln_2.weight": "te1_text_model.encoder.layers.24.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_fc.bias": "te1_text_model.encoder.layers.24.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_fc.weight": "te1_text_model.encoder.layers.24.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.bias": "te1_text_model.encoder.layers.24.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.weight": "te1_text_model.encoder.layers.24.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.bias": "te1_text_model.encoder.layers.25.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.weight": "te1_text_model.encoder.layers.25.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.25.ln_1.bias": "te1_text_model.encoder.layers.25.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.25.ln_1.weight": "te1_text_model.encoder.layers.25.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.25.ln_2.bias": "te1_text_model.encoder.layers.25.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.25.ln_2.weight": "te1_text_model.encoder.layers.25.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_fc.bias": "te1_text_model.encoder.layers.25.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_fc.weight": "te1_text_model.encoder.layers.25.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.bias": "te1_text_model.encoder.layers.25.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.weight": "te1_text_model.encoder.layers.25.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.bias": "te1_text_model.encoder.layers.26.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.weight": "te1_text_model.encoder.layers.26.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.26.ln_1.bias": "te1_text_model.encoder.layers.26.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.26.ln_1.weight": "te1_text_model.encoder.layers.26.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.26.ln_2.bias": "te1_text_model.encoder.layers.26.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.26.ln_2.weight": "te1_text_model.encoder.layers.26.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_fc.bias": "te1_text_model.encoder.layers.26.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_fc.weight": "te1_text_model.encoder.layers.26.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.bias": "te1_text_model.encoder.layers.26.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.weight": "te1_text_model.encoder.layers.26.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.bias": "te1_text_model.encoder.layers.27.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.weight": "te1_text_model.encoder.layers.27.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.27.ln_1.bias": "te1_text_model.encoder.layers.27.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.27.ln_1.weight": "te1_text_model.encoder.layers.27.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.27.ln_2.bias": "te1_text_model.encoder.layers.27.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.27.ln_2.weight": "te1_text_model.encoder.layers.27.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_fc.bias": "te1_text_model.encoder.layers.27.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_fc.weight": "te1_text_model.encoder.layers.27.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.bias": "te1_text_model.encoder.layers.27.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.weight": "te1_text_model.encoder.layers.27.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.bias": "te1_text_model.encoder.layers.28.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.weight": "te1_text_model.encoder.layers.28.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.28.ln_1.bias": "te1_text_model.encoder.layers.28.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.28.ln_1.weight": "te1_text_model.encoder.layers.28.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.28.ln_2.bias": "te1_text_model.encoder.layers.28.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.28.ln_2.weight": "te1_text_model.encoder.layers.28.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_fc.bias": "te1_text_model.encoder.layers.28.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_fc.weight": "te1_text_model.encoder.layers.28.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.bias": "te1_text_model.encoder.layers.28.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.weight": "te1_text_model.encoder.layers.28.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.bias": "te1_text_model.encoder.layers.29.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.weight": "te1_text_model.encoder.layers.29.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.29.ln_1.bias": "te1_text_model.encoder.layers.29.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.29.ln_1.weight": "te1_text_model.encoder.layers.29.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.29.ln_2.bias": "te1_text_model.encoder.layers.29.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.29.ln_2.weight": "te1_text_model.encoder.layers.29.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_fc.bias": "te1_text_model.encoder.layers.29.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_fc.weight": "te1_text_model.encoder.layers.29.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.bias": "te1_text_model.encoder.layers.29.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.weight": "te1_text_model.encoder.layers.29.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.bias": "te1_text_model.encoder.layers.3.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.weight": "te1_text_model.encoder.layers.3.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.3.ln_1.bias": "te1_text_model.encoder.layers.3.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.3.ln_1.weight": "te1_text_model.encoder.layers.3.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.3.ln_2.bias": "te1_text_model.encoder.layers.3.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.3.ln_2.weight": "te1_text_model.encoder.layers.3.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_fc.bias": "te1_text_model.encoder.layers.3.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_fc.weight": "te1_text_model.encoder.layers.3.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.bias": "te1_text_model.encoder.layers.3.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.weight": "te1_text_model.encoder.layers.3.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.bias": "te1_text_model.encoder.layers.30.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.weight": "te1_text_model.encoder.layers.30.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.30.ln_1.bias": "te1_text_model.encoder.layers.30.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.30.ln_1.weight": "te1_text_model.encoder.layers.30.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.30.ln_2.bias": "te1_text_model.encoder.layers.30.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.30.ln_2.weight": "te1_text_model.encoder.layers.30.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_fc.bias": "te1_text_model.encoder.layers.30.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_fc.weight": "te1_text_model.encoder.layers.30.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.bias": "te1_text_model.encoder.layers.30.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.weight": "te1_text_model.encoder.layers.30.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.bias": "te1_text_model.encoder.layers.31.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.weight": "te1_text_model.encoder.layers.31.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.31.ln_1.bias": "te1_text_model.encoder.layers.31.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.31.ln_1.weight": "te1_text_model.encoder.layers.31.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.31.ln_2.bias": "te1_text_model.encoder.layers.31.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.31.ln_2.weight": "te1_text_model.encoder.layers.31.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_fc.bias": "te1_text_model.encoder.layers.31.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_fc.weight": "te1_text_model.encoder.layers.31.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.bias": "te1_text_model.encoder.layers.31.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.weight": "te1_text_model.encoder.layers.31.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.bias": "te1_text_model.encoder.layers.4.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.weight": "te1_text_model.encoder.layers.4.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.4.ln_1.bias": "te1_text_model.encoder.layers.4.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.4.ln_1.weight": "te1_text_model.encoder.layers.4.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.4.ln_2.bias": "te1_text_model.encoder.layers.4.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.4.ln_2.weight": "te1_text_model.encoder.layers.4.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_fc.bias": "te1_text_model.encoder.layers.4.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_fc.weight": "te1_text_model.encoder.layers.4.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.bias": "te1_text_model.encoder.layers.4.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.weight": "te1_text_model.encoder.layers.4.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.bias": "te1_text_model.encoder.layers.5.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.weight": "te1_text_model.encoder.layers.5.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.5.ln_1.bias": "te1_text_model.encoder.layers.5.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.5.ln_1.weight": "te1_text_model.encoder.layers.5.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.5.ln_2.bias": "te1_text_model.encoder.layers.5.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.5.ln_2.weight": "te1_text_model.encoder.layers.5.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_fc.bias": "te1_text_model.encoder.layers.5.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_fc.weight": "te1_text_model.encoder.layers.5.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.bias": "te1_text_model.encoder.layers.5.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.weight": "te1_text_model.encoder.layers.5.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.bias": "te1_text_model.encoder.layers.6.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.weight": "te1_text_model.encoder.layers.6.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.6.ln_1.bias": "te1_text_model.encoder.layers.6.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.6.ln_1.weight": "te1_text_model.encoder.layers.6.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.6.ln_2.bias": "te1_text_model.encoder.layers.6.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.6.ln_2.weight": "te1_text_model.encoder.layers.6.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_fc.bias": "te1_text_model.encoder.layers.6.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_fc.weight": "te1_text_model.encoder.layers.6.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.bias": "te1_text_model.encoder.layers.6.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.weight": "te1_text_model.encoder.layers.6.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.bias": "te1_text_model.encoder.layers.7.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.weight": "te1_text_model.encoder.layers.7.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.7.ln_1.bias": "te1_text_model.encoder.layers.7.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.7.ln_1.weight": "te1_text_model.encoder.layers.7.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.7.ln_2.bias": "te1_text_model.encoder.layers.7.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.7.ln_2.weight": "te1_text_model.encoder.layers.7.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_fc.bias": "te1_text_model.encoder.layers.7.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_fc.weight": "te1_text_model.encoder.layers.7.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.bias": "te1_text_model.encoder.layers.7.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.weight": "te1_text_model.encoder.layers.7.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.bias": "te1_text_model.encoder.layers.8.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.weight": "te1_text_model.encoder.layers.8.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.8.ln_1.bias": "te1_text_model.encoder.layers.8.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.8.ln_1.weight": "te1_text_model.encoder.layers.8.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.8.ln_2.bias": "te1_text_model.encoder.layers.8.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.8.ln_2.weight": "te1_text_model.encoder.layers.8.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_fc.bias": "te1_text_model.encoder.layers.8.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_fc.weight": "te1_text_model.encoder.layers.8.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.bias": "te1_text_model.encoder.layers.8.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.weight": "te1_text_model.encoder.layers.8.mlp.fc2.weight", + "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.bias": "te1_text_model.encoder.layers.9.self_attn.out_proj.bias", + "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.weight": "te1_text_model.encoder.layers.9.self_attn.out_proj.weight", + "conditioner.embedders.1.model.transformer.resblocks.9.ln_1.bias": "te1_text_model.encoder.layers.9.layer_norm1.bias", + "conditioner.embedders.1.model.transformer.resblocks.9.ln_1.weight": "te1_text_model.encoder.layers.9.layer_norm1.weight", + "conditioner.embedders.1.model.transformer.resblocks.9.ln_2.bias": "te1_text_model.encoder.layers.9.layer_norm2.bias", + "conditioner.embedders.1.model.transformer.resblocks.9.ln_2.weight": "te1_text_model.encoder.layers.9.layer_norm2.weight", + "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_fc.bias": "te1_text_model.encoder.layers.9.mlp.fc1.bias", + "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_fc.weight": "te1_text_model.encoder.layers.9.mlp.fc1.weight", + "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.bias": "te1_text_model.encoder.layers.9.mlp.fc2.bias", + "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.weight": "te1_text_model.encoder.layers.9.mlp.fc2.weight", + "first_stage_model.decoder.conv_in.bias": "vae_decoder.conv_in.bias", + "first_stage_model.decoder.conv_in.weight": "vae_decoder.conv_in.weight", + "first_stage_model.decoder.conv_out.bias": "vae_decoder.conv_out.bias", + "first_stage_model.decoder.conv_out.weight": "vae_decoder.conv_out.weight", + "first_stage_model.decoder.mid.attn_1.k.bias": "vae_decoder.mid_block.attentions.0.to_k.bias", + "first_stage_model.decoder.mid.attn_1.k.weight": "vae_decoder.mid_block.attentions.0.to_k.weight", + "first_stage_model.decoder.mid.attn_1.norm.bias": "vae_decoder.mid_block.attentions.0.group_norm.bias", + "first_stage_model.decoder.mid.attn_1.norm.weight": "vae_decoder.mid_block.attentions.0.group_norm.weight", + "first_stage_model.decoder.mid.attn_1.proj_out.bias": "vae_decoder.mid_block.attentions.0.to_out.0.bias", + "first_stage_model.decoder.mid.attn_1.proj_out.weight": "vae_decoder.mid_block.attentions.0.to_out.0.weight", + "first_stage_model.decoder.mid.attn_1.q.bias": "vae_decoder.mid_block.attentions.0.to_q.bias", + "first_stage_model.decoder.mid.attn_1.q.weight": "vae_decoder.mid_block.attentions.0.to_q.weight", + "first_stage_model.decoder.mid.attn_1.v.bias": "vae_decoder.mid_block.attentions.0.to_v.bias", + "first_stage_model.decoder.mid.attn_1.v.weight": "vae_decoder.mid_block.attentions.0.to_v.weight", + "first_stage_model.decoder.mid.block_1.conv1.bias": "vae_decoder.mid_block.resnets.0.conv1.bias", + "first_stage_model.decoder.mid.block_1.conv1.weight": "vae_decoder.mid_block.resnets.0.conv1.weight", + "first_stage_model.decoder.mid.block_1.conv2.bias": "vae_decoder.mid_block.resnets.0.conv2.bias", + "first_stage_model.decoder.mid.block_1.conv2.weight": "vae_decoder.mid_block.resnets.0.conv2.weight", + "first_stage_model.decoder.mid.block_1.norm1.bias": "vae_decoder.mid_block.resnets.0.norm1.bias", + "first_stage_model.decoder.mid.block_1.norm1.weight": "vae_decoder.mid_block.resnets.0.norm1.weight", + "first_stage_model.decoder.mid.block_1.norm2.bias": "vae_decoder.mid_block.resnets.0.norm2.bias", + "first_stage_model.decoder.mid.block_1.norm2.weight": "vae_decoder.mid_block.resnets.0.norm2.weight", + "first_stage_model.decoder.mid.block_2.conv1.bias": "vae_decoder.mid_block.resnets.1.conv1.bias", + "first_stage_model.decoder.mid.block_2.conv1.weight": "vae_decoder.mid_block.resnets.1.conv1.weight", + "first_stage_model.decoder.mid.block_2.conv2.bias": "vae_decoder.mid_block.resnets.1.conv2.bias", + "first_stage_model.decoder.mid.block_2.conv2.weight": "vae_decoder.mid_block.resnets.1.conv2.weight", + "first_stage_model.decoder.mid.block_2.norm1.bias": "vae_decoder.mid_block.resnets.1.norm1.bias", + "first_stage_model.decoder.mid.block_2.norm1.weight": "vae_decoder.mid_block.resnets.1.norm1.weight", + "first_stage_model.decoder.mid.block_2.norm2.bias": "vae_decoder.mid_block.resnets.1.norm2.bias", + "first_stage_model.decoder.mid.block_2.norm2.weight": "vae_decoder.mid_block.resnets.1.norm2.weight", + "first_stage_model.decoder.norm_out.bias": "vae_decoder.conv_norm_out.bias", + "first_stage_model.decoder.norm_out.weight": "vae_decoder.conv_norm_out.weight", + "first_stage_model.decoder.up.0.block.0.conv1.bias": "vae_decoder.up_blocks.3.resnets.0.conv1.bias", + "first_stage_model.decoder.up.0.block.0.conv1.weight": "vae_decoder.up_blocks.3.resnets.0.conv1.weight", + "first_stage_model.decoder.up.0.block.0.conv2.bias": "vae_decoder.up_blocks.3.resnets.0.conv2.bias", + "first_stage_model.decoder.up.0.block.0.conv2.weight": "vae_decoder.up_blocks.3.resnets.0.conv2.weight", + "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.bias", + "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.3.resnets.0.conv_shortcut.weight", + "first_stage_model.decoder.up.0.block.0.norm1.bias": "vae_decoder.up_blocks.3.resnets.0.norm1.bias", + "first_stage_model.decoder.up.0.block.0.norm1.weight": "vae_decoder.up_blocks.3.resnets.0.norm1.weight", + "first_stage_model.decoder.up.0.block.0.norm2.bias": "vae_decoder.up_blocks.3.resnets.0.norm2.bias", + "first_stage_model.decoder.up.0.block.0.norm2.weight": "vae_decoder.up_blocks.3.resnets.0.norm2.weight", + "first_stage_model.decoder.up.0.block.1.conv1.bias": "vae_decoder.up_blocks.3.resnets.1.conv1.bias", + "first_stage_model.decoder.up.0.block.1.conv1.weight": "vae_decoder.up_blocks.3.resnets.1.conv1.weight", + "first_stage_model.decoder.up.0.block.1.conv2.bias": "vae_decoder.up_blocks.3.resnets.1.conv2.bias", + "first_stage_model.decoder.up.0.block.1.conv2.weight": "vae_decoder.up_blocks.3.resnets.1.conv2.weight", + "first_stage_model.decoder.up.0.block.1.norm1.bias": "vae_decoder.up_blocks.3.resnets.1.norm1.bias", + "first_stage_model.decoder.up.0.block.1.norm1.weight": "vae_decoder.up_blocks.3.resnets.1.norm1.weight", + "first_stage_model.decoder.up.0.block.1.norm2.bias": "vae_decoder.up_blocks.3.resnets.1.norm2.bias", + "first_stage_model.decoder.up.0.block.1.norm2.weight": "vae_decoder.up_blocks.3.resnets.1.norm2.weight", + "first_stage_model.decoder.up.0.block.2.conv1.bias": "vae_decoder.up_blocks.3.resnets.2.conv1.bias", + "first_stage_model.decoder.up.0.block.2.conv1.weight": "vae_decoder.up_blocks.3.resnets.2.conv1.weight", + "first_stage_model.decoder.up.0.block.2.conv2.bias": "vae_decoder.up_blocks.3.resnets.2.conv2.bias", + "first_stage_model.decoder.up.0.block.2.conv2.weight": "vae_decoder.up_blocks.3.resnets.2.conv2.weight", + "first_stage_model.decoder.up.0.block.2.norm1.bias": "vae_decoder.up_blocks.3.resnets.2.norm1.bias", + "first_stage_model.decoder.up.0.block.2.norm1.weight": "vae_decoder.up_blocks.3.resnets.2.norm1.weight", + "first_stage_model.decoder.up.0.block.2.norm2.bias": "vae_decoder.up_blocks.3.resnets.2.norm2.bias", + "first_stage_model.decoder.up.0.block.2.norm2.weight": "vae_decoder.up_blocks.3.resnets.2.norm2.weight", + "first_stage_model.decoder.up.1.block.0.conv1.bias": "vae_decoder.up_blocks.2.resnets.0.conv1.bias", + "first_stage_model.decoder.up.1.block.0.conv1.weight": "vae_decoder.up_blocks.2.resnets.0.conv1.weight", + "first_stage_model.decoder.up.1.block.0.conv2.bias": "vae_decoder.up_blocks.2.resnets.0.conv2.bias", + "first_stage_model.decoder.up.1.block.0.conv2.weight": "vae_decoder.up_blocks.2.resnets.0.conv2.weight", + "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.bias", + "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": "vae_decoder.up_blocks.2.resnets.0.conv_shortcut.weight", + "first_stage_model.decoder.up.1.block.0.norm1.bias": "vae_decoder.up_blocks.2.resnets.0.norm1.bias", + "first_stage_model.decoder.up.1.block.0.norm1.weight": "vae_decoder.up_blocks.2.resnets.0.norm1.weight", + "first_stage_model.decoder.up.1.block.0.norm2.bias": "vae_decoder.up_blocks.2.resnets.0.norm2.bias", + "first_stage_model.decoder.up.1.block.0.norm2.weight": "vae_decoder.up_blocks.2.resnets.0.norm2.weight", + "first_stage_model.decoder.up.1.block.1.conv1.bias": "vae_decoder.up_blocks.2.resnets.1.conv1.bias", + "first_stage_model.decoder.up.1.block.1.conv1.weight": "vae_decoder.up_blocks.2.resnets.1.conv1.weight", + "first_stage_model.decoder.up.1.block.1.conv2.bias": "vae_decoder.up_blocks.2.resnets.1.conv2.bias", + "first_stage_model.decoder.up.1.block.1.conv2.weight": "vae_decoder.up_blocks.2.resnets.1.conv2.weight", + "first_stage_model.decoder.up.1.block.1.norm1.bias": "vae_decoder.up_blocks.2.resnets.1.norm1.bias", + "first_stage_model.decoder.up.1.block.1.norm1.weight": "vae_decoder.up_blocks.2.resnets.1.norm1.weight", + "first_stage_model.decoder.up.1.block.1.norm2.bias": "vae_decoder.up_blocks.2.resnets.1.norm2.bias", + "first_stage_model.decoder.up.1.block.1.norm2.weight": "vae_decoder.up_blocks.2.resnets.1.norm2.weight", + "first_stage_model.decoder.up.1.block.2.conv1.bias": "vae_decoder.up_blocks.2.resnets.2.conv1.bias", + "first_stage_model.decoder.up.1.block.2.conv1.weight": "vae_decoder.up_blocks.2.resnets.2.conv1.weight", + "first_stage_model.decoder.up.1.block.2.conv2.bias": "vae_decoder.up_blocks.2.resnets.2.conv2.bias", + "first_stage_model.decoder.up.1.block.2.conv2.weight": "vae_decoder.up_blocks.2.resnets.2.conv2.weight", + "first_stage_model.decoder.up.1.block.2.norm1.bias": "vae_decoder.up_blocks.2.resnets.2.norm1.bias", + "first_stage_model.decoder.up.1.block.2.norm1.weight": "vae_decoder.up_blocks.2.resnets.2.norm1.weight", + "first_stage_model.decoder.up.1.block.2.norm2.bias": "vae_decoder.up_blocks.2.resnets.2.norm2.bias", + "first_stage_model.decoder.up.1.block.2.norm2.weight": "vae_decoder.up_blocks.2.resnets.2.norm2.weight", + "first_stage_model.decoder.up.1.upsample.conv.bias": "vae_decoder.up_blocks.2.upsamplers.0.conv.bias", + "first_stage_model.decoder.up.1.upsample.conv.weight": "vae_decoder.up_blocks.2.upsamplers.0.conv.weight", + "first_stage_model.decoder.up.2.block.0.conv1.bias": "vae_decoder.up_blocks.1.resnets.0.conv1.bias", + "first_stage_model.decoder.up.2.block.0.conv1.weight": "vae_decoder.up_blocks.1.resnets.0.conv1.weight", + "first_stage_model.decoder.up.2.block.0.conv2.bias": "vae_decoder.up_blocks.1.resnets.0.conv2.bias", + "first_stage_model.decoder.up.2.block.0.conv2.weight": "vae_decoder.up_blocks.1.resnets.0.conv2.weight", + "first_stage_model.decoder.up.2.block.0.norm1.bias": "vae_decoder.up_blocks.1.resnets.0.norm1.bias", + "first_stage_model.decoder.up.2.block.0.norm1.weight": "vae_decoder.up_blocks.1.resnets.0.norm1.weight", + "first_stage_model.decoder.up.2.block.0.norm2.bias": "vae_decoder.up_blocks.1.resnets.0.norm2.bias", + "first_stage_model.decoder.up.2.block.0.norm2.weight": "vae_decoder.up_blocks.1.resnets.0.norm2.weight", + "first_stage_model.decoder.up.2.block.1.conv1.bias": "vae_decoder.up_blocks.1.resnets.1.conv1.bias", + "first_stage_model.decoder.up.2.block.1.conv1.weight": "vae_decoder.up_blocks.1.resnets.1.conv1.weight", + "first_stage_model.decoder.up.2.block.1.conv2.bias": "vae_decoder.up_blocks.1.resnets.1.conv2.bias", + "first_stage_model.decoder.up.2.block.1.conv2.weight": "vae_decoder.up_blocks.1.resnets.1.conv2.weight", + "first_stage_model.decoder.up.2.block.1.norm1.bias": "vae_decoder.up_blocks.1.resnets.1.norm1.bias", + "first_stage_model.decoder.up.2.block.1.norm1.weight": "vae_decoder.up_blocks.1.resnets.1.norm1.weight", + "first_stage_model.decoder.up.2.block.1.norm2.bias": "vae_decoder.up_blocks.1.resnets.1.norm2.bias", + "first_stage_model.decoder.up.2.block.1.norm2.weight": "vae_decoder.up_blocks.1.resnets.1.norm2.weight", + "first_stage_model.decoder.up.2.block.2.conv1.bias": "vae_decoder.up_blocks.1.resnets.2.conv1.bias", + "first_stage_model.decoder.up.2.block.2.conv1.weight": "vae_decoder.up_blocks.1.resnets.2.conv1.weight", + "first_stage_model.decoder.up.2.block.2.conv2.bias": "vae_decoder.up_blocks.1.resnets.2.conv2.bias", + "first_stage_model.decoder.up.2.block.2.conv2.weight": "vae_decoder.up_blocks.1.resnets.2.conv2.weight", + "first_stage_model.decoder.up.2.block.2.norm1.bias": "vae_decoder.up_blocks.1.resnets.2.norm1.bias", + "first_stage_model.decoder.up.2.block.2.norm1.weight": "vae_decoder.up_blocks.1.resnets.2.norm1.weight", + "first_stage_model.decoder.up.2.block.2.norm2.bias": "vae_decoder.up_blocks.1.resnets.2.norm2.bias", + "first_stage_model.decoder.up.2.block.2.norm2.weight": "vae_decoder.up_blocks.1.resnets.2.norm2.weight", + "first_stage_model.decoder.up.2.upsample.conv.bias": "vae_decoder.up_blocks.1.upsamplers.0.conv.bias", + "first_stage_model.decoder.up.2.upsample.conv.weight": "vae_decoder.up_blocks.1.upsamplers.0.conv.weight", + "first_stage_model.decoder.up.3.block.0.conv1.bias": "vae_decoder.up_blocks.0.resnets.0.conv1.bias", + "first_stage_model.decoder.up.3.block.0.conv1.weight": "vae_decoder.up_blocks.0.resnets.0.conv1.weight", + "first_stage_model.decoder.up.3.block.0.conv2.bias": "vae_decoder.up_blocks.0.resnets.0.conv2.bias", + "first_stage_model.decoder.up.3.block.0.conv2.weight": "vae_decoder.up_blocks.0.resnets.0.conv2.weight", + "first_stage_model.decoder.up.3.block.0.norm1.bias": "vae_decoder.up_blocks.0.resnets.0.norm1.bias", + "first_stage_model.decoder.up.3.block.0.norm1.weight": "vae_decoder.up_blocks.0.resnets.0.norm1.weight", + "first_stage_model.decoder.up.3.block.0.norm2.bias": "vae_decoder.up_blocks.0.resnets.0.norm2.bias", + "first_stage_model.decoder.up.3.block.0.norm2.weight": "vae_decoder.up_blocks.0.resnets.0.norm2.weight", + "first_stage_model.decoder.up.3.block.1.conv1.bias": "vae_decoder.up_blocks.0.resnets.1.conv1.bias", + "first_stage_model.decoder.up.3.block.1.conv1.weight": "vae_decoder.up_blocks.0.resnets.1.conv1.weight", + "first_stage_model.decoder.up.3.block.1.conv2.bias": "vae_decoder.up_blocks.0.resnets.1.conv2.bias", + "first_stage_model.decoder.up.3.block.1.conv2.weight": "vae_decoder.up_blocks.0.resnets.1.conv2.weight", + "first_stage_model.decoder.up.3.block.1.norm1.bias": "vae_decoder.up_blocks.0.resnets.1.norm1.bias", + "first_stage_model.decoder.up.3.block.1.norm1.weight": "vae_decoder.up_blocks.0.resnets.1.norm1.weight", + "first_stage_model.decoder.up.3.block.1.norm2.bias": "vae_decoder.up_blocks.0.resnets.1.norm2.bias", + "first_stage_model.decoder.up.3.block.1.norm2.weight": "vae_decoder.up_blocks.0.resnets.1.norm2.weight", + "first_stage_model.decoder.up.3.block.2.conv1.bias": "vae_decoder.up_blocks.0.resnets.2.conv1.bias", + "first_stage_model.decoder.up.3.block.2.conv1.weight": "vae_decoder.up_blocks.0.resnets.2.conv1.weight", + "first_stage_model.decoder.up.3.block.2.conv2.bias": "vae_decoder.up_blocks.0.resnets.2.conv2.bias", + "first_stage_model.decoder.up.3.block.2.conv2.weight": "vae_decoder.up_blocks.0.resnets.2.conv2.weight", + "first_stage_model.decoder.up.3.block.2.norm1.bias": "vae_decoder.up_blocks.0.resnets.2.norm1.bias", + "first_stage_model.decoder.up.3.block.2.norm1.weight": "vae_decoder.up_blocks.0.resnets.2.norm1.weight", + "first_stage_model.decoder.up.3.block.2.norm2.bias": "vae_decoder.up_blocks.0.resnets.2.norm2.bias", + "first_stage_model.decoder.up.3.block.2.norm2.weight": "vae_decoder.up_blocks.0.resnets.2.norm2.weight", + "first_stage_model.decoder.up.3.upsample.conv.bias": "vae_decoder.up_blocks.0.upsamplers.0.conv.bias", + "first_stage_model.decoder.up.3.upsample.conv.weight": "vae_decoder.up_blocks.0.upsamplers.0.conv.weight", + "first_stage_model.encoder.conv_in.bias": "vae_encoder.conv_in.bias", + "first_stage_model.encoder.conv_in.weight": "vae_encoder.conv_in.weight", + "first_stage_model.encoder.conv_out.bias": "vae_encoder.conv_out.bias", + "first_stage_model.encoder.conv_out.weight": "vae_encoder.conv_out.weight", + "first_stage_model.encoder.down.0.block.0.conv1.bias": "vae_encoder.down_blocks.0.resnets.0.conv1.bias", + "first_stage_model.encoder.down.0.block.0.conv1.weight": "vae_encoder.down_blocks.0.resnets.0.conv1.weight", + "first_stage_model.encoder.down.0.block.0.conv2.bias": "vae_encoder.down_blocks.0.resnets.0.conv2.bias", + "first_stage_model.encoder.down.0.block.0.conv2.weight": "vae_encoder.down_blocks.0.resnets.0.conv2.weight", + "first_stage_model.encoder.down.0.block.0.norm1.bias": "vae_encoder.down_blocks.0.resnets.0.norm1.bias", + "first_stage_model.encoder.down.0.block.0.norm1.weight": "vae_encoder.down_blocks.0.resnets.0.norm1.weight", + "first_stage_model.encoder.down.0.block.0.norm2.bias": "vae_encoder.down_blocks.0.resnets.0.norm2.bias", + "first_stage_model.encoder.down.0.block.0.norm2.weight": "vae_encoder.down_blocks.0.resnets.0.norm2.weight", + "first_stage_model.encoder.down.0.block.1.conv1.bias": "vae_encoder.down_blocks.0.resnets.1.conv1.bias", + "first_stage_model.encoder.down.0.block.1.conv1.weight": "vae_encoder.down_blocks.0.resnets.1.conv1.weight", + "first_stage_model.encoder.down.0.block.1.conv2.bias": "vae_encoder.down_blocks.0.resnets.1.conv2.bias", + "first_stage_model.encoder.down.0.block.1.conv2.weight": "vae_encoder.down_blocks.0.resnets.1.conv2.weight", + "first_stage_model.encoder.down.0.block.1.norm1.bias": "vae_encoder.down_blocks.0.resnets.1.norm1.bias", + "first_stage_model.encoder.down.0.block.1.norm1.weight": "vae_encoder.down_blocks.0.resnets.1.norm1.weight", + "first_stage_model.encoder.down.0.block.1.norm2.bias": "vae_encoder.down_blocks.0.resnets.1.norm2.bias", + "first_stage_model.encoder.down.0.block.1.norm2.weight": "vae_encoder.down_blocks.0.resnets.1.norm2.weight", + "first_stage_model.encoder.down.0.downsample.conv.bias": "vae_encoder.down_blocks.0.downsamplers.0.conv.bias", + "first_stage_model.encoder.down.0.downsample.conv.weight": "vae_encoder.down_blocks.0.downsamplers.0.conv.weight", + "first_stage_model.encoder.down.1.block.0.conv1.bias": "vae_encoder.down_blocks.1.resnets.0.conv1.bias", + "first_stage_model.encoder.down.1.block.0.conv1.weight": "vae_encoder.down_blocks.1.resnets.0.conv1.weight", + "first_stage_model.encoder.down.1.block.0.conv2.bias": "vae_encoder.down_blocks.1.resnets.0.conv2.bias", + "first_stage_model.encoder.down.1.block.0.conv2.weight": "vae_encoder.down_blocks.1.resnets.0.conv2.weight", + "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.bias", + "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.1.resnets.0.conv_shortcut.weight", + "first_stage_model.encoder.down.1.block.0.norm1.bias": "vae_encoder.down_blocks.1.resnets.0.norm1.bias", + "first_stage_model.encoder.down.1.block.0.norm1.weight": "vae_encoder.down_blocks.1.resnets.0.norm1.weight", + "first_stage_model.encoder.down.1.block.0.norm2.bias": "vae_encoder.down_blocks.1.resnets.0.norm2.bias", + "first_stage_model.encoder.down.1.block.0.norm2.weight": "vae_encoder.down_blocks.1.resnets.0.norm2.weight", + "first_stage_model.encoder.down.1.block.1.conv1.bias": "vae_encoder.down_blocks.1.resnets.1.conv1.bias", + "first_stage_model.encoder.down.1.block.1.conv1.weight": "vae_encoder.down_blocks.1.resnets.1.conv1.weight", + "first_stage_model.encoder.down.1.block.1.conv2.bias": "vae_encoder.down_blocks.1.resnets.1.conv2.bias", + "first_stage_model.encoder.down.1.block.1.conv2.weight": "vae_encoder.down_blocks.1.resnets.1.conv2.weight", + "first_stage_model.encoder.down.1.block.1.norm1.bias": "vae_encoder.down_blocks.1.resnets.1.norm1.bias", + "first_stage_model.encoder.down.1.block.1.norm1.weight": "vae_encoder.down_blocks.1.resnets.1.norm1.weight", + "first_stage_model.encoder.down.1.block.1.norm2.bias": "vae_encoder.down_blocks.1.resnets.1.norm2.bias", + "first_stage_model.encoder.down.1.block.1.norm2.weight": "vae_encoder.down_blocks.1.resnets.1.norm2.weight", + "first_stage_model.encoder.down.1.downsample.conv.bias": "vae_encoder.down_blocks.1.downsamplers.0.conv.bias", + "first_stage_model.encoder.down.1.downsample.conv.weight": "vae_encoder.down_blocks.1.downsamplers.0.conv.weight", + "first_stage_model.encoder.down.2.block.0.conv1.bias": "vae_encoder.down_blocks.2.resnets.0.conv1.bias", + "first_stage_model.encoder.down.2.block.0.conv1.weight": "vae_encoder.down_blocks.2.resnets.0.conv1.weight", + "first_stage_model.encoder.down.2.block.0.conv2.bias": "vae_encoder.down_blocks.2.resnets.0.conv2.bias", + "first_stage_model.encoder.down.2.block.0.conv2.weight": "vae_encoder.down_blocks.2.resnets.0.conv2.weight", + "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.bias", + "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": "vae_encoder.down_blocks.2.resnets.0.conv_shortcut.weight", + "first_stage_model.encoder.down.2.block.0.norm1.bias": "vae_encoder.down_blocks.2.resnets.0.norm1.bias", + "first_stage_model.encoder.down.2.block.0.norm1.weight": "vae_encoder.down_blocks.2.resnets.0.norm1.weight", + "first_stage_model.encoder.down.2.block.0.norm2.bias": "vae_encoder.down_blocks.2.resnets.0.norm2.bias", + "first_stage_model.encoder.down.2.block.0.norm2.weight": "vae_encoder.down_blocks.2.resnets.0.norm2.weight", + "first_stage_model.encoder.down.2.block.1.conv1.bias": "vae_encoder.down_blocks.2.resnets.1.conv1.bias", + "first_stage_model.encoder.down.2.block.1.conv1.weight": "vae_encoder.down_blocks.2.resnets.1.conv1.weight", + "first_stage_model.encoder.down.2.block.1.conv2.bias": "vae_encoder.down_blocks.2.resnets.1.conv2.bias", + "first_stage_model.encoder.down.2.block.1.conv2.weight": "vae_encoder.down_blocks.2.resnets.1.conv2.weight", + "first_stage_model.encoder.down.2.block.1.norm1.bias": "vae_encoder.down_blocks.2.resnets.1.norm1.bias", + "first_stage_model.encoder.down.2.block.1.norm1.weight": "vae_encoder.down_blocks.2.resnets.1.norm1.weight", + "first_stage_model.encoder.down.2.block.1.norm2.bias": "vae_encoder.down_blocks.2.resnets.1.norm2.bias", + "first_stage_model.encoder.down.2.block.1.norm2.weight": "vae_encoder.down_blocks.2.resnets.1.norm2.weight", + "first_stage_model.encoder.down.2.downsample.conv.bias": "vae_encoder.down_blocks.2.downsamplers.0.conv.bias", + "first_stage_model.encoder.down.2.downsample.conv.weight": "vae_encoder.down_blocks.2.downsamplers.0.conv.weight", + "first_stage_model.encoder.down.3.block.0.conv1.bias": "vae_encoder.down_blocks.3.resnets.0.conv1.bias", + "first_stage_model.encoder.down.3.block.0.conv1.weight": "vae_encoder.down_blocks.3.resnets.0.conv1.weight", + "first_stage_model.encoder.down.3.block.0.conv2.bias": "vae_encoder.down_blocks.3.resnets.0.conv2.bias", + "first_stage_model.encoder.down.3.block.0.conv2.weight": "vae_encoder.down_blocks.3.resnets.0.conv2.weight", + "first_stage_model.encoder.down.3.block.0.norm1.bias": "vae_encoder.down_blocks.3.resnets.0.norm1.bias", + "first_stage_model.encoder.down.3.block.0.norm1.weight": "vae_encoder.down_blocks.3.resnets.0.norm1.weight", + "first_stage_model.encoder.down.3.block.0.norm2.bias": "vae_encoder.down_blocks.3.resnets.0.norm2.bias", + "first_stage_model.encoder.down.3.block.0.norm2.weight": "vae_encoder.down_blocks.3.resnets.0.norm2.weight", + "first_stage_model.encoder.down.3.block.1.conv1.bias": "vae_encoder.down_blocks.3.resnets.1.conv1.bias", + "first_stage_model.encoder.down.3.block.1.conv1.weight": "vae_encoder.down_blocks.3.resnets.1.conv1.weight", + "first_stage_model.encoder.down.3.block.1.conv2.bias": "vae_encoder.down_blocks.3.resnets.1.conv2.bias", + "first_stage_model.encoder.down.3.block.1.conv2.weight": "vae_encoder.down_blocks.3.resnets.1.conv2.weight", + "first_stage_model.encoder.down.3.block.1.norm1.bias": "vae_encoder.down_blocks.3.resnets.1.norm1.bias", + "first_stage_model.encoder.down.3.block.1.norm1.weight": "vae_encoder.down_blocks.3.resnets.1.norm1.weight", + "first_stage_model.encoder.down.3.block.1.norm2.bias": "vae_encoder.down_blocks.3.resnets.1.norm2.bias", + "first_stage_model.encoder.down.3.block.1.norm2.weight": "vae_encoder.down_blocks.3.resnets.1.norm2.weight", + "first_stage_model.encoder.mid.attn_1.k.bias": "vae_encoder.mid_block.attentions.0.to_k.bias", + "first_stage_model.encoder.mid.attn_1.k.weight": "vae_encoder.mid_block.attentions.0.to_k.weight", + "first_stage_model.encoder.mid.attn_1.norm.bias": "vae_encoder.mid_block.attentions.0.group_norm.bias", + "first_stage_model.encoder.mid.attn_1.norm.weight": "vae_encoder.mid_block.attentions.0.group_norm.weight", + "first_stage_model.encoder.mid.attn_1.proj_out.bias": "vae_encoder.mid_block.attentions.0.to_out.0.bias", + "first_stage_model.encoder.mid.attn_1.proj_out.weight": "vae_encoder.mid_block.attentions.0.to_out.0.weight", + "first_stage_model.encoder.mid.attn_1.q.bias": "vae_encoder.mid_block.attentions.0.to_q.bias", + "first_stage_model.encoder.mid.attn_1.q.weight": "vae_encoder.mid_block.attentions.0.to_q.weight", + "first_stage_model.encoder.mid.attn_1.v.bias": "vae_encoder.mid_block.attentions.0.to_v.bias", + "first_stage_model.encoder.mid.attn_1.v.weight": "vae_encoder.mid_block.attentions.0.to_v.weight", + "first_stage_model.encoder.mid.block_1.conv1.bias": "vae_encoder.mid_block.resnets.0.conv1.bias", + "first_stage_model.encoder.mid.block_1.conv1.weight": "vae_encoder.mid_block.resnets.0.conv1.weight", + "first_stage_model.encoder.mid.block_1.conv2.bias": "vae_encoder.mid_block.resnets.0.conv2.bias", + "first_stage_model.encoder.mid.block_1.conv2.weight": "vae_encoder.mid_block.resnets.0.conv2.weight", + "first_stage_model.encoder.mid.block_1.norm1.bias": "vae_encoder.mid_block.resnets.0.norm1.bias", + "first_stage_model.encoder.mid.block_1.norm1.weight": "vae_encoder.mid_block.resnets.0.norm1.weight", + "first_stage_model.encoder.mid.block_1.norm2.bias": "vae_encoder.mid_block.resnets.0.norm2.bias", + "first_stage_model.encoder.mid.block_1.norm2.weight": "vae_encoder.mid_block.resnets.0.norm2.weight", + "first_stage_model.encoder.mid.block_2.conv1.bias": "vae_encoder.mid_block.resnets.1.conv1.bias", + "first_stage_model.encoder.mid.block_2.conv1.weight": "vae_encoder.mid_block.resnets.1.conv1.weight", + "first_stage_model.encoder.mid.block_2.conv2.bias": "vae_encoder.mid_block.resnets.1.conv2.bias", + "first_stage_model.encoder.mid.block_2.conv2.weight": "vae_encoder.mid_block.resnets.1.conv2.weight", + "first_stage_model.encoder.mid.block_2.norm1.bias": "vae_encoder.mid_block.resnets.1.norm1.bias", + "first_stage_model.encoder.mid.block_2.norm1.weight": "vae_encoder.mid_block.resnets.1.norm1.weight", + "first_stage_model.encoder.mid.block_2.norm2.bias": "vae_encoder.mid_block.resnets.1.norm2.bias", + "first_stage_model.encoder.mid.block_2.norm2.weight": "vae_encoder.mid_block.resnets.1.norm2.weight", + "first_stage_model.encoder.norm_out.bias": "vae_encoder.conv_norm_out.bias", + "first_stage_model.encoder.norm_out.weight": "vae_encoder.conv_norm_out.weight", + "first_stage_model.post_quant_conv.bias": "vae_post_quant_conv.bias", + "first_stage_model.post_quant_conv.weight": "vae_post_quant_conv.weight", + "first_stage_model.quant_conv.bias": "vae_quant_conv.bias", + "first_stage_model.quant_conv.weight": "vae_quant_conv.weight", + "model.diffusion_model.input_blocks.0.0.bias": "unet_conv_in.bias", + "model.diffusion_model.input_blocks.0.0.weight": "unet_conv_in.weight", + "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": "unet_down_blocks.0.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": "unet_down_blocks.0.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "unet_down_blocks.0.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": "unet_down_blocks.0.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": "unet_down_blocks.0.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": "unet_down_blocks.0.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": "unet_down_blocks.0.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": "unet_down_blocks.0.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "unet_down_blocks.0.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": "unet_down_blocks.0.resnets.1.time_emb_proj.weight", + "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": "unet_down_blocks.0.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": "unet_down_blocks.0.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "unet_down_blocks.0.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": "unet_down_blocks.0.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": "unet_down_blocks.0.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": "unet_down_blocks.0.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": "unet_down_blocks.0.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": "unet_down_blocks.0.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.3.0.op.bias": "unet_down_blocks.0.downsamplers.0.conv.bias", + "model.diffusion_model.input_blocks.3.0.op.weight": "unet_down_blocks.0.downsamplers.0.conv.weight", + "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": "unet_down_blocks.1.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": "unet_down_blocks.1.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "unet_down_blocks.1.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": "unet_down_blocks.1.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": "unet_down_blocks.1.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": "unet_down_blocks.1.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": "unet_down_blocks.1.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": "unet_down_blocks.1.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.4.0.skip_connection.bias": "unet_down_blocks.1.resnets.0.conv_shortcut.bias", + "model.diffusion_model.input_blocks.4.0.skip_connection.weight": "unet_down_blocks.1.resnets.0.conv_shortcut.weight", + "model.diffusion_model.input_blocks.4.1.norm.bias": "unet_down_blocks.1.attentions.0.norm.bias", + "model.diffusion_model.input_blocks.4.1.norm.weight": "unet_down_blocks.1.attentions.0.norm.weight", + "model.diffusion_model.input_blocks.4.1.proj_in.bias": "unet_down_blocks.1.attentions.0.proj_in.bias", + "model.diffusion_model.input_blocks.4.1.proj_in.weight": "unet_down_blocks.1.attentions.0.proj_in.weight", + "model.diffusion_model.input_blocks.4.1.proj_out.bias": "unet_down_blocks.1.attentions.0.proj_out.bias", + "model.diffusion_model.input_blocks.4.1.proj_out.weight": "unet_down_blocks.1.attentions.0.proj_out.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "unet_down_blocks.1.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": "unet_down_blocks.1.resnets.1.time_emb_proj.weight", + "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": "unet_down_blocks.1.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": "unet_down_blocks.1.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "unet_down_blocks.1.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": "unet_down_blocks.1.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": "unet_down_blocks.1.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": "unet_down_blocks.1.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": "unet_down_blocks.1.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": "unet_down_blocks.1.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.5.1.norm.bias": "unet_down_blocks.1.attentions.1.norm.bias", + "model.diffusion_model.input_blocks.5.1.norm.weight": "unet_down_blocks.1.attentions.1.norm.weight", + "model.diffusion_model.input_blocks.5.1.proj_in.bias": "unet_down_blocks.1.attentions.1.proj_in.bias", + "model.diffusion_model.input_blocks.5.1.proj_in.weight": "unet_down_blocks.1.attentions.1.proj_in.weight", + "model.diffusion_model.input_blocks.5.1.proj_out.bias": "unet_down_blocks.1.attentions.1.proj_out.bias", + "model.diffusion_model.input_blocks.5.1.proj_out.weight": "unet_down_blocks.1.attentions.1.proj_out.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.6.0.op.bias": "unet_down_blocks.1.downsamplers.0.conv.bias", + "model.diffusion_model.input_blocks.6.0.op.weight": "unet_down_blocks.1.downsamplers.0.conv.weight", + "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.0.time_emb_proj.bias", + "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.0.time_emb_proj.weight", + "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": "unet_down_blocks.2.resnets.0.norm1.bias", + "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": "unet_down_blocks.2.resnets.0.norm1.weight", + "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "unet_down_blocks.2.resnets.0.conv1.bias", + "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": "unet_down_blocks.2.resnets.0.conv1.weight", + "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": "unet_down_blocks.2.resnets.0.norm2.bias", + "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": "unet_down_blocks.2.resnets.0.norm2.weight", + "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": "unet_down_blocks.2.resnets.0.conv2.bias", + "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": "unet_down_blocks.2.resnets.0.conv2.weight", + "model.diffusion_model.input_blocks.7.0.skip_connection.bias": "unet_down_blocks.2.resnets.0.conv_shortcut.bias", + "model.diffusion_model.input_blocks.7.0.skip_connection.weight": "unet_down_blocks.2.resnets.0.conv_shortcut.weight", + "model.diffusion_model.input_blocks.7.1.norm.bias": "unet_down_blocks.2.attentions.0.norm.bias", + "model.diffusion_model.input_blocks.7.1.norm.weight": "unet_down_blocks.2.attentions.0.norm.weight", + "model.diffusion_model.input_blocks.7.1.proj_in.bias": "unet_down_blocks.2.attentions.0.proj_in.bias", + "model.diffusion_model.input_blocks.7.1.proj_in.weight": "unet_down_blocks.2.attentions.0.proj_in.weight", + "model.diffusion_model.input_blocks.7.1.proj_out.bias": "unet_down_blocks.2.attentions.0.proj_out.bias", + "model.diffusion_model.input_blocks.7.1.proj_out.weight": "unet_down_blocks.2.attentions.0.proj_out.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm1.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm1.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm2.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm2.weight", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm3.bias", + "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.2.attentions.0.transformer_blocks.1.norm3.weight", + "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "unet_down_blocks.2.resnets.1.time_emb_proj.bias", + "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": "unet_down_blocks.2.resnets.1.time_emb_proj.weight", + "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": "unet_down_blocks.2.resnets.1.norm1.bias", + "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": "unet_down_blocks.2.resnets.1.norm1.weight", + "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "unet_down_blocks.2.resnets.1.conv1.bias", + "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": "unet_down_blocks.2.resnets.1.conv1.weight", + "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": "unet_down_blocks.2.resnets.1.norm2.bias", + "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": "unet_down_blocks.2.resnets.1.norm2.weight", + "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": "unet_down_blocks.2.resnets.1.conv2.bias", + "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": "unet_down_blocks.2.resnets.1.conv2.weight", + "model.diffusion_model.input_blocks.8.1.norm.bias": "unet_down_blocks.2.attentions.1.norm.bias", + "model.diffusion_model.input_blocks.8.1.norm.weight": "unet_down_blocks.2.attentions.1.norm.weight", + "model.diffusion_model.input_blocks.8.1.proj_in.bias": "unet_down_blocks.2.attentions.1.proj_in.bias", + "model.diffusion_model.input_blocks.8.1.proj_in.weight": "unet_down_blocks.2.attentions.1.proj_in.weight", + "model.diffusion_model.input_blocks.8.1.proj_out.bias": "unet_down_blocks.2.attentions.1.proj_out.bias", + "model.diffusion_model.input_blocks.8.1.proj_out.weight": "unet_down_blocks.2.attentions.1.proj_out.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_k.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_q.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_v.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm1.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm1.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm1.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm1.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm2.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm2.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm2.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm2.weight", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm3.bias": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm3.bias", + "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm3.weight": "unet_down_blocks.2.attentions.1.transformer_blocks.1.norm3.weight", + "model.diffusion_model.label_emb.0.0.bias": "unet_add_embedding.linear_1.bias", + "model.diffusion_model.label_emb.0.0.weight": "unet_add_embedding.linear_1.weight", + "model.diffusion_model.label_emb.0.2.bias": "unet_add_embedding.linear_2.bias", + "model.diffusion_model.label_emb.0.2.weight": "unet_add_embedding.linear_2.weight", + "model.diffusion_model.middle_block.0.emb_layers.1.bias": "unet_mid_block.resnets.0.time_emb_proj.bias", + "model.diffusion_model.middle_block.0.emb_layers.1.weight": "unet_mid_block.resnets.0.time_emb_proj.weight", + "model.diffusion_model.middle_block.0.in_layers.0.bias": "unet_mid_block.resnets.0.norm1.bias", + "model.diffusion_model.middle_block.0.in_layers.0.weight": "unet_mid_block.resnets.0.norm1.weight", + "model.diffusion_model.middle_block.0.in_layers.2.bias": "unet_mid_block.resnets.0.conv1.bias", + "model.diffusion_model.middle_block.0.in_layers.2.weight": "unet_mid_block.resnets.0.conv1.weight", + "model.diffusion_model.middle_block.0.out_layers.0.bias": "unet_mid_block.resnets.0.norm2.bias", + "model.diffusion_model.middle_block.0.out_layers.0.weight": "unet_mid_block.resnets.0.norm2.weight", + "model.diffusion_model.middle_block.0.out_layers.3.bias": "unet_mid_block.resnets.0.conv2.bias", + "model.diffusion_model.middle_block.0.out_layers.3.weight": "unet_mid_block.resnets.0.conv2.weight", + "model.diffusion_model.out.0.bias": "unet_conv_norm_out.bias", + "model.diffusion_model.out.0.weight": "unet_conv_norm_out.weight", + "model.diffusion_model.out.2.bias": "unet_conv_out.bias", + "model.diffusion_model.out.2.weight": "unet_conv_out.weight", + "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": "unet_up_blocks.0.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": "unet_up_blocks.0.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "unet_up_blocks.0.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": "unet_up_blocks.0.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": "unet_up_blocks.0.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": "unet_up_blocks.0.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": "unet_up_blocks.0.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": "unet_up_blocks.0.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.0.0.skip_connection.bias": "unet_up_blocks.0.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.0.0.skip_connection.weight": "unet_up_blocks.0.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.0.1.norm.bias": "unet_up_blocks.0.attentions.0.norm.bias", + "model.diffusion_model.output_blocks.0.1.norm.weight": "unet_up_blocks.0.attentions.0.norm.weight", + "model.diffusion_model.output_blocks.0.1.proj_in.bias": "unet_up_blocks.0.attentions.0.proj_in.bias", + "model.diffusion_model.output_blocks.0.1.proj_in.weight": "unet_up_blocks.0.attentions.0.proj_in.weight", + "model.diffusion_model.output_blocks.0.1.proj_out.bias": "unet_up_blocks.0.attentions.0.proj_out.bias", + "model.diffusion_model.output_blocks.0.1.proj_out.weight": "unet_up_blocks.0.attentions.0.proj_out.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm1.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm1.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm2.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm2.weight", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm3.bias", + "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.0.attentions.0.transformer_blocks.1.norm3.weight", + "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": "unet_up_blocks.0.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": "unet_up_blocks.0.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "unet_up_blocks.0.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": "unet_up_blocks.0.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": "unet_up_blocks.0.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": "unet_up_blocks.0.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": "unet_up_blocks.0.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": "unet_up_blocks.0.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.1.0.skip_connection.bias": "unet_up_blocks.0.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.1.0.skip_connection.weight": "unet_up_blocks.0.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.1.1.norm.bias": "unet_up_blocks.0.attentions.1.norm.bias", + "model.diffusion_model.output_blocks.1.1.norm.weight": "unet_up_blocks.0.attentions.1.norm.weight", + "model.diffusion_model.output_blocks.1.1.proj_in.bias": "unet_up_blocks.0.attentions.1.proj_in.bias", + "model.diffusion_model.output_blocks.1.1.proj_in.weight": "unet_up_blocks.0.attentions.1.proj_in.weight", + "model.diffusion_model.output_blocks.1.1.proj_out.bias": "unet_up_blocks.0.attentions.1.proj_out.bias", + "model.diffusion_model.output_blocks.1.1.proj_out.weight": "unet_up_blocks.0.attentions.1.proj_out.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm1.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm1.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm2.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm2.weight", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm3.bias", + "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.0.attentions.1.transformer_blocks.1.norm3.weight", + "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "unet_up_blocks.0.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "unet_up_blocks.0.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": "unet_up_blocks.0.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": "unet_up_blocks.0.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "unet_up_blocks.0.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": "unet_up_blocks.0.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": "unet_up_blocks.0.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": "unet_up_blocks.0.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": "unet_up_blocks.0.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": "unet_up_blocks.0.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.2.0.skip_connection.bias": "unet_up_blocks.0.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.2.0.skip_connection.weight": "unet_up_blocks.0.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.2.1.norm.bias": "unet_up_blocks.0.attentions.2.norm.bias", + "model.diffusion_model.output_blocks.2.1.norm.weight": "unet_up_blocks.0.attentions.2.norm.weight", + "model.diffusion_model.output_blocks.2.1.proj_in.bias": "unet_up_blocks.0.attentions.2.proj_in.bias", + "model.diffusion_model.output_blocks.2.1.proj_in.weight": "unet_up_blocks.0.attentions.2.proj_in.weight", + "model.diffusion_model.output_blocks.2.1.proj_out.bias": "unet_up_blocks.0.attentions.2.proj_out.bias", + "model.diffusion_model.output_blocks.2.1.proj_out.weight": "unet_up_blocks.0.attentions.2.proj_out.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_k.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_out.0.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_out.0.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_q.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_v.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.0.proj.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.0.proj.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm1.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm1.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm1.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm1.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm2.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm2.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm2.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm2.weight", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm3.bias": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm3.bias", + "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm3.weight": "unet_up_blocks.0.attentions.2.transformer_blocks.1.norm3.weight", + "model.diffusion_model.output_blocks.2.2.conv.bias": "unet_up_blocks.0.upsamplers.0.conv.bias", + "model.diffusion_model.output_blocks.2.2.conv.weight": "unet_up_blocks.0.upsamplers.0.conv.weight", + "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": "unet_up_blocks.1.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": "unet_up_blocks.1.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "unet_up_blocks.1.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": "unet_up_blocks.1.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": "unet_up_blocks.1.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": "unet_up_blocks.1.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": "unet_up_blocks.1.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": "unet_up_blocks.1.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.3.0.skip_connection.bias": "unet_up_blocks.1.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.3.0.skip_connection.weight": "unet_up_blocks.1.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.3.1.norm.bias": "unet_up_blocks.1.attentions.0.norm.bias", + "model.diffusion_model.output_blocks.3.1.norm.weight": "unet_up_blocks.1.attentions.0.norm.weight", + "model.diffusion_model.output_blocks.3.1.proj_in.bias": "unet_up_blocks.1.attentions.0.proj_in.bias", + "model.diffusion_model.output_blocks.3.1.proj_in.weight": "unet_up_blocks.1.attentions.0.proj_in.weight", + "model.diffusion_model.output_blocks.3.1.proj_out.bias": "unet_up_blocks.1.attentions.0.proj_out.bias", + "model.diffusion_model.output_blocks.3.1.proj_out.weight": "unet_up_blocks.1.attentions.0.proj_out.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": "unet_up_blocks.1.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": "unet_up_blocks.1.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "unet_up_blocks.1.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": "unet_up_blocks.1.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": "unet_up_blocks.1.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": "unet_up_blocks.1.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": "unet_up_blocks.1.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": "unet_up_blocks.1.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.4.0.skip_connection.bias": "unet_up_blocks.1.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.4.0.skip_connection.weight": "unet_up_blocks.1.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.4.1.norm.bias": "unet_up_blocks.1.attentions.1.norm.bias", + "model.diffusion_model.output_blocks.4.1.norm.weight": "unet_up_blocks.1.attentions.1.norm.weight", + "model.diffusion_model.output_blocks.4.1.proj_in.bias": "unet_up_blocks.1.attentions.1.proj_in.bias", + "model.diffusion_model.output_blocks.4.1.proj_in.weight": "unet_up_blocks.1.attentions.1.proj_in.weight", + "model.diffusion_model.output_blocks.4.1.proj_out.bias": "unet_up_blocks.1.attentions.1.proj_out.bias", + "model.diffusion_model.output_blocks.4.1.proj_out.weight": "unet_up_blocks.1.attentions.1.proj_out.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "unet_up_blocks.1.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": "unet_up_blocks.1.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": "unet_up_blocks.1.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": "unet_up_blocks.1.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "unet_up_blocks.1.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": "unet_up_blocks.1.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": "unet_up_blocks.1.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": "unet_up_blocks.1.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": "unet_up_blocks.1.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": "unet_up_blocks.1.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.5.0.skip_connection.bias": "unet_up_blocks.1.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.5.0.skip_connection.weight": "unet_up_blocks.1.resnets.2.conv_shortcut.weight", + "model.diffusion_model.output_blocks.5.1.norm.bias": "unet_up_blocks.1.attentions.2.norm.bias", + "model.diffusion_model.output_blocks.5.1.norm.weight": "unet_up_blocks.1.attentions.2.norm.weight", + "model.diffusion_model.output_blocks.5.1.proj_in.bias": "unet_up_blocks.1.attentions.2.proj_in.bias", + "model.diffusion_model.output_blocks.5.1.proj_in.weight": "unet_up_blocks.1.attentions.2.proj_in.weight", + "model.diffusion_model.output_blocks.5.1.proj_out.bias": "unet_up_blocks.1.attentions.2.proj_out.bias", + "model.diffusion_model.output_blocks.5.1.proj_out.weight": "unet_up_blocks.1.attentions.2.proj_out.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm1.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm2.weight", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.bias", + "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": "unet_up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight", + "model.diffusion_model.output_blocks.5.2.conv.bias": "unet_up_blocks.1.upsamplers.0.conv.bias", + "model.diffusion_model.output_blocks.5.2.conv.weight": "unet_up_blocks.1.upsamplers.0.conv.weight", + "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.0.time_emb_proj.bias", + "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.0.time_emb_proj.weight", + "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": "unet_up_blocks.2.resnets.0.norm1.bias", + "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": "unet_up_blocks.2.resnets.0.norm1.weight", + "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "unet_up_blocks.2.resnets.0.conv1.bias", + "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": "unet_up_blocks.2.resnets.0.conv1.weight", + "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": "unet_up_blocks.2.resnets.0.norm2.bias", + "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": "unet_up_blocks.2.resnets.0.norm2.weight", + "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": "unet_up_blocks.2.resnets.0.conv2.bias", + "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": "unet_up_blocks.2.resnets.0.conv2.weight", + "model.diffusion_model.output_blocks.6.0.skip_connection.bias": "unet_up_blocks.2.resnets.0.conv_shortcut.bias", + "model.diffusion_model.output_blocks.6.0.skip_connection.weight": "unet_up_blocks.2.resnets.0.conv_shortcut.weight", + "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.1.time_emb_proj.bias", + "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.1.time_emb_proj.weight", + "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": "unet_up_blocks.2.resnets.1.norm1.bias", + "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": "unet_up_blocks.2.resnets.1.norm1.weight", + "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "unet_up_blocks.2.resnets.1.conv1.bias", + "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": "unet_up_blocks.2.resnets.1.conv1.weight", + "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": "unet_up_blocks.2.resnets.1.norm2.bias", + "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": "unet_up_blocks.2.resnets.1.norm2.weight", + "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": "unet_up_blocks.2.resnets.1.conv2.bias", + "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": "unet_up_blocks.2.resnets.1.conv2.weight", + "model.diffusion_model.output_blocks.7.0.skip_connection.bias": "unet_up_blocks.2.resnets.1.conv_shortcut.bias", + "model.diffusion_model.output_blocks.7.0.skip_connection.weight": "unet_up_blocks.2.resnets.1.conv_shortcut.weight", + "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "unet_up_blocks.2.resnets.2.time_emb_proj.bias", + "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": "unet_up_blocks.2.resnets.2.time_emb_proj.weight", + "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": "unet_up_blocks.2.resnets.2.norm1.bias", + "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": "unet_up_blocks.2.resnets.2.norm1.weight", + "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "unet_up_blocks.2.resnets.2.conv1.bias", + "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": "unet_up_blocks.2.resnets.2.conv1.weight", + "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": "unet_up_blocks.2.resnets.2.norm2.bias", + "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": "unet_up_blocks.2.resnets.2.norm2.weight", + "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": "unet_up_blocks.2.resnets.2.conv2.bias", + "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": "unet_up_blocks.2.resnets.2.conv2.weight", + "model.diffusion_model.output_blocks.8.0.skip_connection.bias": "unet_up_blocks.2.resnets.2.conv_shortcut.bias", + "model.diffusion_model.output_blocks.8.0.skip_connection.weight": "unet_up_blocks.2.resnets.2.conv_shortcut.weight", + "model.diffusion_model.time_embed.0.bias": "unet_time_embedding.linear_1.bias", + "model.diffusion_model.time_embed.0.weight": "unet_time_embedding.linear_1.weight", + "model.diffusion_model.time_embed.2.bias": "unet_time_embedding.linear_2.bias", + "model.diffusion_model.time_embed.2.weight": "unet_time_embedding.linear_2.weight" + }, + "ldm_diffusers_shape_map": { + "first_stage_model.decoder.mid.attn_1.k.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.mid.attn_1.proj_out.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.mid.attn_1.q.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.decoder.mid.attn_1.v.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.k.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.proj_out.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.q.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ], + "first_stage_model.encoder.mid.attn_1.v.weight": [ + [ + 512, + 512, + 1, + 1 + ], + [ + 512, + 512 + ] + ] + }, + "ldm_diffusers_operator_map": { + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.0.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.0.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.0.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.0.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.0.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.0.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.1.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.1.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.1.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.1.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.1.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.1.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.10.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.10.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.10.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.10.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.10.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.10.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.11.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.11.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.11.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.11.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.11.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.11.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.12.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.12.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.12.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.12.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.12.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.12.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.13.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.13.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.13.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.13.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.13.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.13.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.14.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.14.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.14.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.14.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.14.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.14.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.15.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.15.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.15.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.15.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.15.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.15.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.16.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.16.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.16.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.16.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.16.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.16.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.17.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.17.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.17.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.17.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.17.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.17.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.18.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.18.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.18.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.18.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.18.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.18.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.19.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.19.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.19.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.19.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.19.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.19.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.2.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.2.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.2.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.2.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.2.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.2.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.20.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.20.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.20.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.20.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.20.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.20.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.21.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.21.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.21.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.21.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.21.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.21.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.22.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.22.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.22.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.22.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.22.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.22.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.23.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.23.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.23.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.23.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.23.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.23.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.24.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.24.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.24.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.24.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.24.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.24.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.25.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.25.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.25.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.25.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.25.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.25.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.26.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.26.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.26.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.26.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.26.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.26.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.27.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.27.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.27.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.27.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.27.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.27.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.28.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.28.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.28.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.28.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.28.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.28.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.29.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.29.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.29.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.29.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.29.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.29.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.3.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.3.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.3.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.3.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.3.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.3.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.30.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.30.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.30.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.30.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.30.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.30.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.31.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.31.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.31.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.31.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.31.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.31.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.4.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.4.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.4.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.4.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.4.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.4.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.5.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.5.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.5.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.5.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.5.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.5.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.6.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.6.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.6.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.6.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.6.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.6.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.7.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.7.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.7.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.7.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.7.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.7.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.8.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.8.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.8.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.8.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.8.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.8.self_attn.v_proj.weight" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias": { + "cat": [ + "te1_text_model.encoder.layers.9.self_attn.q_proj.bias", + "te1_text_model.encoder.layers.9.self_attn.k_proj.bias", + "te1_text_model.encoder.layers.9.self_attn.v_proj.bias" + ] + }, + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight": { + "cat": [ + "te1_text_model.encoder.layers.9.self_attn.q_proj.weight", + "te1_text_model.encoder.layers.9.self_attn.k_proj.weight", + "te1_text_model.encoder.layers.9.self_attn.v_proj.weight" + ] + } + }, + "diffusers_ldm_operator_map": { + "te1_text_model.encoder.layers.0.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.0.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.0.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.0.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.0.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.0.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.1.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.1.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.1.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.1.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.1.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.1.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.10.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.10.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.10.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.10.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.10.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.10.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.11.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.11.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.11.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.11.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.11.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.11.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.12.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.12.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.12.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.12.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.12.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.12.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.13.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.13.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.13.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.13.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.13.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.13.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.14.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.14.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.14.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.14.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.14.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.14.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.15.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.15.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.15.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.15.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.15.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.15.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.16.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.16.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.16.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.16.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.16.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.16.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.17.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.17.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.17.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.17.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.17.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.17.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.18.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.18.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.18.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.18.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.18.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.18.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.19.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.19.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.19.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.19.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.19.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.19.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.2.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.2.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.2.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.2.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.2.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.2.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.20.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.20.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.20.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.20.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.20.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.20.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.21.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.21.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.21.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.21.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.21.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.21.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.22.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.22.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.22.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.22.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.22.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.22.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.23.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.23.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.23.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.23.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.23.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.23.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.24.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.24.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.24.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.24.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.24.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.24.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.25.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.25.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.25.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.25.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.25.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.25.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.26.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.26.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.26.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.26.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.26.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.26.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.27.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.27.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.27.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.27.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.27.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.27.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.28.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.28.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.28.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.28.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.28.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.28.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.29.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.29.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.29.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.29.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.29.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.29.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.3.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.3.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.3.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.3.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.3.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.3.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.30.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.30.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.30.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.30.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.30.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.30.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.31.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.31.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.31.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.31.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.31.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.31.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.4.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.4.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.4.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.4.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.4.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.4.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.5.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.5.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.5.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.5.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.5.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.5.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.6.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.6.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.6.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.6.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.6.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.6.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.7.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.7.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.7.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.7.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.7.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.7.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.8.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.8.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.8.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.8.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.8.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.8.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.9.self_attn.q_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.9.self_attn.k_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.9.self_attn.v_proj.bias": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias", + "2560:, :" + ] + }, + "te1_text_model.encoder.layers.9.self_attn.q_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight", + "0:1280, :" + ] + }, + "te1_text_model.encoder.layers.9.self_attn.k_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight", + "1280:2560, :" + ] + }, + "te1_text_model.encoder.layers.9.self_attn.v_proj.weight": { + "slice": [ + "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight", + "2560:, :" + ] + } + } +} \ No newline at end of file diff --git a/toolkit/keymaps/stable_diffusion_vega_ldm_base.safetensors b/toolkit/keymaps/stable_diffusion_vega_ldm_base.safetensors new file mode 100644 index 00000000..39694991 Binary files /dev/null and b/toolkit/keymaps/stable_diffusion_vega_ldm_base.safetensors differ diff --git a/toolkit/lora_special.py b/toolkit/lora_special.py index 8bf1e063..4f566b03 100644 --- a/toolkit/lora_special.py +++ b/toolkit/lora_special.py @@ -111,8 +111,11 @@ class LoRAModule(ToolkitModuleMixin, ExtractableModuleMixin, torch.nn.Module): class LoRASpecialNetwork(ToolkitNetworkMixin, LoRANetwork): NUM_OF_BLOCKS = 12 # フルモデル相当でのup,downの層の数 - UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel"] - UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"] + # UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel"] + # UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel", "ResnetBlock2D"] + UNET_TARGET_REPLACE_MODULE = ["''UNet2DConditionModel''"] + # UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"] + UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["'UNet2DConditionModel'"] TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"] LORA_PREFIX_UNET = "lora_unet" LORA_PREFIX_TEXT_ENCODER = "lora_te" @@ -230,8 +233,90 @@ class LoRASpecialNetwork(ToolkitNetworkMixin, LoRANetwork): ) loras = [] skipped = [] + attached_modules = [] for name, module in root_module.named_modules(): - if module.__class__.__name__ in target_replace_modules: + if is_unet: + module_name = module.__class__.__name__ + if module not in attached_modules: + # if module.__class__.__name__ in target_replace_modules: + # for child_name, child_module in module.named_modules(): + is_linear = module_name == 'LoRACompatibleLinear' + is_conv2d = module_name == 'LoRACompatibleConv' + + if is_linear and self.lora_dim is None: + continue + if is_conv2d and self.conv_lora_dim is None: + continue + + is_conv2d_1x1 = is_conv2d and module.kernel_size == (1, 1) + + if is_conv2d_1x1: + pass + + skip = False + if any([word in name for word in self.ignore_if_contains]): + skip = True + + # see if it is over threshold + if count_parameters(module) < parameter_threshold: + skip = True + + if (is_linear or is_conv2d) and not skip: + lora_name = prefix + "." + name + lora_name = lora_name.replace(".", "_") + + dim = None + alpha = None + + if modules_dim is not None: + # モジュール指定あり + if lora_name in modules_dim: + dim = modules_dim[lora_name] + alpha = modules_alpha[lora_name] + elif is_unet and block_dims is not None: + # U-Netでblock_dims指定あり + block_idx = get_block_index(lora_name) + if is_linear or is_conv2d_1x1: + dim = block_dims[block_idx] + alpha = block_alphas[block_idx] + elif conv_block_dims is not None: + dim = conv_block_dims[block_idx] + alpha = conv_block_alphas[block_idx] + else: + # 通常、すべて対象とする + if is_linear or is_conv2d_1x1: + dim = self.lora_dim + alpha = self.alpha + elif self.conv_lora_dim is not None: + dim = self.conv_lora_dim + alpha = self.conv_alpha + else: + dim = None + alpha = None + + if dim is None or dim == 0: + # skipした情報を出力 + if is_linear or is_conv2d_1x1 or ( + self.conv_lora_dim is not None or conv_block_dims is not None): + skipped.append(lora_name) + continue + + lora = module_class( + lora_name, + module, + self.multiplier, + dim, + alpha, + dropout=dropout, + rank_dropout=rank_dropout, + module_dropout=module_dropout, + network=self, + parent=module, + use_bias=use_bias, + ) + loras.append(lora) + attached_modules.append(module) + elif module.__class__.__name__ in target_replace_modules: for child_name, child_module in module.named_modules(): is_linear = child_module.__class__.__name__ in LINEAR_MODULES is_conv2d = child_module.__class__.__name__ in CONV_MODULES diff --git a/toolkit/network_mixins.py b/toolkit/network_mixins.py index 37f9f83c..e92e835e 100644 --- a/toolkit/network_mixins.py +++ b/toolkit/network_mixins.py @@ -340,6 +340,7 @@ class ToolkitNetworkMixin: is_sdxl=False, is_v2=False, is_ssd=False, + is_vega=False, network_config: Optional[NetworkConfig] = None, is_lorm=False, **kwargs @@ -351,6 +352,7 @@ class ToolkitNetworkMixin: self.is_active: bool = False self.is_sdxl = is_sdxl self.is_ssd = is_ssd + self.is_vega = is_vega self.is_v2 = is_v2 self.is_merged_in = False self.is_lorm = is_lorm @@ -365,6 +367,9 @@ class ToolkitNetworkMixin: if self.is_ssd: keymap_tail = 'ssd' use_weight_mapping = True + elif self.is_vega: + keymap_tail = 'vega' + use_weight_mapping = True elif self.is_sdxl: keymap_tail = 'sdxl' elif self.is_v2: diff --git a/toolkit/sampler.py b/toolkit/sampler.py index 6ccc621b..2e9f1654 100644 --- a/toolkit/sampler.py +++ b/toolkit/sampler.py @@ -46,8 +46,11 @@ sdxl_sampler_config = { def get_sampler( sampler: str, + kwargs: dict = None, ): sched_init_args = {} + if kwargs is not None: + sched_init_args.update(kwargs) if sampler.startswith("k_"): sched_init_args["use_karras_sigmas"] = True diff --git a/toolkit/saving.py b/toolkit/saving.py index 5c7b37e1..9bedcd8d 100644 --- a/toolkit/saving.py +++ b/toolkit/saving.py @@ -97,7 +97,7 @@ def convert_state_dict_to_ldm_with_mapping( def get_ldm_state_dict_from_diffusers( state_dict: 'OrderedDict', - sd_version: Literal['1', '2', 'sdxl', 'ssd', 'sdxl_refiner'] = '2', + sd_version: Literal['1', '2', 'sdxl', 'ssd', 'vega', 'sdxl_refiner'] = '2', device='cpu', dtype=get_torch_dtype('fp32'), ): @@ -115,6 +115,10 @@ def get_ldm_state_dict_from_diffusers( # load our base base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_ssd_ldm_base.safetensors') mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_ssd.json') + elif sd_version == 'vega': + # load our base + base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_vega_ldm_base.safetensors') + mapping_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_vega.json') elif sd_version == 'sdxl_refiner': # load our base base_path = os.path.join(KEYMAPS_ROOT, 'stable_diffusion_refiner_ldm_base.safetensors') @@ -137,7 +141,7 @@ def save_ldm_model_from_diffusers( output_file: str, meta: 'OrderedDict', save_dtype=get_torch_dtype('fp16'), - sd_version: Literal['1', '2', 'sdxl', 'ssd'] = '2' + sd_version: Literal['1', '2', 'sdxl', 'ssd', 'vega'] = '2' ): converted_state_dict = get_ldm_state_dict_from_diffusers( sd.state_dict(), @@ -156,11 +160,11 @@ def save_lora_from_diffusers( output_file: str, meta: 'OrderedDict', save_dtype=get_torch_dtype('fp16'), - sd_version: Literal['1', '2', 'sdxl', 'ssd'] = '2' + sd_version: Literal['1', '2', 'sdxl', 'ssd', 'vega'] = '2' ): converted_state_dict = OrderedDict() # only handle sxdxl for now - if sd_version != 'sdxl' and sd_version != 'ssd': + if sd_version != 'sdxl' and sd_version != 'ssd' and sd_version != 'vega': raise ValueError(f"Invalid sd_version {sd_version}") for key, value in lora_state_dict.items(): # todo verify if this works with ssd diff --git a/toolkit/sd_device_states_presets.py b/toolkit/sd_device_states_presets.py index fb4663a0..7f82f37c 100644 --- a/toolkit/sd_device_states_presets.py +++ b/toolkit/sd_device_states_presets.py @@ -84,5 +84,8 @@ def get_train_sd_device_state_preset( preset['adapter']['training'] = True preset['adapter']['device'] = device preset['unet']['training'] = True + preset['unet']['requires_grad'] = False + preset['unet']['device'] = device + preset['text_encoder']['device'] = device return preset diff --git a/toolkit/stable_diffusion_model.py b/toolkit/stable_diffusion_model.py index 2c7dd286..6bfe3e5e 100644 --- a/toolkit/stable_diffusion_model.py +++ b/toolkit/stable_diffusion_model.py @@ -137,6 +137,7 @@ class StableDiffusion: self.is_xl = model_config.is_xl self.is_v2 = model_config.is_v2 self.is_ssd = model_config.is_ssd + self.is_vega = model_config.is_vega self.use_text_encoder_1 = model_config.use_text_encoder_1 self.use_text_encoder_2 = model_config.use_text_encoder_2 @@ -149,7 +150,10 @@ class StableDiffusion: dtype = get_torch_dtype(self.dtype) # sch = KDPM2DiscreteScheduler if self.noise_scheduler is None: - scheduler = get_sampler('ddpm') + scheduler = get_sampler( + 'ddpm', { + "prediction_type": self.prediction_type, + }) self.noise_scheduler = scheduler # move the betas alphas and alphas_cumprod to device. Sometimed they get stuck on cpu, not sure why @@ -169,7 +173,7 @@ class StableDiffusion: if self.model_config.vae_path is not None: load_args['vae'] = load_vae(self.model_config.vae_path, dtype) - if self.model_config.is_xl or self.model_config.is_ssd: + if self.model_config.is_xl or self.model_config.is_ssd or self.model_config.is_vega: if self.custom_pipeline is not None: pipln = self.custom_pipeline else: @@ -358,9 +362,17 @@ class StableDiffusion: if sampler is not None: if sampler.startswith("sample_"): # sample_dpmpp_2m # using ksampler - noise_scheduler = get_sampler('lms') + noise_scheduler = get_sampler( + 'lms', { + "prediction_type": self.prediction_type, + }) else: - noise_scheduler = get_sampler(sampler) + noise_scheduler = get_sampler( + sampler, + { + "prediction_type": self.prediction_type, + } + ) try: noise_scheduler = noise_scheduler.to(self.device_torch, self.torch_dtype) @@ -674,7 +686,6 @@ class StableDiffusion: 'EulerDiscreteSchedulerOutput', ] - # todo handle if timestep is single value original_samples_chunks = torch.chunk(original_samples, original_samples.shape[0], dim=0) @@ -692,10 +703,12 @@ class StableDiffusion: noise_timesteps = timesteps_chunks[idx] if scheduler_class_name == 'DPMSolverMultistepScheduler': # Make sure sigmas and timesteps have the same device and dtype as original_samples - sigmas = self.noise_scheduler.sigmas.to(device=original_samples_chunks[idx].device, dtype=original_samples_chunks[idx].dtype) + sigmas = self.noise_scheduler.sigmas.to(device=original_samples_chunks[idx].device, + dtype=original_samples_chunks[idx].dtype) if original_samples_chunks[idx].device.type == "mps" and torch.is_floating_point(noise_timesteps): # mps does not support float64 - schedule_timesteps = self.noise_scheduler.timesteps.to(original_samples_chunks[idx].device, dtype=torch.float32) + schedule_timesteps = self.noise_scheduler.timesteps.to(original_samples_chunks[idx].device, + dtype=torch.float32) noise_timesteps = noise_timesteps.to(original_samples_chunks[idx].device, dtype=torch.float32) else: schedule_timesteps = self.noise_scheduler.timesteps.to(original_samples_chunks[idx].device) @@ -719,7 +732,8 @@ class StableDiffusion: noisy_samples = alpha_t * original_samples + sigma_t * noise_chunks[idx] noisy_latents = noisy_samples else: - noisy_latents = self.noise_scheduler.add_noise(original_samples_chunks[idx], noise_chunks[idx], noise_timesteps) + noisy_latents = self.noise_scheduler.add_noise(original_samples_chunks[idx], noise_chunks[idx], + noise_timesteps) noisy_latents_chunks.append(noisy_latents) noisy_latents = torch.cat(noisy_latents_chunks, dim=0) @@ -777,7 +791,6 @@ class StableDiffusion: else: timestep = timestep.repeat(latents.shape[0], 0) - def scale_model_input(model_input, timestep_tensor): if is_input_scaled: return model_input @@ -986,7 +999,6 @@ class StableDiffusion: ): timesteps_to_run = self.noise_scheduler.timesteps[start_timesteps:total_timesteps] - for timestep in tqdm(timesteps_to_run, leave=False): timestep = timestep.unsqueeze_(0) noise_pred = self.predict_noise( @@ -1290,7 +1302,6 @@ class StableDiffusion: output_config_path = f"{output_path_no_ext}.yaml" shutil.copyfile(self.config_file, output_config_path) - def save(self, output_file: str, meta: OrderedDict, save_dtype=get_torch_dtype('fp16'), logit_scale=None): version_string = '1' if self.is_v2: @@ -1300,6 +1311,8 @@ class StableDiffusion: if self.is_ssd: # overwrite sdxl because both wil be true here version_string = 'ssd' + if self.is_ssd and self.is_vega: + version_string = 'vega' # if output file does not end in .safetensors, then it is a directory and we are # saving in diffusers format if not output_file.endswith('.safetensors'): diff --git a/toolkit/train_tools.py b/toolkit/train_tools.py index 8f284091..cc18e0fa 100644 --- a/toolkit/train_tools.py +++ b/toolkit/train_tools.py @@ -776,15 +776,19 @@ def apply_snr_weight( ): # will get it from noise scheduler if exist or will calculate it if not all_snr = get_all_snr(noise_scheduler, loss.device) - step_indices = [] - for t in timesteps: - for i, st in enumerate(noise_scheduler.timesteps): - if st == t: - step_indices.append(i) - break + # step_indices = [] + # for t in timesteps: + # for i, st in enumerate(noise_scheduler.timesteps): + # if st == t: + # step_indices.append(i) + # break # this breaks on some schedulers # step_indices = [(noise_scheduler.timesteps == t).nonzero().item() for t in timesteps] - snr = torch.stack([all_snr[t] for t in step_indices]) + + offset = 0 + if noise_scheduler.timesteps[0] == 1000: + offset = 1 + snr = torch.stack([all_snr[t - offset] for t in timesteps]) gamma_over_snr = torch.div(torch.ones_like(snr) * gamma, snr) if fixed: snr_weight = gamma_over_snr.float().to(loss.device) # directly using gamma over snr