Properly load the newbie diffusion model. (#11172)

There is still one of the text encoders missing and I didn't actually test it.
This commit is contained in:
comfyanonymous
2025-12-07 04:44:55 -08:00
committed by GitHub
parent 329480da5a
commit 56fa7dbe38
3 changed files with 42 additions and 0 deletions

View File

@@ -423,6 +423,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["axes_lens"] = [300, 512, 512]
dit_config["rope_theta"] = 10000.0
dit_config["ffn_dim_multiplier"] = 4.0
ctd_weight = state_dict.get('{}clip_text_pooled_proj.0.weight'.format(key_prefix), None)
if ctd_weight is not None:
dit_config["clip_text_dim"] = ctd_weight.shape[0]
elif dit_config["dim"] == 3840: # Z image
dit_config["n_heads"] = 30
dit_config["n_kv_heads"] = 30