Properly load the newbie diffusion model. (#11172)

There is still one of the text encoders missing and I didn't actually test it.
2026-03-13 17:20:01 +00:00 · 2025-12-07 04:44:55 -08:00
parent 329480da5a
commit 56fa7dbe38
3 changed files with 42 additions and 0 deletions
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -423,6 +423,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["axes_lens"] = [300, 512, 512]
            dit_config["rope_theta"] = 10000.0
            dit_config["ffn_dim_multiplier"] = 4.0
+            ctd_weight = state_dict.get('{}clip_text_pooled_proj.0.weight'.format(key_prefix), None)
+            if ctd_weight is not None:
+                dit_config["clip_text_dim"] = ctd_weight.shape[0]
        elif dit_config["dim"] == 3840:  # Z image
            dit_config["n_heads"] = 30
            dit_config["n_kv_heads"] = 30