diff --git a/extensions_built_in/diffusion_models/hidream/src/hidream_o1/qwen3_vl_transformers.py b/extensions_built_in/diffusion_models/hidream/src/hidream_o1/qwen3_vl_transformers.py index 499a6de0..0ec7695b 100644 --- a/extensions_built_in/diffusion_models/hidream/src/hidream_o1/qwen3_vl_transformers.py +++ b/extensions_built_in/diffusion_models/hidream/src/hidream_o1/qwen3_vl_transformers.py @@ -2104,7 +2104,7 @@ class Qwen3VLCausalLMOutputWithPast(ModelOutput): class Qwen3VLForConditionalGeneration(Qwen3VLPreTrainedModel, GenerationMixin): _checkpoint_conversion_mapping = {} - _tied_weights_keys = ["lm_head.weight"] + _tied_weights_keys = {"lm_head.weight": "model.language_model.embed_tokens.weight"} # Reference: fix gemma3 grad acc #37208 accepts_loss_kwargs = False config: Qwen3VLConfig