fixed various issues with llm attention masking. Added block training on the llm adapter.

2026-04-24 16:29:26 +00:00 · 2025-02-14 11:24:01 -07:00
parent 2be6926398
commit bd8d7dc081
3 changed files with 52 additions and 6 deletions
--- a/toolkit/custom_adapter.py
+++ b/toolkit/custom_adapter.py
@@ -212,6 +212,7 @@ class CustomAdapter(torch.nn.Module):
                sd=self.sd_ref(),
                llm=self.te,
                tokenizer=self.tokenizer,
+                num_cloned_blocks=self.config.num_cloned_blocks,
            )
            self.llm_adapter.to(self.device, torch_dtype)
        elif self.adapter_type == 'te_augmenter':