fixed various issues with llm attention masking. Added block training on the llm adapter.

This commit is contained in:
Jaret Burkett
2025-02-14 11:24:01 -07:00
parent 2be6926398
commit bd8d7dc081
3 changed files with 52 additions and 6 deletions

View File

@@ -212,6 +212,7 @@ class CustomAdapter(torch.nn.Module):
sd=self.sd_ref(),
llm=self.te,
tokenizer=self.tokenizer,
num_cloned_blocks=self.config.num_cloned_blocks,
)
self.llm_adapter.to(self.device, torch_dtype)
elif self.adapter_type == 'te_augmenter':