fixed various issues with llm attention masking. Added block training on the llm adapter.

This commit is contained in:
Jaret Burkett
2025-02-14 11:24:01 -07:00
parent 2be6926398
commit bd8d7dc081
3 changed files with 52 additions and 6 deletions

View File

@@ -216,6 +216,9 @@ class AdapterConfig:
self.conv_pooling: bool = kwargs.get('conv_pooling', False)
self.conv_pooling_stacks: int = kwargs.get('conv_pooling_stacks', 1)
self.sparse_autoencoder_dim: Optional[int] = kwargs.get('sparse_autoencoder_dim', None)
# for llm adapter
self.num_cloned_blocks: int = kwargs.get('num_cloned_blocks', 0)
class EmbeddingConfig: