Amall bug fixes

2026-04-23 15:59:32 +00:00 · 2024-07-18 10:39:55 -06:00
parent 11e426fdf1
commit a2301cf28c
2 changed files with 14 additions and 3 deletions
--- a/toolkit/models/ilora.py
+++ b/toolkit/models/ilora.py
@@ -268,6 +268,10 @@ class InstantLoRAModule(torch.nn.Module):

        self.output_size = output_size

+        number_formatted_output_size = "{:,}".format(output_size)
+
+        print(f" ILORA output size: {number_formatted_output_size}")
+
        # if not evenly divisible, error
        if self.output_size % self.num_heads != 0:
            raise ValueError("Output size must be divisible by the number of heads")
@@ -284,6 +288,7 @@ class InstantLoRAModule(torch.nn.Module):
                embedding_dim=vision_hidden_size,
                max_seq_len=vision_tokens,
                output_dim=head_dim,
+                apply_pos_emb=True,  # this is new
                ff_mult=4
            )

--- a/toolkit/models/te_adapter.py
+++ b/toolkit/models/te_adapter.py
@@ -382,6 +382,7 @@ class TEAdapter(torch.nn.Module):
    def encode_text(self, text):
        te: T5EncoderModel = self.te_ref()
        tokenizer: T5Tokenizer = self.tokenizer_ref()
+        attn_mask_float = None

        # input_ids = tokenizer(
        #     text,
@@ -424,13 +425,18 @@ class TEAdapter(torch.nn.Module):
            attn_mask_float = attention_mask.to(embeds.device, dtype=embeds.dtype)
        if self.text_projection is not None:
            # pool the output of embeds ignoring 0 in the attention mask
-            pooled_output = embeds * attn_mask_float.unsqueeze(-1)
+            if attn_mask_float is not None:
+                pooled_output = embeds * attn_mask_float.unsqueeze(-1)
+            else:
+                pooled_output = embeds

            # reduce along dim 1 while maintaining batch and dim 2
            pooled_output_sum = pooled_output.sum(dim=1)
-            attn_mask_sum = attn_mask_float.sum(dim=1).unsqueeze(-1)

-            pooled_output = pooled_output_sum / attn_mask_sum
+            if attn_mask_float is not None:
+                attn_mask_sum = attn_mask_float.sum(dim=1).unsqueeze(-1)
+
+                pooled_output = pooled_output_sum / attn_mask_sum

            pooled_embeds = self.text_projection(pooled_output)