Amall bug fixes

This commit is contained in:
Jaret Burkett
2024-07-18 10:39:55 -06:00
parent 11e426fdf1
commit a2301cf28c
2 changed files with 14 additions and 3 deletions

View File

@@ -268,6 +268,10 @@ class InstantLoRAModule(torch.nn.Module):
self.output_size = output_size
number_formatted_output_size = "{:,}".format(output_size)
print(f" ILORA output size: {number_formatted_output_size}")
# if not evenly divisible, error
if self.output_size % self.num_heads != 0:
raise ValueError("Output size must be divisible by the number of heads")
@@ -284,6 +288,7 @@ class InstantLoRAModule(torch.nn.Module):
embedding_dim=vision_hidden_size,
max_seq_len=vision_tokens,
output_dim=head_dim,
apply_pos_emb=True, # this is new
ff_mult=4
)

View File

@@ -382,6 +382,7 @@ class TEAdapter(torch.nn.Module):
def encode_text(self, text):
te: T5EncoderModel = self.te_ref()
tokenizer: T5Tokenizer = self.tokenizer_ref()
attn_mask_float = None
# input_ids = tokenizer(
# text,
@@ -424,13 +425,18 @@ class TEAdapter(torch.nn.Module):
attn_mask_float = attention_mask.to(embeds.device, dtype=embeds.dtype)
if self.text_projection is not None:
# pool the output of embeds ignoring 0 in the attention mask
pooled_output = embeds * attn_mask_float.unsqueeze(-1)
if attn_mask_float is not None:
pooled_output = embeds * attn_mask_float.unsqueeze(-1)
else:
pooled_output = embeds
# reduce along dim 1 while maintaining batch and dim 2
pooled_output_sum = pooled_output.sum(dim=1)
attn_mask_sum = attn_mask_float.sum(dim=1).unsqueeze(-1)
pooled_output = pooled_output_sum / attn_mask_sum
if attn_mask_float is not None:
attn_mask_sum = attn_mask_float.sum(dim=1).unsqueeze(-1)
pooled_output = pooled_output_sum / attn_mask_sum
pooled_embeds = self.text_projection(pooled_output)