Merge remote-tracking branch 'origin/dev' into dev

This commit is contained in:
turboderp
2024-07-06 08:06:25 +02:00
3 changed files with 41 additions and 2 deletions

View File

@@ -124,7 +124,10 @@ class ExLlamaV2Embedding(ExLlamaV2Module):
standard_mask_ = standard_mask[i]
input_ids_ = input_ids[i]
standard_ids_ = input_ids_[standard_mask_]
standard_embeddings_ = self.embedding(standard_ids_)
if loras is not None and loras[0].embed_tokens is not None:
standard_embeddings_ = loras[0].embed_tokens(standard_ids_)
else:
standard_embeddings_ = self.embedding(standard_ids_)
standard_embeddings_ = safe_move_tensor(standard_embeddings_, indexed_embeddings.device)
combined_embeddings[i][standard_mask_] = standard_embeddings_
@@ -144,7 +147,10 @@ class ExLlamaV2Embedding(ExLlamaV2Module):
# Call embedding module if no indexed embeddings
else:
hidden_states = self.embedding.forward(hidden_states)
if loras is not None and loras[0].embed_tokens is not None:
hidden_states = loras[0].embed_tokens(hidden_states)
else:
hidden_states = self.embedding(hidden_states)
if self.model.config.arch.normalize_embeddings:
hidden_states *= self.model.config.hidden_size ** 0.5

View File

@@ -241,6 +241,14 @@ class ExLlamaV2Linear(ExLlamaV2Module):
# Linear forward
if self.key == 'lm_head' and loras is not None and loras[0].lm_head is not None:
hidden_states_out = loras[0].lm_head(hidden_states)
if intermediates:
return {"hidden_states": hidden_states_out}
else:
return hidden_states_out
if self.q_handle is not None and not force_recons:
output_shape = hidden_states.shape[:-1] + (self.out_features,)

View File

@@ -53,6 +53,8 @@ class ExLlamaV2Lora:
self.target_modules = {}
self.bias_ignored = False
self.lora_scaling = lora_scaling
self.embed_tokens = None
self.lm_head = None
# Grab relevant items from LoRA config
@@ -77,6 +79,29 @@ class ExLlamaV2Lora:
tensor = f[key]
# Find target
if key.endswith(f'{self.config.arch.lm_head_key}.weight'):
if tensor.dtype == torch.bfloat16:
tensor = tensor.to(torch.float16)
elif tensor.dtype == torch.float32:
tensor = tensor.to(torch.float16)
target_module = self.model.modules_dict["lm_head"]
tensor = safe_move_tensor(tensor, target_module.device())
self.lm_head = torch.nn.Linear(target_module.in_features, tensor.shape[0], bias = False, device = "meta")
self.lm_head.weight = torch.nn.Parameter(tensor, requires_grad=False)
continue
elif key.endswith(f'embed_tokens.weight'):
if tensor.dtype == torch.bfloat16:
tensor = tensor.to(torch.float16)
elif tensor.dtype == torch.float32:
tensor = tensor.to(torch.float16)
target_module = self.model.modules_dict["model.embed_tokens"]
tensor = safe_move_tensor(tensor, target_module.device())
self.embed_tokens = torch.nn.Embedding(tensor.shape[0], self.config.hidden_size, self.config.pad_token_id, device = "meta")
weight = torch.nn.Parameter(tensor, requires_grad=False)
if self.model.config.scale_emb != 1:
weight *= self.model.config.scale_emb
self.embed_tokens.weight = weight
continue
i = key.find("model.layers.")
if i == -1: raise ValueError(f" ## Error: unsupported layer in {self.lora_path}: {key}")