Auto expand Q/K norm weight to match number of heads

2026-04-20 14:29:28 +00:00 · 2025-03-14 23:21:18 +01:00
parent a88c18cac1
commit e2fa480595
1 changed files with 10 additions and 3 deletions
--- a/exllamav2/headnorm.py
+++ b/exllamav2/headnorm.py
@@ -48,16 +48,23 @@ class ExLlamaV2HeadNorm(ExLlamaV2Module):
        if isinstance(w, tuple):
            weight = w[0]
            bias = w[1]
+            if len(weight.shape) == 1 and weight.shape[0] == self.model.config.head_dim:
+                weight = nn.Parameter(weight.repeat(self.num_heads, 1))
+                bias = nn.Parameter(bias.repeat(self.num_heads, 1))
        else:
            weight = w
            bias = None
+            if len(weight.shape) == 1 and weight.shape[0] == self.model.config.head_dim:
+                weight = nn.Parameter(weight.repeat(self.num_heads, 1))

        assert isinstance(weight, nn.Parameter)
        assert bias is None or isinstance(bias, nn.Parameter)

-        self.layernorm = nn.LayerNorm(self.model.config.hidden_size,
-                                      elementwise_affine = True,
-                                      bias = bias is not None)
+        self.layernorm = nn.LayerNorm(
+            self.model.config.hidden_size,
+            elementwise_affine = True,
+            bias = bias is not None
+        )

        self.layernorm.weight = weight
        self.weight = weight