convert.py: Fix overflow when mixing bitrates for expert-heavy models

2026-04-20 14:29:51 +00:00 · 2026-03-15 00:29:37 +01:00
parent cd94bf8f8f
commit 5f54aa5f57
1 changed files with 6 additions and 4 deletions
--- a/exllamav3/conversion/allocation.py
+++ b/exllamav3/conversion/allocation.py
@@ -134,10 +134,12 @@ def allocate_linear(

    numel = l.weights_numel()
    budget = int(bpw * numel) + surplus_bits + 1
-    bpw = budget / numel
-    bpw = max(int(math.floor(bpw)), 1)
-    used_budget = bpw * numel
+    base_bpw = int(math.floor(bpw))
+    new_bpw = int(math.floor(budget / numel))
+    new_bpw = max(new_bpw, 1)
+    new_bpw = min(new_bpw, base_bpw + 2, 8)
+    used_budget = new_bpw * numel

-    strategy = {l.key: bpw}
+    strategy = {l.key: new_bpw}
    surplus = budget - used_budget
    return strategy, surplus