compare_q.py: Fix GGUF VRAM computation when output.weight precedes token_embd.weight

2026-04-20 14:29:51 +00:00 · 2025-06-04 23:34:42 +02:00
parent 9a24e0ce52
commit ab875ba730
1 changed files with 2 additions and 3 deletions
--- a/eval/compare_q_llamacpp.py
+++ b/eval/compare_q_llamacpp.py
@@ -23,11 +23,10 @@ def get_storage_info(model_dir):
    head_numel = 0
    for tensor_info in tensors:
        name = tensor_info.name
-        if (name == "token_embd.weight" and head_bpw == 0) or \
-            name == "output.weight":
+        if (name == "token_embd.weight" and head_bpw == 0) or name == "output.weight":
            head_bpw = tensor_info.n_bytes * 8 / tensor_info.n_elements
            head_numel = tensor_info.n_elements
-        elif name.endswith(".weight"):
+        elif name.endswith(".weight") and name != "token_embd.weight" and not name.endswith("norm.weight"):
            sum_bits += tensor_info.n_bytes * 8
            sum_numel += tensor_info.n_elements
    vram_bits = head_numel * head_bpw + sum_bits