compare_q.py: Add more GPTQ layer types

This commit is contained in:
turboderp
2025-05-18 00:19:19 +02:00
parent 2432c64e68
commit 475dfcca47
2 changed files with 21 additions and 0 deletions

View File

@@ -1,5 +1,7 @@
import torch
from gptqmodel.nn_modules.qlinear.marlin import MarlinQuantLinear
from gptqmodel.nn_modules.qlinear.tritonv2 import TritonV2QuantLinear
from gptqmodel.nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear
from transformers import AutoTokenizer, AutoModelForCausalLM
from aqlm import QuantizedLinear
from awq.modules.linear import WQLinear_GEMM
@@ -85,6 +87,17 @@ def get_storage_info(model):
"scales": module.scales,
})
sum_numel += module.in_features * module.out_features
elif any(isinstance(module, x) for x in [TritonV2QuantLinear]):
sum_bits += get_tensors_size({
"g_idx": module.g_idx,
"qweight": module.qweight,
"qzeros": module.qzeros,
"scales": module.scales,
})
sum_numel += module.in_features * module.out_features
elif any(isinstance(module, x) for x in [ExllamaV2QuantLinear]):
sum_bits += get_tensors_size(module.q_tensors)
sum_numel += module.in_features * module.out_features
vram_bits = head_numel * head_bpw + sum_bits
return sum_bits / sum_numel, head_bpw, vram_bits

View File

@@ -0,0 +1,8 @@
[
{
"model_dir": "/mnt/str/models/llama3.1-8b-instruct/autoround/4bit-asym",
"load_fn": "transformers",
"fwd_fn": "transformers",
"label": "AutoRound 4bit asym"
}
]