compare_q.py: Allow script to run without all backends installed

This commit is contained in:
turboderp
2025-06-05 00:00:14 +02:00
parent 162f99ab8b
commit db65151b07
4 changed files with 44 additions and 14 deletions

View File

@@ -1,7 +1,9 @@
import torch
from any_precision.modules.AnyPrecisionForCausalLM import AnyPrecisionForCausalLM
from any_precision.modules.AnyPrecisionLinear import AnyPrecisionLinear
from transformers import AutoModelForCausalLM
try:
from any_precision.modules.AnyPrecisionForCausalLM import AnyPrecisionForCausalLM
from any_precision.modules.AnyPrecisionLinear import AnyPrecisionLinear
except ModuleNotFoundError:
pass
def get_tensors_size(tensors):
return 8 * sum(t.element_size() * t.numel() for t in tensors.values() if t is not None)

View File

@@ -1,6 +1,9 @@
import torch
from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache
from exllamav2.model import ExLlamaV2Linear
try:
from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache
from exllamav2.model import ExLlamaV2Linear
except ModuleNotFoundError:
pass
def get_tensor_size(tensors):
return 8 * sum(t.element_size() * t.numel() for t in tensors.values())

View File

@@ -1,6 +1,9 @@
import torch
from exllamav3 import Config, Model, Tokenizer, Cache
from exllamav3.modules import Linear
try:
from exllamav3 import Config, Model, Tokenizer, Cache
from exllamav3.modules import Linear
except ModuleNotFoundError:
pass
def get_tensor_size(tensors):
return 8 * sum(t.element_size() * t.numel() for t in tensors.values())

View File

@@ -1,12 +1,34 @@
import torch
from gptqmodel.nn_modules.qlinear.marlin import MarlinQuantLinear
from gptqmodel.nn_modules.qlinear.tritonv2 import TritonV2QuantLinear
from gptqmodel.nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear
from transformers import AutoTokenizer, AutoModelForCausalLM
from aqlm import QuantizedLinear
from awq.modules.linear import WQLinear_GEMM
from vptq import VQuantLinear
from bitsandbytes.nn import Linear4bit
try:
from gptqmodel.nn_modules.qlinear.marlin import MarlinQuantLinear
from gptqmodel.nn_modules.qlinear.tritonv2 import TritonV2QuantLinear
from gptqmodel.nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear
except ModuleNotFoundError:
MarlinQuantLinear = None
TritonV2QuantLinear = None
ExllamaV2QuantLinear = None
try:
from aqlm import QuantizedLinear
except ModuleNotFoundError:
QuantizedLinear = None
try:
from awq.modules.linear import WQLinear_GEMM
except ModuleNotFoundError:
WQLinear_GEMM = None
try:
from vptq import VQuantLinear
except ModuleNotFoundError:
VQuantLinear = None
try:
from bitsandbytes.nn import Linear4bit
except ModuleNotFoundError:
Linear4bit = None
def get_tensors_size(tensors):
return 8 * sum(t.element_size() * t.numel() for t in tensors.values() if t is not None)