diff --git a/eval/compare_q_anyprecision.py b/eval/compare_q_anyprecision.py index f1c7c18..ecd1d3e 100644 --- a/eval/compare_q_anyprecision.py +++ b/eval/compare_q_anyprecision.py @@ -1,7 +1,9 @@ import torch -from any_precision.modules.AnyPrecisionForCausalLM import AnyPrecisionForCausalLM -from any_precision.modules.AnyPrecisionLinear import AnyPrecisionLinear -from transformers import AutoModelForCausalLM +try: + from any_precision.modules.AnyPrecisionForCausalLM import AnyPrecisionForCausalLM + from any_precision.modules.AnyPrecisionLinear import AnyPrecisionLinear +except ModuleNotFoundError: + pass def get_tensors_size(tensors): return 8 * sum(t.element_size() * t.numel() for t in tensors.values() if t is not None) diff --git a/eval/compare_q_exllamav2.py b/eval/compare_q_exllamav2.py index d5f46c3..b02856f 100644 --- a/eval/compare_q_exllamav2.py +++ b/eval/compare_q_exllamav2.py @@ -1,6 +1,9 @@ import torch -from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache -from exllamav2.model import ExLlamaV2Linear +try: + from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache + from exllamav2.model import ExLlamaV2Linear +except ModuleNotFoundError: + pass def get_tensor_size(tensors): return 8 * sum(t.element_size() * t.numel() for t in tensors.values()) diff --git a/eval/compare_q_exllamav3.py b/eval/compare_q_exllamav3.py index 3d38968..d7fded1 100644 --- a/eval/compare_q_exllamav3.py +++ b/eval/compare_q_exllamav3.py @@ -1,6 +1,9 @@ import torch -from exllamav3 import Config, Model, Tokenizer, Cache -from exllamav3.modules import Linear +try: + from exllamav3 import Config, Model, Tokenizer, Cache + from exllamav3.modules import Linear +except ModuleNotFoundError: + pass def get_tensor_size(tensors): return 8 * sum(t.element_size() * t.numel() for t in tensors.values()) diff --git a/eval/compare_q_transformers.py b/eval/compare_q_transformers.py index aefe546..0083dfd 100644 --- a/eval/compare_q_transformers.py +++ b/eval/compare_q_transformers.py @@ -1,12 +1,34 @@ import torch -from gptqmodel.nn_modules.qlinear.marlin import MarlinQuantLinear -from gptqmodel.nn_modules.qlinear.tritonv2 import TritonV2QuantLinear -from gptqmodel.nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear from transformers import AutoTokenizer, AutoModelForCausalLM -from aqlm import QuantizedLinear -from awq.modules.linear import WQLinear_GEMM -from vptq import VQuantLinear -from bitsandbytes.nn import Linear4bit + +try: + from gptqmodel.nn_modules.qlinear.marlin import MarlinQuantLinear + from gptqmodel.nn_modules.qlinear.tritonv2 import TritonV2QuantLinear + from gptqmodel.nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear +except ModuleNotFoundError: + MarlinQuantLinear = None + TritonV2QuantLinear = None + ExllamaV2QuantLinear = None + +try: + from aqlm import QuantizedLinear +except ModuleNotFoundError: + QuantizedLinear = None + +try: + from awq.modules.linear import WQLinear_GEMM +except ModuleNotFoundError: + WQLinear_GEMM = None + +try: + from vptq import VQuantLinear +except ModuleNotFoundError: + VQuantLinear = None + +try: + from bitsandbytes.nn import Linear4bit +except ModuleNotFoundError: + Linear4bit = None def get_tensors_size(tensors): return 8 * sum(t.element_size() * t.numel() for t in tensors.values() if t is not None)