mirror of
https://github.com/turboderp-org/exllamav3.git
synced 2026-03-15 00:07:24 +00:00
compare_q.py: Allow script to run without all backends installed
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
import torch
|
||||
from any_precision.modules.AnyPrecisionForCausalLM import AnyPrecisionForCausalLM
|
||||
from any_precision.modules.AnyPrecisionLinear import AnyPrecisionLinear
|
||||
from transformers import AutoModelForCausalLM
|
||||
try:
|
||||
from any_precision.modules.AnyPrecisionForCausalLM import AnyPrecisionForCausalLM
|
||||
from any_precision.modules.AnyPrecisionLinear import AnyPrecisionLinear
|
||||
except ModuleNotFoundError:
|
||||
pass
|
||||
|
||||
def get_tensors_size(tensors):
|
||||
return 8 * sum(t.element_size() * t.numel() for t in tensors.values() if t is not None)
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import torch
|
||||
from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache
|
||||
from exllamav2.model import ExLlamaV2Linear
|
||||
try:
|
||||
from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache
|
||||
from exllamav2.model import ExLlamaV2Linear
|
||||
except ModuleNotFoundError:
|
||||
pass
|
||||
|
||||
def get_tensor_size(tensors):
|
||||
return 8 * sum(t.element_size() * t.numel() for t in tensors.values())
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import torch
|
||||
from exllamav3 import Config, Model, Tokenizer, Cache
|
||||
from exllamav3.modules import Linear
|
||||
try:
|
||||
from exllamav3 import Config, Model, Tokenizer, Cache
|
||||
from exllamav3.modules import Linear
|
||||
except ModuleNotFoundError:
|
||||
pass
|
||||
|
||||
def get_tensor_size(tensors):
|
||||
return 8 * sum(t.element_size() * t.numel() for t in tensors.values())
|
||||
|
||||
@@ -1,12 +1,34 @@
|
||||
import torch
|
||||
from gptqmodel.nn_modules.qlinear.marlin import MarlinQuantLinear
|
||||
from gptqmodel.nn_modules.qlinear.tritonv2 import TritonV2QuantLinear
|
||||
from gptqmodel.nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from aqlm import QuantizedLinear
|
||||
from awq.modules.linear import WQLinear_GEMM
|
||||
from vptq import VQuantLinear
|
||||
from bitsandbytes.nn import Linear4bit
|
||||
|
||||
try:
|
||||
from gptqmodel.nn_modules.qlinear.marlin import MarlinQuantLinear
|
||||
from gptqmodel.nn_modules.qlinear.tritonv2 import TritonV2QuantLinear
|
||||
from gptqmodel.nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear
|
||||
except ModuleNotFoundError:
|
||||
MarlinQuantLinear = None
|
||||
TritonV2QuantLinear = None
|
||||
ExllamaV2QuantLinear = None
|
||||
|
||||
try:
|
||||
from aqlm import QuantizedLinear
|
||||
except ModuleNotFoundError:
|
||||
QuantizedLinear = None
|
||||
|
||||
try:
|
||||
from awq.modules.linear import WQLinear_GEMM
|
||||
except ModuleNotFoundError:
|
||||
WQLinear_GEMM = None
|
||||
|
||||
try:
|
||||
from vptq import VQuantLinear
|
||||
except ModuleNotFoundError:
|
||||
VQuantLinear = None
|
||||
|
||||
try:
|
||||
from bitsandbytes.nn import Linear4bit
|
||||
except ModuleNotFoundError:
|
||||
Linear4bit = None
|
||||
|
||||
def get_tensors_size(tensors):
|
||||
return 8 * sum(t.element_size() * t.numel() for t in tensors.values() if t is not None)
|
||||
|
||||
Reference in New Issue
Block a user