diff --git a/backend/operations_gguf.py b/backend/operations_gguf.py index 5a01d1ca..abd52a53 100644 --- a/backend/operations_gguf.py +++ b/backend/operations_gguf.py @@ -2,9 +2,46 @@ import gguf import torch -# def functional_quantize_gguf(weight): -# gguf_cls = weight.gguf_cls -# gguf_cls.en +quants_mapping = { + gguf.GGMLQuantizationType.Q4_0: gguf.Q4_0, + gguf.GGMLQuantizationType.Q5_0: gguf.Q5_0, + gguf.GGMLQuantizationType.Q8_0: gguf.Q8_0, +} + + +class ParameterGGUF(torch.nn.Parameter): + def __init__(self, tensor=None, requires_grad=False, no_init=False): + super().__init__() + self.is_gguf = True + + if no_init: + return + + self.gguf_type = tensor.tensor_type + self.gguf_real_shape = torch.Size(reversed(list(tensor.shape))) + self.gguf_cls = quants_mapping.get(self.gguf_type, None) + + @property + def shape(self): + return self.gguf_real_shape + + def __new__(cls, tensor=None, requires_grad=False, no_init=False): + return super().__new__(cls, torch.tensor(tensor.data), requires_grad=requires_grad) + + def to(self, *args, **kwargs): + new = ParameterGGUF(self.data.to(*args, **kwargs), no_init=True) + new.gguf_type = self.gguf_type + new.gguf_real_shape = self.gguf_real_shape + new.gguf_cls = self.gguf_cls + return new + + @classmethod + def make(cls, data, gguf_type, gguf_cls, gguf_real_shape): + new = ParameterGGUF(data, no_init=True) + new.gguf_type = gguf_type + new.gguf_real_shape = gguf_real_shape + new.gguf_cls = gguf_cls + return new def functional_linear_gguf(x, weight, bias=None): diff --git a/backend/utils.py b/backend/utils.py index 7860eb08..d01863e0 100644 --- a/backend/utils.py +++ b/backend/utils.py @@ -4,48 +4,7 @@ import os import json import safetensors.torch import backend.misc.checkpoint_pickle - - -quants_mapping = { - gguf.GGMLQuantizationType.Q4_0: gguf.Q4_0, - gguf.GGMLQuantizationType.Q5_0: gguf.Q5_0, - gguf.GGMLQuantizationType.Q8_0: gguf.Q8_0, -} - - -class ParameterGGUF(torch.nn.Parameter): - def __init__(self, tensor=None, requires_grad=False, no_init=False): - super().__init__() - self.is_gguf = True - - if no_init: - return - - self.gguf_type = tensor.tensor_type - self.gguf_real_shape = torch.Size(reversed(list(tensor.shape))) - self.gguf_cls = quants_mapping.get(self.gguf_type, None) - - @property - def shape(self): - return self.gguf_real_shape - - def __new__(cls, tensor=None, requires_grad=False, no_init=False): - return super().__new__(cls, torch.tensor(tensor.data), requires_grad=requires_grad) - - def to(self, *args, **kwargs): - new = ParameterGGUF(self.data.to(*args, **kwargs), no_init=True) - new.gguf_type = self.gguf_type - new.gguf_real_shape = self.gguf_real_shape - new.gguf_cls = self.gguf_cls - return new - - @classmethod - def make(cls, data, gguf_type, gguf_cls, gguf_real_shape): - new = ParameterGGUF(data, no_init=True) - new.gguf_type = gguf_type - new.gguf_real_shape = gguf_real_shape - new.gguf_cls = gguf_cls - return new +from backend.operations_gguf import ParameterGGUF def read_arbitrary_config(directory):