revise GGUF by precomputing some parameters

rather than computing them in each diffusion iteration
2026-04-27 09:41:31 +00:00 · 2024-08-25 14:26:46 -07:00
parent ba01ad3711
commit 13d6f8ed90
5 changed files with 137 additions and 48 deletions
--- a/backend/loader.py
+++ b/backend/loader.py
@@ -162,6 +162,10 @@ def load_huggingface_component(guess, component_name, lib_name, cls_name, repo_p
            model.initial_device = initial_device
            model.offload_device = offload_device

+            if storage_dtype in ['gguf']:
+                from backend.operations_gguf import bake_gguf_model
+                model = bake_gguf_model(model)
+
            return model

    print(f'Skipped: {component_name} = {lib_name}.{cls_name}')
--- a/backend/operations.py
+++ b/backend/operations.py
@@ -405,12 +405,24 @@ class ForgeOperationsGGUF(ForgeOperations):
                    self.weight = state_dict[prefix + 'weight']
                if prefix + 'bias' in state_dict:
                    self.bias = state_dict[prefix + 'bias']
+            if self.weight is not None and hasattr(self.weight, 'parent'):
+                self.weight.parent = self
+            if self.bias is not None and hasattr(self.bias, 'parent'):
+                self.bias.parent = self
+            return

        def _apply(self, fn, recurse=True):
            if self.weight is not None:
                self.weight = utils.tensor2parameter(fn(self.weight))
            if self.bias is not None:
                self.bias = utils.tensor2parameter(fn(self.bias))
+            for i in range(5):
+                quant_state_name = f'quant_state_{i}'
+                quant_state = getattr(self, quant_state_name, None)
+                if quant_state is not None:
+                    quant_state = fn(quant_state)
+                    quant_state = utils.tensor2parameter(quant_state)
+                    setattr(self, quant_state_name, quant_state)
            return self

        def forward(self, x):
--- a/backend/operations_gguf.py
+++ b/backend/operations_gguf.py
@@ -27,6 +27,7 @@ class ParameterGGUF(torch.nn.Parameter):
        self.gguf_type = tensor.tensor_type
        self.gguf_real_shape = torch.Size(reversed(list(tensor.shape)))
        self.gguf_cls = quants_mapping.get(self.gguf_type, None)
+        self.parent = None

    @property
    def shape(self):
@@ -43,6 +44,7 @@ class ParameterGGUF(torch.nn.Parameter):
        new.gguf_type = self.gguf_type
        new.gguf_real_shape = self.gguf_real_shape
        new.gguf_cls = self.gguf_cls
+        new.parent = self.parent
        return new

    def pin_memory(self, device=None):
@@ -50,17 +52,38 @@ class ParameterGGUF(torch.nn.Parameter):
        new.gguf_type = self.gguf_type
        new.gguf_real_shape = self.gguf_real_shape
        new.gguf_cls = self.gguf_cls
+        new.parent = self.parent
        return new

    @classmethod
-    def make(cls, data, gguf_type, gguf_cls, gguf_real_shape):
+    def make(cls, data, gguf_type, gguf_cls, gguf_real_shape, parent):
        new = ParameterGGUF(data, no_init=True)
        new.gguf_type = gguf_type
        new.gguf_real_shape = gguf_real_shape
        new.gguf_cls = gguf_cls
+        new.parent = parent
        return new


+def bake_gguf_model(model):
+    computation_dtype = model.computation_dtype
+    backed_layer_counter = 0
+
+    for m in model.modules():
+        if hasattr(m, 'weight'):
+            weight = m.weight
+            if hasattr(weight, 'gguf_cls'):
+                gguf_cls = weight.gguf_cls
+                if gguf_cls is not None:
+                    backed_layer_counter += 1
+                    gguf_cls.bake_layer(m, weight, computation_dtype)
+
+    if backed_layer_counter > 0:
+        print(f'GGUF backed {backed_layer_counter} layers.')
+
+    return model
+
+
 def dequantize_tensor(tensor):
    if tensor is None:
        return None
@@ -68,7 +91,7 @@ def dequantize_tensor(tensor):
    if not hasattr(tensor, 'gguf_cls'):
        return tensor

-    data = torch.tensor(tensor.data)
+    data = tensor
    gguf_cls = tensor.gguf_cls
    gguf_real_shape = tensor.gguf_real_shape

--- a/backend/patcher/lora.py
+++ b/backend/patcher/lora.py
@@ -425,7 +425,8 @@ class LoraLoader:
                    data=weight,
                    gguf_type=gguf_type,
                    gguf_cls=gguf_cls,
-                    gguf_real_shape=gguf_real_shape
+                    gguf_real_shape=gguf_real_shape,
+                    parent=parent_layer
                ))
                continue