revise GGUF by precomputing some parameters

rather than computing them in each diffusion iteration
This commit is contained in:
layerdiffusion
2024-08-25 14:26:46 -07:00
parent ba01ad3711
commit 13d6f8ed90
5 changed files with 137 additions and 48 deletions

View File

@@ -162,6 +162,10 @@ def load_huggingface_component(guess, component_name, lib_name, cls_name, repo_p
model.initial_device = initial_device
model.offload_device = offload_device
if storage_dtype in ['gguf']:
from backend.operations_gguf import bake_gguf_model
model = bake_gguf_model(model)
return model
print(f'Skipped: {component_name} = {lib_name}.{cls_name}')

View File

@@ -405,12 +405,24 @@ class ForgeOperationsGGUF(ForgeOperations):
self.weight = state_dict[prefix + 'weight']
if prefix + 'bias' in state_dict:
self.bias = state_dict[prefix + 'bias']
if self.weight is not None and hasattr(self.weight, 'parent'):
self.weight.parent = self
if self.bias is not None and hasattr(self.bias, 'parent'):
self.bias.parent = self
return
def _apply(self, fn, recurse=True):
if self.weight is not None:
self.weight = utils.tensor2parameter(fn(self.weight))
if self.bias is not None:
self.bias = utils.tensor2parameter(fn(self.bias))
for i in range(5):
quant_state_name = f'quant_state_{i}'
quant_state = getattr(self, quant_state_name, None)
if quant_state is not None:
quant_state = fn(quant_state)
quant_state = utils.tensor2parameter(quant_state)
setattr(self, quant_state_name, quant_state)
return self
def forward(self, x):

View File

@@ -27,6 +27,7 @@ class ParameterGGUF(torch.nn.Parameter):
self.gguf_type = tensor.tensor_type
self.gguf_real_shape = torch.Size(reversed(list(tensor.shape)))
self.gguf_cls = quants_mapping.get(self.gguf_type, None)
self.parent = None
@property
def shape(self):
@@ -43,6 +44,7 @@ class ParameterGGUF(torch.nn.Parameter):
new.gguf_type = self.gguf_type
new.gguf_real_shape = self.gguf_real_shape
new.gguf_cls = self.gguf_cls
new.parent = self.parent
return new
def pin_memory(self, device=None):
@@ -50,17 +52,38 @@ class ParameterGGUF(torch.nn.Parameter):
new.gguf_type = self.gguf_type
new.gguf_real_shape = self.gguf_real_shape
new.gguf_cls = self.gguf_cls
new.parent = self.parent
return new
@classmethod
def make(cls, data, gguf_type, gguf_cls, gguf_real_shape):
def make(cls, data, gguf_type, gguf_cls, gguf_real_shape, parent):
new = ParameterGGUF(data, no_init=True)
new.gguf_type = gguf_type
new.gguf_real_shape = gguf_real_shape
new.gguf_cls = gguf_cls
new.parent = parent
return new
def bake_gguf_model(model):
computation_dtype = model.computation_dtype
backed_layer_counter = 0
for m in model.modules():
if hasattr(m, 'weight'):
weight = m.weight
if hasattr(weight, 'gguf_cls'):
gguf_cls = weight.gguf_cls
if gguf_cls is not None:
backed_layer_counter += 1
gguf_cls.bake_layer(m, weight, computation_dtype)
if backed_layer_counter > 0:
print(f'GGUF backed {backed_layer_counter} layers.')
return model
def dequantize_tensor(tensor):
if tensor is None:
return None
@@ -68,7 +91,7 @@ def dequantize_tensor(tensor):
if not hasattr(tensor, 'gguf_cls'):
return tensor
data = torch.tensor(tensor.data)
data = tensor
gguf_cls = tensor.gguf_cls
gguf_real_shape = tensor.gguf_real_shape

View File

@@ -425,7 +425,8 @@ class LoraLoader:
data=weight,
gguf_type=gguf_type,
gguf_cls=gguf_cls,
gguf_real_shape=gguf_real_shape
gguf_real_shape=gguf_real_shape,
parent=parent_layer
))
continue