mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2026-04-27 09:41:31 +00:00
revise GGUF by precomputing some parameters
rather than computing them in each diffusion iteration
This commit is contained in:
@@ -162,6 +162,10 @@ def load_huggingface_component(guess, component_name, lib_name, cls_name, repo_p
|
||||
model.initial_device = initial_device
|
||||
model.offload_device = offload_device
|
||||
|
||||
if storage_dtype in ['gguf']:
|
||||
from backend.operations_gguf import bake_gguf_model
|
||||
model = bake_gguf_model(model)
|
||||
|
||||
return model
|
||||
|
||||
print(f'Skipped: {component_name} = {lib_name}.{cls_name}')
|
||||
|
||||
@@ -405,12 +405,24 @@ class ForgeOperationsGGUF(ForgeOperations):
|
||||
self.weight = state_dict[prefix + 'weight']
|
||||
if prefix + 'bias' in state_dict:
|
||||
self.bias = state_dict[prefix + 'bias']
|
||||
if self.weight is not None and hasattr(self.weight, 'parent'):
|
||||
self.weight.parent = self
|
||||
if self.bias is not None and hasattr(self.bias, 'parent'):
|
||||
self.bias.parent = self
|
||||
return
|
||||
|
||||
def _apply(self, fn, recurse=True):
|
||||
if self.weight is not None:
|
||||
self.weight = utils.tensor2parameter(fn(self.weight))
|
||||
if self.bias is not None:
|
||||
self.bias = utils.tensor2parameter(fn(self.bias))
|
||||
for i in range(5):
|
||||
quant_state_name = f'quant_state_{i}'
|
||||
quant_state = getattr(self, quant_state_name, None)
|
||||
if quant_state is not None:
|
||||
quant_state = fn(quant_state)
|
||||
quant_state = utils.tensor2parameter(quant_state)
|
||||
setattr(self, quant_state_name, quant_state)
|
||||
return self
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
@@ -27,6 +27,7 @@ class ParameterGGUF(torch.nn.Parameter):
|
||||
self.gguf_type = tensor.tensor_type
|
||||
self.gguf_real_shape = torch.Size(reversed(list(tensor.shape)))
|
||||
self.gguf_cls = quants_mapping.get(self.gguf_type, None)
|
||||
self.parent = None
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
@@ -43,6 +44,7 @@ class ParameterGGUF(torch.nn.Parameter):
|
||||
new.gguf_type = self.gguf_type
|
||||
new.gguf_real_shape = self.gguf_real_shape
|
||||
new.gguf_cls = self.gguf_cls
|
||||
new.parent = self.parent
|
||||
return new
|
||||
|
||||
def pin_memory(self, device=None):
|
||||
@@ -50,17 +52,38 @@ class ParameterGGUF(torch.nn.Parameter):
|
||||
new.gguf_type = self.gguf_type
|
||||
new.gguf_real_shape = self.gguf_real_shape
|
||||
new.gguf_cls = self.gguf_cls
|
||||
new.parent = self.parent
|
||||
return new
|
||||
|
||||
@classmethod
|
||||
def make(cls, data, gguf_type, gguf_cls, gguf_real_shape):
|
||||
def make(cls, data, gguf_type, gguf_cls, gguf_real_shape, parent):
|
||||
new = ParameterGGUF(data, no_init=True)
|
||||
new.gguf_type = gguf_type
|
||||
new.gguf_real_shape = gguf_real_shape
|
||||
new.gguf_cls = gguf_cls
|
||||
new.parent = parent
|
||||
return new
|
||||
|
||||
|
||||
def bake_gguf_model(model):
|
||||
computation_dtype = model.computation_dtype
|
||||
backed_layer_counter = 0
|
||||
|
||||
for m in model.modules():
|
||||
if hasattr(m, 'weight'):
|
||||
weight = m.weight
|
||||
if hasattr(weight, 'gguf_cls'):
|
||||
gguf_cls = weight.gguf_cls
|
||||
if gguf_cls is not None:
|
||||
backed_layer_counter += 1
|
||||
gguf_cls.bake_layer(m, weight, computation_dtype)
|
||||
|
||||
if backed_layer_counter > 0:
|
||||
print(f'GGUF backed {backed_layer_counter} layers.')
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def dequantize_tensor(tensor):
|
||||
if tensor is None:
|
||||
return None
|
||||
@@ -68,7 +91,7 @@ def dequantize_tensor(tensor):
|
||||
if not hasattr(tensor, 'gguf_cls'):
|
||||
return tensor
|
||||
|
||||
data = torch.tensor(tensor.data)
|
||||
data = tensor
|
||||
gguf_cls = tensor.gguf_cls
|
||||
gguf_real_shape = tensor.gguf_real_shape
|
||||
|
||||
|
||||
@@ -425,7 +425,8 @@ class LoraLoader:
|
||||
data=weight,
|
||||
gguf_type=gguf_type,
|
||||
gguf_cls=gguf_cls,
|
||||
gguf_real_shape=gguf_real_shape
|
||||
gguf_real_shape=gguf_real_shape,
|
||||
parent=parent_layer
|
||||
))
|
||||
continue
|
||||
|
||||
|
||||
Reference in New Issue
Block a user