mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2026-04-29 18:51:31 +00:00
Support LoRAs for Q8/Q5/Q4 GGUF Models
what a crazy night of math
This commit is contained in:
@@ -2,34 +2,27 @@ import gguf
|
||||
import torch
|
||||
|
||||
|
||||
quants_mapping = {
|
||||
gguf.GGMLQuantizationType.Q4_0: gguf.Q4_0,
|
||||
gguf.GGMLQuantizationType.Q5_0: gguf.Q5_0,
|
||||
gguf.GGMLQuantizationType.Q8_0: gguf.Q8_0,
|
||||
}
|
||||
# def functional_quantize_gguf(weight):
|
||||
# gguf_cls = weight.gguf_cls
|
||||
# gguf_cls.en
|
||||
|
||||
|
||||
def functional_linear_gguf(x, weight, bias=None):
|
||||
target_dtype = x.dtype
|
||||
weight = dequantize_tensor(weight, target_dtype)
|
||||
bias = dequantize_tensor(bias, target_dtype)
|
||||
weight = dequantize_tensor(weight).to(target_dtype)
|
||||
bias = dequantize_tensor(bias).to(target_dtype)
|
||||
return torch.nn.functional.linear(x, weight, bias)
|
||||
|
||||
|
||||
def dequantize_tensor(tensor, target_dtype=torch.float16):
|
||||
def dequantize_tensor(tensor):
|
||||
if tensor is None:
|
||||
return None
|
||||
|
||||
data = torch.tensor(tensor.data)
|
||||
gguf_type = tensor.gguf_type
|
||||
gguf_cls = tensor.gguf_cls
|
||||
gguf_real_shape = tensor.gguf_real_shape
|
||||
|
||||
if gguf_type in [gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16, gguf.GGMLQuantizationType.BF16]:
|
||||
return data.to(target_dtype)
|
||||
if gguf_cls is None:
|
||||
return data
|
||||
|
||||
if gguf_type not in quants_mapping:
|
||||
raise NotImplementedError(f'Quant type {gguf_type} not implemented!')
|
||||
|
||||
quant_cls = quants_mapping.get(gguf_type)
|
||||
|
||||
return quant_cls.dequantize_pytorch(data, gguf_real_shape).to(target_dtype)
|
||||
return gguf_cls.dequantize_pytorch(data, gguf_real_shape)
|
||||
|
||||
Reference in New Issue
Block a user