From 2f0555f7dc3f2d06b3a3cc238a4fa2b72e11e28d Mon Sep 17 00:00:00 2001 From: layerdiffusion <19834515+lllyasviel@users.noreply.github.com> Date: Fri, 16 Aug 2024 08:45:17 -0700 Subject: [PATCH] GPU Shared Async Swap for all GGUF/BNB --- backend/operations_bnb.py | 12 ++++++++++++ backend/operations_gguf.py | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/backend/operations_bnb.py b/backend/operations_bnb.py index 7aefca2c..5a7089a9 100644 --- a/backend/operations_bnb.py +++ b/backend/operations_bnb.py @@ -65,6 +65,18 @@ class ForgeParams4bit(Params4bit): bnb_quantized=self.bnb_quantized, ) + def pin_memory(self, device=None): + return ForgeParams4bit( + torch.Tensor.pin_memory(self, device=device), + requires_grad=self.requires_grad, + quant_state=self.quant_state, + blocksize=self.blocksize, + compress_statistics=self.compress_statistics, + quant_type=self.quant_type, + quant_storage=self.quant_storage, + bnb_quantized=self.bnb_quantized, + ) + class ForgeLoader4Bit(torch.nn.Module): def __init__(self, *, device, dtype, quant_type, **kwargs): diff --git a/backend/operations_gguf.py b/backend/operations_gguf.py index fdd565ec..9c54e090 100644 --- a/backend/operations_gguf.py +++ b/backend/operations_gguf.py @@ -37,6 +37,13 @@ class ParameterGGUF(torch.nn.Parameter): new.gguf_cls = self.gguf_cls return new + def pin_memory(self, device=None): + new = ParameterGGUF(torch.Tensor.pin_memory(self, device=device), no_init=True) + new.gguf_type = self.gguf_type + new.gguf_real_shape = self.gguf_real_shape + new.gguf_cls = self.gguf_cls + return new + @classmethod def make(cls, data, gguf_type, gguf_cls, gguf_real_shape): new = ParameterGGUF(data, no_init=True)