GPU Shared Async Swap for all GGUF/BNB

This commit is contained in:
layerdiffusion
2024-08-16 08:45:17 -07:00
parent 04e7f05769
commit 2f0555f7dc
2 changed files with 19 additions and 0 deletions

View File

@@ -65,6 +65,18 @@ class ForgeParams4bit(Params4bit):
bnb_quantized=self.bnb_quantized,
)
def pin_memory(self, device=None):
return ForgeParams4bit(
torch.Tensor.pin_memory(self, device=device),
requires_grad=self.requires_grad,
quant_state=self.quant_state,
blocksize=self.blocksize,
compress_statistics=self.compress_statistics,
quant_type=self.quant_type,
quant_storage=self.quant_storage,
bnb_quantized=self.bnb_quantized,
)
class ForgeLoader4Bit(torch.nn.Module):
def __init__(self, *, device, dtype, quant_type, **kwargs):

View File

@@ -37,6 +37,13 @@ class ParameterGGUF(torch.nn.Parameter):
new.gguf_cls = self.gguf_cls
return new
def pin_memory(self, device=None):
new = ParameterGGUF(torch.Tensor.pin_memory(self, device=device), no_init=True)
new.gguf_type = self.gguf_type
new.gguf_real_shape = self.gguf_real_shape
new.gguf_cls = self.gguf_cls
return new
@classmethod
def make(cls, data, gguf_type, gguf_cls, gguf_real_shape):
new = ParameterGGUF(data, no_init=True)