mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2026-04-24 08:19:13 +00:00
speed up nf4 lora in offline patching mode
This commit is contained in:
@@ -405,7 +405,12 @@ class LoadedModel:
|
|||||||
mem_counter += module_mem
|
mem_counter += module_mem
|
||||||
else:
|
else:
|
||||||
memory_in_swap += module_mem
|
memory_in_swap += module_mem
|
||||||
|
|
||||||
|
if hasattr(m, 'weight') and hasattr(m.weight, 'bnb_quantized') and not m.weight.bnb_quantized and self.device.type == 'cuda':
|
||||||
|
m.to(self.device) # Quantize happens here
|
||||||
|
|
||||||
m.to(self.model.offload_device)
|
m.to(self.model.offload_device)
|
||||||
|
|
||||||
if PIN_SHARED_MEMORY and is_device_cpu(self.model.offload_device):
|
if PIN_SHARED_MEMORY and is_device_cpu(self.model.offload_device):
|
||||||
m._apply(lambda x: x.pin_memory())
|
m._apply(lambda x: x.pin_memory())
|
||||||
elif hasattr(m, "weight"):
|
elif hasattr(m, "weight"):
|
||||||
|
|||||||
Reference in New Issue
Block a user