mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2026-04-30 19:21:21 +00:00
add two optimizations
--pin-shared-memory and --cuda-malloc See also the updates in Readme for more details
This commit is contained in:
@@ -49,9 +49,6 @@ parser.add_argument("--cache-path", type=str, default=None)
|
||||
parser.add_argument("--in-browser", action="store_true")
|
||||
parser.add_argument("--disable-in-browser", action="store_true")
|
||||
parser.add_argument("--gpu-device-id", type=int, default=None, metavar="DEVICE_ID")
|
||||
cm_group = parser.add_mutually_exclusive_group()
|
||||
cm_group.add_argument("--async-cuda-allocation", action="store_true")
|
||||
cm_group.add_argument("--disable-async-cuda-allocation", action="store_true")
|
||||
|
||||
parser.add_argument("--disable-attention-upcast", action="store_true")
|
||||
|
||||
@@ -118,6 +115,9 @@ parser.add_argument("--disable-server-info", action="store_true")
|
||||
|
||||
parser.add_argument("--multi-user", action="store_true")
|
||||
|
||||
parser.add_argument("--cuda-malloc", action="store_true")
|
||||
parser.add_argument("--pin-shared-memory", action="store_true")
|
||||
|
||||
if ldm_patched.modules.options.args_parsing:
|
||||
args = parser.parse_args([])
|
||||
else:
|
||||
|
||||
@@ -244,6 +244,12 @@ ALWAYS_VRAM_OFFLOAD = args.always_offload_from_vram
|
||||
if ALWAYS_VRAM_OFFLOAD:
|
||||
print("Always offload VRAM")
|
||||
|
||||
PIN_SHARED_MEMORY = args.pin_shared_memory
|
||||
|
||||
if PIN_SHARED_MEMORY:
|
||||
print("Always pin shared GPU memory")
|
||||
|
||||
|
||||
def get_torch_device_name(device):
|
||||
if hasattr(device, 'type'):
|
||||
if device.type == "cuda":
|
||||
@@ -328,8 +334,8 @@ class LoadedModel:
|
||||
else:
|
||||
real_async_memory += module_mem
|
||||
m.to(self.model.offload_device)
|
||||
# if is_device_cpu(self.model.offload_device):
|
||||
# m._apply(lambda x: x.pin_memory())
|
||||
if PIN_SHARED_MEMORY and is_device_cpu(self.model.offload_device):
|
||||
m._apply(lambda x: x.pin_memory())
|
||||
elif hasattr(m, "weight"):
|
||||
m.to(self.device)
|
||||
mem_counter += module_size(m)
|
||||
|
||||
Reference in New Issue
Block a user