add two optimizations

--pin-shared-memory and --cuda-malloc See also the updates in Readme for more details
2026-04-30 19:21:21 +00:00 · 2024-02-23 18:39:32 -08:00
parent 54c89503eb
commit 88f395091b
5 changed files with 114 additions and 6 deletions
--- a/ldm_patched/modules/args_parser.py
+++ b/ldm_patched/modules/args_parser.py
@@ -49,9 +49,6 @@ parser.add_argument("--cache-path", type=str, default=None)
 parser.add_argument("--in-browser", action="store_true")
 parser.add_argument("--disable-in-browser", action="store_true")
 parser.add_argument("--gpu-device-id", type=int, default=None, metavar="DEVICE_ID")
-cm_group = parser.add_mutually_exclusive_group()
-cm_group.add_argument("--async-cuda-allocation", action="store_true")
-cm_group.add_argument("--disable-async-cuda-allocation", action="store_true")

 parser.add_argument("--disable-attention-upcast", action="store_true")

@@ -118,6 +115,9 @@ parser.add_argument("--disable-server-info", action="store_true")

 parser.add_argument("--multi-user", action="store_true")

+parser.add_argument("--cuda-malloc", action="store_true")
+parser.add_argument("--pin-shared-memory", action="store_true")
+
 if ldm_patched.modules.options.args_parsing:
    args = parser.parse_args([])
 else:
--- a/ldm_patched/modules/model_management.py
+++ b/ldm_patched/modules/model_management.py
@@ -244,6 +244,12 @@ ALWAYS_VRAM_OFFLOAD = args.always_offload_from_vram
 if ALWAYS_VRAM_OFFLOAD:
    print("Always offload VRAM")

+PIN_SHARED_MEMORY = args.pin_shared_memory
+
+if PIN_SHARED_MEMORY:
+    print("Always pin shared GPU memory")
+
+
 def get_torch_device_name(device):
    if hasattr(device, 'type'):
        if device.type == "cuda":
@@ -328,8 +334,8 @@ class LoadedModel:
                    else:
                        real_async_memory += module_mem
                        m.to(self.model.offload_device)
-                        # if is_device_cpu(self.model.offload_device):
-                        #     m._apply(lambda x: x.pin_memory())
+                        if PIN_SHARED_MEMORY and is_device_cpu(self.model.offload_device):
+                            m._apply(lambda x: x.pin_memory())
                elif hasattr(m, "weight"):
                    m.to(self.device)
                    mem_counter += module_size(m)