add two optimizations

--pin-shared-memory and --cuda-malloc

See also the updates in Readme for more details
This commit is contained in:
lllyasviel
2024-02-23 18:39:32 -08:00
committed by GitHub
parent 54c89503eb
commit 88f395091b
5 changed files with 114 additions and 6 deletions

View File

@@ -49,9 +49,6 @@ parser.add_argument("--cache-path", type=str, default=None)
parser.add_argument("--in-browser", action="store_true")
parser.add_argument("--disable-in-browser", action="store_true")
parser.add_argument("--gpu-device-id", type=int, default=None, metavar="DEVICE_ID")
cm_group = parser.add_mutually_exclusive_group()
cm_group.add_argument("--async-cuda-allocation", action="store_true")
cm_group.add_argument("--disable-async-cuda-allocation", action="store_true")
parser.add_argument("--disable-attention-upcast", action="store_true")
@@ -118,6 +115,9 @@ parser.add_argument("--disable-server-info", action="store_true")
parser.add_argument("--multi-user", action="store_true")
parser.add_argument("--cuda-malloc", action="store_true")
parser.add_argument("--pin-shared-memory", action="store_true")
if ldm_patched.modules.options.args_parsing:
args = parser.parse_args([])
else:

View File

@@ -244,6 +244,12 @@ ALWAYS_VRAM_OFFLOAD = args.always_offload_from_vram
if ALWAYS_VRAM_OFFLOAD:
print("Always offload VRAM")
PIN_SHARED_MEMORY = args.pin_shared_memory
if PIN_SHARED_MEMORY:
print("Always pin shared GPU memory")
def get_torch_device_name(device):
if hasattr(device, 'type'):
if device.type == "cuda":
@@ -328,8 +334,8 @@ class LoadedModel:
else:
real_async_memory += module_mem
m.to(self.model.offload_device)
# if is_device_cpu(self.model.offload_device):
# m._apply(lambda x: x.pin_memory())
if PIN_SHARED_MEMORY and is_device_cpu(self.model.offload_device):
m._apply(lambda x: x.pin_memory())
elif hasattr(m, "weight"):
m.to(self.device)
mem_counter += module_size(m)