From f7a2b206d1d79745c7d620337d7b9b9d16a30cc5 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Wed, 24 Jan 2024 10:53:42 -0800
Subject: [PATCH] Update lowvram.py

---
 modules/lowvram.py | 132 ++-------------------------------------------
 1 file changed, 5 insertions(+), 127 deletions(-)

diff --git a/modules/lowvram.py b/modules/lowvram.py
index 45701046..908b5962 100644
--- a/modules/lowvram.py
+++ b/modules/lowvram.py
@@ -6,142 +6,20 @@ cpu = torch.device("cpu")
 
 
 def send_everything_to_cpu():
-    global module_in_gpu
-
-    if module_in_gpu is not None:
-        module_in_gpu.to(cpu)
-
-    module_in_gpu = None
+    return
 
 
 def is_needed(sd_model):
-    return shared.cmd_opts.lowvram or shared.cmd_opts.medvram or shared.cmd_opts.medvram_sdxl and hasattr(sd_model, 'conditioner')
+    return False
 
 
 def apply(sd_model):
-    enable = is_needed(sd_model)
-    shared.parallel_processing_allowed = not enable
-
-    if enable:
-        setup_for_low_vram(sd_model, not shared.cmd_opts.lowvram)
-    else:
-        sd_model.lowvram = False
+    return
 
 
 def setup_for_low_vram(sd_model, use_medvram):
-    if getattr(sd_model, 'lowvram', False):
-        return
-
-    sd_model.lowvram = True
-
-    parents = {}
-
-    def send_me_to_gpu(module, _):
-        """send this module to GPU; send whatever tracked module was previous in GPU to CPU;
-        we add this as forward_pre_hook to a lot of modules and this way all but one of them will
-        be in CPU
-        """
-        global module_in_gpu
-
-        module = parents.get(module, module)
-
-        if module_in_gpu == module:
-            return
-
-        if module_in_gpu is not None:
-            module_in_gpu.to(cpu)
-
-        module.to(devices.device)
-        module_in_gpu = module
-
-    # see below for register_forward_pre_hook;
-    # first_stage_model does not use forward(), it uses encode/decode, so register_forward_pre_hook is
-    # useless here, and we just replace those methods
-
-    first_stage_model = sd_model.first_stage_model
-    first_stage_model_encode = sd_model.first_stage_model.encode
-    first_stage_model_decode = sd_model.first_stage_model.decode
-
-    def first_stage_model_encode_wrap(x):
-        send_me_to_gpu(first_stage_model, None)
-        return first_stage_model_encode(x)
-
-    def first_stage_model_decode_wrap(z):
-        send_me_to_gpu(first_stage_model, None)
-        return first_stage_model_decode(z)
-
-    to_remain_in_cpu = [
-        (sd_model, 'first_stage_model'),
-        (sd_model, 'depth_model'),
-        (sd_model, 'embedder'),
-        (sd_model, 'model'),
-        (sd_model, 'embedder'),
-    ]
-
-    is_sdxl = hasattr(sd_model, 'conditioner')
-    is_sd2 = not is_sdxl and hasattr(sd_model.cond_stage_model, 'model')
-
-    if is_sdxl:
-        to_remain_in_cpu.append((sd_model, 'conditioner'))
-    elif is_sd2:
-        to_remain_in_cpu.append((sd_model.cond_stage_model, 'model'))
-    else:
-        to_remain_in_cpu.append((sd_model.cond_stage_model, 'transformer'))
-
-    # remove several big modules: cond, first_stage, depth/embedder (if applicable), and unet from the model
-    stored = []
-    for obj, field in to_remain_in_cpu:
-        module = getattr(obj, field, None)
-        stored.append(module)
-        setattr(obj, field, None)
-
-    # send the model to GPU.
-    sd_model.to(devices.device)
-
-    # put modules back. the modules will be in CPU.
-    for (obj, field), module in zip(to_remain_in_cpu, stored):
-        setattr(obj, field, module)
-
-    # register hooks for those the first three models
-    if is_sdxl:
-        sd_model.conditioner.register_forward_pre_hook(send_me_to_gpu)
-    elif is_sd2:
-        sd_model.cond_stage_model.model.register_forward_pre_hook(send_me_to_gpu)
-        sd_model.cond_stage_model.model.token_embedding.register_forward_pre_hook(send_me_to_gpu)
-        parents[sd_model.cond_stage_model.model] = sd_model.cond_stage_model
-        parents[sd_model.cond_stage_model.model.token_embedding] = sd_model.cond_stage_model
-    else:
-        sd_model.cond_stage_model.transformer.register_forward_pre_hook(send_me_to_gpu)
-        parents[sd_model.cond_stage_model.transformer] = sd_model.cond_stage_model
-
-    sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu)
-    sd_model.first_stage_model.encode = first_stage_model_encode_wrap
-    sd_model.first_stage_model.decode = first_stage_model_decode_wrap
-    if sd_model.depth_model:
-        sd_model.depth_model.register_forward_pre_hook(send_me_to_gpu)
-    if sd_model.embedder:
-        sd_model.embedder.register_forward_pre_hook(send_me_to_gpu)
-
-    if use_medvram:
-        sd_model.model.register_forward_pre_hook(send_me_to_gpu)
-    else:
-        diff_model = sd_model.model.diffusion_model
-
-        # the third remaining model is still too big for 4 GB, so we also do the same for its submodules
-        # so that only one of them is in GPU at a time
-        stored = diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed
-        diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = None, None, None, None
-        sd_model.model.to(devices.device)
-        diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = stored
-
-        # install hooks for bits of third model
-        diff_model.time_embed.register_forward_pre_hook(send_me_to_gpu)
-        for block in diff_model.input_blocks:
-            block.register_forward_pre_hook(send_me_to_gpu)
-        diff_model.middle_block.register_forward_pre_hook(send_me_to_gpu)
-        for block in diff_model.output_blocks:
-            block.register_forward_pre_hook(send_me_to_gpu)
+    return
 
 
 def is_enabled(sd_model):
-    return sd_model.lowvram
+    return False