From 5825d0b3d663bb4df5e29a19d8d2a88136c42116 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Sat, 27 Jan 2024 11:38:37 -0800
Subject: [PATCH] remove unecessary flags

---
 README.md                       | 14 +++-----------
 modules_forge/initialization.py | 21 +--------------------
 2 files changed, 4 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index df1c4cb7..740cdf87 100644
--- a/README.md
+++ b/README.md
@@ -55,19 +55,11 @@ We do not change any UI. But you will see the version of Forge here
 
 ### Changes
 
-Forge removes all WebUI's codes related to speed and memory optimization and reworked everything. 
+Forge removes all WebUI's codes related to speed and memory optimization and reworked everything. All previous cmd flags like medvram, lowvram, medvram-sdxl, precision full, no half, no half vae, attention_xxx, upcast unet, ... are all REMOVED. Adding these flags will not cause error but they will not do anything now. **We highly encourage Forge users to remove all cmd flags and let Forge to decide how to load models.**
 
-All previous cmd flags like medvram, lowvram, medvram-sdxl, precision full, no half, no half vae, attention_xxx, upcast unet, ... are all REMOVED. Adding these flags will not cause error but they will not do anything now. **We highly encourage Forge users to remove all cmd flags and let Forge to decide how to load models.**
+Without any cmd flag, Forge can run SDXL with 4GB vram and SD1.5 with 2GB vram.
 
-Currently, the behaviors is:
-
-"When loading a model to GPU, Forge will decide whether to load the entire model, or to load separated parts of the model. Then, when loading another model, Forge will try best to unload the previous model."
-
-**The only one flag that you may still need** is `--disable-offload-from-vram`, to change the above behavior to
-
-"When loading a model to GPU, Forge will decide whether to load the entire model, or to load separated parts of the model. Then, when loading another model, Forge will try best to keep the previous model in GPU without unloading it."
-
-You should `--disable-offload-from-vram` when and only when you have more than 20GB GPU memory, or when you are on MAC MPS.
+**The only one flag that you may still need** is `--always-offload-from-vram` (This flag will make things **slower**). This option will let Forge always unload models from VRAM. This can be useful is you use multiple software together and want Forge to use less VRAM and give some vram to other software, or when you are using some old extensions that will compete vram with main UI, or (very rarely) when you get OOM.
 
 If you really want to play with cmd flags, you can additionally control the GPU with:
 
diff --git a/modules_forge/initialization.py b/modules_forge/initialization.py
index 5d579ce1..5797435e 100644
--- a/modules_forge/initialization.py
+++ b/modules_forge/initialization.py
@@ -2,28 +2,9 @@
 def initialize_forge():
     import ldm_patched.modules.args_parser as args_parser
 
-    args_parser.parser.add_argument("--disable-offload-from-vram", action="store_true",
-                                    help="Force loading models to vram when the unload can be avoided. "
-                                         "Use this when you ara on MAC or have more than 20GB VRAM like RTX4096.")
-
-    args_parser.args = args_parser.parser.parse_known_args()[0]
+    args_parser.args, _ = args_parser.parser.parse_known_args()
 
     import ldm_patched.modules.model_management as model_management
-
-    if args_parser.args.disable_offload_from_vram:
-        print('User disabled VRAM offload.')
-        model_management.ALWAYS_VRAM_OFFLOAD = False
-    elif model_management.total_vram > 20 * 1024:
-        if args_parser.args.always_offload_from_vram:
-            print('User has more than 20GB VRAM, but forced offloading models from VRAM.')
-            model_management.ALWAYS_VRAM_OFFLOAD = True
-        else:
-            print('Automatically disable VRAM offload since user has more than 20GB VRAM.')
-            model_management.ALWAYS_VRAM_OFFLOAD = False
-    else:
-        print('Always offload models from VRAM.')
-        model_management.ALWAYS_VRAM_OFFLOAD = True
-
     import torch
 
     device = model_management.get_torch_device()