diff --git a/modules/processing.py b/modules/processing.py
index 233da0c4..90311c07 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -32,6 +32,7 @@ from einops import repeat, rearrange
 from blendmodes.blend import blendLayers, BlendType
 from modules.sd_models import apply_token_merging, forge_model_reload
 from modules_forge.utils import apply_circular_forge
+from backend import memory_management
 
 
 # some of those options should not be changed at all because they would break the model, so I removed them from options.
@@ -773,7 +774,16 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
     return f"{prompt_text}{negative_prompt_text}\n{generation_params_text}".strip()
 
 
+need_global_unload = False
+
+
 def process_images(p: StableDiffusionProcessing) -> Processed:
+    global need_global_unload
+
+    if need_global_unload:
+        need_global_unload = False
+        memory_management.unload_all_models()
+
     p.sd_model = forge_model_reload()
 
     if p.scripts is not None:
diff --git a/modules_forge/main_entry.py b/modules_forge/main_entry.py
index 7503c0ea..3799ad9b 100644
--- a/modules_forge/main_entry.py
+++ b/modules_forge/main_entry.py
@@ -1,7 +1,7 @@
 import torch
 import gradio as gr
 
-from modules import shared_items, shared, ui_common, sd_models
+from modules import shared_items, shared, ui_common, sd_models, processing
 from modules import sd_vae as sd_vae_module
 from backend import memory_management, stream
 
@@ -94,6 +94,8 @@ def refresh_memory_management_settings(model_memory, async_loading, pin_shared_m
     print(f'Stream Used by CUDA: {stream.should_use_stream()}')
     print(f'Current Inference Memory: {memory_management.minimum_inference_memory() / (1024 * 1024):.2f} MB')
     print(f'PIN Shared Memory: {pin_shared_memory}')
+
+    processing.need_global_unload = True
     return