diff --git a/backend/operations.py b/backend/operations.py index 473dcc5b..69324b68 100644 --- a/backend/operations.py +++ b/backend/operations.py @@ -500,3 +500,52 @@ def automatic_memory_management(): print(f'Automatic Memory Management: {len(module_list)} Modules in {(end - start):.2f} seconds.') return + + +class DynamicSwapInstaller: + @staticmethod + def _install_module(module: torch.nn.Module, target_device: torch.device): + original_class = module.__class__ + module.__dict__['forge_backup_original_class'] = original_class + + def hacked_get_attr(self, name: str): + if '_parameters' in self.__dict__: + _parameters = self.__dict__['_parameters'] + if name in _parameters: + p = _parameters[name] + if p is None: + return None + if p.__class__ == torch.nn.Parameter: + return torch.nn.Parameter(p.to(target_device), requires_grad=p.requires_grad) + else: + return p.to(target_device) + if '_buffers' in self.__dict__: + _buffers = self.__dict__['_buffers'] + if name in _buffers: + return _buffers[name].to(target_device) + return super(original_class, self).__getattr__(name) + + module.__class__ = type('DynamicSwapInstance', (original_class,), { + '__getattr__': hacked_get_attr, + }) + + return + + @staticmethod + def _uninstall_module(module: torch.nn.Module): + if 'forge_backup_original_class' in module.__dict__: + module.__class__ = module.__dict__.pop('forge_backup_original_class') + return + + @staticmethod + def install_model(model: torch.nn.Module, target_device: torch.device): + for m in model.modules(): + DynamicSwapInstaller._install_module(m, target_device) + return + + @staticmethod + def uninstall_model(model: torch.nn.Module): + for m in model.modules(): + DynamicSwapInstaller._uninstall_module(m) + return + diff --git a/extensions-builtin/forge_space_animagine_xl_31/space_meta.json b/extensions-builtin/forge_space_animagine_xl_31/space_meta.json index 1add2f56..b0240ed7 100644 --- a/extensions-builtin/forge_space_animagine_xl_31/space_meta.json +++ b/extensions-builtin/forge_space_animagine_xl_31/space_meta.json @@ -1,6 +1,6 @@ { "tag": "General Image Generation", - "title": "Animagine XL 3.1 Official User Interface (8GB VRAM)", + "title": "Animagine XL 3.1 Official User Interface", "repo_id": "cagliostrolab/animagine-xl-3.1", "revision": "f240016348c54945299cfb4163fbc514fba1c2ed" } diff --git a/extensions-builtin/forge_space_photo_maker_v2/space_meta.json b/extensions-builtin/forge_space_photo_maker_v2/space_meta.json index 181b3385..9a378258 100644 --- a/extensions-builtin/forge_space_photo_maker_v2/space_meta.json +++ b/extensions-builtin/forge_space_photo_maker_v2/space_meta.json @@ -1,6 +1,6 @@ { "tag": "Face Swap, Human Identification, and Style Transfer", - "title": "PhotoMaker V2 (8GB VRAM required): Improved ID Fidelity and Better Controllability", + "title": "PhotoMaker V2: Improved ID Fidelity and Better Controllability", "repo_id": "TencentARC/PhotoMaker-V2", "revision": "abbf3252f4188b0373bb5384db31f429be40176c" } diff --git a/spaces.py b/spaces.py index bcaa9e88..01ccb9ba 100644 --- a/spaces.py +++ b/spaces.py @@ -10,6 +10,7 @@ import gradio.oauth import gradio.routes from backend import memory_management +from backend.operations import DynamicSwapInstaller from diffusers.models import modeling_utils as diffusers_modeling_utils from transformers import modeling_utils as transformers_modeling_utils from backend.attention import AttentionProcessorForge @@ -30,6 +31,8 @@ Request.__init__ = patched_init gradio.oauth.attach_oauth = lambda x: None gradio.routes.attach_oauth = lambda x: None +ALWAYS_SWAP = False + module_in_gpu: torch.nn.Module = None gpu = memory_management.get_torch_device() cpu = torch.device('cpu') @@ -44,8 +47,10 @@ def unload_module(): if module_in_gpu is None: return - print(f'Moved module to CPU: {type(module_in_gpu).__name__}') + DynamicSwapInstaller.uninstall_model(module_in_gpu) module_in_gpu.to(cpu) + print(f'Move module to CPU: {type(module_in_gpu).__name__}') + module_in_gpu = None memory_management.soft_empty_cache() return @@ -58,9 +63,25 @@ def load_module(m): return unload_module() + + model_memory = memory_management.module_size(m) + current_free_mem = memory_management.get_free_memory(gpu) + inference_memory = 1.5 * 1024 * 1024 * 1024 # memory_management.minimum_inference_memory() # TODO: connect to main memory system + estimated_remaining_memory = current_free_mem - model_memory - inference_memory + + print(f"[Memory Management] Current Free GPU Memory: {current_free_mem / (1024 * 1024):.2f} MB") + print(f"[Memory Management] Required Model Memory: {model_memory / (1024 * 1024):.2f} MB") + print(f"[Memory Management] Required Inference Memory: {inference_memory / (1024 * 1024):.2f} MB") + print(f"[Memory Management] Estimated Remaining GPU Memory: {estimated_remaining_memory / (1024 * 1024):.2f} MB") + + if ALWAYS_SWAP or estimated_remaining_memory < 0: + print(f'Move module to SWAP: {type(m).__name__}') + DynamicSwapInstaller.install_model(m, target_device=gpu) + else: + print(f'Move module to GPU: {type(m).__name__}') + m.to(gpu) + module_in_gpu = m - module_in_gpu.to(gpu) - print(f'Moved module to GPU: {type(module_in_gpu).__name__}') return