Offload ARA with the layer if doing layer offloading. Add support to offload the LoRA. Still needs optimizer support

2026-04-28 10:11:14 +00:00 · 2025-10-21 06:03:27 -06:00
parent 76ce757e0c
commit 0d8a33dc16
5 changed files with 37 additions and 0 deletions
--- a/toolkit/memory_management/manager.py
+++ b/toolkit/memory_management/manager.py
@@ -108,6 +108,14 @@ class MemoryManager:
                        LinearLayerMemoryManager.attach(
                            child_module, module._memory_manager
                        )
+                        # attach to ARA as well
+                        if hasattr(child_module, "ara_lora_ref"):
+                            ara = child_module.ara_lora_ref()
+                            if ara not in modules_processed:
+                                MemoryManager.attach(
+                                    ara, 
+                                    device,
+                                )
                    modules_processed.append(child_module)
                elif (
                    child_module.__class__.__name__ in CONV_MODULES
@@ -125,6 +133,15 @@ class MemoryManager:
                        ConvLayerMemoryManager.attach(
                            child_module, module._memory_manager
                        )
+                        # attach to ARA as well
+                        if hasattr(child_module, "ara_lora_ref"):
+                            ara = child_module.ara_lora_ref()
+                            if ara not in modules_processed:
+                                MemoryManager.attach(
+                                    ara, 
+                                    device,
+                                )
+                            modules_processed.append(ara)
                    modules_processed.append(child_module)
                elif child_module.__class__.__name__ in UNMANAGED_MODULES or any(
                    inc in child_module.__class__.__name__