From d339600181252aafbe12dc06f1b4fa7f953fc6f3 Mon Sep 17 00:00:00 2001
From: layerdiffusion <19834515+lllyasviel@users.noreply.github.com>
Date: Wed, 28 Aug 2024 09:56:18 -0700
Subject: [PATCH] fix

---
 backend/memory_management.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/backend/memory_management.py b/backend/memory_management.py
index f8b57a40..a6cc9578 100644
--- a/backend/memory_management.py
+++ b/backend/memory_management.py
@@ -462,13 +462,13 @@ class LoadedModel:
                 m.to(self.device)
                 mem_counter += m.total_mem
 
-            for m in cpu_modules + gpu_modules_only_extras:
-                if hasattr(m, 'weight') and m.weight is not None and hasattr(m.weight, 'bnb_quantized') and not m.weight.bnb_quantized and self.device.type == 'cuda':
-                    m.to(self.device)  # Quantize happens here
-
             for m in cpu_modules:
                 m.prev_parameters_manual_cast = m.parameters_manual_cast
                 m.parameters_manual_cast = True
+
+                if hasattr(m, 'weight') and m.weight is not None and hasattr(m.weight, 'bnb_quantized') and not m.weight.bnb_quantized and self.device.type == 'cuda':
+                    m.to(self.device)  # Quantize happens here
+
                 m.to(self.model.offload_device)
                 if pin_memory:
                     m._apply(lambda x: x.pin_memory())
@@ -477,6 +477,10 @@ class LoadedModel:
             for m in gpu_modules_only_extras:
                 m.prev_parameters_manual_cast = m.parameters_manual_cast
                 m.parameters_manual_cast = True
+                
+                if hasattr(m, 'weight') and m.weight is not None and hasattr(m.weight, 'bnb_quantized') and not m.weight.bnb_quantized and self.device.type == 'cuda':
+                    m.to(self.device)  # Quantize happens here
+
                 module_move(m, device=self.device, recursive=False, excluded_pattens=['weight'])
                 if hasattr(m, 'weight') and m.weight is not None:
                     if pin_memory: