From bd758ff203232b059aa4308a47568a1a062d0fc8 Mon Sep 17 00:00:00 2001
From: Jaret Burkett <jaretburkett@gmail.com>
Date: Tue, 29 Aug 2023 05:45:49 -0600
Subject: [PATCH] Cleanup and small bug fixes

---
 jobs/process/BaseSDTrainProcess.py | 8 ++++++++
 toolkit/lora_special.py            | 7 +++++--
 toolkit/optimizer.py               | 2 ++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/jobs/process/BaseSDTrainProcess.py b/jobs/process/BaseSDTrainProcess.py
index a565dcac..df68c698 100644
--- a/jobs/process/BaseSDTrainProcess.py
+++ b/jobs/process/BaseSDTrainProcess.py
@@ -177,6 +177,14 @@ class BaseSDTrainProcess(BaseTrainProcess):
         )
         o_dict['ss_output_name'] = self.job.name
 
+        if self.trigger_word is not None:
+            # just so auto1111 will pick it up
+            o_dict['ss_tag_frequency'] = {
+                'actfig': {
+                    'actfig': 1
+                }
+            }
+
         self.add_meta(o_dict)
 
     def get_training_info(self):
diff --git a/toolkit/lora_special.py b/toolkit/lora_special.py
index 1dc4dbc4..3a5d86ee 100644
--- a/toolkit/lora_special.py
+++ b/toolkit/lora_special.py
@@ -213,9 +213,12 @@ class LoRAModule(torch.nn.Module):
         device = state_dict['lora_up.weight'].device
 
         # todo should we do this at fp32?
+        if isinstance(self.normalize_scaler, torch.Tensor):
+            scaler = self.normalize_scaler.clone().detach()
+        else:
+            scaler = torch.tensor(self.normalize_scaler).to(device, dtype=dtype)
 
-        total_module_scale = torch.tensor(self.normalize_scaler / target_normalize_scaler) \
-            .to(device, dtype=dtype)
+        total_module_scale = scaler / target_normalize_scaler
         num_modules_layers = 2  # up and down
         up_down_scale = torch.pow(total_module_scale, 1.0 / num_modules_layers) \
             .to(device, dtype=dtype)
diff --git a/toolkit/optimizer.py b/toolkit/optimizer.py
index 96aabfca..b4449f73 100644
--- a/toolkit/optimizer.py
+++ b/toolkit/optimizer.py
@@ -35,6 +35,8 @@ def get_optimizer(
         if use_lr < 0.1:
             # dadaptation uses different lr that is values of 0.1 to 1.0. default to 1.0
             use_lr = 1.0
+
+        print(f"Using lr {use_lr}")
         # let net be the neural network you want to train
         # you can choose weight decay value based on your problem, 0 by default
         optimizer = Prodigy(params, lr=use_lr, **optimizer_params)