From bd758ff203232b059aa4308a47568a1a062d0fc8 Mon Sep 17 00:00:00 2001 From: Jaret Burkett Date: Tue, 29 Aug 2023 05:45:49 -0600 Subject: [PATCH] Cleanup and small bug fixes --- jobs/process/BaseSDTrainProcess.py | 8 ++++++++ toolkit/lora_special.py | 7 +++++-- toolkit/optimizer.py | 2 ++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/jobs/process/BaseSDTrainProcess.py b/jobs/process/BaseSDTrainProcess.py index a565dcac..df68c698 100644 --- a/jobs/process/BaseSDTrainProcess.py +++ b/jobs/process/BaseSDTrainProcess.py @@ -177,6 +177,14 @@ class BaseSDTrainProcess(BaseTrainProcess): ) o_dict['ss_output_name'] = self.job.name + if self.trigger_word is not None: + # just so auto1111 will pick it up + o_dict['ss_tag_frequency'] = { + 'actfig': { + 'actfig': 1 + } + } + self.add_meta(o_dict) def get_training_info(self): diff --git a/toolkit/lora_special.py b/toolkit/lora_special.py index 1dc4dbc4..3a5d86ee 100644 --- a/toolkit/lora_special.py +++ b/toolkit/lora_special.py @@ -213,9 +213,12 @@ class LoRAModule(torch.nn.Module): device = state_dict['lora_up.weight'].device # todo should we do this at fp32? + if isinstance(self.normalize_scaler, torch.Tensor): + scaler = self.normalize_scaler.clone().detach() + else: + scaler = torch.tensor(self.normalize_scaler).to(device, dtype=dtype) - total_module_scale = torch.tensor(self.normalize_scaler / target_normalize_scaler) \ - .to(device, dtype=dtype) + total_module_scale = scaler / target_normalize_scaler num_modules_layers = 2 # up and down up_down_scale = torch.pow(total_module_scale, 1.0 / num_modules_layers) \ .to(device, dtype=dtype) diff --git a/toolkit/optimizer.py b/toolkit/optimizer.py index 96aabfca..b4449f73 100644 --- a/toolkit/optimizer.py +++ b/toolkit/optimizer.py @@ -35,6 +35,8 @@ def get_optimizer( if use_lr < 0.1: # dadaptation uses different lr that is values of 0.1 to 1.0. default to 1.0 use_lr = 1.0 + + print(f"Using lr {use_lr}") # let net be the neural network you want to train # you can choose weight decay value based on your problem, 0 by default optimizer = Prodigy(params, lr=use_lr, **optimizer_params)