From 40e60fa0214754551cf8635f529750a589bc68b6 Mon Sep 17 00:00:00 2001
From: Jaret Burkett <jaretburkett@gmail.com>
Date: Tue, 25 Jul 2023 15:28:20 -0600
Subject: [PATCH] Finially found that bug. ugh

---
 jobs/process/BaseSDTrainProcess.py | 11 ++++------
 jobs/process/TrainSliderProcess.py | 32 ++++++++++++++++++++----------
 2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/jobs/process/BaseSDTrainProcess.py b/jobs/process/BaseSDTrainProcess.py
index 5e9f9405..f32d3fb1 100644
--- a/jobs/process/BaseSDTrainProcess.py
+++ b/jobs/process/BaseSDTrainProcess.py
@@ -328,21 +328,17 @@ class BaseSDTrainProcess(BaseTrainProcess):
                 guidance_rescale=guidance_rescale
             )
 
-            # compute the previous noisy sample x_t -> x_t-1
-            latents = self.sd.noise_scheduler.step(noise_pred, timestep, latents).prev_sample
         else:
             noise_pred = train_util.predict_noise(
                 self.sd.unet,
                 self.sd.noise_scheduler,
                 timestep,
                 latents,
-                text_embeddings.text_embeds,
+                text_embeddings.text_embeds if hasattr(text_embeddings, 'text_embeds') else text_embeddings,
                 guidance_scale=guidance_scale
             )
 
-            # compute the previous noisy sample x_t -> x_t-1
-            latents = self.sd.noise_scheduler.step(noise_pred, timestep, latents).prev_sample
-        return latents
+        return noise_pred
 
     # ref: https://github.com/huggingface/diffusers/blob/0bab447670f47c28df60fbd2f6a0f833f75a16f5/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L746
     def diffuse_some_steps(
@@ -357,7 +353,7 @@ class BaseSDTrainProcess(BaseTrainProcess):
     ):
 
         for timestep in tqdm(self.sd.noise_scheduler.timesteps[start_timesteps:total_timesteps], leave=False):
-            latents = self.predict_noise(
+            noise_pred = self.predict_noise(
                 latents,
                 text_embeddings,
                 timestep,
@@ -365,6 +361,7 @@ class BaseSDTrainProcess(BaseTrainProcess):
                 add_time_ids=add_time_ids,
                 **kwargs,
             )
+            latents = self.sd.noise_scheduler.step(noise_pred, timestep, latents).prev_sample
 
         # return latents_steps
         return latents
diff --git a/jobs/process/TrainSliderProcess.py b/jobs/process/TrainSliderProcess.py
index fddcecb1..c7b89a2a 100644
--- a/jobs/process/TrainSliderProcess.py
+++ b/jobs/process/TrainSliderProcess.py
@@ -244,16 +244,16 @@ class TrainSliderProcess(BaseSDTrainProcess):
         lr_scheduler = self.lr_scheduler
         loss_function = torch.nn.MSELoss()
 
-        def get_noise_pred(p, n):
+        def get_noise_pred(p, n, gs, cts, dn):
             return self.predict_noise(
-                latents=denoised_latents,
+                latents=dn,
                 text_embeddings=train_tools.concat_prompt_embeddings(
                     p,  # unconditional
                     n,  # positive
                     self.train_config.batch_size,
                 ),
-                timestep=current_timestep,
-                guidance_scale=1,
+                timestep=cts,
+                guidance_scale=gs,
             )
 
         # set network multiplier
@@ -302,11 +302,17 @@ class TrainSliderProcess(BaseSDTrainProcess):
                 int(timesteps_to * 1000 / self.train_config.max_denoising_steps)
             ]
 
-            positive_latents = get_noise_pred(positive, negative)
+            positive_latents = get_noise_pred(
+                positive, negative, 1, current_timestep, denoised_latents
+            ).to("cpu", dtype=torch.float32)
 
-            neutral_latents = get_noise_pred(positive, neutral)
+            neutral_latents = get_noise_pred(
+                positive, neutral, 1, current_timestep, denoised_latents
+            ).to("cpu", dtype=torch.float32)
 
-            unconditional_latents = get_noise_pred(positive, positive)
+            unconditional_latents = get_noise_pred(
+                positive, positive, 1, current_timestep, denoised_latents
+            ).to("cpu", dtype=torch.float32)
 
         anchor_loss = None
         if len(self.anchor_pairs) > 0:
@@ -315,19 +321,25 @@ class TrainSliderProcess(BaseSDTrainProcess):
                 torch.randint(0, len(self.anchor_pairs), (1,)).item()
             ]
             with torch.no_grad():
-                anchor_target_noise = get_noise_pred(anchor.prompt, anchor.neg_prompt)
+                anchor_target_noise = get_noise_pred(
+                    anchor.prompt, anchor.neg_prompt, 1, current_timestep, denoised_latents
+                ).to("cpu", dtype=torch.float32)
             with self.network:
                 # anchor whatever weight  prompt pair is using
                 pos_nem_mult = 1.0 if prompt_pair.multiplier > 0 else -1.0
                 self.network.multiplier = anchor.multiplier * pos_nem_mult
 
-                anchor_pred_noise = get_noise_pred(anchor.prompt, anchor.neg_prompt)
+                anchor_pred_noise = get_noise_pred(
+                    anchor.prompt, anchor.neg_prompt, 1, current_timestep, denoised_latents
+                ).to("cpu", dtype=torch.float32)
 
                 self.network.multiplier = prompt_pair.multiplier
 
         with self.network:
             self.network.multiplier = prompt_pair.multiplier
-            target_latents = get_noise_pred(positive, target_class)
+            target_latents = get_noise_pred(
+                positive, target_class, 1, current_timestep, denoised_latents
+            ).to("cpu", dtype=torch.float32)
 
             # if self.logging_config.verbose:
             #     self.print("target_latents:", target_latents[0, 0, :5, :5])