Allow short and long caption combinations like form the new captioning system. Merge the network into the model before inference and reextract when done. Doubles inference speed on locon models during inference. allow splitting a batch into individual components and run them through alone. Basicallt gradient accumulation with single batch size.

2026-04-28 02:01:29 +00:00 · 2023-10-24 16:02:07 -06:00
parent 73c8b50975
commit 002279cec3
9 changed files with 315 additions and 115 deletions
--- a/toolkit/stable_diffusion_model.py
+++ b/toolkit/stable_diffusion_model.py
@@ -62,6 +62,7 @@ class BlankNetwork:
        self.multiplier = 1.0
        self.is_active = True
        self.is_normalizing = False
+        self.is_merged_in = False

    def apply_stored_normalizer(self, target_normalize_scaler: float = 1.0):
        pass
@@ -267,10 +268,18 @@ class StableDiffusion:

    @torch.no_grad()
    def generate_images(self, image_configs: List[GenerateImageConfig], sampler=None):
+        merge_multiplier = 1.0
        # sample_folder = os.path.join(self.save_root, 'samples')
        if self.network is not None:
            self.network.eval()
            network = self.network
+            # check if we have the same network weight for all samples. If we do, we can merge in th
+            # the network to drastically speed up inference
+            unique_network_weights = set([x.network_multiplier for x in image_configs])
+            if len(unique_network_weights) == 1:
+                can_merge_in = True
+                merge_multiplier = unique_network_weights.pop()
+                network.merge_in(merge_weight=merge_multiplier)
        else:
            network = BlankNetwork()

@@ -462,6 +471,9 @@ class StableDiffusion:
            self.network.train()
            self.network.multiplier = start_multiplier
            self.network.is_normalizing = was_network_normalizing
+
+        if network.is_merged_in:
+            network.merge_out(merge_multiplier)
        # self.tokenizer.to(original_device_dict['tokenizer'])

    def get_latent_noise(