Add optimization --cuda-stream

See also the readme for more details
2026-04-30 19:21:21 +00:00 · 2024-02-24 14:00:48 -08:00
parent 0f09d98814
commit 434ca2169f
9 changed files with 63 additions and 73 deletions
--- a/ldm_patched/modules/controlnet.py
+++ b/ldm_patched/modules/controlnet.py
@@ -14,7 +14,7 @@ import ldm_patched.modules.ops
 import ldm_patched.controlnet.cldm
 import ldm_patched.t2ia.adapter

-from ldm_patched.modules.ops import main_thread_worker
+from ldm_patched.modules.ops import main_stream_worker


 def broadcast_image_to(tensor, target_batch_size, batched_number):
@@ -306,7 +306,7 @@ class ControlLoraOps:

        def forward(self, input):
            weight, bias, signal = ldm_patched.modules.ops.cast_bias_weight(self, input)
-            with main_thread_worker(weight, bias, signal):
+            with main_stream_worker(weight, bias, signal):
                if self.up is not None:
                    return torch.nn.functional.linear(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias)
                else:
@@ -347,7 +347,7 @@ class ControlLoraOps:

        def forward(self, input):
            weight, bias, signal = ldm_patched.modules.ops.cast_bias_weight(self, input)
-            with main_thread_worker(weight, bias, signal):
+            with main_stream_worker(weight, bias, signal):
                if self.up is not None:
                    return torch.nn.functional.conv2d(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias, self.stride, self.padding, self.dilation, self.groups)
                else: