From 2cb1b65309256763814a006a07b683c0f1013a30 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sun, 28 Jan 2024 22:18:46 +0800
Subject: [PATCH 01/12] Bump safetensors' version to 0.4.2

---
 requirements_versions.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements_versions.txt b/requirements_versions.txt
index 2a922f28..5e30b5ea 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -19,7 +19,7 @@ piexif==1.1.3
 psutil==5.9.5
 pytorch_lightning==1.9.4
 resize-right==0.0.2
-safetensors==0.3.1
+safetensors==0.4.2
 scikit-image==0.21.0
 spandrel==0.1.6
 tomesd==0.1.3

From baaf39b6f92f24275a1b264a634514bac571dfae Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Mon, 29 Jan 2024 10:20:27 +0300
Subject: [PATCH 02/12] fix the typo -- thanks Cyberbeing

---
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index ef237396..941dff4b 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -94,7 +94,7 @@ class CFGDenoiser(torch.nn.Module):
 
     def pad_cond_uncond(self, cond, uncond):
         empty = shared.sd_model.cond_stage_model_empty_prompt
-        num_repeats = (cond.shape[1] - cond.shape[1]) // empty.shape[1]
+        num_repeats = (cond.shape[1] - uncond.shape[1]) // empty.shape[1]
 
         if num_repeats < 0:
             cond = pad_cond(cond, -num_repeats, empty)

From ec124607f47371a6cfd61a795f86a7f1cbd44651 Mon Sep 17 00:00:00 2001
From: wangshuai09 <391746016@qq.com>
Date: Sat, 27 Jan 2024 17:21:32 +0800
Subject: [PATCH 03/12] Add NPU Support

---
 modules/devices.py                            |  9 +++--
 modules/initialize.py                         |  6 +++-
 modules/npu_specific.py                       | 34 +++++++++++++++++++
 .../textual_inversion/textual_inversion.py    |  4 +++
 requirements.txt                              |  4 +++
 requirements_versions.txt                     |  4 +++
 webui.sh                                      |  4 +++
 7 files changed, 62 insertions(+), 3 deletions(-)
 create mode 100644 modules/npu_specific.py

diff --git a/modules/devices.py b/modules/devices.py
index ea1f712f..f1e56501 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -3,7 +3,7 @@ import contextlib
 from functools import lru_cache
 
 import torch
-from modules import errors, shared
+from modules import errors, shared, npu_specific
 
 if sys.platform == "darwin":
     from modules import mac_specific
@@ -40,6 +40,9 @@ def get_optimal_device_name():
     if has_xpu():
         return xpu_specific.get_xpu_device_string()
 
+    if npu_specific.has_npu:
+        return npu_specific.get_npu_device_string()
+
     return "cpu"
 
 
@@ -67,6 +70,9 @@ def torch_gc():
     if has_xpu():
         xpu_specific.torch_xpu_gc()
 
+    if npu_specific.has_npu:
+        npu_specific.torch_npu_gc()
+
 
 def enable_tf32():
     if torch.cuda.is_available():
@@ -164,4 +170,3 @@ def first_time_calculation():
     x = torch.zeros((1, 1, 3, 3)).to(device, dtype)
     conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype)
     conv2d(x)
-
diff --git a/modules/initialize.py b/modules/initialize.py
index ac95fc6f..3285cc3c 100644
--- a/modules/initialize.py
+++ b/modules/initialize.py
@@ -143,13 +143,17 @@ def initialize_rest(*, reload_script_modules=False):
         its optimization may be None because the list of optimizaers has neet been filled
         by that time, so we apply optimization again.
         """
+        from modules import devices
+        # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue
+        if devices.npu_specific.has_npu:
+            import torch
+            torch.npu.set_device(0)
 
         shared.sd_model  # noqa: B018
 
         if sd_hijack.current_optimizer is None:
             sd_hijack.apply_optimizations()
 
-        from modules import devices
         devices.first_time_calculation()
     if not shared.cmd_opts.skip_load_model_at_start:
         Thread(target=load_model).start()
diff --git a/modules/npu_specific.py b/modules/npu_specific.py
new file mode 100644
index 00000000..d8aebf9c
--- /dev/null
+++ b/modules/npu_specific.py
@@ -0,0 +1,34 @@
+import importlib
+import torch
+
+from modules import shared
+
+
+def check_for_npu():
+    if importlib.util.find_spec("torch_npu") is None:
+        return False
+    import torch_npu
+    torch_npu.npu.set_device(0)
+
+    try:
+        # Will raise a RuntimeError if no NPU is found
+        _ = torch.npu.device_count()
+        return torch.npu.is_available()
+    except RuntimeError:
+        return False
+
+
+def get_npu_device_string():
+    if shared.cmd_opts.device_id is not None:
+        return f"npu:{shared.cmd_opts.device_id}"
+    return "npu:0"
+
+
+def torch_npu_gc():
+    # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue
+    torch.npu.set_device(0)
+    with torch.npu.device(get_npu_device_string()):
+        torch.npu.empty_cache()
+
+
+has_npu = check_for_npu()
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 04dda585..9c062503 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -151,6 +151,10 @@ class EmbeddingDatabase:
         return embedding
 
     def get_expected_shape(self):
+        # workaround
+        if devices.npu_specific.has_npu:
+            import torch
+            torch.npu.set_device(0)
         vec = shared.sd_model.cond_stage_model.encode_embedding_init_text(",", 1)
         return vec.shape[1]
 
diff --git a/requirements.txt b/requirements.txt
index 80b43845..4537402b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,6 +5,8 @@ accelerate
 basicsr
 blendmodes
 clean-fid
+cloudpickle
+decorator
 einops
 fastapi>=0.90.1
 gfpgan
@@ -26,9 +28,11 @@ resize-right
 
 safetensors
 scikit-image>=0.19
+synr==0.5.0
 timm
 tomesd
 torch
 torchdiffeq
 torchsde
+tornado
 transformers==4.30.2
diff --git a/requirements_versions.txt b/requirements_versions.txt
index cb7403a9..95515b55 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -4,6 +4,8 @@ accelerate==0.21.0
 basicsr==1.4.2
 blendmodes==2022
 clean-fid==0.1.35
+cloudpickle==3.0.0
+decorator==5.1.1
 einops==0.4.1
 fastapi==0.94.0
 gfpgan==1.3.8
@@ -23,10 +25,12 @@ realesrgan==0.3.0
 resize-right==0.0.2
 safetensors==0.3.1
 scikit-image==0.21.0
+synr==0.5.0
 timm==0.9.2
 tomesd==0.1.3
 torch
 torchdiffeq==0.2.3
 torchsde==0.2.6
+tornado==6.4
 transformers==4.30.2
 httpx==0.24.1
diff --git a/webui.sh b/webui.sh
index cff43327..3f6e87fd 100755
--- a/webui.sh
+++ b/webui.sh
@@ -159,6 +159,10 @@ then
     if echo "$gpu_info" | grep -q "AMD" && [[ -z "${TORCH_COMMAND}" ]]
     then
         export TORCH_COMMAND="pip install torch==2.0.1+rocm5.4.2 torchvision==0.15.2+rocm5.4.2 --index-url https://download.pytorch.org/whl/rocm5.4.2"
+    elif echo "$gpu_info" | grep -q "Huawei" && [[ -z "${TORCH_COMMAND}" ]]
+    then
+        export TORCH_COMMAND="pip install torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu; pip install torch_npu"
+    
     fi
 fi
 

From 750dd6014a45397979cad42a74634451d0861581 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Mon, 29 Jan 2024 22:27:53 +0800
Subject: [PATCH 04/12] Fix potential bugs

---
 modules/devices.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/modules/devices.py b/modules/devices.py
index dfffaf24..60f7d6d7 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -141,7 +141,12 @@ def manual_cast_forward(target_dtype):
             args = [arg.to(target_dtype) if isinstance(arg, torch.Tensor) else arg for arg in args]
             kwargs = {k: v.to(target_dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
 
-        org_dtype = torch_utils.get_param(self).dtype
+        org_dtype = target_dtype
+        for param in self.parameters():
+            if param.dtype != target_dtype:
+                org_dtype = param.dtype
+                break
+
         if org_dtype != target_dtype:
             self.to(target_dtype)
         result = self.org_forward(*args, **kwargs)
@@ -170,7 +175,7 @@ def manual_cast(target_dtype):
             continue
         applied = True
         org_forward = module_type.forward
-        if module_type == torch.nn.MultiheadAttention and has_xpu():
+        if module_type == torch.nn.MultiheadAttention:
             module_type.forward = manual_cast_forward(torch.float32)
         else:
             module_type.forward = manual_cast_forward(target_dtype)

From 6e7f0860f7ae4a0ce59f9416fb9b2f3bcab44f1d Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Mon, 29 Jan 2024 22:46:43 +0800
Subject: [PATCH 05/12] linting

---
 modules/devices.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/devices.py b/modules/devices.py
index 60f7d6d7..8f49f7a4 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -4,7 +4,6 @@ from functools import lru_cache
 
 import torch
 from modules import errors, shared
-from modules import torch_utils
 
 if sys.platform == "darwin":
     from modules import mac_specific

From d243e24f539d717b221992e894a5db5a321bf3cd Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Mon, 29 Jan 2024 22:49:45 +0800
Subject: [PATCH 06/12] Try to reverse the dtype checking mechanism

---
 modules/devices.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/modules/devices.py b/modules/devices.py
index 8f49f7a4..f9648e9a 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -4,6 +4,7 @@ from functools import lru_cache
 
 import torch
 from modules import errors, shared
+from modules import torch_utils
 
 if sys.platform == "darwin":
     from modules import mac_specific
@@ -140,11 +141,7 @@ def manual_cast_forward(target_dtype):
             args = [arg.to(target_dtype) if isinstance(arg, torch.Tensor) else arg for arg in args]
             kwargs = {k: v.to(target_dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
 
-        org_dtype = target_dtype
-        for param in self.parameters():
-            if param.dtype != target_dtype:
-                org_dtype = param.dtype
-                break
+        org_dtype = torch_utils.get_param(self).dtype
 
         if org_dtype != target_dtype:
             self.to(target_dtype)

From f9ba7e648ad5bf7dbdf2b95fa207936179bf784e Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Mon, 29 Jan 2024 22:54:12 +0800
Subject: [PATCH 07/12] Revert "Try to reverse the dtype checking mechanism"

This reverts commit d243e24f539d717b221992e894a5db5a321bf3cd.
---
 modules/devices.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/modules/devices.py b/modules/devices.py
index f9648e9a..8f49f7a4 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -4,7 +4,6 @@ from functools import lru_cache
 
 import torch
 from modules import errors, shared
-from modules import torch_utils
 
 if sys.platform == "darwin":
     from modules import mac_specific
@@ -141,7 +140,11 @@ def manual_cast_forward(target_dtype):
             args = [arg.to(target_dtype) if isinstance(arg, torch.Tensor) else arg for arg in args]
             kwargs = {k: v.to(target_dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
 
-        org_dtype = torch_utils.get_param(self).dtype
+        org_dtype = target_dtype
+        for param in self.parameters():
+            if param.dtype != target_dtype:
+                org_dtype = param.dtype
+                break
 
         if org_dtype != target_dtype:
             self.to(target_dtype)

From c4255d12f7531725e591160e1cfe47d7a2fc0f02 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Wed, 31 Jan 2024 04:36:11 +0900
Subject: [PATCH 08/12] add tooltip create_submit_box

---
 modules/ui_toprow.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/ui_toprow.py b/modules/ui_toprow.py
index fbe705be..457fbf52 100644
--- a/modules/ui_toprow.py
+++ b/modules/ui_toprow.py
@@ -96,9 +96,9 @@ class Toprow:
         with gr.Row(elem_id=f"{self.id_part}_generate_box", elem_classes=["generate-box"] + (["generate-box-compact"] if self.is_compact else []), render=not self.is_compact) as submit_box:
             self.submit_box = submit_box
 
-            self.interrupt = gr.Button('Interrupt', elem_id=f"{self.id_part}_interrupt", elem_classes="generate-box-interrupt")
-            self.skip = gr.Button('Skip', elem_id=f"{self.id_part}_skip", elem_classes="generate-box-skip")
-            self.submit = gr.Button('Generate', elem_id=f"{self.id_part}_generate", variant='primary')
+            self.interrupt = gr.Button('Interrupt', elem_id=f"{self.id_part}_interrupt", elem_classes="generate-box-interrupt", tooltip="End generation immediately or after completing current batch")
+            self.skip = gr.Button('Skip', elem_id=f"{self.id_part}_skip", elem_classes="generate-box-skip", tooltip="Stop generation of current batch and continues onto next batch")
+            self.submit = gr.Button('Generate', elem_id=f"{self.id_part}_generate", variant='primary', tooltip="Right click generate forever menu")
 
             self.skip.click(
                 fn=lambda: shared.state.skip(),

From cc3f604310458eed7d26456c1b3934d582283ffe Mon Sep 17 00:00:00 2001
From: wangshuai09 <391746016@qq.com>
Date: Wed, 31 Jan 2024 10:46:53 +0800
Subject: [PATCH 09/12] Update

---
 modules/devices.py                             | 7 +++++++
 modules/initialize.py                          | 5 +----
 modules/launch_utils.py                        | 8 ++++++++
 modules/npu_specific.py                        | 5 +----
 modules/textual_inversion/textual_inversion.py | 5 +----
 requirements.txt                               | 4 ----
 requirements_npu.txt                           | 4 ++++
 requirements_versions.txt                      | 4 ----
 8 files changed, 22 insertions(+), 20 deletions(-)
 create mode 100644 requirements_npu.txt

diff --git a/modules/devices.py b/modules/devices.py
index c737162a..28c0c54d 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -88,9 +88,16 @@ def torch_gc():
         xpu_specific.torch_xpu_gc()
 
     if npu_specific.has_npu:
+        torch_npu_set_device()
         npu_specific.torch_npu_gc()
 
 
+def torch_npu_set_device():
+    # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue
+    if npu_specific.has_npu:
+        torch.npu.set_device(0)
+
+
 def enable_tf32():
     if torch.cuda.is_available():
 
diff --git a/modules/initialize.py b/modules/initialize.py
index cc34fd6f..f7313ff4 100644
--- a/modules/initialize.py
+++ b/modules/initialize.py
@@ -143,10 +143,7 @@ def initialize_rest(*, reload_script_modules=False):
         by that time, so we apply optimization again.
         """
         from modules import devices
-        # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue
-        if devices.npu_specific.has_npu:
-            import torch
-            torch.npu.set_device(0)
+        devices.torch_npu_set_device()
 
         shared.sd_model  # noqa: B018
 
diff --git a/modules/launch_utils.py b/modules/launch_utils.py
index 3ff4576a..107c72b0 100644
--- a/modules/launch_utils.py
+++ b/modules/launch_utils.py
@@ -338,6 +338,7 @@ def prepare_environment():
             torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/")
             torch_command = os.environ.get('TORCH_COMMAND', f"pip install torch==2.0.0a0 intel-extension-for-pytorch==2.0.110+gitba7f6c1 --extra-index-url {torch_index_url}")
     requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt")
+    requirements_file_for_npu = os.environ.get('REQS_FILE_FOR_NPU', "requirements_npu.txt")
 
     xformers_package = os.environ.get('XFORMERS_PACKAGE', 'xformers==0.0.23.post1')
     clip_package = os.environ.get('CLIP_PACKAGE', "https://github.com/openai/CLIP/archive/d50d76daa670286dd6cacf3bcd80b5e4823fc8e1.zip")
@@ -421,6 +422,13 @@ def prepare_environment():
         run_pip(f"install -r \"{requirements_file}\"", "requirements")
         startup_timer.record("install requirements")
 
+    if not os.path.isfile(requirements_file_for_npu):
+        requirements_file_for_npu = os.path.join(script_path, requirements_file_for_npu)
+
+    if "torch_npu" in torch_command and not requirements_met(requirements_file_for_npu):
+        run_pip(f"install -r \"{requirements_file_for_npu}\"", "requirements_for_npu")
+        startup_timer.record("install requirements_for_npu")
+
     if not args.skip_install:
         run_extensions_installers(settings_file=args.ui_settings_file)
 
diff --git a/modules/npu_specific.py b/modules/npu_specific.py
index d8aebf9c..94100691 100644
--- a/modules/npu_specific.py
+++ b/modules/npu_specific.py
@@ -8,11 +8,10 @@ def check_for_npu():
     if importlib.util.find_spec("torch_npu") is None:
         return False
     import torch_npu
-    torch_npu.npu.set_device(0)
 
     try:
         # Will raise a RuntimeError if no NPU is found
-        _ = torch.npu.device_count()
+        _ = torch_npu.npu.device_count()
         return torch.npu.is_available()
     except RuntimeError:
         return False
@@ -25,8 +24,6 @@ def get_npu_device_string():
 
 
 def torch_npu_gc():
-    # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue
-    torch.npu.set_device(0)
     with torch.npu.device(get_npu_device_string()):
         torch.npu.empty_cache()
 
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index d16e3b9a..6d815c0b 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -150,10 +150,7 @@ class EmbeddingDatabase:
         return embedding
 
     def get_expected_shape(self):
-        # workaround
-        if devices.npu_specific.has_npu:
-            import torch
-            torch.npu.set_device(0)
+        devices.torch_npu_set_device()
         vec = shared.sd_model.cond_stage_model.encode_embedding_init_text(",", 1)
         return vec.shape[1]
 
diff --git a/requirements.txt b/requirements.txt
index d1e4ede9..731a1be7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,8 +4,6 @@ accelerate
 
 blendmodes
 clean-fid
-cloudpickle
-decorator
 einops
 facexlib
 fastapi>=0.90.1
@@ -26,10 +24,8 @@ resize-right
 
 safetensors
 scikit-image>=0.19
-synr==0.5.0
 tomesd
 torch
 torchdiffeq
 torchsde
-tornado
 transformers==4.30.2
diff --git a/requirements_npu.txt b/requirements_npu.txt
new file mode 100644
index 00000000..5e6a4364
--- /dev/null
+++ b/requirements_npu.txt
@@ -0,0 +1,4 @@
+cloudpickle
+decorator
+synr==0.5.0
+tornado
diff --git a/requirements_versions.txt b/requirements_versions.txt
index 1c66cd8c..5e30b5ea 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -3,8 +3,6 @@ Pillow==9.5.0
 accelerate==0.21.0
 blendmodes==2022
 clean-fid==0.1.35
-cloudpickle==3.0.0
-decorator==5.1.1
 einops==0.4.1
 facexlib==0.3.0
 fastapi==0.94.0
@@ -23,12 +21,10 @@ pytorch_lightning==1.9.4
 resize-right==0.0.2
 safetensors==0.4.2
 scikit-image==0.21.0
-synr==0.5.0
 spandrel==0.1.6
 tomesd==0.1.3
 torch
 torchdiffeq==0.2.3
 torchsde==0.2.6
-tornado==6.4
 transformers==4.30.2
 httpx==0.24.1

From 74b214a92a2959948dcd05a78b7380e046163871 Mon Sep 17 00:00:00 2001
From: Cyberbeing <cyber.spamage@gmail.com>
Date: Mon, 29 Jan 2024 02:06:50 -0800
Subject: [PATCH 10/12] Fix potential autocast NaNs in image upscale

---
 modules/upscaler_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/upscaler_utils.py b/modules/upscaler_utils.py
index afed8b40..b5e5a80c 100644
--- a/modules/upscaler_utils.py
+++ b/modules/upscaler_utils.py
@@ -6,7 +6,7 @@ import torch
 import tqdm
 from PIL import Image
 
-from modules import images, shared, torch_utils
+from modules import devices, images, shared, torch_utils
 
 logger = logging.getLogger(__name__)
 
@@ -44,7 +44,8 @@ def upscale_pil_patch(model, img: Image.Image) -> Image.Image:
     with torch.no_grad():
         tensor = pil_image_to_torch_bgr(img).unsqueeze(0)  # add batch dimension
         tensor = tensor.to(device=param.device, dtype=param.dtype)
-        return torch_bgr_to_pil_image(model(tensor))
+        with devices.without_autocast():
+            return torch_bgr_to_pil_image(model(tensor))
 
 
 def upscale_with_model(

From 67c38f9294607e4062cf770012a471e6602378cc Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Wed, 31 Jan 2024 13:16:41 -0800
Subject: [PATCH 11/12] b

---
 ldm_patched/contrib/external.py               | 21 +++++++++
 .../modules/diffusionmodules/openaimodel.py   | 38 ++++++++++++++--
 .../ldm/modules/diffusionmodules/util.py      | 14 ++----
 ldm_patched/modules/controlnet.py             |  7 +++
 ldm_patched/modules/model_management.py       | 22 +++++++--
 ldm_patched/modules/ops.py                    | 19 ++++++++
 ldm_patched/modules/samplers.py               | 36 +++++++++++++--
 ldm_patched/modules/sd.py                     |  2 +-
 ldm_patched/modules/sd1_clip.py               | 30 +++++++++----
 ldm_patched/modules/supported_models_base.py  |  1 +
 ldm_patched/utils/path_utils.py               | 45 +++----------------
 11 files changed, 167 insertions(+), 68 deletions(-)

diff --git a/ldm_patched/contrib/external.py b/ldm_patched/contrib/external.py
index 927cd3f3..35f698ff 100644
--- a/ldm_patched/contrib/external.py
+++ b/ldm_patched/contrib/external.py
@@ -186,6 +186,26 @@ class ConditioningSetAreaPercentage:
             c.append(n)
         return (c, )
 
+class ConditioningSetAreaStrength:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"conditioning": ("CONDITIONING", ),
+                              "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                             }}
+    RETURN_TYPES = ("CONDITIONING",)
+    FUNCTION = "append"
+
+    CATEGORY = "conditioning"
+
+    def append(self, conditioning, strength):
+        c = []
+        for t in conditioning:
+            n = [t[0], t[1].copy()]
+            n[1]['strength'] = strength
+            c.append(n)
+        return (c, )
+
+
 class ConditioningSetMask:
     @classmethod
     def INPUT_TYPES(s):
@@ -1756,6 +1776,7 @@ NODE_CLASS_MAPPINGS = {
     "ConditioningConcat": ConditioningConcat,
     "ConditioningSetArea": ConditioningSetArea,
     "ConditioningSetAreaPercentage": ConditioningSetAreaPercentage,
+    "ConditioningSetAreaStrength": ConditioningSetAreaStrength,
     "ConditioningSetMask": ConditioningSetMask,
     "KSamplerAdvanced": KSamplerAdvanced,
     "SetLatentNoiseMask": SetLatentNoiseMask,
diff --git a/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py b/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py
index 4b695f76..ffd168af 100644
--- a/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py
+++ b/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py
@@ -825,6 +825,7 @@ class UNetModel(nn.Module):
         transformer_options["original_shape"] = list(x.shape)
         transformer_options["transformer_index"] = 0
         transformer_patches = transformer_options.get("patches", {})
+        block_modifiers = transformer_options.get("block_modifiers", [])
 
         num_video_frames = kwargs.get("num_video_frames", self.default_num_video_frames)
         image_only_indicator = kwargs.get("image_only_indicator", self.default_image_only_indicator)
@@ -844,8 +845,16 @@ class UNetModel(nn.Module):
         h = x
         for id, module in enumerate(self.input_blocks):
             transformer_options["block"] = ("input", id)
+
+            for block_modifier in block_modifiers:
+                h = block_modifier(h, 'before', transformer_options)
+
             h = forward_timestep_embed(module, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator)
             h = apply_control(h, control, 'input')
+
+            for block_modifier in block_modifiers:
+                h = block_modifier(h, 'after', transformer_options)
+
             if "input_block_patch" in transformer_patches:
                 patch = transformer_patches["input_block_patch"]
                 for p in patch:
@@ -858,9 +867,15 @@ class UNetModel(nn.Module):
                     h = p(h, transformer_options)
 
         transformer_options["block"] = ("middle", 0)
+
+        for block_modifier in block_modifiers:
+            h = block_modifier(h, 'before', transformer_options)
+
         h = forward_timestep_embed(self.middle_block, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator)
         h = apply_control(h, control, 'middle')
 
+        for block_modifier in block_modifiers:
+            h = block_modifier(h, 'after', transformer_options)
 
         for id, module in enumerate(self.output_blocks):
             transformer_options["block"] = ("output", id)
@@ -878,9 +893,26 @@ class UNetModel(nn.Module):
                 output_shape = hs[-1].shape
             else:
                 output_shape = None
+
+            for block_modifier in block_modifiers:
+                h = block_modifier(h, 'before', transformer_options)
+
             h = forward_timestep_embed(module, h, emb, context, transformer_options, output_shape, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator)
-        h = h.type(x.dtype)
+
+            for block_modifier in block_modifiers:
+                h = block_modifier(h, 'after', transformer_options)
+
+        transformer_options["block"] = ("last", 0)
+
+        for block_modifier in block_modifiers:
+            h = block_modifier(h, 'before', transformer_options)
+
         if self.predict_codebook_ids:
-            return self.id_predictor(h)
+            h = self.id_predictor(h)
         else:
-            return self.out(h)
+            h = self.out(h)
+
+        for block_modifier in block_modifiers:
+            h = block_modifier(h, 'after', transformer_options)
+
+        return h.type(x.dtype)
diff --git a/ldm_patched/ldm/modules/diffusionmodules/util.py b/ldm_patched/ldm/modules/diffusionmodules/util.py
index e261e06a..eeef837e 100644
--- a/ldm_patched/ldm/modules/diffusionmodules/util.py
+++ b/ldm_patched/ldm/modules/diffusionmodules/util.py
@@ -225,19 +225,13 @@ class CheckpointFunction(torch.autograd.Function):
 
 
 def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False):
-    """
-    Create sinusoidal timestep embeddings.
-    :param timesteps: a 1-D Tensor of N indices, one per batch element.
-                      These may be fractional.
-    :param dim: the dimension of the output.
-    :param max_period: controls the minimum frequency of the embeddings.
-    :return: an [N x dim] Tensor of positional embeddings.
-    """
+    # Consistent with Kohya to reduce differences between model training and inference.
+
     if not repeat_only:
         half = dim // 2
         freqs = torch.exp(
-            -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=timesteps.device) / half
-        )
+            -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half
+        ).to(device=timesteps.device)
         args = timesteps[:, None].float() * freqs[None]
         embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
         if dim % 2:
diff --git a/ldm_patched/modules/controlnet.py b/ldm_patched/modules/controlnet.py
index 7e11497f..67d9c9ee 100644
--- a/ldm_patched/modules/controlnet.py
+++ b/ldm_patched/modules/controlnet.py
@@ -11,6 +11,9 @@ import ldm_patched.controlnet.cldm
 import ldm_patched.t2ia.adapter
 
 
+compute_controlnet_weighting = None
+
+
 def broadcast_image_to(tensor, target_batch_size, batched_number):
     current_batch_size = tensor.shape[0]
     #print(current_batch_size, target_batch_size)
@@ -114,6 +117,10 @@ class ControlBase:
                         x = x.to(output_dtype)
 
                 out[key].append(x)
+
+        if compute_controlnet_weighting is not None:
+            out = compute_controlnet_weighting(out, self)
+
         if control_prev is not None:
             for x in ['input', 'middle', 'output']:
                 o = out[x]
diff --git a/ldm_patched/modules/model_management.py b/ldm_patched/modules/model_management.py
index 6f88579d..c9d9f52f 100644
--- a/ldm_patched/modules/model_management.py
+++ b/ldm_patched/modules/model_management.py
@@ -1,3 +1,4 @@
+import time
 import psutil
 from enum import Enum
 from ldm_patched.modules.args_parser import args
@@ -42,8 +43,6 @@ if args.directml is not None:
     else:
         directml_device = torch_directml.device(device_index)
     print("Using directml with device:", torch_directml.device_name(device_index))
-    # torch_directml.disable_tiled_resources(True)
-    lowvram_available = False #TODO: need to find a way to get free memory in directml before this can be enabled by default.
 
 try:
     import intel_extension_for_pytorch as ipex
@@ -128,6 +127,9 @@ try:
 except:
     OOM_EXCEPTION = Exception
 
+if directml_enabled:
+    OOM_EXCEPTION = Exception
+
 XFORMERS_VERSION = ""
 XFORMERS_ENABLED_VAE = True
 if args.disable_xformers:
@@ -376,6 +378,8 @@ def free_memory(memory_required, device, keep_loaded=[]):
 def load_models_gpu(models, memory_required=0):
     global vram_state
 
+    execution_start_time = time.perf_counter()
+
     inference_memory = minimum_inference_memory()
     extra_mem = max(inference_memory, memory_required)
 
@@ -390,7 +394,7 @@ def load_models_gpu(models, memory_required=0):
             models_already_loaded.append(loaded_model)
         else:
             if hasattr(x, "model"):
-                print(f"Requested to load {x.model.__class__.__name__}")
+                print(f"To load target model {x.model.__class__.__name__}")
             models_to_load.append(loaded_model)
 
     if len(models_to_load) == 0:
@@ -398,9 +402,14 @@ def load_models_gpu(models, memory_required=0):
         for d in devs:
             if d != torch.device("cpu"):
                 free_memory(extra_mem, d, models_already_loaded)
+
+        moving_time = time.perf_counter() - execution_start_time
+        if moving_time > 0.1:
+            print(f'Moving model(s) skipped. Freeing memory has taken {moving_time:.2f} seconds')
+
         return
 
-    print(f"Loading {len(models_to_load)} new model{'s' if len(models_to_load) > 1 else ''}")
+    print(f"Begin to load {len(models_to_load)} model{'s' if len(models_to_load) > 1 else ''}")
 
     total_memory_required = {}
     for loaded_model in models_to_load:
@@ -433,6 +442,11 @@ def load_models_gpu(models, memory_required=0):
 
         cur_loaded_model = loaded_model.model_load(lowvram_model_memory)
         current_loaded_models.insert(0, loaded_model)
+
+    moving_time = time.perf_counter() - execution_start_time
+    if moving_time > 0.1:
+        print(f'Moving model(s) has taken {moving_time:.2f} seconds')
+
     return
 
 
diff --git a/ldm_patched/modules/ops.py b/ldm_patched/modules/ops.py
index 2d7fa377..c9926fd2 100644
--- a/ldm_patched/modules/ops.py
+++ b/ldm_patched/modules/ops.py
@@ -1,5 +1,24 @@
 import torch
 import ldm_patched.modules.model_management
+import contextlib
+
+
+@contextlib.contextmanager
+def use_patched_ops(operations):
+    op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm']
+    backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names}
+
+    try:
+        for op_name in op_names:
+            setattr(torch.nn, op_name, getattr(operations, op_name))
+
+        yield
+
+    finally:
+        for op_name in op_names:
+            setattr(torch.nn, op_name, backups[op_name])
+    return
+
 
 def cast_bias_weight(s, input):
     bias = None
diff --git a/ldm_patched/modules/samplers.py b/ldm_patched/modules/samplers.py
index 1f69d2b1..d6f36b21 100644
--- a/ldm_patched/modules/samplers.py
+++ b/ldm_patched/modules/samplers.py
@@ -126,6 +126,29 @@ def cond_cat(c_list):
 
     return out
 
+def compute_cond_mark(cond_or_uncond, sigmas):
+    cond_or_uncond_size = int(sigmas.shape[0])
+
+    cond_mark = []
+    for cx in cond_or_uncond:
+        cond_mark += [cx] * cond_or_uncond_size
+
+    cond_mark = torch.Tensor(cond_mark).to(sigmas)
+    return cond_mark
+
+def compute_cond_indices(cond_or_uncond, sigmas):
+    cl = int(sigmas.shape[0])
+
+    cond_indices = []
+    uncond_indices = []
+    for i, cx in enumerate(cond_or_uncond):
+        if cx == 0:
+            cond_indices += list(range(i * cl, (i + 1) * cl))
+        else:
+            uncond_indices += list(range(i * cl, (i + 1) * cl))
+
+    return cond_indices, uncond_indices
+
 def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options):
     out_cond = torch.zeros_like(x_in)
     out_count = torch.ones_like(x_in) * 1e-37
@@ -193,9 +216,6 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options):
         c = cond_cat(c)
         timestep_ = torch.cat([timestep] * batch_chunks)
 
-        if control is not None:
-            c['control'] = control.get_control(input_x, timestep_, c, len(cond_or_uncond))
-
         transformer_options = {}
         if 'transformer_options' in model_options:
             transformer_options = model_options['transformer_options'].copy()
@@ -214,8 +234,18 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options):
         transformer_options["cond_or_uncond"] = cond_or_uncond[:]
         transformer_options["sigmas"] = timestep
 
+        transformer_options["cond_mark"] = compute_cond_mark(cond_or_uncond=cond_or_uncond, sigmas=timestep)
+        transformer_options["cond_indices"], transformer_options["uncond_indices"] = compute_cond_indices(cond_or_uncond=cond_or_uncond, sigmas=timestep)
+
         c['transformer_options'] = transformer_options
 
+        if control is not None:
+            p = control
+            while p is not None:
+                p.transformer_options = transformer_options
+                p = p.previous_controlnet
+            c['control'] = control.get_control(input_x, timestep_, c, len(cond_or_uncond))
+
         if 'model_function_wrapper' in model_options:
             output = model_options['model_function_wrapper'](model.apply_model, {"input": input_x, "timestep": timestep_, "c": c, "cond_or_uncond": cond_or_uncond}).chunk(batch_chunks)
         else:
diff --git a/ldm_patched/modules/sd.py b/ldm_patched/modules/sd.py
index e197c39c..a8413307 100644
--- a/ldm_patched/modules/sd.py
+++ b/ldm_patched/modules/sd.py
@@ -462,7 +462,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
         model.load_model_weights(sd, "model.diffusion_model.")
 
     if output_vae:
-        vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"first_stage_model.": ""}, filter_keys=True)
+        vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {k: "" for k in model_config.vae_key_prefix}, filter_keys=True)
         vae_sd = model_config.process_vae_state_dict(vae_sd)
         vae = VAE(sd=vae_sd)
 
diff --git a/ldm_patched/modules/sd1_clip.py b/ldm_patched/modules/sd1_clip.py
index 3727fb48..a1cdec2e 100644
--- a/ldm_patched/modules/sd1_clip.py
+++ b/ldm_patched/modules/sd1_clip.py
@@ -8,6 +8,7 @@ import zipfile
 from . import model_management
 import ldm_patched.modules.clip_model
 import json
+from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils
 
 def gen_empty_tokens(special_tokens, length):
     start_token = special_tokens.get("start", None)
@@ -74,11 +75,17 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
         if textmodel_json_config is None:
             textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_clip_config.json")
 
-        with open(textmodel_json_config) as f:
-            config = json.load(f)
+        config = CLIPTextConfig.from_json_file(textmodel_json_config)
+        self.num_layers = config.num_hidden_layers
 
-        self.transformer = model_class(config, dtype, device, ldm_patched.modules.ops.manual_cast)
-        self.num_layers = self.transformer.num_layers
+        with ldm_patched.modules.ops.use_patched_ops(ldm_patched.modules.ops.manual_cast):
+            with modeling_utils.no_init_weights():
+                self.transformer = CLIPTextModel(config)
+
+        if dtype is not None:
+            self.transformer.to(dtype)
+
+        self.transformer.text_model.embeddings.to(torch.float32)
 
         self.max_length = max_length
         if freeze:
@@ -169,16 +176,21 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
                     if tokens[x, y] == max_token:
                         break
 
-        outputs = self.transformer(tokens, attention_mask, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state)
+        outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask,
+                                   output_hidden_states=self.layer == "hidden")
         self.transformer.set_input_embeddings(backup_embeds)
 
         if self.layer == "last":
-            z = outputs[0]
+            z = outputs.last_hidden_state
+        elif self.layer == "pooled":
+            z = outputs.pooler_output[:, None, :]
         else:
-            z = outputs[1]
+            z = outputs.hidden_states[self.layer_idx]
+            if self.layer_norm_hidden_state:
+                z = self.transformer.text_model.final_layer_norm(z)
 
-        if outputs[2] is not None:
-            pooled_output = outputs[2].float()
+        if hasattr(outputs, "pooler_output"):
+            pooled_output = outputs.pooler_output.float()
         else:
             pooled_output = None
 
diff --git a/ldm_patched/modules/supported_models_base.py b/ldm_patched/modules/supported_models_base.py
index 5baf4bca..58535a9f 100644
--- a/ldm_patched/modules/supported_models_base.py
+++ b/ldm_patched/modules/supported_models_base.py
@@ -21,6 +21,7 @@ class BASE:
     noise_aug_config = None
     sampling_settings = {}
     latent_format = latent_formats.LatentFormat
+    vae_key_prefix = ["first_stage_model."]
 
     manual_cast_dtype = None
 
diff --git a/ldm_patched/utils/path_utils.py b/ldm_patched/utils/path_utils.py
index 6cae149b..af96b523 100644
--- a/ldm_patched/utils/path_utils.py
+++ b/ldm_patched/utils/path_utils.py
@@ -5,47 +5,16 @@ supported_pt_extensions = set(['.ckpt', '.pt', '.bin', '.pth', '.safetensors'])
 
 folder_names_and_paths = {}
 
-base_path = os.getcwd()
-models_dir = os.path.join(base_path, "models")
-folder_names_and_paths["checkpoints"] = ([os.path.join(models_dir, "checkpoints")], supported_pt_extensions)
-folder_names_and_paths["configs"] = ([os.path.join(models_dir, "configs")], [".yaml"])
-
-folder_names_and_paths["loras"] = ([os.path.join(models_dir, "loras")], supported_pt_extensions)
-folder_names_and_paths["vae"] = ([os.path.join(models_dir, "vae")], supported_pt_extensions)
-folder_names_and_paths["clip"] = ([os.path.join(models_dir, "clip")], supported_pt_extensions)
-folder_names_and_paths["unet"] = ([os.path.join(models_dir, "unet")], supported_pt_extensions)
-folder_names_and_paths["clip_vision"] = ([os.path.join(models_dir, "clip_vision")], supported_pt_extensions)
-folder_names_and_paths["style_models"] = ([os.path.join(models_dir, "style_models")], supported_pt_extensions)
-folder_names_and_paths["embeddings"] = ([os.path.join(models_dir, "embeddings")], supported_pt_extensions)
-folder_names_and_paths["diffusers"] = ([os.path.join(models_dir, "diffusers")], ["folder"])
-folder_names_and_paths["vae_approx"] = ([os.path.join(models_dir, "vae_approx")], supported_pt_extensions)
-
-folder_names_and_paths["controlnet"] = ([os.path.join(models_dir, "controlnet"), os.path.join(models_dir, "t2i_adapter")], supported_pt_extensions)
-folder_names_and_paths["gligen"] = ([os.path.join(models_dir, "gligen")], supported_pt_extensions)
-
-folder_names_and_paths["upscale_models"] = ([os.path.join(models_dir, "upscale_models")], supported_pt_extensions)
-
-folder_names_and_paths["custom_nodes"] = ([os.path.join(base_path, "custom_nodes")], [])
-
-folder_names_and_paths["hypernetworks"] = ([os.path.join(models_dir, "hypernetworks")], supported_pt_extensions)
-
-folder_names_and_paths["photomaker"] = ([os.path.join(models_dir, "photomaker")], supported_pt_extensions)
-
-folder_names_and_paths["classifiers"] = ([os.path.join(models_dir, "classifiers")], {""})
-
-output_directory = os.path.join(os.getcwd(), "output")
-temp_directory = os.path.join(os.getcwd(), "temp")
-input_directory = os.path.join(os.getcwd(), "input")
-user_directory = os.path.join(os.getcwd(), "user")
+# Will be assigned by modules.paths
+base_path = None
+models_dir = None
+output_directory = None
+temp_directory = None
+input_directory = None
+user_directory = None
 
 filename_list_cache = {}
 
-if not os.path.exists(input_directory):
-    try:
-        pass  # os.makedirs(input_directory)
-    except:
-        print("Failed to create input directory")
-
 def set_output_directory(output_dir):
     global output_directory
     output_directory = output_dir

From dbd002f930fcb97e4836b938afbc5033e9392573 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Wed, 31 Jan 2024 13:22:40 -0800
Subject: [PATCH 12/12] Update devices.py

---
 modules/devices.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/modules/devices.py b/modules/devices.py
index 62dc9f42..7c09d1f4 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -38,15 +38,9 @@ def get_device_for(task):
 def torch_gc():
     model_management.soft_empty_cache()
 
-    if npu_specific.has_npu:
-        torch_npu_set_device()
-        npu_specific.torch_npu_gc()
-
 
 def torch_npu_set_device():
-    # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue
-    if npu_specific.has_npu:
-        torch.npu.set_device(0)
+    return
 
 
 def enable_tf32():