From 2cb1b65309256763814a006a07b683c0f1013a30 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Sun, 28 Jan 2024 22:18:46 +0800 Subject: [PATCH 01/12] Bump safetensors' version to 0.4.2 --- requirements_versions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_versions.txt b/requirements_versions.txt index 2a922f28..5e30b5ea 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -19,7 +19,7 @@ piexif==1.1.3 psutil==5.9.5 pytorch_lightning==1.9.4 resize-right==0.0.2 -safetensors==0.3.1 +safetensors==0.4.2 scikit-image==0.21.0 spandrel==0.1.6 tomesd==0.1.3 From baaf39b6f92f24275a1b264a634514bac571dfae Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Mon, 29 Jan 2024 10:20:27 +0300 Subject: [PATCH 02/12] fix the typo -- thanks Cyberbeing --- modules/sd_samplers_cfg_denoiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index ef237396..941dff4b 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -94,7 +94,7 @@ class CFGDenoiser(torch.nn.Module): def pad_cond_uncond(self, cond, uncond): empty = shared.sd_model.cond_stage_model_empty_prompt - num_repeats = (cond.shape[1] - cond.shape[1]) // empty.shape[1] + num_repeats = (cond.shape[1] - uncond.shape[1]) // empty.shape[1] if num_repeats < 0: cond = pad_cond(cond, -num_repeats, empty) From ec124607f47371a6cfd61a795f86a7f1cbd44651 Mon Sep 17 00:00:00 2001 From: wangshuai09 <391746016@qq.com> Date: Sat, 27 Jan 2024 17:21:32 +0800 Subject: [PATCH 03/12] Add NPU Support --- modules/devices.py | 9 +++-- modules/initialize.py | 6 +++- modules/npu_specific.py | 34 +++++++++++++++++++ .../textual_inversion/textual_inversion.py | 4 +++ requirements.txt | 4 +++ requirements_versions.txt | 4 +++ webui.sh | 4 +++ 7 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 modules/npu_specific.py diff --git a/modules/devices.py b/modules/devices.py index ea1f712f..f1e56501 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -3,7 +3,7 @@ import contextlib from functools import lru_cache import torch -from modules import errors, shared +from modules import errors, shared, npu_specific if sys.platform == "darwin": from modules import mac_specific @@ -40,6 +40,9 @@ def get_optimal_device_name(): if has_xpu(): return xpu_specific.get_xpu_device_string() + if npu_specific.has_npu: + return npu_specific.get_npu_device_string() + return "cpu" @@ -67,6 +70,9 @@ def torch_gc(): if has_xpu(): xpu_specific.torch_xpu_gc() + if npu_specific.has_npu: + npu_specific.torch_npu_gc() + def enable_tf32(): if torch.cuda.is_available(): @@ -164,4 +170,3 @@ def first_time_calculation(): x = torch.zeros((1, 1, 3, 3)).to(device, dtype) conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype) conv2d(x) - diff --git a/modules/initialize.py b/modules/initialize.py index ac95fc6f..3285cc3c 100644 --- a/modules/initialize.py +++ b/modules/initialize.py @@ -143,13 +143,17 @@ def initialize_rest(*, reload_script_modules=False): its optimization may be None because the list of optimizaers has neet been filled by that time, so we apply optimization again. """ + from modules import devices + # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue + if devices.npu_specific.has_npu: + import torch + torch.npu.set_device(0) shared.sd_model # noqa: B018 if sd_hijack.current_optimizer is None: sd_hijack.apply_optimizations() - from modules import devices devices.first_time_calculation() if not shared.cmd_opts.skip_load_model_at_start: Thread(target=load_model).start() diff --git a/modules/npu_specific.py b/modules/npu_specific.py new file mode 100644 index 00000000..d8aebf9c --- /dev/null +++ b/modules/npu_specific.py @@ -0,0 +1,34 @@ +import importlib +import torch + +from modules import shared + + +def check_for_npu(): + if importlib.util.find_spec("torch_npu") is None: + return False + import torch_npu + torch_npu.npu.set_device(0) + + try: + # Will raise a RuntimeError if no NPU is found + _ = torch.npu.device_count() + return torch.npu.is_available() + except RuntimeError: + return False + + +def get_npu_device_string(): + if shared.cmd_opts.device_id is not None: + return f"npu:{shared.cmd_opts.device_id}" + return "npu:0" + + +def torch_npu_gc(): + # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue + torch.npu.set_device(0) + with torch.npu.device(get_npu_device_string()): + torch.npu.empty_cache() + + +has_npu = check_for_npu() diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 04dda585..9c062503 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -151,6 +151,10 @@ class EmbeddingDatabase: return embedding def get_expected_shape(self): + # workaround + if devices.npu_specific.has_npu: + import torch + torch.npu.set_device(0) vec = shared.sd_model.cond_stage_model.encode_embedding_init_text(",", 1) return vec.shape[1] diff --git a/requirements.txt b/requirements.txt index 80b43845..4537402b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,8 @@ accelerate basicsr blendmodes clean-fid +cloudpickle +decorator einops fastapi>=0.90.1 gfpgan @@ -26,9 +28,11 @@ resize-right safetensors scikit-image>=0.19 +synr==0.5.0 timm tomesd torch torchdiffeq torchsde +tornado transformers==4.30.2 diff --git a/requirements_versions.txt b/requirements_versions.txt index cb7403a9..95515b55 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -4,6 +4,8 @@ accelerate==0.21.0 basicsr==1.4.2 blendmodes==2022 clean-fid==0.1.35 +cloudpickle==3.0.0 +decorator==5.1.1 einops==0.4.1 fastapi==0.94.0 gfpgan==1.3.8 @@ -23,10 +25,12 @@ realesrgan==0.3.0 resize-right==0.0.2 safetensors==0.3.1 scikit-image==0.21.0 +synr==0.5.0 timm==0.9.2 tomesd==0.1.3 torch torchdiffeq==0.2.3 torchsde==0.2.6 +tornado==6.4 transformers==4.30.2 httpx==0.24.1 diff --git a/webui.sh b/webui.sh index cff43327..3f6e87fd 100755 --- a/webui.sh +++ b/webui.sh @@ -159,6 +159,10 @@ then if echo "$gpu_info" | grep -q "AMD" && [[ -z "${TORCH_COMMAND}" ]] then export TORCH_COMMAND="pip install torch==2.0.1+rocm5.4.2 torchvision==0.15.2+rocm5.4.2 --index-url https://download.pytorch.org/whl/rocm5.4.2" + elif echo "$gpu_info" | grep -q "Huawei" && [[ -z "${TORCH_COMMAND}" ]] + then + export TORCH_COMMAND="pip install torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu; pip install torch_npu" + fi fi From 750dd6014a45397979cad42a74634451d0861581 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Mon, 29 Jan 2024 22:27:53 +0800 Subject: [PATCH 04/12] Fix potential bugs --- modules/devices.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/devices.py b/modules/devices.py index dfffaf24..60f7d6d7 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -141,7 +141,12 @@ def manual_cast_forward(target_dtype): args = [arg.to(target_dtype) if isinstance(arg, torch.Tensor) else arg for arg in args] kwargs = {k: v.to(target_dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()} - org_dtype = torch_utils.get_param(self).dtype + org_dtype = target_dtype + for param in self.parameters(): + if param.dtype != target_dtype: + org_dtype = param.dtype + break + if org_dtype != target_dtype: self.to(target_dtype) result = self.org_forward(*args, **kwargs) @@ -170,7 +175,7 @@ def manual_cast(target_dtype): continue applied = True org_forward = module_type.forward - if module_type == torch.nn.MultiheadAttention and has_xpu(): + if module_type == torch.nn.MultiheadAttention: module_type.forward = manual_cast_forward(torch.float32) else: module_type.forward = manual_cast_forward(target_dtype) From 6e7f0860f7ae4a0ce59f9416fb9b2f3bcab44f1d Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Mon, 29 Jan 2024 22:46:43 +0800 Subject: [PATCH 05/12] linting --- modules/devices.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/devices.py b/modules/devices.py index 60f7d6d7..8f49f7a4 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -4,7 +4,6 @@ from functools import lru_cache import torch from modules import errors, shared -from modules import torch_utils if sys.platform == "darwin": from modules import mac_specific From d243e24f539d717b221992e894a5db5a321bf3cd Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Mon, 29 Jan 2024 22:49:45 +0800 Subject: [PATCH 06/12] Try to reverse the dtype checking mechanism --- modules/devices.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/modules/devices.py b/modules/devices.py index 8f49f7a4..f9648e9a 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -4,6 +4,7 @@ from functools import lru_cache import torch from modules import errors, shared +from modules import torch_utils if sys.platform == "darwin": from modules import mac_specific @@ -140,11 +141,7 @@ def manual_cast_forward(target_dtype): args = [arg.to(target_dtype) if isinstance(arg, torch.Tensor) else arg for arg in args] kwargs = {k: v.to(target_dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()} - org_dtype = target_dtype - for param in self.parameters(): - if param.dtype != target_dtype: - org_dtype = param.dtype - break + org_dtype = torch_utils.get_param(self).dtype if org_dtype != target_dtype: self.to(target_dtype) From f9ba7e648ad5bf7dbdf2b95fa207936179bf784e Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Mon, 29 Jan 2024 22:54:12 +0800 Subject: [PATCH 07/12] Revert "Try to reverse the dtype checking mechanism" This reverts commit d243e24f539d717b221992e894a5db5a321bf3cd. --- modules/devices.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/devices.py b/modules/devices.py index f9648e9a..8f49f7a4 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -4,7 +4,6 @@ from functools import lru_cache import torch from modules import errors, shared -from modules import torch_utils if sys.platform == "darwin": from modules import mac_specific @@ -141,7 +140,11 @@ def manual_cast_forward(target_dtype): args = [arg.to(target_dtype) if isinstance(arg, torch.Tensor) else arg for arg in args] kwargs = {k: v.to(target_dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()} - org_dtype = torch_utils.get_param(self).dtype + org_dtype = target_dtype + for param in self.parameters(): + if param.dtype != target_dtype: + org_dtype = param.dtype + break if org_dtype != target_dtype: self.to(target_dtype) From c4255d12f7531725e591160e1cfe47d7a2fc0f02 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Wed, 31 Jan 2024 04:36:11 +0900 Subject: [PATCH 08/12] add tooltip create_submit_box --- modules/ui_toprow.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ui_toprow.py b/modules/ui_toprow.py index fbe705be..457fbf52 100644 --- a/modules/ui_toprow.py +++ b/modules/ui_toprow.py @@ -96,9 +96,9 @@ class Toprow: with gr.Row(elem_id=f"{self.id_part}_generate_box", elem_classes=["generate-box"] + (["generate-box-compact"] if self.is_compact else []), render=not self.is_compact) as submit_box: self.submit_box = submit_box - self.interrupt = gr.Button('Interrupt', elem_id=f"{self.id_part}_interrupt", elem_classes="generate-box-interrupt") - self.skip = gr.Button('Skip', elem_id=f"{self.id_part}_skip", elem_classes="generate-box-skip") - self.submit = gr.Button('Generate', elem_id=f"{self.id_part}_generate", variant='primary') + self.interrupt = gr.Button('Interrupt', elem_id=f"{self.id_part}_interrupt", elem_classes="generate-box-interrupt", tooltip="End generation immediately or after completing current batch") + self.skip = gr.Button('Skip', elem_id=f"{self.id_part}_skip", elem_classes="generate-box-skip", tooltip="Stop generation of current batch and continues onto next batch") + self.submit = gr.Button('Generate', elem_id=f"{self.id_part}_generate", variant='primary', tooltip="Right click generate forever menu") self.skip.click( fn=lambda: shared.state.skip(), From cc3f604310458eed7d26456c1b3934d582283ffe Mon Sep 17 00:00:00 2001 From: wangshuai09 <391746016@qq.com> Date: Wed, 31 Jan 2024 10:46:53 +0800 Subject: [PATCH 09/12] Update --- modules/devices.py | 7 +++++++ modules/initialize.py | 5 +---- modules/launch_utils.py | 8 ++++++++ modules/npu_specific.py | 5 +---- modules/textual_inversion/textual_inversion.py | 5 +---- requirements.txt | 4 ---- requirements_npu.txt | 4 ++++ requirements_versions.txt | 4 ---- 8 files changed, 22 insertions(+), 20 deletions(-) create mode 100644 requirements_npu.txt diff --git a/modules/devices.py b/modules/devices.py index c737162a..28c0c54d 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -88,9 +88,16 @@ def torch_gc(): xpu_specific.torch_xpu_gc() if npu_specific.has_npu: + torch_npu_set_device() npu_specific.torch_npu_gc() +def torch_npu_set_device(): + # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue + if npu_specific.has_npu: + torch.npu.set_device(0) + + def enable_tf32(): if torch.cuda.is_available(): diff --git a/modules/initialize.py b/modules/initialize.py index cc34fd6f..f7313ff4 100644 --- a/modules/initialize.py +++ b/modules/initialize.py @@ -143,10 +143,7 @@ def initialize_rest(*, reload_script_modules=False): by that time, so we apply optimization again. """ from modules import devices - # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue - if devices.npu_specific.has_npu: - import torch - torch.npu.set_device(0) + devices.torch_npu_set_device() shared.sd_model # noqa: B018 diff --git a/modules/launch_utils.py b/modules/launch_utils.py index 3ff4576a..107c72b0 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -338,6 +338,7 @@ def prepare_environment(): torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/") torch_command = os.environ.get('TORCH_COMMAND', f"pip install torch==2.0.0a0 intel-extension-for-pytorch==2.0.110+gitba7f6c1 --extra-index-url {torch_index_url}") requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt") + requirements_file_for_npu = os.environ.get('REQS_FILE_FOR_NPU', "requirements_npu.txt") xformers_package = os.environ.get('XFORMERS_PACKAGE', 'xformers==0.0.23.post1') clip_package = os.environ.get('CLIP_PACKAGE', "https://github.com/openai/CLIP/archive/d50d76daa670286dd6cacf3bcd80b5e4823fc8e1.zip") @@ -421,6 +422,13 @@ def prepare_environment(): run_pip(f"install -r \"{requirements_file}\"", "requirements") startup_timer.record("install requirements") + if not os.path.isfile(requirements_file_for_npu): + requirements_file_for_npu = os.path.join(script_path, requirements_file_for_npu) + + if "torch_npu" in torch_command and not requirements_met(requirements_file_for_npu): + run_pip(f"install -r \"{requirements_file_for_npu}\"", "requirements_for_npu") + startup_timer.record("install requirements_for_npu") + if not args.skip_install: run_extensions_installers(settings_file=args.ui_settings_file) diff --git a/modules/npu_specific.py b/modules/npu_specific.py index d8aebf9c..94100691 100644 --- a/modules/npu_specific.py +++ b/modules/npu_specific.py @@ -8,11 +8,10 @@ def check_for_npu(): if importlib.util.find_spec("torch_npu") is None: return False import torch_npu - torch_npu.npu.set_device(0) try: # Will raise a RuntimeError if no NPU is found - _ = torch.npu.device_count() + _ = torch_npu.npu.device_count() return torch.npu.is_available() except RuntimeError: return False @@ -25,8 +24,6 @@ def get_npu_device_string(): def torch_npu_gc(): - # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue - torch.npu.set_device(0) with torch.npu.device(get_npu_device_string()): torch.npu.empty_cache() diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index d16e3b9a..6d815c0b 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -150,10 +150,7 @@ class EmbeddingDatabase: return embedding def get_expected_shape(self): - # workaround - if devices.npu_specific.has_npu: - import torch - torch.npu.set_device(0) + devices.torch_npu_set_device() vec = shared.sd_model.cond_stage_model.encode_embedding_init_text(",", 1) return vec.shape[1] diff --git a/requirements.txt b/requirements.txt index d1e4ede9..731a1be7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,8 +4,6 @@ accelerate blendmodes clean-fid -cloudpickle -decorator einops facexlib fastapi>=0.90.1 @@ -26,10 +24,8 @@ resize-right safetensors scikit-image>=0.19 -synr==0.5.0 tomesd torch torchdiffeq torchsde -tornado transformers==4.30.2 diff --git a/requirements_npu.txt b/requirements_npu.txt new file mode 100644 index 00000000..5e6a4364 --- /dev/null +++ b/requirements_npu.txt @@ -0,0 +1,4 @@ +cloudpickle +decorator +synr==0.5.0 +tornado diff --git a/requirements_versions.txt b/requirements_versions.txt index 1c66cd8c..5e30b5ea 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -3,8 +3,6 @@ Pillow==9.5.0 accelerate==0.21.0 blendmodes==2022 clean-fid==0.1.35 -cloudpickle==3.0.0 -decorator==5.1.1 einops==0.4.1 facexlib==0.3.0 fastapi==0.94.0 @@ -23,12 +21,10 @@ pytorch_lightning==1.9.4 resize-right==0.0.2 safetensors==0.4.2 scikit-image==0.21.0 -synr==0.5.0 spandrel==0.1.6 tomesd==0.1.3 torch torchdiffeq==0.2.3 torchsde==0.2.6 -tornado==6.4 transformers==4.30.2 httpx==0.24.1 From 74b214a92a2959948dcd05a78b7380e046163871 Mon Sep 17 00:00:00 2001 From: Cyberbeing Date: Mon, 29 Jan 2024 02:06:50 -0800 Subject: [PATCH 10/12] Fix potential autocast NaNs in image upscale --- modules/upscaler_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/upscaler_utils.py b/modules/upscaler_utils.py index afed8b40..b5e5a80c 100644 --- a/modules/upscaler_utils.py +++ b/modules/upscaler_utils.py @@ -6,7 +6,7 @@ import torch import tqdm from PIL import Image -from modules import images, shared, torch_utils +from modules import devices, images, shared, torch_utils logger = logging.getLogger(__name__) @@ -44,7 +44,8 @@ def upscale_pil_patch(model, img: Image.Image) -> Image.Image: with torch.no_grad(): tensor = pil_image_to_torch_bgr(img).unsqueeze(0) # add batch dimension tensor = tensor.to(device=param.device, dtype=param.dtype) - return torch_bgr_to_pil_image(model(tensor)) + with devices.without_autocast(): + return torch_bgr_to_pil_image(model(tensor)) def upscale_with_model( From 67c38f9294607e4062cf770012a471e6602378cc Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Wed, 31 Jan 2024 13:16:41 -0800 Subject: [PATCH 11/12] b --- ldm_patched/contrib/external.py | 21 +++++++++ .../modules/diffusionmodules/openaimodel.py | 38 ++++++++++++++-- .../ldm/modules/diffusionmodules/util.py | 14 ++---- ldm_patched/modules/controlnet.py | 7 +++ ldm_patched/modules/model_management.py | 22 +++++++-- ldm_patched/modules/ops.py | 19 ++++++++ ldm_patched/modules/samplers.py | 36 +++++++++++++-- ldm_patched/modules/sd.py | 2 +- ldm_patched/modules/sd1_clip.py | 30 +++++++++---- ldm_patched/modules/supported_models_base.py | 1 + ldm_patched/utils/path_utils.py | 45 +++---------------- 11 files changed, 167 insertions(+), 68 deletions(-) diff --git a/ldm_patched/contrib/external.py b/ldm_patched/contrib/external.py index 927cd3f3..35f698ff 100644 --- a/ldm_patched/contrib/external.py +++ b/ldm_patched/contrib/external.py @@ -186,6 +186,26 @@ class ConditioningSetAreaPercentage: c.append(n) return (c, ) +class ConditioningSetAreaStrength: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "append" + + CATEGORY = "conditioning" + + def append(self, conditioning, strength): + c = [] + for t in conditioning: + n = [t[0], t[1].copy()] + n[1]['strength'] = strength + c.append(n) + return (c, ) + + class ConditioningSetMask: @classmethod def INPUT_TYPES(s): @@ -1756,6 +1776,7 @@ NODE_CLASS_MAPPINGS = { "ConditioningConcat": ConditioningConcat, "ConditioningSetArea": ConditioningSetArea, "ConditioningSetAreaPercentage": ConditioningSetAreaPercentage, + "ConditioningSetAreaStrength": ConditioningSetAreaStrength, "ConditioningSetMask": ConditioningSetMask, "KSamplerAdvanced": KSamplerAdvanced, "SetLatentNoiseMask": SetLatentNoiseMask, diff --git a/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py b/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py index 4b695f76..ffd168af 100644 --- a/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py +++ b/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py @@ -825,6 +825,7 @@ class UNetModel(nn.Module): transformer_options["original_shape"] = list(x.shape) transformer_options["transformer_index"] = 0 transformer_patches = transformer_options.get("patches", {}) + block_modifiers = transformer_options.get("block_modifiers", []) num_video_frames = kwargs.get("num_video_frames", self.default_num_video_frames) image_only_indicator = kwargs.get("image_only_indicator", self.default_image_only_indicator) @@ -844,8 +845,16 @@ class UNetModel(nn.Module): h = x for id, module in enumerate(self.input_blocks): transformer_options["block"] = ("input", id) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'before', transformer_options) + h = forward_timestep_embed(module, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator) h = apply_control(h, control, 'input') + + for block_modifier in block_modifiers: + h = block_modifier(h, 'after', transformer_options) + if "input_block_patch" in transformer_patches: patch = transformer_patches["input_block_patch"] for p in patch: @@ -858,9 +867,15 @@ class UNetModel(nn.Module): h = p(h, transformer_options) transformer_options["block"] = ("middle", 0) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'before', transformer_options) + h = forward_timestep_embed(self.middle_block, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator) h = apply_control(h, control, 'middle') + for block_modifier in block_modifiers: + h = block_modifier(h, 'after', transformer_options) for id, module in enumerate(self.output_blocks): transformer_options["block"] = ("output", id) @@ -878,9 +893,26 @@ class UNetModel(nn.Module): output_shape = hs[-1].shape else: output_shape = None + + for block_modifier in block_modifiers: + h = block_modifier(h, 'before', transformer_options) + h = forward_timestep_embed(module, h, emb, context, transformer_options, output_shape, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator) - h = h.type(x.dtype) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'after', transformer_options) + + transformer_options["block"] = ("last", 0) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'before', transformer_options) + if self.predict_codebook_ids: - return self.id_predictor(h) + h = self.id_predictor(h) else: - return self.out(h) + h = self.out(h) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'after', transformer_options) + + return h.type(x.dtype) diff --git a/ldm_patched/ldm/modules/diffusionmodules/util.py b/ldm_patched/ldm/modules/diffusionmodules/util.py index e261e06a..eeef837e 100644 --- a/ldm_patched/ldm/modules/diffusionmodules/util.py +++ b/ldm_patched/ldm/modules/diffusionmodules/util.py @@ -225,19 +225,13 @@ class CheckpointFunction(torch.autograd.Function): def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False): - """ - Create sinusoidal timestep embeddings. - :param timesteps: a 1-D Tensor of N indices, one per batch element. - These may be fractional. - :param dim: the dimension of the output. - :param max_period: controls the minimum frequency of the embeddings. - :return: an [N x dim] Tensor of positional embeddings. - """ + # Consistent with Kohya to reduce differences between model training and inference. + if not repeat_only: half = dim // 2 freqs = torch.exp( - -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=timesteps.device) / half - ) + -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half + ).to(device=timesteps.device) args = timesteps[:, None].float() * freqs[None] embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) if dim % 2: diff --git a/ldm_patched/modules/controlnet.py b/ldm_patched/modules/controlnet.py index 7e11497f..67d9c9ee 100644 --- a/ldm_patched/modules/controlnet.py +++ b/ldm_patched/modules/controlnet.py @@ -11,6 +11,9 @@ import ldm_patched.controlnet.cldm import ldm_patched.t2ia.adapter +compute_controlnet_weighting = None + + def broadcast_image_to(tensor, target_batch_size, batched_number): current_batch_size = tensor.shape[0] #print(current_batch_size, target_batch_size) @@ -114,6 +117,10 @@ class ControlBase: x = x.to(output_dtype) out[key].append(x) + + if compute_controlnet_weighting is not None: + out = compute_controlnet_weighting(out, self) + if control_prev is not None: for x in ['input', 'middle', 'output']: o = out[x] diff --git a/ldm_patched/modules/model_management.py b/ldm_patched/modules/model_management.py index 6f88579d..c9d9f52f 100644 --- a/ldm_patched/modules/model_management.py +++ b/ldm_patched/modules/model_management.py @@ -1,3 +1,4 @@ +import time import psutil from enum import Enum from ldm_patched.modules.args_parser import args @@ -42,8 +43,6 @@ if args.directml is not None: else: directml_device = torch_directml.device(device_index) print("Using directml with device:", torch_directml.device_name(device_index)) - # torch_directml.disable_tiled_resources(True) - lowvram_available = False #TODO: need to find a way to get free memory in directml before this can be enabled by default. try: import intel_extension_for_pytorch as ipex @@ -128,6 +127,9 @@ try: except: OOM_EXCEPTION = Exception +if directml_enabled: + OOM_EXCEPTION = Exception + XFORMERS_VERSION = "" XFORMERS_ENABLED_VAE = True if args.disable_xformers: @@ -376,6 +378,8 @@ def free_memory(memory_required, device, keep_loaded=[]): def load_models_gpu(models, memory_required=0): global vram_state + execution_start_time = time.perf_counter() + inference_memory = minimum_inference_memory() extra_mem = max(inference_memory, memory_required) @@ -390,7 +394,7 @@ def load_models_gpu(models, memory_required=0): models_already_loaded.append(loaded_model) else: if hasattr(x, "model"): - print(f"Requested to load {x.model.__class__.__name__}") + print(f"To load target model {x.model.__class__.__name__}") models_to_load.append(loaded_model) if len(models_to_load) == 0: @@ -398,9 +402,14 @@ def load_models_gpu(models, memory_required=0): for d in devs: if d != torch.device("cpu"): free_memory(extra_mem, d, models_already_loaded) + + moving_time = time.perf_counter() - execution_start_time + if moving_time > 0.1: + print(f'Moving model(s) skipped. Freeing memory has taken {moving_time:.2f} seconds') + return - print(f"Loading {len(models_to_load)} new model{'s' if len(models_to_load) > 1 else ''}") + print(f"Begin to load {len(models_to_load)} model{'s' if len(models_to_load) > 1 else ''}") total_memory_required = {} for loaded_model in models_to_load: @@ -433,6 +442,11 @@ def load_models_gpu(models, memory_required=0): cur_loaded_model = loaded_model.model_load(lowvram_model_memory) current_loaded_models.insert(0, loaded_model) + + moving_time = time.perf_counter() - execution_start_time + if moving_time > 0.1: + print(f'Moving model(s) has taken {moving_time:.2f} seconds') + return diff --git a/ldm_patched/modules/ops.py b/ldm_patched/modules/ops.py index 2d7fa377..c9926fd2 100644 --- a/ldm_patched/modules/ops.py +++ b/ldm_patched/modules/ops.py @@ -1,5 +1,24 @@ import torch import ldm_patched.modules.model_management +import contextlib + + +@contextlib.contextmanager +def use_patched_ops(operations): + op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm'] + backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names} + + try: + for op_name in op_names: + setattr(torch.nn, op_name, getattr(operations, op_name)) + + yield + + finally: + for op_name in op_names: + setattr(torch.nn, op_name, backups[op_name]) + return + def cast_bias_weight(s, input): bias = None diff --git a/ldm_patched/modules/samplers.py b/ldm_patched/modules/samplers.py index 1f69d2b1..d6f36b21 100644 --- a/ldm_patched/modules/samplers.py +++ b/ldm_patched/modules/samplers.py @@ -126,6 +126,29 @@ def cond_cat(c_list): return out +def compute_cond_mark(cond_or_uncond, sigmas): + cond_or_uncond_size = int(sigmas.shape[0]) + + cond_mark = [] + for cx in cond_or_uncond: + cond_mark += [cx] * cond_or_uncond_size + + cond_mark = torch.Tensor(cond_mark).to(sigmas) + return cond_mark + +def compute_cond_indices(cond_or_uncond, sigmas): + cl = int(sigmas.shape[0]) + + cond_indices = [] + uncond_indices = [] + for i, cx in enumerate(cond_or_uncond): + if cx == 0: + cond_indices += list(range(i * cl, (i + 1) * cl)) + else: + uncond_indices += list(range(i * cl, (i + 1) * cl)) + + return cond_indices, uncond_indices + def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options): out_cond = torch.zeros_like(x_in) out_count = torch.ones_like(x_in) * 1e-37 @@ -193,9 +216,6 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options): c = cond_cat(c) timestep_ = torch.cat([timestep] * batch_chunks) - if control is not None: - c['control'] = control.get_control(input_x, timestep_, c, len(cond_or_uncond)) - transformer_options = {} if 'transformer_options' in model_options: transformer_options = model_options['transformer_options'].copy() @@ -214,8 +234,18 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options): transformer_options["cond_or_uncond"] = cond_or_uncond[:] transformer_options["sigmas"] = timestep + transformer_options["cond_mark"] = compute_cond_mark(cond_or_uncond=cond_or_uncond, sigmas=timestep) + transformer_options["cond_indices"], transformer_options["uncond_indices"] = compute_cond_indices(cond_or_uncond=cond_or_uncond, sigmas=timestep) + c['transformer_options'] = transformer_options + if control is not None: + p = control + while p is not None: + p.transformer_options = transformer_options + p = p.previous_controlnet + c['control'] = control.get_control(input_x, timestep_, c, len(cond_or_uncond)) + if 'model_function_wrapper' in model_options: output = model_options['model_function_wrapper'](model.apply_model, {"input": input_x, "timestep": timestep_, "c": c, "cond_or_uncond": cond_or_uncond}).chunk(batch_chunks) else: diff --git a/ldm_patched/modules/sd.py b/ldm_patched/modules/sd.py index e197c39c..a8413307 100644 --- a/ldm_patched/modules/sd.py +++ b/ldm_patched/modules/sd.py @@ -462,7 +462,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o model.load_model_weights(sd, "model.diffusion_model.") if output_vae: - vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"first_stage_model.": ""}, filter_keys=True) + vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {k: "" for k in model_config.vae_key_prefix}, filter_keys=True) vae_sd = model_config.process_vae_state_dict(vae_sd) vae = VAE(sd=vae_sd) diff --git a/ldm_patched/modules/sd1_clip.py b/ldm_patched/modules/sd1_clip.py index 3727fb48..a1cdec2e 100644 --- a/ldm_patched/modules/sd1_clip.py +++ b/ldm_patched/modules/sd1_clip.py @@ -8,6 +8,7 @@ import zipfile from . import model_management import ldm_patched.modules.clip_model import json +from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils def gen_empty_tokens(special_tokens, length): start_token = special_tokens.get("start", None) @@ -74,11 +75,17 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): if textmodel_json_config is None: textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_clip_config.json") - with open(textmodel_json_config) as f: - config = json.load(f) + config = CLIPTextConfig.from_json_file(textmodel_json_config) + self.num_layers = config.num_hidden_layers - self.transformer = model_class(config, dtype, device, ldm_patched.modules.ops.manual_cast) - self.num_layers = self.transformer.num_layers + with ldm_patched.modules.ops.use_patched_ops(ldm_patched.modules.ops.manual_cast): + with modeling_utils.no_init_weights(): + self.transformer = CLIPTextModel(config) + + if dtype is not None: + self.transformer.to(dtype) + + self.transformer.text_model.embeddings.to(torch.float32) self.max_length = max_length if freeze: @@ -169,16 +176,21 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): if tokens[x, y] == max_token: break - outputs = self.transformer(tokens, attention_mask, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state) + outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask, + output_hidden_states=self.layer == "hidden") self.transformer.set_input_embeddings(backup_embeds) if self.layer == "last": - z = outputs[0] + z = outputs.last_hidden_state + elif self.layer == "pooled": + z = outputs.pooler_output[:, None, :] else: - z = outputs[1] + z = outputs.hidden_states[self.layer_idx] + if self.layer_norm_hidden_state: + z = self.transformer.text_model.final_layer_norm(z) - if outputs[2] is not None: - pooled_output = outputs[2].float() + if hasattr(outputs, "pooler_output"): + pooled_output = outputs.pooler_output.float() else: pooled_output = None diff --git a/ldm_patched/modules/supported_models_base.py b/ldm_patched/modules/supported_models_base.py index 5baf4bca..58535a9f 100644 --- a/ldm_patched/modules/supported_models_base.py +++ b/ldm_patched/modules/supported_models_base.py @@ -21,6 +21,7 @@ class BASE: noise_aug_config = None sampling_settings = {} latent_format = latent_formats.LatentFormat + vae_key_prefix = ["first_stage_model."] manual_cast_dtype = None diff --git a/ldm_patched/utils/path_utils.py b/ldm_patched/utils/path_utils.py index 6cae149b..af96b523 100644 --- a/ldm_patched/utils/path_utils.py +++ b/ldm_patched/utils/path_utils.py @@ -5,47 +5,16 @@ supported_pt_extensions = set(['.ckpt', '.pt', '.bin', '.pth', '.safetensors']) folder_names_and_paths = {} -base_path = os.getcwd() -models_dir = os.path.join(base_path, "models") -folder_names_and_paths["checkpoints"] = ([os.path.join(models_dir, "checkpoints")], supported_pt_extensions) -folder_names_and_paths["configs"] = ([os.path.join(models_dir, "configs")], [".yaml"]) - -folder_names_and_paths["loras"] = ([os.path.join(models_dir, "loras")], supported_pt_extensions) -folder_names_and_paths["vae"] = ([os.path.join(models_dir, "vae")], supported_pt_extensions) -folder_names_and_paths["clip"] = ([os.path.join(models_dir, "clip")], supported_pt_extensions) -folder_names_and_paths["unet"] = ([os.path.join(models_dir, "unet")], supported_pt_extensions) -folder_names_and_paths["clip_vision"] = ([os.path.join(models_dir, "clip_vision")], supported_pt_extensions) -folder_names_and_paths["style_models"] = ([os.path.join(models_dir, "style_models")], supported_pt_extensions) -folder_names_and_paths["embeddings"] = ([os.path.join(models_dir, "embeddings")], supported_pt_extensions) -folder_names_and_paths["diffusers"] = ([os.path.join(models_dir, "diffusers")], ["folder"]) -folder_names_and_paths["vae_approx"] = ([os.path.join(models_dir, "vae_approx")], supported_pt_extensions) - -folder_names_and_paths["controlnet"] = ([os.path.join(models_dir, "controlnet"), os.path.join(models_dir, "t2i_adapter")], supported_pt_extensions) -folder_names_and_paths["gligen"] = ([os.path.join(models_dir, "gligen")], supported_pt_extensions) - -folder_names_and_paths["upscale_models"] = ([os.path.join(models_dir, "upscale_models")], supported_pt_extensions) - -folder_names_and_paths["custom_nodes"] = ([os.path.join(base_path, "custom_nodes")], []) - -folder_names_and_paths["hypernetworks"] = ([os.path.join(models_dir, "hypernetworks")], supported_pt_extensions) - -folder_names_and_paths["photomaker"] = ([os.path.join(models_dir, "photomaker")], supported_pt_extensions) - -folder_names_and_paths["classifiers"] = ([os.path.join(models_dir, "classifiers")], {""}) - -output_directory = os.path.join(os.getcwd(), "output") -temp_directory = os.path.join(os.getcwd(), "temp") -input_directory = os.path.join(os.getcwd(), "input") -user_directory = os.path.join(os.getcwd(), "user") +# Will be assigned by modules.paths +base_path = None +models_dir = None +output_directory = None +temp_directory = None +input_directory = None +user_directory = None filename_list_cache = {} -if not os.path.exists(input_directory): - try: - pass # os.makedirs(input_directory) - except: - print("Failed to create input directory") - def set_output_directory(output_dir): global output_directory output_directory = output_dir From dbd002f930fcb97e4836b938afbc5033e9392573 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Wed, 31 Jan 2024 13:22:40 -0800 Subject: [PATCH 12/12] Update devices.py --- modules/devices.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/devices.py b/modules/devices.py index 62dc9f42..7c09d1f4 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -38,15 +38,9 @@ def get_device_for(task): def torch_gc(): model_management.soft_empty_cache() - if npu_specific.has_npu: - torch_npu_set_device() - npu_specific.torch_npu_gc() - def torch_npu_set_device(): - # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue - if npu_specific.has_npu: - torch.npu.set_device(0) + return def enable_tf32():