Merge branch 'main' into dev

This commit is contained in:
lllyasviel
2024-01-27 15:48:30 -08:00
committed by GitHub
66 changed files with 2746 additions and 2892 deletions

View File

@@ -2,8 +2,9 @@ import argparse
import json
import os
from modules.paths_internal import models_path, script_path, data_path, extensions_dir, extensions_builtin_dir, sd_default_config, sd_model_file # noqa: F401
from ldm_patched.modules import args_parser
parser = argparse.ArgumentParser()
parser = args_parser.parser
parser.add_argument("-f", action='store_true', help=argparse.SUPPRESS) # allows running as root; implemented outside of webui
parser.add_argument("--update-all-extensions", action='store_true', help="launch.py argument: download updates for all extensions when starting the program")

View File

@@ -2,7 +2,7 @@ import os
from modules import modelloader, errors
from modules.shared import cmd_opts, opts
from modules.upscaler import Upscaler, UpscalerData
from modules.upscaler import Upscaler, UpscalerData, prepare_free_memory
from modules.upscaler_utils import upscale_with_model
@@ -23,6 +23,7 @@ class UpscalerDAT(Upscaler):
self.scalers.append(model)
def do_upscale(self, img, path):
prepare_free_memory()
try:
info = self.load_model(path)
except Exception:

View File

@@ -4,7 +4,10 @@ import re
import torch
import numpy as np
from modules import modelloader, paths, deepbooru_model, devices, images, shared
from modules import modelloader, paths, deepbooru_model, images, shared
from ldm_patched.modules import model_management
from ldm_patched.modules.model_patcher import ModelPatcher
re_special = re.compile(r'([\\()])')
@@ -12,6 +15,14 @@ re_special = re.compile(r'([\\()])')
class DeepDanbooru:
def __init__(self):
self.model = None
self.load_device = model_management.text_encoder_device()
self.offload_device = model_management.text_encoder_offload_device()
self.dtype = torch.float32
if model_management.should_use_fp16(device=self.load_device):
self.dtype = torch.float16
self.patcher = None
def load(self):
if self.model is not None:
@@ -28,16 +39,16 @@ class DeepDanbooru:
self.model.load_state_dict(torch.load(files[0], map_location="cpu"))
self.model.eval()
self.model.to(devices.cpu, devices.dtype)
self.model.to(self.offload_device, self.dtype)
self.patcher = ModelPatcher(self.model, load_device=self.load_device, offload_device=self.offload_device)
def start(self):
self.load()
self.model.to(devices.device)
model_management.load_models_gpu([self.patcher])
def stop(self):
if not shared.opts.interrogate_keep_models_in_memory:
self.model.to(devices.cpu)
devices.torch_gc()
pass
def tag(self, pil_image):
self.start()
@@ -56,8 +67,8 @@ class DeepDanbooru:
pic = images.resize_image(2, pil_image.convert("RGB"), 512, 512)
a = np.expand_dims(np.array(pic, dtype=np.float32), 0) / 255
with torch.no_grad(), devices.autocast():
x = torch.from_numpy(a).to(devices.device)
with torch.no_grad():
x = torch.from_numpy(a).to(self.load_device, self.dtype)
y = self.model(x)[0].detach().cpu().numpy()
probability_dict = {}

View File

@@ -1,211 +1,89 @@
import sys
import contextlib
from functools import lru_cache
import torch
from modules import errors, shared
from modules import torch_utils
if sys.platform == "darwin":
from modules import mac_specific
if shared.cmd_opts.use_ipex:
from modules import xpu_specific
import ldm_patched.modules.model_management as model_management
def has_xpu() -> bool:
return shared.cmd_opts.use_ipex and xpu_specific.has_xpu
return model_management.xpu_available
def has_mps() -> bool:
if sys.platform != "darwin":
return False
else:
return mac_specific.has_mps
return model_management.mps_mode()
def cuda_no_autocast(device_id=None) -> bool:
if device_id is None:
device_id = get_cuda_device_id()
return (
torch.cuda.get_device_capability(device_id) == (7, 5)
and torch.cuda.get_device_name(device_id).startswith("NVIDIA GeForce GTX 16")
)
return False
def get_cuda_device_id():
return (
int(shared.cmd_opts.device_id)
if shared.cmd_opts.device_id is not None and shared.cmd_opts.device_id.isdigit()
else 0
) or torch.cuda.current_device()
return model_management.get_torch_device().index
def get_cuda_device_string():
if shared.cmd_opts.device_id is not None:
return f"cuda:{shared.cmd_opts.device_id}"
return "cuda"
return str(model_management.get_torch_device())
def get_optimal_device_name():
if torch.cuda.is_available():
return get_cuda_device_string()
if has_mps():
return "mps"
if has_xpu():
return xpu_specific.get_xpu_device_string()
return "cpu"
return model_management.get_torch_device().type
def get_optimal_device():
return torch.device(get_optimal_device_name())
return model_management.get_torch_device()
def get_device_for(task):
if task in shared.cmd_opts.use_cpu or "all" in shared.cmd_opts.use_cpu:
return cpu
return get_optimal_device()
def torch_gc():
if torch.cuda.is_available():
with torch.cuda.device(get_cuda_device_string()):
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
if has_mps():
mac_specific.torch_mps_gc()
if has_xpu():
xpu_specific.torch_xpu_gc()
model_management.soft_empty_cache()
def enable_tf32():
if torch.cuda.is_available():
return
# enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't
# see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407
if cuda_no_autocast():
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
errors.run(enable_tf32, "Enabling TF32")
cpu: torch.device = torch.device("cpu")
fp8: bool = False
device: torch.device = None
device_interrogate: torch.device = None
device_gfpgan: torch.device = None
device_esrgan: torch.device = None
device_codeformer: torch.device = None
dtype: torch.dtype = torch.float16
dtype_vae: torch.dtype = torch.float16
dtype_unet: torch.dtype = torch.float16
dtype_inference: torch.dtype = torch.float16
device: torch.device = model_management.get_torch_device()
device_interrogate: torch.device = cpu # not used
device_gfpgan: torch.device = cpu
device_esrgan: torch.device = model_management.get_torch_device() # will be managed in special way
device_codeformer: torch.device = cpu
dtype: torch.dtype = model_management.unet_dtype()
dtype_vae: torch.dtype = model_management.vae_dtype()
dtype_unet: torch.dtype = model_management.unet_dtype()
dtype_inference: torch.dtype = model_management.unet_dtype()
unet_needs_upcast = False
def cond_cast_unet(input):
return input.to(dtype_unet) if unet_needs_upcast else input
return input
def cond_cast_float(input):
return input.float() if unet_needs_upcast else input
return input
nv_rng = None
patch_module_list = [
torch.nn.Linear,
torch.nn.Conv2d,
torch.nn.MultiheadAttention,
torch.nn.GroupNorm,
torch.nn.LayerNorm,
]
patch_module_list = []
def manual_cast_forward(target_dtype):
def forward_wrapper(self, *args, **kwargs):
if any(
isinstance(arg, torch.Tensor) and arg.dtype != target_dtype
for arg in args
):
args = [arg.to(target_dtype) if isinstance(arg, torch.Tensor) else arg for arg in args]
kwargs = {k: v.to(target_dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
org_dtype = torch_utils.get_param(self).dtype
if org_dtype != target_dtype:
self.to(target_dtype)
result = self.org_forward(*args, **kwargs)
if org_dtype != target_dtype:
self.to(org_dtype)
if target_dtype != dtype_inference:
if isinstance(result, tuple):
result = tuple(
i.to(dtype_inference)
if isinstance(i, torch.Tensor)
else i
for i in result
)
elif isinstance(result, torch.Tensor):
result = result.to(dtype_inference)
return result
return forward_wrapper
return
@contextlib.contextmanager
def manual_cast(target_dtype):
applied = False
for module_type in patch_module_list:
if hasattr(module_type, "org_forward"):
continue
applied = True
org_forward = module_type.forward
if module_type == torch.nn.MultiheadAttention and has_xpu():
module_type.forward = manual_cast_forward(torch.float32)
else:
module_type.forward = manual_cast_forward(target_dtype)
module_type.org_forward = org_forward
try:
yield None
finally:
if applied:
for module_type in patch_module_list:
if hasattr(module_type, "org_forward"):
module_type.forward = module_type.org_forward
delattr(module_type, "org_forward")
return
def autocast(disable=False):
if disable:
return contextlib.nullcontext()
if fp8 and device==cpu:
return torch.autocast("cpu", dtype=torch.bfloat16, enabled=True)
if fp8 and dtype_inference == torch.float32:
return manual_cast(dtype)
if dtype == torch.float32 or dtype_inference == torch.float32:
return contextlib.nullcontext()
if has_xpu() or has_mps() or cuda_no_autocast():
return manual_cast(dtype)
return torch.autocast("cuda")
return contextlib.nullcontext()
def without_autocast(disable=False):
return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext()
return contextlib.nullcontext()
class NansException(Exception):
@@ -213,43 +91,9 @@ class NansException(Exception):
def test_for_nans(x, where):
if shared.cmd_opts.disable_nan_check:
return
if not torch.all(torch.isnan(x)).item():
return
if where == "unet":
message = "A tensor with all NaNs was produced in Unet."
if not shared.cmd_opts.no_half:
message += " This could be either because there's not enough precision to represent the picture, or because your video card does not support half type. Try setting the \"Upcast cross attention layer to float32\" option in Settings > Stable Diffusion or using the --no-half commandline argument to fix this."
elif where == "vae":
message = "A tensor with all NaNs was produced in VAE."
if not shared.cmd_opts.no_half and not shared.cmd_opts.no_half_vae:
message += " This could be because there's not enough precision to represent the picture. Try adding --no-half-vae commandline argument to fix this."
else:
message = "A tensor with all NaNs was produced."
message += " Use --disable-nan-check commandline argument to disable this check."
raise NansException(message)
return
@lru_cache
def first_time_calculation():
"""
just do any calculation with pytorch layers - the first time this is done it allocaltes about 700MB of memory and
spends about 2.7 seconds doing that, at least wih NVidia.
"""
x = torch.zeros((1, 1)).to(device, dtype)
linear = torch.nn.Linear(1, 1).to(device, dtype)
linear(x)
x = torch.zeros((1, 1, 3, 3)).to(device, dtype)
conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype)
conv2d(x)
return

View File

@@ -1,6 +1,6 @@
from modules import modelloader, devices, errors
from modules.shared import opts
from modules.upscaler import Upscaler, UpscalerData
from modules.upscaler import Upscaler, UpscalerData, prepare_free_memory
from modules.upscaler_utils import upscale_with_model
@@ -27,6 +27,7 @@ class UpscalerESRGAN(Upscaler):
self.scalers.append(scaler_data)
def do_upscale(self, img, selected_model):
prepare_free_memory()
try:
model = self.load_model(selected_model)
except Exception:

View File

@@ -3,7 +3,7 @@ import sys
from modules import modelloader, devices
from modules.shared import opts
from modules.upscaler import Upscaler, UpscalerData
from modules.upscaler import Upscaler, UpscalerData, prepare_free_memory
from modules.upscaler_utils import upscale_with_model
@@ -20,6 +20,7 @@ class UpscalerHAT(Upscaler):
self.scalers.append(scaler_data)
def do_upscale(self, img, selected_model):
prepare_free_memory()
try:
model = self.load_model(selected_model)
except Exception as e:

View File

@@ -12,6 +12,10 @@ def imports():
logging.getLogger("torch.distributed.nn").setLevel(logging.ERROR) # sshh...
logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
from modules_forge.initialization import initialize_forge
initialize_forge()
startup_timer.record("initialize forge")
import torch # noqa: F401
startup_timer.record("import torch")
import pytorch_lightning # noqa: F401

View File

@@ -10,7 +10,10 @@ import torch.hub
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
from modules import devices, paths, shared, lowvram, modelloader, errors, torch_utils
from modules import devices, paths, shared, modelloader, errors
from ldm_patched.modules import model_management
from ldm_patched.modules.model_patcher import ModelPatcher
blip_image_eval_size = 384
clip_model_name = 'ViT-L/14'
@@ -53,7 +56,16 @@ class InterrogateModels:
self.loaded_categories = None
self.skip_categories = []
self.content_dir = content_dir
self.running_on_cpu = devices.device_interrogate == torch.device("cpu")
self.load_device = model_management.text_encoder_device()
self.offload_device = model_management.text_encoder_offload_device()
self.dtype = torch.float32
if model_management.should_use_fp16(device=self.load_device):
self.dtype = torch.float16
self.blip_patcher = None
self.clip_patcher = None
def categories(self):
if not os.path.exists(self.content_dir):
@@ -105,49 +117,37 @@ class InterrogateModels:
def load_clip_model(self):
import clip
import clip.model
if self.running_on_cpu:
model, preprocess = clip.load(clip_model_name, device="cpu", download_root=shared.cmd_opts.clip_models_path)
else:
model, preprocess = clip.load(clip_model_name, download_root=shared.cmd_opts.clip_models_path)
clip.model.LayerNorm = torch.nn.LayerNorm
model, preprocess = clip.load(clip_model_name, device="cpu", download_root=shared.cmd_opts.clip_models_path)
model.eval()
model = model.to(devices.device_interrogate)
return model, preprocess
def load(self):
if self.blip_model is None:
self.blip_model = self.load_blip_model()
if not shared.cmd_opts.no_half and not self.running_on_cpu:
self.blip_model = self.blip_model.half()
self.blip_model = self.blip_model.to(devices.device_interrogate)
self.blip_model = self.blip_model.to(device=self.offload_device, dtype=self.dtype)
self.blip_patcher = ModelPatcher(self.blip_model, load_device=self.load_device, offload_device=self.offload_device)
if self.clip_model is None:
self.clip_model, self.clip_preprocess = self.load_clip_model()
if not shared.cmd_opts.no_half and not self.running_on_cpu:
self.clip_model = self.clip_model.half()
self.clip_model = self.clip_model.to(device=self.offload_device, dtype=self.dtype)
self.clip_patcher = ModelPatcher(self.clip_model, load_device=self.load_device, offload_device=self.offload_device)
self.clip_model = self.clip_model.to(devices.device_interrogate)
self.dtype = torch_utils.get_param(self.clip_model).dtype
model_management.load_models_gpu([self.blip_patcher, self.clip_patcher])
return
def send_clip_to_ram(self):
if not shared.opts.interrogate_keep_models_in_memory:
if self.clip_model is not None:
self.clip_model = self.clip_model.to(devices.cpu)
pass
def send_blip_to_ram(self):
if not shared.opts.interrogate_keep_models_in_memory:
if self.blip_model is not None:
self.blip_model = self.blip_model.to(devices.cpu)
pass
def unload(self):
self.send_clip_to_ram()
self.send_blip_to_ram()
devices.torch_gc()
pass
def rank(self, image_features, text_array, top_count=1):
import clip
@@ -158,11 +158,11 @@ class InterrogateModels:
text_array = text_array[0:int(shared.opts.interrogate_clip_dict_limit)]
top_count = min(top_count, len(text_array))
text_tokens = clip.tokenize(list(text_array), truncate=True).to(devices.device_interrogate)
text_tokens = clip.tokenize(list(text_array), truncate=True).to(self.load_device)
text_features = self.clip_model.encode_text(text_tokens).type(self.dtype)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = torch.zeros((1, len(text_array))).to(devices.device_interrogate)
similarity = torch.zeros((1, len(text_array))).to(self.load_device)
for i in range(image_features.shape[0]):
similarity += (100.0 * image_features[i].unsqueeze(0) @ text_features.T).softmax(dim=-1)
similarity /= image_features.shape[0]
@@ -175,7 +175,7 @@ class InterrogateModels:
transforms.Resize((blip_image_eval_size, blip_image_eval_size), interpolation=InterpolationMode.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
])(pil_image).unsqueeze(0).type(self.dtype).to(devices.device_interrogate)
])(pil_image).unsqueeze(0).type(self.dtype).to(self.load_device)
with torch.no_grad():
caption = self.blip_model.generate(gpu_image, sample=False, num_beams=shared.opts.interrogate_clip_num_beams, min_length=shared.opts.interrogate_clip_min_length, max_length=shared.opts.interrogate_clip_max_length)
@@ -186,9 +186,6 @@ class InterrogateModels:
res = ""
shared.state.begin(job="interrogate")
try:
lowvram.send_everything_to_cpu()
devices.torch_gc()
self.load()
caption = self.generate_caption(pil_image)
@@ -197,7 +194,7 @@ class InterrogateModels:
res = caption
clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(devices.device_interrogate)
clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(self.load_device)
with torch.no_grad(), devices.autocast():
image_features = self.clip_model.encode_image(clip_image).type(self.dtype)

View File

@@ -15,6 +15,7 @@ from modules import cmd_args, errors
from modules.paths_internal import script_path, extensions_dir
from modules.timer import startup_timer
from modules import logging_config
from modules_forge import forge_version
args, _ = cmd_args.parser.parse_known_args()
logging_config.setup_logging(args.loglevel)
@@ -70,7 +71,7 @@ def commit_hash():
@lru_cache()
def git_tag():
def git_tag_a1111():
try:
return subprocess.check_output([git, "-C", script_path, "describe", "--tags"], shell=False, encoding='utf8').strip()
except Exception:
@@ -85,6 +86,10 @@ def git_tag():
return "<none>"
def git_tag():
return 'f' + forge_version.version + '-' + git_tag_a1111()
def run(command, desc=None, errdesc=None, custom_env=None, live: bool = default_command_live) -> str:
if desc is not None:
print(desc)

View File

@@ -6,142 +6,20 @@ cpu = torch.device("cpu")
def send_everything_to_cpu():
global module_in_gpu
if module_in_gpu is not None:
module_in_gpu.to(cpu)
module_in_gpu = None
return
def is_needed(sd_model):
return shared.cmd_opts.lowvram or shared.cmd_opts.medvram or shared.cmd_opts.medvram_sdxl and hasattr(sd_model, 'conditioner')
return False
def apply(sd_model):
enable = is_needed(sd_model)
shared.parallel_processing_allowed = not enable
if enable:
setup_for_low_vram(sd_model, not shared.cmd_opts.lowvram)
else:
sd_model.lowvram = False
return
def setup_for_low_vram(sd_model, use_medvram):
if getattr(sd_model, 'lowvram', False):
return
sd_model.lowvram = True
parents = {}
def send_me_to_gpu(module, _):
"""send this module to GPU; send whatever tracked module was previous in GPU to CPU;
we add this as forward_pre_hook to a lot of modules and this way all but one of them will
be in CPU
"""
global module_in_gpu
module = parents.get(module, module)
if module_in_gpu == module:
return
if module_in_gpu is not None:
module_in_gpu.to(cpu)
module.to(devices.device)
module_in_gpu = module
# see below for register_forward_pre_hook;
# first_stage_model does not use forward(), it uses encode/decode, so register_forward_pre_hook is
# useless here, and we just replace those methods
first_stage_model = sd_model.first_stage_model
first_stage_model_encode = sd_model.first_stage_model.encode
first_stage_model_decode = sd_model.first_stage_model.decode
def first_stage_model_encode_wrap(x):
send_me_to_gpu(first_stage_model, None)
return first_stage_model_encode(x)
def first_stage_model_decode_wrap(z):
send_me_to_gpu(first_stage_model, None)
return first_stage_model_decode(z)
to_remain_in_cpu = [
(sd_model, 'first_stage_model'),
(sd_model, 'depth_model'),
(sd_model, 'embedder'),
(sd_model, 'model'),
(sd_model, 'embedder'),
]
is_sdxl = hasattr(sd_model, 'conditioner')
is_sd2 = not is_sdxl and hasattr(sd_model.cond_stage_model, 'model')
if is_sdxl:
to_remain_in_cpu.append((sd_model, 'conditioner'))
elif is_sd2:
to_remain_in_cpu.append((sd_model.cond_stage_model, 'model'))
else:
to_remain_in_cpu.append((sd_model.cond_stage_model, 'transformer'))
# remove several big modules: cond, first_stage, depth/embedder (if applicable), and unet from the model
stored = []
for obj, field in to_remain_in_cpu:
module = getattr(obj, field, None)
stored.append(module)
setattr(obj, field, None)
# send the model to GPU.
sd_model.to(devices.device)
# put modules back. the modules will be in CPU.
for (obj, field), module in zip(to_remain_in_cpu, stored):
setattr(obj, field, module)
# register hooks for those the first three models
if is_sdxl:
sd_model.conditioner.register_forward_pre_hook(send_me_to_gpu)
elif is_sd2:
sd_model.cond_stage_model.model.register_forward_pre_hook(send_me_to_gpu)
sd_model.cond_stage_model.model.token_embedding.register_forward_pre_hook(send_me_to_gpu)
parents[sd_model.cond_stage_model.model] = sd_model.cond_stage_model
parents[sd_model.cond_stage_model.model.token_embedding] = sd_model.cond_stage_model
else:
sd_model.cond_stage_model.transformer.register_forward_pre_hook(send_me_to_gpu)
parents[sd_model.cond_stage_model.transformer] = sd_model.cond_stage_model
sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu)
sd_model.first_stage_model.encode = first_stage_model_encode_wrap
sd_model.first_stage_model.decode = first_stage_model_decode_wrap
if sd_model.depth_model:
sd_model.depth_model.register_forward_pre_hook(send_me_to_gpu)
if sd_model.embedder:
sd_model.embedder.register_forward_pre_hook(send_me_to_gpu)
if use_medvram:
sd_model.model.register_forward_pre_hook(send_me_to_gpu)
else:
diff_model = sd_model.model.diffusion_model
# the third remaining model is still too big for 4 GB, so we also do the same for its submodules
# so that only one of them is in GPU at a time
stored = diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed
diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = None, None, None, None
sd_model.model.to(devices.device)
diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = stored
# install hooks for bits of third model
diff_model.time_embed.register_forward_pre_hook(send_me_to_gpu)
for block in diff_model.input_blocks:
block.register_forward_pre_hook(send_me_to_gpu)
diff_model.middle_block.register_forward_pre_hook(send_me_to_gpu)
for block in diff_model.output_blocks:
block.register_forward_pre_hook(send_me_to_gpu)
return
def is_enabled(sd_model):
return sd_model.lowvram
return False

View File

@@ -627,44 +627,7 @@ def decode_latent_batch(model, batch, target_device=None, check_for_nans=False):
for i in range(batch.shape[0]):
sample = decode_first_stage(model, batch[i:i + 1])[0]
if check_for_nans:
try:
devices.test_for_nans(sample, "vae")
except devices.NansException as e:
if shared.opts.auto_vae_precision_bfloat16:
autofix_dtype = torch.bfloat16
autofix_dtype_text = "bfloat16"
autofix_dtype_setting = "Automatically convert VAE to bfloat16"
autofix_dtype_comment = ""
elif shared.opts.auto_vae_precision:
autofix_dtype = torch.float32
autofix_dtype_text = "32-bit float"
autofix_dtype_setting = "Automatically revert VAE to 32-bit floats"
autofix_dtype_comment = "\nTo always start with 32-bit VAE, use --no-half-vae commandline flag."
else:
raise e
if devices.dtype_vae == autofix_dtype:
raise e
errors.print_error_explanation(
"A tensor with all NaNs was produced in VAE.\n"
f"Web UI will now convert VAE into {autofix_dtype_text} and retry.\n"
f"To disable this behavior, disable the '{autofix_dtype_setting}' setting.{autofix_dtype_comment}"
)
devices.dtype_vae = autofix_dtype
model.first_stage_model.to(devices.dtype_vae)
batch = batch.to(devices.dtype_vae)
sample = decode_first_stage(model, batch[i:i + 1])[0]
if target_device is not None:
sample = sample.to(target_device)
samples.append(sample)
samples.append(sample.to(target_device))
return samples
@@ -847,7 +810,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
infotexts = []
output_images = []
with torch.no_grad(), p.sd_model.ema_scope():
with torch.no_grad():
with devices.autocast():
p.init(p.all_prompts, p.all_seeds, p.all_subseeds)
@@ -871,6 +834,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
sd_models.reload_model_weights() # model can be changed for example by refiner
p.sd_model.forge_objects = p.sd_model.forge_objects_original.shallow_copy()
p.prompts = p.all_prompts[n * p.batch_size:(n + 1) * p.batch_size]
p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n + 1) * p.batch_size]
p.seeds = p.all_seeds[n * p.batch_size:(n + 1) * p.batch_size]
@@ -887,8 +851,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
p.parse_extra_network_prompts()
if not p.disable_extra_networks:
with devices.autocast():
extra_networks.activate(p, p.extra_network_data)
extra_networks.activate(p, p.extra_network_data)
p.sd_model.forge_objects = p.sd_model.forge_objects_after_applying_lora.shallow_copy()
if p.scripts is not None:
p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds)
@@ -940,8 +905,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
if p.scripts is not None:
ps = scripts.PostSampleArgs(samples_ddim)
@@ -960,9 +924,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
del samples_ddim
if lowvram.is_enabled(shared.sd_model):
lowvram.send_everything_to_cpu()
devices.torch_gc()
state.nextjob()
@@ -1269,7 +1230,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
image = np.array(self.firstpass_image).astype(np.float32) / 255.0
image = np.moveaxis(image, 2, 0)
image = torch.from_numpy(np.expand_dims(image, axis=0))
image = image.to(shared.device, dtype=devices.dtype_vae)
image = image.to(shared.device, dtype=torch.float32)
if opts.sd_vae_encode_method != 'Full':
self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method
@@ -1353,7 +1314,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
batch_images.append(image)
decoded_samples = torch.from_numpy(np.array(batch_images))
decoded_samples = decoded_samples.to(shared.device, dtype=devices.dtype_vae)
decoded_samples = decoded_samples.to(shared.device, dtype=torch.float32)
if opts.sd_vae_encode_method != 'Full':
self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method
@@ -1458,7 +1419,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
if shared.opts.hires_fix_use_firstpass_conds:
self.calculate_hr_conds()
elif lowvram.is_enabled(shared.sd_model) and shared.sd_model.sd_checkpoint_info == sd_models.select_checkpoint(): # if in lowvram mode, we need to calculate conds right away, before the cond NN is unloaded
elif shared.sd_model.sd_checkpoint_info == sd_models.select_checkpoint(): # if in lowvram mode, we need to calculate conds right away, before the cond NN is unloaded
with devices.autocast():
extra_networks.activate(self, self.hr_extra_network_data)
@@ -1645,7 +1606,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
image = torch.from_numpy(batch_images)
image = image.to(shared.device, dtype=devices.dtype_vae)
image = image.to(shared.device, dtype=torch.float32)
if opts.sd_vae_encode_method != 'Full':
self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method

View File

@@ -276,6 +276,12 @@ class DictWithShape(dict):
def shape(self):
return self["crossattn"].shape
def to(self, *args, **kwargs):
for k in self.keys():
if isinstance(self[k], torch.Tensor):
self[k] = self[k].to(*args, **kwargs)
return self
def reconstruct_cond_batch(c: list[list[ScheduledPromptConditioning]], current_step):
param = c[0][0].cond
@@ -317,15 +323,32 @@ def stack_conds(tensors):
return torch.stack(tensors)
def stack_conds_alter(tensors, weights):
token_count = max([x.shape[0] for x in tensors])
for i in range(len(tensors)):
if tensors[i].shape[0] != token_count:
last_vector = tensors[i][-1:]
last_vector_repeated = last_vector.repeat([token_count - tensors[i].shape[0], 1])
tensors[i] = torch.vstack([tensors[i], last_vector_repeated])
result = 0
full_weights = 0
for x, w in zip(tensors, weights):
result = result + x * float(w)
full_weights = full_weights + float(w)
result = result / full_weights
return result
def reconstruct_multicond_batch(c: MulticondLearnedConditioning, current_step):
param = c.batch[0][0].schedules[0].cond
tensors = []
conds_list = []
results = []
for composable_prompts in c.batch:
conds_for_batch = []
tensors = []
weights = []
for composable_prompt in composable_prompts:
target_index = 0
@@ -334,19 +357,24 @@ def reconstruct_multicond_batch(c: MulticondLearnedConditioning, current_step):
target_index = current
break
conds_for_batch.append((len(tensors), composable_prompt.weight))
weights.append(composable_prompt.weight)
tensors.append(composable_prompt.schedules[target_index].cond)
conds_list.append(conds_for_batch)
if isinstance(tensors[0], dict):
weighted = {k: stack_conds_alter([x[k] for x in tensors], weights) for k in tensors[0].keys()}
else:
weighted = stack_conds_alter(tensors, weights)
if isinstance(tensors[0], dict):
keys = list(tensors[0].keys())
stacked = {k: stack_conds([x[k] for x in tensors]) for k in keys}
stacked = DictWithShape(stacked, stacked['crossattn'].shape)
results.append(weighted)
if isinstance(results[0], dict):
results = {k: torch.stack([x[k] for x in results])
for k in results[0].keys()}
results = DictWithShape(results, results['crossattn'].shape)
else:
stacked = stack_conds(tensors).to(device=param.device, dtype=param.dtype)
results = torch.stack(results).to(device=param.device, dtype=param.dtype)
return conds_list, stacked
return results
re_attention = re.compile(r"""

View File

@@ -2,7 +2,7 @@ import os
from modules import modelloader, errors
from modules.shared import cmd_opts, opts
from modules.upscaler import Upscaler, UpscalerData
from modules.upscaler import Upscaler, UpscalerData, prepare_free_memory
from modules.upscaler_utils import upscale_with_model
@@ -27,6 +27,8 @@ class UpscalerRealESRGAN(Upscaler):
self.scalers.append(scaler)
def do_upscale(self, img, path):
prepare_free_memory()
if not self.enable:
return img

View File

@@ -57,57 +57,11 @@ def list_optimizers():
def apply_optimizations(option=None):
global current_optimizer
undo_optimizations()
if len(optimizers) == 0:
# a script can access the model very early, and optimizations would not be filled by then
current_optimizer = None
return ''
ldm.modules.diffusionmodules.model.nonlinearity = silu
ldm.modules.diffusionmodules.openaimodel.th = sd_hijack_unet.th
sgm.modules.diffusionmodules.model.nonlinearity = silu
sgm.modules.diffusionmodules.openaimodel.th = sd_hijack_unet.th
if current_optimizer is not None:
current_optimizer.undo()
current_optimizer = None
selection = option or shared.opts.cross_attention_optimization
if selection == "Automatic" and len(optimizers) > 0:
matching_optimizer = next(iter([x for x in optimizers if x.cmd_opt and getattr(shared.cmd_opts, x.cmd_opt, False)]), optimizers[0])
else:
matching_optimizer = next(iter([x for x in optimizers if x.title() == selection]), None)
if selection == "None":
matching_optimizer = None
elif selection == "Automatic" and shared.cmd_opts.disable_opt_split_attention:
matching_optimizer = None
elif matching_optimizer is None:
matching_optimizer = optimizers[0]
if matching_optimizer is not None:
print(f"Applying attention optimization: {matching_optimizer.name}... ", end='')
matching_optimizer.apply()
print("done.")
current_optimizer = matching_optimizer
return current_optimizer.name
else:
print("Disabling attention optimization")
return ''
return
def undo_optimizations():
ldm.modules.diffusionmodules.model.nonlinearity = diffusionmodules_model_nonlinearity
ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward
ldm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward
sgm.modules.diffusionmodules.model.nonlinearity = diffusionmodules_model_nonlinearity
sgm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward
sgm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward
return
def fix_checkpoint():
@@ -182,131 +136,16 @@ class StableDiffusionModelHijack:
self.embedding_db.add_embedding_dir(cmd_opts.embeddings_dir)
def apply_optimizations(self, option=None):
try:
self.optimization_method = apply_optimizations(option)
except Exception as e:
errors.display(e, "applying cross attention optimization")
undo_optimizations()
pass
def convert_sdxl_to_ssd(self, m):
"""Converts an SDXL model to a Segmind Stable Diffusion model (see https://huggingface.co/segmind/SSD-1B)"""
delattr(m.model.diffusion_model.middle_block, '1')
delattr(m.model.diffusion_model.middle_block, '2')
for i in ['9', '8', '7', '6', '5', '4']:
delattr(m.model.diffusion_model.input_blocks[7][1].transformer_blocks, i)
delattr(m.model.diffusion_model.input_blocks[8][1].transformer_blocks, i)
delattr(m.model.diffusion_model.output_blocks[0][1].transformer_blocks, i)
delattr(m.model.diffusion_model.output_blocks[1][1].transformer_blocks, i)
delattr(m.model.diffusion_model.output_blocks[4][1].transformer_blocks, '1')
delattr(m.model.diffusion_model.output_blocks[5][1].transformer_blocks, '1')
devices.torch_gc()
pass
def hijack(self, m):
conditioner = getattr(m, 'conditioner', None)
if conditioner:
text_cond_models = []
for i in range(len(conditioner.embedders)):
embedder = conditioner.embedders[i]
typename = type(embedder).__name__
if typename == 'FrozenOpenCLIPEmbedder':
embedder.model.token_embedding = EmbeddingsWithFixes(embedder.model.token_embedding, self)
conditioner.embedders[i] = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(embedder, self)
text_cond_models.append(conditioner.embedders[i])
if typename == 'FrozenCLIPEmbedder':
model_embeddings = embedder.transformer.text_model.embeddings
model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
conditioner.embedders[i] = sd_hijack_clip.FrozenCLIPEmbedderForSDXLWithCustomWords(embedder, self)
text_cond_models.append(conditioner.embedders[i])
if typename == 'FrozenOpenCLIPEmbedder2':
embedder.model.token_embedding = EmbeddingsWithFixes(embedder.model.token_embedding, self, textual_inversion_key='clip_g')
conditioner.embedders[i] = sd_hijack_open_clip.FrozenOpenCLIPEmbedder2WithCustomWords(embedder, self)
text_cond_models.append(conditioner.embedders[i])
if len(text_cond_models) == 1:
m.cond_stage_model = text_cond_models[0]
else:
m.cond_stage_model = conditioner
if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation or type(m.cond_stage_model) == xlmr_m18.BertSeriesModelWithTransformation:
model_embeddings = m.cond_stage_model.roberta.embeddings
model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.word_embeddings, self)
m.cond_stage_model = sd_hijack_xlmr.FrozenXLMREmbedderWithCustomWords(m.cond_stage_model, self)
elif type(m.cond_stage_model) == ldm.modules.encoders.modules.FrozenCLIPEmbedder:
model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
elif type(m.cond_stage_model) == ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder:
m.cond_stage_model.model.token_embedding = EmbeddingsWithFixes(m.cond_stage_model.model.token_embedding, self)
m.cond_stage_model = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
apply_weighted_forward(m)
if m.cond_stage_key == "edit":
sd_hijack_unet.hijack_ddpm_edit()
self.apply_optimizations()
self.clip = m.cond_stage_model
def flatten(el):
flattened = [flatten(children) for children in el.children()]
res = [el]
for c in flattened:
res += c
return res
self.layers = flatten(m)
import modules.models.diffusion.ddpm_edit
if isinstance(m, ldm.models.diffusion.ddpm.LatentDiffusion):
sd_unet.original_forward = ldm_original_forward
elif isinstance(m, modules.models.diffusion.ddpm_edit.LatentDiffusion):
sd_unet.original_forward = ldm_original_forward
elif isinstance(m, sgm.models.diffusion.DiffusionEngine):
sd_unet.original_forward = sgm_original_forward
else:
sd_unet.original_forward = None
pass
def undo_hijack(self, m):
conditioner = getattr(m, 'conditioner', None)
if conditioner:
for i in range(len(conditioner.embedders)):
embedder = conditioner.embedders[i]
if isinstance(embedder, (sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords, sd_hijack_open_clip.FrozenOpenCLIPEmbedder2WithCustomWords)):
embedder.wrapped.model.token_embedding = embedder.wrapped.model.token_embedding.wrapped
conditioner.embedders[i] = embedder.wrapped
if isinstance(embedder, sd_hijack_clip.FrozenCLIPEmbedderForSDXLWithCustomWords):
embedder.wrapped.transformer.text_model.embeddings.token_embedding = embedder.wrapped.transformer.text_model.embeddings.token_embedding.wrapped
conditioner.embedders[i] = embedder.wrapped
if hasattr(m, 'cond_stage_model'):
delattr(m, 'cond_stage_model')
elif type(m.cond_stage_model) == sd_hijack_xlmr.FrozenXLMREmbedderWithCustomWords:
m.cond_stage_model = m.cond_stage_model.wrapped
elif type(m.cond_stage_model) == sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords:
m.cond_stage_model = m.cond_stage_model.wrapped
model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
if type(model_embeddings.token_embedding) == EmbeddingsWithFixes:
model_embeddings.token_embedding = model_embeddings.token_embedding.wrapped
elif type(m.cond_stage_model) == sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords:
m.cond_stage_model.wrapped.model.token_embedding = m.cond_stage_model.wrapped.model.token_embedding.wrapped
m.cond_stage_model = m.cond_stage_model.wrapped
undo_optimizations()
undo_weighted_forward(m)
self.apply_circular(False)
self.layers = None
self.clip = None
pass
def apply_circular(self, enable):
if self.circular_enabled == enable:
@@ -321,17 +160,12 @@ class StableDiffusionModelHijack:
self.comments = []
self.extra_generation_params = {}
def get_prompt_lengths(self, text):
if self.clip is None:
return "-", "-"
_, token_count = self.clip.process_texts([text])
return token_count, self.clip.get_target_prompt_token_count(token_count)
def get_prompt_lengths(self, text, cond_stage_model):
_, token_count = cond_stage_model.process_texts([text])
return token_count, cond_stage_model.get_target_prompt_token_count(token_count)
def redo_hijack(self, m):
self.undo_hijack(m)
self.hijack(m)
pass
class EmbeddingsWithFixes(torch.nn.Module):

View File

@@ -10,6 +10,7 @@ from omegaconf import OmegaConf, ListConfig
from os import mkdir
from urllib import request
import ldm.modules.midas as midas
import gc
from ldm.util import instantiate_from_config
@@ -17,6 +18,12 @@ from modules import paths, shared, modelloader, devices, script_callbacks, sd_va
from modules.timer import Timer
import tomesd
import numpy as np
from modules_forge import forge_loader
import modules_forge.ops as forge_ops
from ldm_patched.modules.ops import manual_cast
from ldm_patched.modules import model_management as model_management
import ldm_patched.modules.model_patcher
model_dir = "Stable-diffusion"
model_path = os.path.abspath(os.path.join(paths.models_path, model_dir))
@@ -366,26 +373,12 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
sd_model_hash = checkpoint_info.calculate_shorthash()
timer.record("calculate hash")
if devices.fp8:
# prevent model to load state dict in fp8
model.half()
if not SkipWritingToConfig.skip:
shared.opts.data["sd_model_checkpoint"] = checkpoint_info.title
if state_dict is None:
state_dict = get_checkpoint_state_dict(checkpoint_info, timer)
model.is_sdxl = hasattr(model, 'conditioner')
model.is_sd2 = not model.is_sdxl and hasattr(model.cond_stage_model, 'model')
model.is_sd1 = not model.is_sdxl and not model.is_sd2
model.is_ssd = model.is_sdxl and 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight' not in state_dict.keys()
if model.is_sdxl:
sd_models_xl.extend_sdxl(model)
if model.is_ssd:
sd_hijack.model_hijack.convert_sdxl_to_ssd(model)
if shared.opts.sd_checkpoint_cache > 0:
# cache newly loaded model
checkpoints_loaded[checkpoint_info] = state_dict.copy()
@@ -395,65 +388,6 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
del state_dict
if shared.cmd_opts.opt_channelslast:
model.to(memory_format=torch.channels_last)
timer.record("apply channels_last")
if shared.cmd_opts.no_half:
model.float()
model.alphas_cumprod_original = model.alphas_cumprod
devices.dtype_unet = torch.float32
timer.record("apply float()")
else:
vae = model.first_stage_model
depth_model = getattr(model, 'depth_model', None)
# with --no-half-vae, remove VAE from model when doing half() to prevent its weights from being converted to float16
if shared.cmd_opts.no_half_vae:
model.first_stage_model = None
# with --upcast-sampling, don't convert the depth model weights to float16
if shared.cmd_opts.upcast_sampling and depth_model:
model.depth_model = None
alphas_cumprod = model.alphas_cumprod
model.alphas_cumprod = None
model.half()
model.alphas_cumprod = alphas_cumprod
model.alphas_cumprod_original = alphas_cumprod
model.first_stage_model = vae
if depth_model:
model.depth_model = depth_model
devices.dtype_unet = torch.float16
timer.record("apply half()")
for module in model.modules():
if hasattr(module, 'fp16_weight'):
del module.fp16_weight
if hasattr(module, 'fp16_bias'):
del module.fp16_bias
if check_fp8(model):
devices.fp8 = True
first_stage = model.first_stage_model
model.first_stage_model = None
for module in model.modules():
if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
if shared.opts.cache_fp16_weight:
module.fp16_weight = module.weight.data.clone().cpu().half()
if module.bias is not None:
module.fp16_bias = module.bias.data.clone().cpu().half()
module.to(torch.float8_e4m3fn)
model.first_stage_model = first_stage
timer.record("apply fp8")
else:
devices.fp8 = False
devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
model.first_stage_model.to(devices.dtype_vae)
timer.record("apply dtype to VAE")
# clean up cache if limit is reached
while len(checkpoints_loaded) > shared.opts.sd_checkpoint_cache:
checkpoints_loaded.popitem(last=False)
@@ -590,14 +524,6 @@ class SdModelData:
sd_vae.loaded_vae_file = getattr(v, "loaded_vae_file", None)
sd_vae.checkpoint_info = v.sd_checkpoint_info
try:
self.loaded_sd_models.remove(v)
except ValueError:
pass
if v is not None:
self.loaded_sd_models.insert(0, v)
model_data = SdModelData()
@@ -615,31 +541,19 @@ def get_empty_cond(sd_model):
def send_model_to_cpu(m):
if m.lowvram:
lowvram.send_everything_to_cpu()
else:
m.to(devices.cpu)
devices.torch_gc()
pass
def model_target_device(m):
if lowvram.is_needed(m):
return devices.cpu
else:
return devices.device
return devices.device
def send_model_to_device(m):
lowvram.apply(m)
if not m.lowvram:
m.to(shared.device)
pass
def send_model_to_trash(m):
m.to(device="meta")
devices.torch_gc()
pass
def load_model(checkpoint_info=None, already_loaded_state_dict=None):
@@ -649,9 +563,14 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
timer = Timer()
if model_data.sd_model:
send_model_to_trash(model_data.sd_model)
if model_data.sd_model.filename == checkpoint_info.filename:
return model_data.sd_model
model_data.sd_model = None
devices.torch_gc()
model_data.loaded_sd_models = []
model_management.unload_all_models()
model_management.soft_empty_cache()
gc.collect()
timer.record("unload existing model")
@@ -660,58 +579,27 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
else:
state_dict = get_checkpoint_state_dict(checkpoint_info, timer)
checkpoint_config = sd_models_config.find_checkpoint_config(state_dict, checkpoint_info)
clip_is_included_into_sd = any(x for x in [sd1_clip_weight, sd2_clip_weight, sdxl_clip_weight, sdxl_refiner_clip_weight] if x in state_dict)
if shared.opts.sd_checkpoint_cache > 0:
# cache newly loaded model
checkpoints_loaded[checkpoint_info] = state_dict.copy()
timer.record("find config")
sd_model = forge_loader.load_model_for_a1111(timer=timer, checkpoint_info=checkpoint_info, state_dict=state_dict)
sd_model.filename = checkpoint_info.filename
sd_config = OmegaConf.load(checkpoint_config)
repair_config(sd_config)
del state_dict
timer.record("load config")
# clean up cache if limit is reached
while len(checkpoints_loaded) > shared.opts.sd_checkpoint_cache:
checkpoints_loaded.popitem(last=False)
print(f"Creating model from config: {checkpoint_config}")
shared.opts.data["sd_checkpoint_hash"] = checkpoint_info.sha256
sd_model = None
try:
with sd_disable_initialization.DisableInitialization(disable_clip=clip_is_included_into_sd or shared.cmd_opts.do_not_download_clip):
with sd_disable_initialization.InitializeOnMeta():
sd_model = instantiate_from_config(sd_config.model)
sd_vae.delete_base_vae()
sd_vae.clear_loaded_vae()
vae_file, vae_source = sd_vae.resolve_vae(checkpoint_info.filename).tuple()
sd_vae.load_vae(sd_model, vae_file, vae_source)
timer.record("load VAE")
except Exception as e:
errors.display(e, "creating model quickly", full_traceback=True)
if sd_model is None:
print('Failed to create model quickly; will retry using slow method.', file=sys.stderr)
with sd_disable_initialization.InitializeOnMeta():
sd_model = instantiate_from_config(sd_config.model)
sd_model.used_config = checkpoint_config
timer.record("create model")
if shared.cmd_opts.no_half:
weight_dtype_conversion = None
else:
weight_dtype_conversion = {
'first_stage_model': None,
'alphas_cumprod': None,
'': torch.float16,
}
with sd_disable_initialization.LoadStateDictOnMeta(state_dict, device=model_target_device(sd_model), weight_dtype_conversion=weight_dtype_conversion):
load_model_weights(sd_model, checkpoint_info, state_dict, timer)
timer.record("load weights from state dict")
send_model_to_device(sd_model)
timer.record("move model to device")
sd_hijack.model_hijack.hijack(sd_model)
timer.record("hijack")
sd_model.eval()
model_data.set_sd_model(sd_model)
model_data.was_loaded_at_least_once = True
@@ -723,7 +611,7 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
timer.record("scripts callbacks")
with devices.autocast(), torch.no_grad():
with torch.no_grad():
sd_model.cond_stage_model_empty_prompt = get_empty_cond(sd_model)
timer.record("calculate empty prompt")
@@ -734,132 +622,14 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
def reuse_model_from_already_loaded(sd_model, checkpoint_info, timer):
"""
Checks if the desired checkpoint from checkpoint_info is not already loaded in model_data.loaded_sd_models.
If it is loaded, returns that (moving it to GPU if necessary, and moving the currently loadded model to CPU if necessary).
If not, returns the model that can be used to load weights from checkpoint_info's file.
If no such model exists, returns None.
Additionaly deletes loaded models that are over the limit set in settings (sd_checkpoints_limit).
"""
already_loaded = None
for i in reversed(range(len(model_data.loaded_sd_models))):
loaded_model = model_data.loaded_sd_models[i]
if loaded_model.sd_checkpoint_info.filename == checkpoint_info.filename:
already_loaded = loaded_model
continue
if len(model_data.loaded_sd_models) > shared.opts.sd_checkpoints_limit > 0:
print(f"Unloading model {len(model_data.loaded_sd_models)} over the limit of {shared.opts.sd_checkpoints_limit}: {loaded_model.sd_checkpoint_info.title}")
model_data.loaded_sd_models.pop()
send_model_to_trash(loaded_model)
timer.record("send model to trash")
if shared.opts.sd_checkpoints_keep_in_cpu:
send_model_to_cpu(sd_model)
timer.record("send model to cpu")
if already_loaded is not None:
send_model_to_device(already_loaded)
timer.record("send model to device")
model_data.set_sd_model(already_loaded, already_loaded=True)
if not SkipWritingToConfig.skip:
shared.opts.data["sd_model_checkpoint"] = already_loaded.sd_checkpoint_info.title
shared.opts.data["sd_checkpoint_hash"] = already_loaded.sd_checkpoint_info.sha256
print(f"Using already loaded model {already_loaded.sd_checkpoint_info.title}: done in {timer.summary()}")
sd_vae.reload_vae_weights(already_loaded)
return model_data.sd_model
elif shared.opts.sd_checkpoints_limit > 1 and len(model_data.loaded_sd_models) < shared.opts.sd_checkpoints_limit:
print(f"Loading model {checkpoint_info.title} ({len(model_data.loaded_sd_models) + 1} out of {shared.opts.sd_checkpoints_limit})")
model_data.sd_model = None
load_model(checkpoint_info)
return model_data.sd_model
elif len(model_data.loaded_sd_models) > 0:
sd_model = model_data.loaded_sd_models.pop()
model_data.sd_model = sd_model
sd_vae.base_vae = getattr(sd_model, "base_vae", None)
sd_vae.loaded_vae_file = getattr(sd_model, "loaded_vae_file", None)
sd_vae.checkpoint_info = sd_model.sd_checkpoint_info
print(f"Reusing loaded model {sd_model.sd_checkpoint_info.title} to load {checkpoint_info.title}")
return sd_model
else:
return None
pass
def reload_model_weights(sd_model=None, info=None, forced_reload=False):
checkpoint_info = info or select_checkpoint()
timer = Timer()
if not sd_model:
sd_model = model_data.sd_model
if sd_model is None: # previous model load failed
current_checkpoint_info = None
else:
current_checkpoint_info = sd_model.sd_checkpoint_info
if check_fp8(sd_model) != devices.fp8:
# load from state dict again to prevent extra numerical errors
forced_reload = True
elif sd_model.sd_model_checkpoint == checkpoint_info.filename and not forced_reload:
return sd_model
sd_model = reuse_model_from_already_loaded(sd_model, checkpoint_info, timer)
if not forced_reload and sd_model is not None and sd_model.sd_checkpoint_info.filename == checkpoint_info.filename:
return sd_model
if sd_model is not None:
sd_unet.apply_unet("None")
send_model_to_cpu(sd_model)
sd_hijack.model_hijack.undo_hijack(sd_model)
state_dict = get_checkpoint_state_dict(checkpoint_info, timer)
checkpoint_config = sd_models_config.find_checkpoint_config(state_dict, checkpoint_info)
timer.record("find config")
if sd_model is None or checkpoint_config != sd_model.used_config:
if sd_model is not None:
send_model_to_trash(sd_model)
load_model(checkpoint_info, already_loaded_state_dict=state_dict)
return model_data.sd_model
try:
load_model_weights(sd_model, checkpoint_info, state_dict, timer)
except Exception:
print("Failed to load checkpoint, restoring previous")
load_model_weights(sd_model, current_checkpoint_info, None, timer)
raise
finally:
sd_hijack.model_hijack.hijack(sd_model)
timer.record("hijack")
if not sd_model.lowvram:
sd_model.to(devices.device)
timer.record("move model to device")
script_callbacks.model_loaded_callback(sd_model)
timer.record("script callbacks")
print(f"Weights loaded in {timer.summary()}.")
model_data.set_sd_model(sd_model)
sd_unet.apply_unet()
return sd_model
return load_model(info)
def unload_model_weights(sd_model=None, info=None):
send_model_to_cpu(sd_model or shared.sd_model)
return sd_model

View File

@@ -8,8 +8,12 @@ import sgm.modules.diffusionmodules.discretizer
from modules import devices, shared, prompt_parser
from modules import torch_utils
import ldm_patched.modules.model_management as model_management
def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch: prompt_parser.SdConditioning | list[str]):
model_management.load_model_gpu(self.forge_objects.clip.patcher)
for embedder in self.conditioner.embedders:
embedder.ucg_rate = 0.0
@@ -18,7 +22,7 @@ def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch:
is_negative_prompt = getattr(batch, 'is_negative_prompt', False)
aesthetic_score = shared.opts.sdxl_refiner_low_aesthetic_score if is_negative_prompt else shared.opts.sdxl_refiner_high_aesthetic_score
devices_args = dict(device=devices.device, dtype=devices.dtype)
devices_args = dict(device=self.forge_objects.clip.patcher.current_device, dtype=model_management.text_encoder_dtype())
sdxl_conds = {
"txt": batch,
@@ -34,14 +38,11 @@ def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch:
return c
def apply_model(self: sgm.models.diffusion.DiffusionEngine, x, t, cond):
sd = self.model.state_dict()
diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
if diffusion_model_input is not None:
if diffusion_model_input.shape[1] == 9:
x = torch.cat([x] + cond['c_concat'], dim=1)
def apply_model(self: sgm.models.diffusion.DiffusionEngine, x, t, cond, *args, **kwargs):
if self.model.diffusion_model.in_channels == 9:
x = torch.cat([x] + cond['c_concat'], dim=1)
return self.model(x, t, cond)
return self.model(x, t, cond, *args, **kwargs)
def get_first_stage_encoding(self, x): # SDXL's encode_first_stage does everything so get_first_stage_encoding is just there for compatibility

View File

@@ -6,6 +6,7 @@ import modules.shared as shared
from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback
from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback
from modules.script_callbacks import AfterCFGCallbackParams, cfg_after_cfg_callback
from modules_forge import forge_sampler
def catenate_conds(conds):
@@ -66,7 +67,7 @@ class CFGDenoiser(torch.nn.Module):
def inner_model(self):
raise NotImplementedError()
def combine_denoised(self, x_out, conds_list, uncond, cond_scale):
def combine_denoised(self, x_out, conds_list, uncond, cond_scale, timestep, x_in, cond):
denoised_uncond = x_out[-uncond.shape[0]:]
denoised = torch.clone(denoised_uncond)
@@ -152,19 +153,13 @@ class CFGDenoiser(torch.nn.Module):
if state.interrupted or state.skipped:
raise sd_samplers_common.InterruptedException
if sd_samplers_common.apply_refiner(self):
if sd_samplers_common.apply_refiner(self, x):
cond = self.sampler.sampler_extra_args['cond']
uncond = self.sampler.sampler_extra_args['uncond']
# at self.image_cfg_scale == 1.0 produced results for edit model are the same as with normal sampling,
# so is_edit_model is set to False to support AND composition.
is_edit_model = shared.sd_model.cond_stage_key == "edit" and self.image_cfg_scale is not None and self.image_cfg_scale != 1.0
conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step)
cond = prompt_parser.reconstruct_multicond_batch(cond, self.step)
uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step)
assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)"
# If we use masks, blending between the denoised and original latent images occurs here.
def apply_blend(current_latent):
blended_latent = current_latent * self.nmask + self.init_latent * self.mask
@@ -181,113 +176,12 @@ class CFGDenoiser(torch.nn.Module):
if self.mask_before_denoising and self.mask is not None:
x = apply_blend(x)
batch_size = len(conds_list)
repeats = [len(conds_list[i]) for i in range(batch_size)]
if shared.sd_model.model.conditioning_key == "crossattn-adm":
image_uncond = torch.zeros_like(image_cond)
make_condition_dict = lambda c_crossattn, c_adm: {"c_crossattn": [c_crossattn], "c_adm": c_adm}
else:
image_uncond = image_cond
if isinstance(uncond, dict):
make_condition_dict = lambda c_crossattn, c_concat: {**c_crossattn, "c_concat": [c_concat]}
else:
make_condition_dict = lambda c_crossattn, c_concat: {"c_crossattn": [c_crossattn], "c_concat": [c_concat]}
if not is_edit_model:
x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x])
sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma])
image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_uncond])
else:
x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x] + [x])
sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma])
image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_uncond] + [torch.zeros_like(self.init_latent)])
denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond, self)
denoiser_params = CFGDenoiserParams(x, image_cond, sigma, state.sampling_step, state.sampling_steps, cond, uncond, self)
cfg_denoiser_callback(denoiser_params)
x_in = denoiser_params.x
image_cond_in = denoiser_params.image_cond
sigma_in = denoiser_params.sigma
tensor = denoiser_params.text_cond
uncond = denoiser_params.text_uncond
skip_uncond = False
# alternating uncond allows for higher thresholds without the quality loss normally expected from raising it
if self.step % 2 and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
skip_uncond = True
x_in = x_in[:-batch_size]
sigma_in = sigma_in[:-batch_size]
self.padded_cond_uncond = False
self.padded_cond_uncond_v0 = False
if shared.opts.pad_cond_uncond and tensor.shape[1] != uncond.shape[1]:
tensor, uncond = self.pad_cond_uncond(tensor, uncond)
elif shared.opts.pad_cond_uncond_v0 and tensor.shape[1] != uncond.shape[1]:
tensor, uncond = self.pad_cond_uncond_v0(tensor, uncond)
if tensor.shape[1] == uncond.shape[1] or skip_uncond:
if is_edit_model:
cond_in = catenate_conds([tensor, uncond, uncond])
elif skip_uncond:
cond_in = tensor
else:
cond_in = catenate_conds([tensor, uncond])
if shared.opts.batch_cond_uncond:
x_out = self.inner_model(x_in, sigma_in, cond=make_condition_dict(cond_in, image_cond_in))
else:
x_out = torch.zeros_like(x_in)
for batch_offset in range(0, x_out.shape[0], batch_size):
a = batch_offset
b = a + batch_size
x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=make_condition_dict(subscript_cond(cond_in, a, b), image_cond_in[a:b]))
else:
x_out = torch.zeros_like(x_in)
batch_size = batch_size*2 if shared.opts.batch_cond_uncond else batch_size
for batch_offset in range(0, tensor.shape[0], batch_size):
a = batch_offset
b = min(a + batch_size, tensor.shape[0])
if not is_edit_model:
c_crossattn = subscript_cond(tensor, a, b)
else:
c_crossattn = torch.cat([tensor[a:b]], uncond)
x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=make_condition_dict(c_crossattn, image_cond_in[a:b]))
if not skip_uncond:
x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond=make_condition_dict(uncond, image_cond_in[-uncond.shape[0]:]))
denoised_image_indexes = [x[0][0] for x in conds_list]
if skip_uncond:
fake_uncond = torch.cat([x_out[i:i+1] for i in denoised_image_indexes])
x_out = torch.cat([x_out, fake_uncond]) # we skipped uncond denoising, so we put cond-denoised image to where the uncond-denoised image should be
denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps, self.inner_model)
cfg_denoised_callback(denoised_params)
devices.test_for_nans(x_out, "unet")
if is_edit_model:
denoised = self.combine_denoised_for_edit_model(x_out, cond_scale)
elif skip_uncond:
denoised = self.combine_denoised(x_out, conds_list, uncond, 1.0)
else:
denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale)
# Blend in the original latents (after)
if not self.mask_before_denoising and self.mask is not None:
denoised = apply_blend(denoised)
self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma)
if opts.live_preview_content == "Prompt":
preview = self.sampler.last_latent
elif opts.live_preview_content == "Negative prompt":
preview = self.get_pred_x0(x_in[-uncond.shape[0]:], x_out[-uncond.shape[0]:], sigma)
else:
preview = self.get_pred_x0(torch.cat([x_in[i:i+1] for i in denoised_image_indexes]), torch.cat([denoised[i:i+1] for i in denoised_image_indexes]), sigma)
denoised = forge_sampler.forge_sample(self, denoiser_params=denoiser_params, cond_scale=cond_scale)
preview = self.sampler.last_latent = denoised
sd_samplers_common.store_latent(preview)
after_cfg_callback_params = AfterCFGCallbackParams(denoised, state.sampling_step, state.sampling_steps)

View File

@@ -5,6 +5,7 @@ import torch
from PIL import Image
from modules import devices, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared, sd_models
from modules.shared import opts, state
from ldm_patched.modules import model_management
import k_diffusion.sampling
@@ -39,9 +40,7 @@ def samples_to_images_tensor(sample, approximation=None, model=None):
if approximation is None or (shared.state.interrupted and opts.live_preview_fast_interrupt):
approximation = approximation_indexes.get(opts.show_progress_type, 0)
from modules import lowvram
if approximation == 0 and lowvram.is_enabled(shared.sd_model) and not shared.opts.live_preview_allow_lowvram_full:
if approximation == 0:
approximation = 1
if approximation == 2:
@@ -54,8 +53,7 @@ def samples_to_images_tensor(sample, approximation=None, model=None):
else:
if model is None:
model = shared.sd_model
with devices.without_autocast(): # fixes an issue with unstable VAEs that are flaky even in fp32
x_sample = model.decode_first_stage(sample.to(model.first_stage_model.dtype))
x_sample = model.decode_first_stage(sample)
return x_sample
@@ -71,7 +69,6 @@ def single_sample_to_image(sample, approximation=None):
def decode_first_stage(model, x):
x = x.to(devices.dtype_vae)
approx_index = approximation_indexes.get(opts.sd_vae_decode_method, 0)
return samples_to_images_tensor(x, approx_index, model)
@@ -95,7 +92,6 @@ def images_tensor_to_samples(image, approximation=None, model=None):
else:
if model is None:
model = shared.sd_model
model.first_stage_model.to(devices.dtype_vae)
image = image.to(shared.device, dtype=devices.dtype_vae)
image = image * 2 - 1
@@ -155,7 +151,7 @@ def replace_torchsde_browinan():
replace_torchsde_browinan()
def apply_refiner(cfg_denoiser):
def apply_refiner(cfg_denoiser, x):
completed_ratio = cfg_denoiser.step / cfg_denoiser.total_steps
refiner_switch_at = cfg_denoiser.p.refiner_switch_at
refiner_checkpoint_info = cfg_denoiser.p.refiner_checkpoint_info
@@ -184,10 +180,17 @@ def apply_refiner(cfg_denoiser):
with sd_models.SkipWritingToConfig():
sd_models.reload_model_weights(info=refiner_checkpoint_info)
refiner = sd_models.model_data.get_sd_model()
devices.torch_gc()
cfg_denoiser.p.setup_conds()
cfg_denoiser.update_inner_model()
inference_memory = refiner.current_controlnet_required_memory
unet_patcher = refiner.forge_objects.unet
model_management.load_models_gpu(
[unet_patcher],
unet_patcher.memory_required([x.shape[0]] + list(x.shape[1:])) + inference_memory)
return True

View File

@@ -7,6 +7,8 @@ from modules.script_callbacks import ExtraNoiseParams, extra_noise_callback
from modules.shared import opts
import modules.shared as shared
import ldm_patched.modules.model_management
samplers_k_diffusion = [
('DPM++ 2M Karras', 'sample_dpmpp_2m', ['k_dpmpp_2m_ka'], {'scheduler': 'karras'}),
@@ -139,11 +141,21 @@ class KDiffusionSampler(sd_samplers_common.Sampler):
return sigmas
def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
inference_memory = self.model_wrap.inner_model.current_controlnet_required_memory
unet_patcher = self.model_wrap.inner_model.forge_objects.unet
ldm_patched.modules.model_management.load_models_gpu(
[unet_patcher],
unet_patcher.memory_required([x.shape[0] * 2] + list(x.shape[1:])) + inference_memory)
self.model_wrap.log_sigmas = self.model_wrap.log_sigmas.to(unet_patcher.current_device)
self.model_wrap.sigmas = self.model_wrap.sigmas.to(unet_patcher.current_device)
steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps)
sigmas = self.get_sigmas(p, steps)
sigma_sched = sigmas[steps - t_enc - 1:]
x = x.to(noise)
xi = x + noise * sigma_sched[0]
if opts.img2img_extra_noise > 0:
@@ -192,6 +204,15 @@ class KDiffusionSampler(sd_samplers_common.Sampler):
return samples
def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
inference_memory = self.model_wrap.inner_model.current_controlnet_required_memory
unet_patcher = self.model_wrap.inner_model.forge_objects.unet
ldm_patched.modules.model_management.load_models_gpu(
[unet_patcher],
unet_patcher.memory_required([x.shape[0] * 2] + list(x.shape[1:])) + inference_memory)
self.model_wrap.log_sigmas = self.model_wrap.log_sigmas.to(unet_patcher.current_device)
self.model_wrap.sigmas = self.model_wrap.sigmas.to(unet_patcher.current_device)
steps = steps or p.steps
sigmas = self.get_sigmas(p, steps)

View File

@@ -34,7 +34,7 @@ class LCMCompVisDenoiser(DiscreteEpsDDPMDenoiser):
def sigma_to_t(self, sigma, quantize=None):
log_sigma = sigma.log()
dists = log_sigma - self.log_sigmas[:, None]
dists = log_sigma - self.log_sigmas.to(sigma)[:, None]
return dists.abs().argmin(dim=0).view(sigma.shape) * self.skip_steps + (self.skip_steps - 1)

View File

@@ -7,6 +7,8 @@ from modules.script_callbacks import ExtraNoiseParams, extra_noise_callback
from modules.shared import opts
import modules.shared as shared
import ldm_patched.modules.model_management
samplers_timesteps = [
('DDIM', sd_samplers_timesteps_impl.ddim, ['ddim'], {}),
@@ -54,6 +56,7 @@ class CFGDenoiserTimesteps(CFGDenoiser):
def get_pred_x0(self, x_in, x_out, sigma):
ts = sigma.to(dtype=int)
self.alphas = self.alphas.to(ts.device)
a_t = self.alphas[ts][:, None, None, None]
sqrt_one_minus_at = (1 - a_t).sqrt()
@@ -95,6 +98,14 @@ class CompVisSampler(sd_samplers_common.Sampler):
return timesteps
def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
inference_memory = self.model_wrap.inner_model.current_controlnet_required_memory
unet_patcher = self.model_wrap.inner_model.forge_objects.unet
ldm_patched.modules.model_management.load_models_gpu(
[unet_patcher],
unet_patcher.memory_required([x.shape[0] * 2] + list(x.shape[1:])) + inference_memory)
self.model_wrap.inner_model.alphas_cumprod = self.model_wrap.inner_model.alphas_cumprod.to(unet_patcher.current_device)
steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps)
timesteps = self.get_timesteps(p, steps)
@@ -104,7 +115,7 @@ class CompVisSampler(sd_samplers_common.Sampler):
sqrt_alpha_cumprod = torch.sqrt(alphas_cumprod[timesteps[t_enc]])
sqrt_one_minus_alpha_cumprod = torch.sqrt(1 - alphas_cumprod[timesteps[t_enc]])
xi = x * sqrt_alpha_cumprod + noise * sqrt_one_minus_alpha_cumprod
xi = x.to(noise) * sqrt_alpha_cumprod + noise * sqrt_one_minus_alpha_cumprod
if opts.img2img_extra_noise > 0:
p.extra_generation_params["Extra noise"] = opts.img2img_extra_noise
@@ -138,6 +149,14 @@ class CompVisSampler(sd_samplers_common.Sampler):
return samples
def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
inference_memory = self.model_wrap.inner_model.current_controlnet_required_memory
unet_patcher = self.model_wrap.inner_model.forge_objects.unet
ldm_patched.modules.model_management.load_models_gpu(
[unet_patcher],
unet_patcher.memory_required([x.shape[0] * 2] + list(x.shape[1:])) + inference_memory)
self.model_wrap.inner_model.alphas_cumprod = self.model_wrap.inner_model.alphas_cumprod.to(unet_patcher.current_device)
steps = steps or p.steps
timesteps = self.get_timesteps(p, steps)

View File

@@ -237,7 +237,6 @@ def load_vae(model, vae_file=None, vae_source="from unknown source"):
# don't call this from outside
def _load_vae_dict(model, vae_dict_1):
model.first_stage_model.load_state_dict(vae_dict_1)
model.first_stage_model.to(devices.dtype_vae)
def clear_loaded_vae():
@@ -263,20 +262,12 @@ def reload_vae_weights(sd_model=None, vae_file=unspecified):
if loaded_vae_file == vae_file:
return
if sd_model.lowvram:
lowvram.send_everything_to_cpu()
else:
sd_model.to(devices.cpu)
sd_hijack.model_hijack.undo_hijack(sd_model)
load_vae(sd_model, vae_file, vae_source)
sd_hijack.model_hijack.hijack(sd_model)
if not sd_model.lowvram:
sd_model.to(devices.device)
script_callbacks.model_loaded_callback(sd_model)
print("VAE weights loaded.")

View File

@@ -24,13 +24,6 @@ def initialize():
pass
from modules import devices
devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_esrgan, devices.device_codeformer = \
(devices.cpu if any(y in cmd_opts.use_cpu for y in [x, 'all']) else devices.get_optimal_device() for x in ['sd', 'interrogate', 'gfpgan', 'esrgan', 'codeformer'])
devices.dtype = torch.float32 if cmd_opts.no_half else torch.float16
devices.dtype_vae = torch.float32 if cmd_opts.no_half or cmd_opts.no_half_vae else torch.float16
devices.dtype_inference = torch.float32 if cmd_opts.precision == 'full' else devices.dtype
shared.device = devices.device
shared.weight_load_location = None if cmd_opts.lowram else "cpu"

View File

@@ -299,7 +299,7 @@ options_templates.update(options_section(('ui_alternatives', "UI alternatives",
options_templates.update(options_section(('ui', "User interface", "ui"), {
"localization": OptionInfo("None", "Localization", gr.Dropdown, lambda: {"choices": ["None"] + list(localization.localizations.keys())}, refresh=lambda: localization.list_localizations(cmd_opts.localizations_dir)).needs_reload_ui(),
"quicksettings_list": OptionInfo(["sd_model_checkpoint"], "Quicksettings list", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that appear at the top of page rather than in settings tab").needs_reload_ui(),
"quicksettings_list": OptionInfo(["sd_model_checkpoint", "sd_vae", "CLIP_stop_at_last_layers"], "Quicksettings list", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that appear at the top of page rather than in settings tab").needs_reload_ui(),
"ui_tab_order": OptionInfo([], "UI tab order", ui_components.DropdownMulti, lambda: {"choices": list(shared.tab_names)}).needs_reload_ui(),
"hidden_tabs": OptionInfo([], "Hidden UI tabs", ui_components.DropdownMulti, lambda: {"choices": list(shared.tab_names)}).needs_reload_ui(),
"ui_reorder_list": OptionInfo([], "UI item order for txt2img/img2img tabs", ui_components.DropdownMulti, lambda: {"choices": list(shared_items.ui_reorder_categories())}).info("selected items appear first").needs_reload_ui(),

View File

@@ -167,9 +167,15 @@ def update_token_counter(text, steps, *, is_positive=True):
# messages related to it in console
prompt_schedules = [[[steps, text]]]
try:
cond_stage_model = sd_models.model_data.sd_model.cond_stage_model
assert cond_stage_model is not None
except Exception:
return f"<span class='gr-box gr-text-input'>?/?</span>"
flat_prompts = reduce(lambda list1, list2: list1+list2, prompt_schedules)
prompts = [prompt_text for step, prompt_text in flat_prompts]
token_count, max_length = max([model_hijack.get_prompt_lengths(prompt) for prompt in prompts], key=lambda args: args[0])
token_count, max_length = max([model_hijack.get_prompt_lengths(prompt, cond_stage_model) for prompt in prompts], key=lambda args: args[0])
return f"<span class='gr-box gr-text-input'>{token_count}/{max_length}</span>"

View File

@@ -294,7 +294,6 @@ class UiSettings:
for _i, k, _item in self.quicksettings_list:
component = self.component_dict[k]
info = opts.data_labels[k]
if isinstance(component, gr.Textbox):
methods = [component.submit, component.blur]
@@ -308,7 +307,7 @@ class UiSettings:
fn=lambda value, k=k: self.run_settings_single(value, key=k),
inputs=[component],
outputs=[component, self.text_settings],
show_progress=info.refresh is not None,
show_progress=False,
)
button_set_checkpoint = gr.Button('Change checkpoint', elem_id='change_checkpoint', visible=False)

View File

@@ -6,6 +6,13 @@ from PIL import Image
import modules.shared
from modules import modelloader, shared
from ldm_patched.modules import model_management
def prepare_free_memory():
model_management.free_memory(memory_required=1024*1024*3, device=model_management.get_torch_device())
print('Upscale script freed memory successfully.')
LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS)
NEAREST = (Image.Resampling.NEAREST if hasattr(Image, 'Resampling') else Image.NEAREST)