UNet from Scratch

Now backend rewrite is about 50% finished.
Estimated finish is in 72 hours.
After that, many newer features will land.
This commit is contained in:
layerdiffusion
2024-08-01 21:19:41 -07:00
parent e3522c8919
commit bc9977a305
20 changed files with 1393 additions and 56 deletions

View File

@@ -6,8 +6,10 @@ from diffusers import DiffusionPipeline
from transformers import modeling_utils
from backend.state_dict import try_filter_state_dict, transformers_convert, load_state_dict, state_dict_key_replace
from backend.operations import using_forge_operations
from backend.nn.autoencoder_kl import IntegratedAutoencoderKL
from backend.nn.vae import IntegratedAutoencoderKL
from backend.nn.clip import IntegratedCLIP, CLIPTextConfig
from backend.nn.unet import IntegratedUNet2DConditionModel
dir_path = os.path.dirname(__file__)
@@ -54,6 +56,15 @@ def load_component(component_name, lib_name, cls_name, repo_path, state_dict):
load_state_dict(model, sd, ignore_errors=['text_projection', 'logit_scale',
'transformer.text_model.embeddings.position_ids'])
return model
if cls_name == 'UNet2DConditionModel':
sd = try_filter_state_dict(state_dict, ['model.diffusion_model.'])
config = IntegratedUNet2DConditionModel.load_config(config_path)
with using_forge_operations():
model = IntegratedUNet2DConditionModel.from_config(config)
load_state_dict(model, sd)
return model
print(f'Skipped: {component_name} = {lib_name}.{cls_name}')
return None

View File

@@ -0,0 +1,2 @@
# will rework soon
from ldm_patched.modules.model_management import *

View File

@@ -0,0 +1,54 @@
import torch
from backend import memory_management
from backend.modules.k_prediction import k_prediction_from_diffusers_scheduler
class KModel(torch.nn.Module):
def __init__(self, huggingface_components, storage_dtype, computation_dtype):
super().__init__()
self.storage_dtype = storage_dtype
self.computation_dtype = computation_dtype
self.diffusion_model = huggingface_components['unet']
self.prediction = k_prediction_from_diffusers_scheduler(huggingface_components['scheduler'])
def apply_model(self, x, t, c_concat=None, c_crossattn=None, control=None, transformer_options={}, **kwargs):
sigma = t
xc = self.prediction.calculate_input(sigma, x)
if c_concat is not None:
xc = torch.cat([xc] + [c_concat], dim=1)
context = c_crossattn
dtype = self.computation_dtype
xc = xc.to(dtype)
t = self.prediction.timestep(t).float()
context = context.to(dtype)
extra_conds = {}
for o in kwargs:
extra = kwargs[o]
if hasattr(extra, "dtype"):
if extra.dtype != torch.int and extra.dtype != torch.long:
extra = extra.to(dtype)
extra_conds[o] = extra
model_output = self.diffusion_model(xc, t, context=context, control=control, transformer_options=transformer_options, **extra_conds).float()
return self.prediction.calculate_denoised(sigma, model_output, x)
def memory_required(self, input_shape):
area = input_shape[0] * input_shape[2] * input_shape[3]
dtype_size = memory_management.dtype_size(self.computation_dtype)
scaler = 1.28
# TODO: Consider these again
# if ldm_patched.modules.model_management.xformers_enabled() or ldm_patched.modules.model_management.pytorch_attention_flash_attention():
# scaler = 1.28
# else:
# scaler = 1.65
# if ldm_patched.ldm.modules.attention._ATTN_PRECISION == "fp32":
# dtype_size = 4
return scaler * area * dtype_size * 16384

View File

@@ -0,0 +1,266 @@
import math
import torch
import numpy as np
def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999):
betas = []
for i in range(num_diffusion_timesteps):
t1 = i / num_diffusion_timesteps
t2 = (i + 1) / num_diffusion_timesteps
betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
return np.array(betas)
def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
if schedule == "linear":
betas = (
torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
)
elif schedule == "cosine":
timesteps = (
torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
)
alphas = timesteps / (1 + cosine_s) * np.pi / 2
alphas = torch.cos(alphas).pow(2)
alphas = alphas / alphas[0]
betas = 1 - alphas[1:] / alphas[:-1]
betas = torch.clamp(betas, min=0, max=0.999)
elif schedule == "sqrt_linear":
betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
elif schedule == "sqrt":
betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5
else:
raise ValueError(f"schedule '{schedule}' unknown.")
return betas
def time_snr_shift(alpha, t):
if alpha == 1.0:
return t
return alpha * t / (1 + (alpha - 1) * t)
def flux_time_shift(mu, sigma, t):
return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
class AbstractPrediction(torch.nn.Module):
def __init__(self, sigma_data=1.0, prediction_type='epsilon'):
super().__init__()
self.sigma_data = sigma_data
self.prediction_type = prediction_type
assert self.prediction_type in ['epsilon', 'const', 'v_prediction', 'edm']
def calculate_input(self, sigma, noise):
if self.prediction_type == 'const':
return noise
else:
sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
return noise / (sigma ** 2 + self.sigma_data ** 2) ** 0.5
def calculate_denoised(self, sigma, model_output, model_input):
sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
if self.prediction_type == 'v_prediction':
return model_input * self.sigma_data ** 2 / (
sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (
sigma ** 2 + self.sigma_data ** 2) ** 0.5
elif self.prediction_type == 'edm':
return model_input * self.sigma_data ** 2 / (
sigma ** 2 + self.sigma_data ** 2) + model_output * sigma * self.sigma_data / (
sigma ** 2 + self.sigma_data ** 2) ** 0.5
else:
return model_input - model_output * sigma
def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
if self.prediction_type == 'const':
return sigma * noise + (1.0 - sigma) * latent_image
else:
if max_denoise:
noise = noise * torch.sqrt(1.0 + sigma ** 2.0)
else:
noise = noise * sigma
noise += latent_image
return noise
def inverse_noise_scaling(self, sigma, latent):
if self.prediction_type == 'const':
return latent / (1.0 - sigma)
else:
return latent
class Prediction(AbstractPrediction):
def __init__(self, sigma_data=1.0, prediction_type='eps', beta_schedule='linear', linear_start=0.00085,
linear_end=0.012, timesteps=1000):
super().__init__(sigma_data=sigma_data, prediction_type=prediction_type)
self.register_schedule(given_betas=None, beta_schedule=beta_schedule, timesteps=timesteps,
linear_start=linear_start, linear_end=linear_end, cosine_s=8e-3)
def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
if given_betas is not None:
betas = given_betas
else:
betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end,
cosine_s=cosine_s)
alphas = 1. - betas
alphas_cumprod = torch.cumprod(alphas, dim=0)
sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
self.set_sigmas(sigmas)
def set_sigmas(self, sigmas):
self.register_buffer('sigmas', sigmas.float())
self.register_buffer('log_sigmas', sigmas.log().float())
@property
def sigma_min(self):
return self.sigmas[0]
@property
def sigma_max(self):
return self.sigmas[-1]
def timestep(self, sigma):
log_sigma = sigma.log()
dists = log_sigma.to(self.log_sigmas.device) - self.log_sigmas[:, None]
return dists.abs().argmin(dim=0).view(sigma.shape).to(sigma.device)
def sigma(self, timestep):
t = torch.clamp(timestep.float().to(self.log_sigmas.device), min=0, max=(len(self.sigmas) - 1))
low_idx = t.floor().long()
high_idx = t.ceil().long()
w = t.frac()
log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx]
return log_sigma.exp().to(timestep.device)
def percent_to_sigma(self, percent):
if percent <= 0.0:
return 999999999.9
if percent >= 1.0:
return 0.0
percent = 1.0 - percent
return self.sigma(torch.tensor(percent * 999.0)).item()
class PredictionEDM(Prediction):
def timestep(self, sigma):
return 0.25 * sigma.log()
def sigma(self, timestep):
return (timestep / 0.25).exp()
class PredictionContinuousEDM(AbstractPrediction):
def __init__(self, sigma_data=1.0, prediction_type='eps', sigma_min=0.002, sigma_max=120.0):
super().__init__(sigma_data=sigma_data, prediction_type=prediction_type)
self.set_parameters(sigma_min, sigma_max, sigma_data)
def set_parameters(self, sigma_min, sigma_max, sigma_data):
self.sigma_data = sigma_data
sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), 1000).exp()
self.register_buffer('sigmas', sigmas)
self.register_buffer('log_sigmas', sigmas.log())
@property
def sigma_min(self):
return self.sigmas[0]
@property
def sigma_max(self):
return self.sigmas[-1]
def timestep(self, sigma):
return 0.25 * sigma.log()
def sigma(self, timestep):
return (timestep / 0.25).exp()
def percent_to_sigma(self, percent):
if percent <= 0.0:
return 999999999.9
if percent >= 1.0:
return 0.0
percent = 1.0 - percent
log_sigma_min = math.log(self.sigma_min)
return math.exp((math.log(self.sigma_max) - log_sigma_min) * percent + log_sigma_min)
class PredictionContinuousV(PredictionContinuousEDM):
def timestep(self, sigma):
return sigma.atan() / math.pi * 2
def sigma(self, timestep):
return (timestep * math.pi / 2).tan()
class PredictionFlow(AbstractPrediction):
def __init__(self, sigma_data=1.0, prediction_type='eps', shift=1.0, multiplier=1000, timesteps=1000):
super().__init__(sigma_data=sigma_data, prediction_type=prediction_type)
self.shift = shift
self.multiplier = multiplier
ts = self.sigma((torch.arange(1, timesteps + 1, 1) / timesteps) * multiplier)
self.register_buffer('sigmas', ts)
@property
def sigma_min(self):
return self.sigmas[0]
@property
def sigma_max(self):
return self.sigmas[-1]
def timestep(self, sigma):
return sigma * self.multiplier
def sigma(self, timestep):
return time_snr_shift(self.shift, timestep / self.multiplier)
def percent_to_sigma(self, percent):
if percent <= 0.0:
return 1.0
if percent >= 1.0:
return 0.0
return 1.0 - percent
class PredictionFlux(AbstractPrediction):
def __init__(self, sigma_data=1.0, prediction_type='eps', shift=1.0, timesteps=10000):
super().__init__(sigma_data=sigma_data, prediction_type=prediction_type)
self.shift = shift
ts = self.sigma((torch.arange(1, timesteps + 1, 1) / timesteps))
self.register_buffer('sigmas', ts)
@property
def sigma_min(self):
return self.sigmas[0]
@property
def sigma_max(self):
return self.sigmas[-1]
def timestep(self, sigma):
return sigma
def sigma(self, timestep):
return flux_time_shift(self.shift, 1.0, timestep)
def percent_to_sigma(self, percent):
if percent <= 0.0:
return 1.0
if percent >= 1.0:
return 0.0
return 1.0 - percent
def k_prediction_from_diffusers_scheduler(scheduler):
if hasattr(scheduler.config, 'prediction_type') and scheduler.config.prediction_type in ["epsilon", "v_prediction"]:
if scheduler.config.beta_schedule == "scaled_linear":
return Prediction(sigma_data=1.0, prediction_type=scheduler.config.prediction_type, beta_schedule='linear',
linear_start=scheduler.config.beta_start, linear_end=scheduler.config.beta_end,
timesteps=scheduler.config.num_train_timesteps)
raise NotImplementedError(f'Failed to recognize {scheduler}')

1008
backend/nn/unet.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -381,7 +381,7 @@ class IntegratedAutoencoderKL(nn.Module, ConfigMixin):
norm_num_groups: int = 32,
sample_size: int = 32,
scaling_factor: float = 0.18215,
shift_factor: Optional[float] = None,
shift_factor: Optional[float] = 0.0,
latents_mean: Optional[Tuple[float]] = None,
latents_std: Optional[Tuple[float]] = None,
force_upcast: float = True,
@@ -403,6 +403,9 @@ class IntegratedAutoencoderKL(nn.Module, ConfigMixin):
self.scaling_factor = scaling_factor
self.shift_factor = shift_factor
if not isinstance(self.shift_factor, float):
self.shift_factor = 0.0
def encode(self, x, regulation=None):
z = self.encoder(x)
z = self.quant_conv(z)
@@ -416,3 +419,9 @@ class IntegratedAutoencoderKL(nn.Module, ConfigMixin):
z = self.post_quant_conv(z)
x = self.decoder(z)
return x
def process_in(self, latent):
return (latent - self.shift_factor) * self.scaling_factor
def process_out(self, latent):
return (latent / self.scaling_factor) + self.shift_factor

View File

@@ -46,7 +46,7 @@ class PreprocessorInpaintOnly(PreprocessorInpaint):
# This is a powerful VAE with integrated memory management, bf16, and tiled fallback.
latent_image = vae.encode(self.image.movedim(1, -1))
latent_image = process.sd_model.forge_objects.unet.model.latent_format.process_in(latent_image)
latent_image = process.sd_model.forge_objects.vae.first_stage_model.process_in(latent_image)
B, C, H, W = latent_image.shape
@@ -154,7 +154,7 @@ class PreprocessorInpaintLama(PreprocessorInpaintOnly):
def process_before_every_sampling(self, process, cond, mask, *args, **kwargs):
cond, mask = super().process_before_every_sampling(process, cond, mask, *args, **kwargs)
sigma_max = process.sd_model.forge_objects.unet.model.model_sampling.sigma_max
sigma_max = process.sd_model.forge_objects.unet.model.prediction.sigma_max
original_noise = kwargs['noise']
process.modified_noise = original_noise + self.latent.to(original_noise) / sigma_max.to(original_noise)
return cond, mask

View File

@@ -61,14 +61,14 @@ class PreprocessorReference(Preprocessor):
# This is a powerful VAE with integrated memory management, bf16, and tiled fallback.
latent_image = vae.encode(cond.movedim(1, -1))
latent_image = process.sd_model.forge_objects.unet.model.latent_format.process_in(latent_image)
latent_image = process.sd_model.forge_objects.vae.first_stage_model.process_in(latent_image)
gen_seed = process.seeds[0] + 1
gen_cpu = torch.Generator().manual_seed(gen_seed)
unet = process.sd_model.forge_objects.unet.clone()
sigma_max = unet.model.model_sampling.percent_to_sigma(start_percent)
sigma_min = unet.model.model_sampling.percent_to_sigma(end_percent)
sigma_max = unet.model.prediction.percent_to_sigma(start_percent)
sigma_min = unet.model.prediction.percent_to_sigma(end_percent)
self.recorded_attn1 = {}
self.recorded_h = {}

View File

@@ -24,7 +24,7 @@ class PreprocessorTile(Preprocessor):
# This is a powerful VAE with integrated memory management, bf16, and tiled fallback.
latent_image = vae.encode(cond.movedim(1, -1))
latent_image = process.sd_model.forge_objects.unet.model.latent_format.process_in(latent_image)
latent_image = process.sd_model.forge_objects.vae.first_stage_model.process_in(latent_image)
self.latent = latent_image
return self.latent
@@ -43,7 +43,7 @@ class PreprocessorTileColorFix(PreprocessorTile):
latent = self.register_latent(process, cond)
unet = process.sd_model.forge_objects.unet.clone()
sigma_data = process.sd_model.forge_objects.unet.model.model_sampling.sigma_data
sigma_data = process.sd_model.forge_objects.unet.model.prediction.sigma_data
if getattr(process, 'is_hr_pass', False):
k = int(self.variation * 2)

View File

@@ -38,7 +38,7 @@ class DynamicThresholdingNode:
cond = input - args["cond"]
uncond = input - args["uncond"]
cond_scale = args["cond_scale"]
time_step = model.model.model_sampling.timestep(args["sigma"])
time_step = model.model.prediction.timestep(args["sigma"])
time_step = time_step[0].item()
dynamic_thresh.step = 999 - time_step

View File

@@ -76,7 +76,7 @@ class FooocusInpaintPatcher(ControlModelPatcher):
vae = process.sd_model.forge_objects.vae
latent_image = vae.encode(cond_original.movedim(1, -1))
latent_image = process.sd_model.forge_objects.unet.model.latent_format.process_in(latent_image)
latent_image = process.sd_model.forge_objects.vae.first_stage_model.process_in(latent_image)
latent_mask = torch.nn.functional.max_pool2d(mask_original, (8, 8)).round().to(cond)
feed = torch.cat([
latent_mask.to(device=torch.device('cpu'), dtype=torch.float32),
@@ -102,8 +102,8 @@ class FooocusInpaintPatcher(ControlModelPatcher):
if not_patched_count > 0:
print(f"[Fooocus Patch Loader] Failed to load {not_patched_count} keys")
sigma_start = unet.model.model_sampling.percent_to_sigma(self.start_percent)
sigma_end = unet.model.model_sampling.percent_to_sigma(self.end_percent)
sigma_start = unet.model.prediction.percent_to_sigma(self.start_percent)
sigma_end = unet.model.prediction.percent_to_sigma(self.end_percent)
def conditioning_modifier(model, x, timestep, uncond, cond, cond_scale, model_options, seed):
if timestep > sigma_start or timestep < sigma_end:

View File

@@ -760,8 +760,8 @@ class IPAdapterApply:
if attn_mask is not None:
attn_mask = attn_mask.to(self.device)
sigma_start = model.model.model_sampling.percent_to_sigma(start_at)
sigma_end = model.model.model_sampling.percent_to_sigma(end_at)
sigma_start = model.model.prediction.percent_to_sigma(start_at)
sigma_end = model.model.prediction.percent_to_sigma(end_at)
patch_kwargs = {
"number": 0,

View File

@@ -919,10 +919,10 @@ class ModelSamplerLatentMegaModifier:
cond = args["cond"]
uncond = args["uncond"]
cond_scale = args["cond_scale"]
timestep = model.model.model_sampling.timestep(args["timestep"])
timestep = model.model.prediction.timestep(args["timestep"])
sigma = args["sigma"]
sigma = sigma.view(sigma.shape[:1] + (1,) * (cond.ndim - 1))
#print(model.model.model_sampling.timestep(timestep))
#print(model.model.prediction.timestep(timestep))
x = x_input / (sigma * sigma + 1.0)
cond = ((x - (x_input - cond)) * (sigma ** 2 + 1.0) ** 0.5) / (sigma)

View File

@@ -285,7 +285,7 @@ class ControlNet(ControlBase):
def pre_run(self, model, percent_to_timestep_function):
super().pre_run(model, percent_to_timestep_function)
self.model_sampling_current = model.model_sampling
self.model_sampling_current = model.prediction
def cleanup(self):
self.model_sampling_current = None

View File

@@ -97,7 +97,7 @@ def load_lora_for_models(model, clip, lora, strength_model, strength_clip, filen
return model, clip
from backend.clip import JointCLIP, JointTokenizer
from backend.modules.clip import JointCLIP, JointTokenizer
class CLIP:

View File

@@ -10,7 +10,7 @@ sd_xl_repo_configs_path = os.path.join(paths.paths['Stable Diffusion XL'], "conf
config_default = shared.sd_default_config
config_sd2 = os.path.join(sd_repo_configs_path, "v2-inference.yaml")
# config_sd2 = os.path.join(sd_repo_configs_path, "v2-inference.yaml")
config_sd2v = os.path.join(sd_repo_configs_path, "v2-inference-v.yaml")
config_sd2_inpainting = os.path.join(sd_repo_configs_path, "v2-inpainting-inference.yaml")
config_sdxl = os.path.join(sd_xl_repo_configs_path, "sd_xl_base.yaml")
@@ -95,10 +95,10 @@ def guess_model_config_from_state_dict(sd, filename):
if sd2_cond_proj_weight is not None and sd2_cond_proj_weight.shape[1] == 1024:
if diffusion_model_input.shape[1] == 9:
return config_sd2_inpainting
elif is_using_v_parameterization_for_sd2(sd):
return config_sd2v
# elif is_using_v_parameterization_for_sd2(sd):
# return config_sd2v
else:
return config_sd2
return config_sd2v
if diffusion_model_input is not None:
if diffusion_model_input.shape[1] == 9:

View File

@@ -8,6 +8,7 @@ from ldm_patched.modules.sd import VAE, CLIP, load_model_weights
import ldm_patched.modules.model_patcher
import ldm_patched.modules.utils
import ldm_patched.modules.clip_vision
import backend.nn.unet
from omegaconf import OmegaConf
from modules.sd_models_config import find_checkpoint_config
@@ -19,6 +20,7 @@ from modules_forge import forge_clip
from modules_forge.unet_patcher import UnetPatcher
from ldm_patched.modules.model_base import model_sampling, ModelType
from backend.loader import load_huggingface_components
from backend.modules.k_model import KModel
import open_clip
from transformers import CLIPTextModel, CLIPTokenizer
@@ -85,27 +87,20 @@ def load_checkpoint_guess_config(sd, output_vae=True, output_clip=True, output_c
unet_dtype = model_management.unet_dtype(model_params=parameters)
load_device = model_management.get_torch_device()
manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device)
manual_cast_dtype = unet_dtype if manual_cast_dtype is None else manual_cast_dtype
class WeightsLoader(torch.nn.Module):
pass
model_config = model_detection.model_config_from_unet(sd, "model.diffusion_model.", unet_dtype)
model_config.set_manual_cast(manual_cast_dtype)
if model_config is None:
raise RuntimeError("ERROR: Could not detect model type")
if model_config.clip_vision_prefix is not None:
if output_clipvision:
clipvision = ldm_patched.modules.clip_vision.load_clipvision_from_sd(sd, model_config.clip_vision_prefix, True)
initial_load_device = model_management.unet_inital_load_device(parameters, unet_dtype)
backend.nn.unet.unet_initial_device = initial_load_device
backend.nn.unet.unet_initial_dtype = unet_dtype
huggingface_components = load_huggingface_components(sd)
if output_model:
inital_load_device = model_management.unet_inital_load_device(parameters, unet_dtype)
offload_device = model_management.unet_offload_device()
model = model_config.get_model(sd, "model.diffusion_model.", device=inital_load_device)
model.load_model_weights(sd, "model.diffusion_model.")
k_model = KModel(huggingface_components, storage_dtype=unet_dtype, computation_dtype=manual_cast_dtype)
k_model.to(device=initial_load_device, dtype=unet_dtype)
model_patcher = UnetPatcher(k_model, load_device=load_device,
offload_device=model_management.unet_offload_device(),
current_device=initial_load_device)
if output_vae:
vae = huggingface_components['vae']
@@ -118,12 +113,6 @@ def load_checkpoint_guess_config(sd, output_vae=True, output_clip=True, output_c
if len(left_over) > 0:
print("left over keys:", left_over)
if output_model:
model_patcher = UnetPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device(), current_device=inital_load_device)
if inital_load_device != torch.device("cpu"):
print("loaded straight to GPU")
model_management.load_model_gpu(model_patcher)
return ForgeSD(model_patcher, clip, vae, clipvision)
@@ -161,7 +150,7 @@ def load_model_for_a1111(timer, checkpoint_info=None, state_dict=None):
timer.record("forge load real models")
sd_model.first_stage_model = forge_objects.vae.first_stage_model
sd_model.model.diffusion_model = forge_objects.unet.model.diffusion_model
sd_model.model.diffusion_model = forge_objects.unet.model
conditioner = getattr(sd_model, 'conditioner', None)
if conditioner:
@@ -202,8 +191,8 @@ def load_model_for_a1111(timer, checkpoint_info=None, state_dict=None):
model_embeddings.token_embedding, sd_hijack.model_hijack)
sd_model.cond_stage_model = forge_clip.CLIP_SD_15_L(sd_model.cond_stage_model, sd_hijack.model_hijack)
elif type(sd_model.cond_stage_model).__name__ == 'FrozenOpenCLIPEmbedder': # SD21 Clip
sd_model.cond_stage_model.tokenizer = forge_objects.clip.tokenizer.clip_h
sd_model.cond_stage_model.transformer = forge_objects.clip.cond_stage_model.clip_h.transformer
sd_model.cond_stage_model.tokenizer = forge_objects.clip.tokenizer.clip_l
sd_model.cond_stage_model.transformer = forge_objects.clip.cond_stage_model.clip_l.transformer
model_embeddings = sd_model.cond_stage_model.transformer.text_model.embeddings
model_embeddings.token_embedding = sd_hijack.EmbeddingsWithFixes(
model_embeddings.token_embedding, sd_hijack.model_hijack)
@@ -216,9 +205,6 @@ def load_model_for_a1111(timer, checkpoint_info=None, state_dict=None):
sd_model_hash = checkpoint_info.calculate_shorthash()
timer.record("calculate hash")
if getattr(sd_model, 'parameterization', None) == 'v':
sd_model.forge_objects.unet.model.model_sampling = model_sampling(sd_model.forge_objects.unet.model.model_config, ModelType.V_PREDICTION)
sd_model.is_sd3 = False
sd_model.latent_channels = 4
sd_model.is_sdxl = conditioner is not None
@@ -234,14 +220,14 @@ def load_model_for_a1111(timer, checkpoint_info=None, state_dict=None):
@torch.inference_mode()
def patched_decode_first_stage(x):
sample = sd_model.forge_objects.unet.model.model_config.latent_format.process_out(x)
sample = sd_model.forge_objects.vae.first_stage_model.process_out(x)
sample = sd_model.forge_objects.vae.decode(sample).movedim(-1, 1) * 2.0 - 1.0
return sample.to(x)
@torch.inference_mode()
def patched_encode_first_stage(x):
sample = sd_model.forge_objects.vae.encode(x.movedim(1, -1) * 0.5 + 0.5)
sample = sd_model.forge_objects.unet.model.model_config.latent_format.process_in(sample)
sample = sd_model.forge_objects.vae.first_stage_model.process_in(sample)
return sample.to(x)
sd_model.ema_scope = lambda *args, **kwargs: contextlib.nullcontext()

View File

@@ -108,7 +108,7 @@ def sampling_prepare(unet, x):
real_model = unet.model
percent_to_timestep_function = lambda p: real_model.model_sampling.percent_to_sigma(p)
percent_to_timestep_function = lambda p: real_model.prediction.percent_to_sigma(p)
for cnet in unet.list_controlnets():
cnet.pre_run(real_model, percent_to_timestep_function)

View File

@@ -4,11 +4,12 @@ import torch
from ldm_patched.modules.model_patcher import ModelPatcher
from ldm_patched.modules.sample import convert_cond
from ldm_patched.modules.samplers import encode_model_conds
from ldm_patched.modules import model_management
class UnetPatcher(ModelPatcher):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self, model, *args, **kwargs):
super().__init__(model, *args, **kwargs)
self.controlnet_linked_list = None
self.extra_preserved_memory_during_sampling = 0
self.extra_model_patchers_during_sampling = []