mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2026-04-22 07:19:21 +00:00
intergrate k-diffusion
This commit is contained in:
@@ -393,14 +393,14 @@ def prepare_environment():
|
||||
assets_repo = os.environ.get('ASSETS_REPO', "https://github.com/AUTOMATIC1111/stable-diffusion-webui-assets.git")
|
||||
# stable_diffusion_repo = os.environ.get('STABLE_DIFFUSION_REPO', "https://github.com/Stability-AI/stablediffusion.git")
|
||||
# stable_diffusion_xl_repo = os.environ.get('STABLE_DIFFUSION_XL_REPO', "https://github.com/Stability-AI/generative-models.git")
|
||||
k_diffusion_repo = os.environ.get('K_DIFFUSION_REPO', 'https://github.com/crowsonkb/k-diffusion.git')
|
||||
# k_diffusion_repo = os.environ.get('K_DIFFUSION_REPO', 'https://github.com/crowsonkb/k-diffusion.git')
|
||||
huggingface_guess_repo = os.environ.get('HUGGINGFACE_GUESS_REPO', 'https://github.com/lllyasviel/huggingface_guess.git')
|
||||
blip_repo = os.environ.get('BLIP_REPO', 'https://github.com/salesforce/BLIP.git')
|
||||
|
||||
assets_commit_hash = os.environ.get('ASSETS_COMMIT_HASH', "6f7db241d2f8ba7457bac5ca9753331f0c266917")
|
||||
# stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf")
|
||||
# stable_diffusion_xl_commit_hash = os.environ.get('STABLE_DIFFUSION_XL_COMMIT_HASH', "45c443b316737a4ab6e40413d7794a7f5657c19f")
|
||||
k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "ab527a9a6d347f364e3d185ba6d714e22d80cb3c")
|
||||
# k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "ab527a9a6d347f364e3d185ba6d714e22d80cb3c")
|
||||
huggingface_guess_commit_hash = os.environ.get('HUGGINGFACE_GUESS_HASH', "3f96b28763515dbe609792135df3615a440c66dc")
|
||||
blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9")
|
||||
|
||||
@@ -458,7 +458,7 @@ def prepare_environment():
|
||||
git_clone(assets_repo, repo_dir('stable-diffusion-webui-assets'), "assets", assets_commit_hash)
|
||||
# git_clone(stable_diffusion_repo, repo_dir('stable-diffusion-stability-ai'), "Stable Diffusion", stable_diffusion_commit_hash)
|
||||
# git_clone(stable_diffusion_xl_repo, repo_dir('generative-models'), "Stable Diffusion XL", stable_diffusion_xl_commit_hash)
|
||||
git_clone(k_diffusion_repo, repo_dir('k-diffusion'), "K-diffusion", k_diffusion_commit_hash)
|
||||
# git_clone(k_diffusion_repo, repo_dir('k-diffusion'), "K-diffusion", k_diffusion_commit_hash)
|
||||
git_clone(huggingface_guess_repo, repo_dir('huggingface_guess'), "huggingface_guess", huggingface_guess_commit_hash)
|
||||
git_clone(blip_repo, repo_dir('BLIP'), "BLIP", blip_commit_hash)
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ sd_path = os.path.dirname(__file__)
|
||||
|
||||
path_dirs = [
|
||||
(os.path.join(sd_path, '../repositories/BLIP'), 'models/blip.py', 'BLIP', []),
|
||||
(os.path.join(sd_path, '../repositories/k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),
|
||||
# (os.path.join(sd_path, '../repositories/k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),
|
||||
(os.path.join(sd_path, '../repositories/huggingface_guess'), 'huggingface_guess/detection.py', 'huggingface_guess', []),
|
||||
]
|
||||
|
||||
|
||||
@@ -59,6 +59,9 @@ class CFGDenoiser(torch.nn.Module):
|
||||
self.model_wrap = None
|
||||
self.p = None
|
||||
|
||||
self.need_last_noise_uncond = False
|
||||
self.last_noise_uncond = None
|
||||
|
||||
# Backward Compatibility
|
||||
self.mask_before_denoising = False
|
||||
|
||||
@@ -179,7 +182,10 @@ class CFGDenoiser(torch.nn.Module):
|
||||
denoiser_params = CFGDenoiserParams(x, image_cond, sigma, state.sampling_step, state.sampling_steps, cond, uncond, self)
|
||||
cfg_denoiser_callback(denoiser_params)
|
||||
|
||||
denoised = sampling_function(self, denoiser_params=denoiser_params, cond_scale=cond_scale, cond_composition=cond_composition)
|
||||
denoised, cond_pred, uncond_pred = sampling_function(self, denoiser_params=denoiser_params, cond_scale=cond_scale, cond_composition=cond_composition)
|
||||
|
||||
if self.need_last_noise_uncond:
|
||||
self.last_noise_uncond = (x - uncond_pred) / sigma[:, None, None, None]
|
||||
|
||||
if self.mask is not None:
|
||||
blended_latent = denoised * self.nmask + self.init_latent * self.mask
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import torch
|
||||
import inspect
|
||||
import k_diffusion.sampling
|
||||
import k_diffusion.external
|
||||
from modules import sd_samplers_common, sd_samplers_extra, sd_samplers_cfg_denoiser, sd_schedulers, devices
|
||||
from modules.sd_samplers_cfg_denoiser import CFGDenoiser # noqa: F401
|
||||
from modules.script_callbacks import ExtraNoiseParams, extra_noise_callback
|
||||
@@ -55,13 +56,11 @@ class CFGDenoiserKDiffusion(sd_samplers_cfg_denoiser.CFGDenoiser):
|
||||
@property
|
||||
def inner_model(self):
|
||||
if self.model_wrap is None:
|
||||
denoiser_constructor = getattr(shared.sd_model, 'create_denoiser', None)
|
||||
|
||||
if denoiser_constructor is not None:
|
||||
self.model_wrap = denoiser_constructor()
|
||||
else:
|
||||
denoiser = k_diffusion.external.CompVisVDenoiser if shared.sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser
|
||||
self.model_wrap = denoiser(shared.sd_model, quantize=shared.opts.enable_quantization)
|
||||
self.model_wrap = k_diffusion.external.DiscreteSchedule(
|
||||
sigmas=shared.sd_model.forge_objects.unet.model.predictor.sigmas,
|
||||
quantize=shared.opts.enable_quantization
|
||||
)
|
||||
self.model_wrap.inner_model = shared.sd_model
|
||||
|
||||
return self.model_wrap
|
||||
|
||||
|
||||
@@ -13,9 +13,10 @@ class LCMCompVisDenoiser(DiscreteEpsDDPMDenoiser):
|
||||
original_timesteps = 50 # LCM Original Timesteps (default=50, for current version of LCM)
|
||||
self.skip_steps = timesteps // original_timesteps
|
||||
|
||||
alphas_cumprod_valid = torch.zeros((original_timesteps), dtype=torch.float32)
|
||||
alphas_cumprod = 1.0 / (model.forge_objects.unet.model.predictor.sigmas ** 2.0 + 1.0)
|
||||
alphas_cumprod_valid = torch.zeros(original_timesteps, dtype=torch.float32)
|
||||
for x in range(original_timesteps):
|
||||
alphas_cumprod_valid[original_timesteps - 1 - x] = model.alphas_cumprod[timesteps - 1 - x * self.skip_steps]
|
||||
alphas_cumprod_valid[original_timesteps - 1 - x] = alphas_cumprod[timesteps - 1 - x * self.skip_steps]
|
||||
|
||||
super().__init__(model, alphas_cumprod_valid, quantize=None)
|
||||
|
||||
|
||||
@@ -28,31 +28,18 @@ class CompVisTimestepsDenoiser(torch.nn.Module):
|
||||
def __init__(self, model, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.inner_model = model
|
||||
self.inner_model.alphas_cumprod = 1.0 / (self.inner_model.forge_objects.unet.model.predictor.sigmas ** 2.0 + 1.0)
|
||||
|
||||
def forward(self, input, timesteps, **kwargs):
|
||||
return self.inner_model.apply_model(input, timesteps, **kwargs)
|
||||
|
||||
|
||||
class CompVisTimestepsVDenoiser(torch.nn.Module):
|
||||
def __init__(self, model, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.inner_model = model
|
||||
|
||||
def predict_eps_from_z_and_v(self, x_t, t, v):
|
||||
return torch.sqrt(self.inner_model.alphas_cumprod)[t.to(torch.int), None, None, None] * v + torch.sqrt(1 - self.inner_model.alphas_cumprod)[t.to(torch.int), None, None, None] * x_t
|
||||
|
||||
def forward(self, input, timesteps, **kwargs):
|
||||
model_output = self.inner_model.apply_model(input, timesteps, **kwargs)
|
||||
e_t = self.predict_eps_from_z_and_v(input, timesteps, model_output)
|
||||
return e_t
|
||||
|
||||
|
||||
class CFGDenoiserTimesteps(CFGDenoiser):
|
||||
|
||||
def __init__(self, sampler):
|
||||
super().__init__(sampler)
|
||||
|
||||
self.alphas = shared.sd_model.alphas_cumprod
|
||||
self.alphas = 1.0 / (shared.sd_model.forge_objects.unet.model.predictor.sigmas ** 2.0 + 1.0)
|
||||
self.classic_ddim_eps_estimation = True
|
||||
|
||||
def get_pred_x0(self, x_in, x_out, sigma):
|
||||
@@ -69,8 +56,7 @@ class CFGDenoiserTimesteps(CFGDenoiser):
|
||||
@property
|
||||
def inner_model(self):
|
||||
if self.model_wrap is None:
|
||||
denoiser = CompVisTimestepsVDenoiser if shared.sd_model.parameterization == "v" else CompVisTimestepsDenoiser
|
||||
self.model_wrap = denoiser(shared.sd_model)
|
||||
self.model_wrap = CompVisTimestepsDenoiser(shared.sd_model)
|
||||
|
||||
return self.model_wrap
|
||||
|
||||
|
||||
Reference in New Issue
Block a user