intergrate k-diffusion

2026-04-22 07:19:21 +00:00 · 2024-08-07 15:05:42 -07:00
parent 14a759b5ca
commit a07c758658
14 changed files with 1366 additions and 42 deletions
--- a/modules/launch_utils.py
+++ b/modules/launch_utils.py
@@ -393,14 +393,14 @@ def prepare_environment():
    assets_repo = os.environ.get('ASSETS_REPO', "https://github.com/AUTOMATIC1111/stable-diffusion-webui-assets.git")
    # stable_diffusion_repo = os.environ.get('STABLE_DIFFUSION_REPO', "https://github.com/Stability-AI/stablediffusion.git")
    # stable_diffusion_xl_repo = os.environ.get('STABLE_DIFFUSION_XL_REPO', "https://github.com/Stability-AI/generative-models.git")
-    k_diffusion_repo = os.environ.get('K_DIFFUSION_REPO', 'https://github.com/crowsonkb/k-diffusion.git')
+    # k_diffusion_repo = os.environ.get('K_DIFFUSION_REPO', 'https://github.com/crowsonkb/k-diffusion.git')
    huggingface_guess_repo = os.environ.get('HUGGINGFACE_GUESS_REPO', 'https://github.com/lllyasviel/huggingface_guess.git')
    blip_repo = os.environ.get('BLIP_REPO', 'https://github.com/salesforce/BLIP.git')

    assets_commit_hash = os.environ.get('ASSETS_COMMIT_HASH', "6f7db241d2f8ba7457bac5ca9753331f0c266917")
    # stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf")
    # stable_diffusion_xl_commit_hash = os.environ.get('STABLE_DIFFUSION_XL_COMMIT_HASH', "45c443b316737a4ab6e40413d7794a7f5657c19f")
-    k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "ab527a9a6d347f364e3d185ba6d714e22d80cb3c")
+    # k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "ab527a9a6d347f364e3d185ba6d714e22d80cb3c")
    huggingface_guess_commit_hash = os.environ.get('HUGGINGFACE_GUESS_HASH', "3f96b28763515dbe609792135df3615a440c66dc")
    blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9")

@@ -458,7 +458,7 @@ def prepare_environment():
    git_clone(assets_repo, repo_dir('stable-diffusion-webui-assets'), "assets", assets_commit_hash)
    # git_clone(stable_diffusion_repo, repo_dir('stable-diffusion-stability-ai'), "Stable Diffusion", stable_diffusion_commit_hash)
    # git_clone(stable_diffusion_xl_repo, repo_dir('generative-models'), "Stable Diffusion XL", stable_diffusion_xl_commit_hash)
-    git_clone(k_diffusion_repo, repo_dir('k-diffusion'), "K-diffusion", k_diffusion_commit_hash)
+    # git_clone(k_diffusion_repo, repo_dir('k-diffusion'), "K-diffusion", k_diffusion_commit_hash)
    git_clone(huggingface_guess_repo, repo_dir('huggingface_guess'), "huggingface_guess", huggingface_guess_commit_hash)
    git_clone(blip_repo, repo_dir('BLIP'), "BLIP", blip_commit_hash)

--- a/modules/paths.py
+++ b/modules/paths.py
@@ -9,7 +9,7 @@ sd_path = os.path.dirname(__file__)

 path_dirs = [
    (os.path.join(sd_path, '../repositories/BLIP'), 'models/blip.py', 'BLIP', []),
-    (os.path.join(sd_path, '../repositories/k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),
+    # (os.path.join(sd_path, '../repositories/k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),
    (os.path.join(sd_path, '../repositories/huggingface_guess'), 'huggingface_guess/detection.py', 'huggingface_guess', []),
 ]

--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -59,6 +59,9 @@ class CFGDenoiser(torch.nn.Module):
        self.model_wrap = None
        self.p = None

+        self.need_last_noise_uncond = False
+        self.last_noise_uncond = None
+
        # Backward Compatibility
        self.mask_before_denoising = False

@@ -179,7 +182,10 @@ class CFGDenoiser(torch.nn.Module):
        denoiser_params = CFGDenoiserParams(x, image_cond, sigma, state.sampling_step, state.sampling_steps, cond, uncond, self)
        cfg_denoiser_callback(denoiser_params)

-        denoised = sampling_function(self, denoiser_params=denoiser_params, cond_scale=cond_scale, cond_composition=cond_composition)
+        denoised, cond_pred, uncond_pred = sampling_function(self, denoiser_params=denoiser_params, cond_scale=cond_scale, cond_composition=cond_composition)
+
+        if self.need_last_noise_uncond:
+            self.last_noise_uncond = (x - uncond_pred) / sigma[:, None, None, None]

        if self.mask is not None:
            blended_latent = denoised * self.nmask + self.init_latent * self.mask
--- a/modules/sd_samplers_kdiffusion.py
+++ b/modules/sd_samplers_kdiffusion.py
@@ -1,6 +1,7 @@
 import torch
 import inspect
 import k_diffusion.sampling
+import k_diffusion.external
 from modules import sd_samplers_common, sd_samplers_extra, sd_samplers_cfg_denoiser, sd_schedulers, devices
 from modules.sd_samplers_cfg_denoiser import CFGDenoiser  # noqa: F401
 from modules.script_callbacks import ExtraNoiseParams, extra_noise_callback
@@ -55,13 +56,11 @@ class CFGDenoiserKDiffusion(sd_samplers_cfg_denoiser.CFGDenoiser):
    @property
    def inner_model(self):
        if self.model_wrap is None:
-            denoiser_constructor = getattr(shared.sd_model, 'create_denoiser', None)
-
-            if denoiser_constructor is not None:
-                self.model_wrap = denoiser_constructor()
-            else:
-                denoiser = k_diffusion.external.CompVisVDenoiser if shared.sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser
-                self.model_wrap = denoiser(shared.sd_model, quantize=shared.opts.enable_quantization)
+            self.model_wrap = k_diffusion.external.DiscreteSchedule(
+                sigmas=shared.sd_model.forge_objects.unet.model.predictor.sigmas,
+                quantize=shared.opts.enable_quantization
+            )
+            self.model_wrap.inner_model = shared.sd_model

        return self.model_wrap

--- a/modules/sd_samplers_lcm.py
+++ b/modules/sd_samplers_lcm.py
@@ -13,9 +13,10 @@ class LCMCompVisDenoiser(DiscreteEpsDDPMDenoiser):
        original_timesteps = 50     # LCM Original Timesteps (default=50, for current version of LCM)
        self.skip_steps = timesteps // original_timesteps

-        alphas_cumprod_valid = torch.zeros((original_timesteps), dtype=torch.float32)
+        alphas_cumprod = 1.0 / (model.forge_objects.unet.model.predictor.sigmas ** 2.0 + 1.0)
+        alphas_cumprod_valid = torch.zeros(original_timesteps, dtype=torch.float32)
        for x in range(original_timesteps):
-            alphas_cumprod_valid[original_timesteps - 1 - x] = model.alphas_cumprod[timesteps - 1 - x * self.skip_steps]
+            alphas_cumprod_valid[original_timesteps - 1 - x] = alphas_cumprod[timesteps - 1 - x * self.skip_steps]

        super().__init__(model, alphas_cumprod_valid, quantize=None)

--- a/modules/sd_samplers_timesteps.py
+++ b/modules/sd_samplers_timesteps.py
@@ -28,31 +28,18 @@ class CompVisTimestepsDenoiser(torch.nn.Module):
    def __init__(self, model, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.inner_model = model
+        self.inner_model.alphas_cumprod = 1.0 / (self.inner_model.forge_objects.unet.model.predictor.sigmas ** 2.0 + 1.0)

    def forward(self, input, timesteps, **kwargs):
        return self.inner_model.apply_model(input, timesteps, **kwargs)


-class CompVisTimestepsVDenoiser(torch.nn.Module):
-    def __init__(self, model, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.inner_model = model
-
-    def predict_eps_from_z_and_v(self, x_t, t, v):
-        return torch.sqrt(self.inner_model.alphas_cumprod)[t.to(torch.int), None, None, None] * v + torch.sqrt(1 - self.inner_model.alphas_cumprod)[t.to(torch.int), None, None, None] * x_t
-
-    def forward(self, input, timesteps, **kwargs):
-        model_output = self.inner_model.apply_model(input, timesteps, **kwargs)
-        e_t = self.predict_eps_from_z_and_v(input, timesteps, model_output)
-        return e_t
-
-
 class CFGDenoiserTimesteps(CFGDenoiser):

    def __init__(self, sampler):
        super().__init__(sampler)

-        self.alphas = shared.sd_model.alphas_cumprod
+        self.alphas = 1.0 / (shared.sd_model.forge_objects.unet.model.predictor.sigmas ** 2.0 + 1.0)
        self.classic_ddim_eps_estimation = True

    def get_pred_x0(self, x_in, x_out, sigma):
@@ -69,8 +56,7 @@ class CFGDenoiserTimesteps(CFGDenoiser):
    @property
    def inner_model(self):
        if self.model_wrap is None:
-            denoiser = CompVisTimestepsVDenoiser if shared.sd_model.parameterization == "v" else CompVisTimestepsDenoiser
-            self.model_wrap = denoiser(shared.sd_model)
+            self.model_wrap = CompVisTimestepsDenoiser(shared.sd_model)

        return self.model_wrap