mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2026-04-26 09:18:59 +00:00
Merge pull request #14208 from CodeHatchling/soft-inpainting
Soft Inpainting
This commit is contained in:
@@ -791,3 +791,4 @@ def flatten(img, bgcolor):
|
||||
img = background
|
||||
|
||||
return img.convert('RGB')
|
||||
|
||||
|
||||
@@ -62,18 +62,22 @@ def apply_color_correction(correction, original_image):
|
||||
return image.convert('RGB')
|
||||
|
||||
|
||||
def apply_overlay(image, paste_loc, index, overlays):
|
||||
if overlays is None or index >= len(overlays):
|
||||
def uncrop(image, dest_size, paste_loc):
|
||||
x, y, w, h = paste_loc
|
||||
base_image = Image.new('RGBA', dest_size)
|
||||
image = images.resize_image(1, image, w, h)
|
||||
base_image.paste(image, (x, y))
|
||||
image = base_image
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def apply_overlay(image, paste_loc, overlay):
|
||||
if overlay is None:
|
||||
return image
|
||||
|
||||
overlay = overlays[index]
|
||||
|
||||
if paste_loc is not None:
|
||||
x, y, w, h = paste_loc
|
||||
base_image = Image.new('RGBA', (overlay.width, overlay.height))
|
||||
image = images.resize_image(1, image, w, h)
|
||||
base_image.paste(image, (x, y))
|
||||
image = base_image
|
||||
image = uncrop(image, (overlay.width, overlay.height), paste_loc)
|
||||
|
||||
image = image.convert('RGBA')
|
||||
image.alpha_composite(overlay)
|
||||
@@ -81,9 +85,12 @@ def apply_overlay(image, paste_loc, index, overlays):
|
||||
|
||||
return image
|
||||
|
||||
def create_binary_mask(image):
|
||||
def create_binary_mask(image, round=True):
|
||||
if image.mode == 'RGBA' and image.getextrema()[-1] != (255, 255):
|
||||
image = image.split()[-1].convert("L").point(lambda x: 255 if x > 128 else 0)
|
||||
if round:
|
||||
image = image.split()[-1].convert("L").point(lambda x: 255 if x > 128 else 0)
|
||||
else:
|
||||
image = image.split()[-1].convert("L")
|
||||
else:
|
||||
image = image.convert('L')
|
||||
return image
|
||||
@@ -308,7 +315,7 @@ class StableDiffusionProcessing:
|
||||
c_adm = torch.cat((c_adm, noise_level_emb), 1)
|
||||
return c_adm
|
||||
|
||||
def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None):
|
||||
def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True):
|
||||
self.is_using_inpainting_conditioning = True
|
||||
|
||||
# Handle the different mask inputs
|
||||
@@ -320,8 +327,10 @@ class StableDiffusionProcessing:
|
||||
conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
|
||||
conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
|
||||
|
||||
# Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
|
||||
conditioning_mask = torch.round(conditioning_mask)
|
||||
if round_image_mask:
|
||||
# Caller is requesting a discretized mask as input, so we round to either 1.0 or 0.0
|
||||
conditioning_mask = torch.round(conditioning_mask)
|
||||
|
||||
else:
|
||||
conditioning_mask = source_image.new_ones(1, 1, *source_image.shape[-2:])
|
||||
|
||||
@@ -345,7 +354,7 @@ class StableDiffusionProcessing:
|
||||
|
||||
return image_conditioning
|
||||
|
||||
def img2img_image_conditioning(self, source_image, latent_image, image_mask=None):
|
||||
def img2img_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True):
|
||||
source_image = devices.cond_cast_float(source_image)
|
||||
|
||||
# HACK: Using introspection as the Depth2Image model doesn't appear to uniquely
|
||||
@@ -357,7 +366,7 @@ class StableDiffusionProcessing:
|
||||
return self.edit_image_conditioning(source_image)
|
||||
|
||||
if self.sampler.conditioning_key in {'hybrid', 'concat'}:
|
||||
return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
|
||||
return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask, round_image_mask=round_image_mask)
|
||||
|
||||
if self.sampler.conditioning_key == "crossattn-adm":
|
||||
return self.unclip_image_conditioning(source_image)
|
||||
@@ -867,6 +876,11 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
|
||||
with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
|
||||
samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
|
||||
|
||||
if p.scripts is not None:
|
||||
ps = scripts.PostSampleArgs(samples_ddim)
|
||||
p.scripts.post_sample(p, ps)
|
||||
samples_ddim = ps.samples
|
||||
|
||||
if getattr(samples_ddim, 'already_decoded', False):
|
||||
x_samples_ddim = samples_ddim
|
||||
else:
|
||||
@@ -922,13 +936,31 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
|
||||
pp = scripts.PostprocessImageArgs(image)
|
||||
p.scripts.postprocess_image(p, pp)
|
||||
image = pp.image
|
||||
|
||||
mask_for_overlay = getattr(p, "mask_for_overlay", None)
|
||||
overlay_image = p.overlay_images[i] if getattr(p, "overlay_images", None) is not None and i < len(p.overlay_images) else None
|
||||
|
||||
if p.scripts is not None:
|
||||
ppmo = scripts.PostProcessMaskOverlayArgs(i, mask_for_overlay, overlay_image)
|
||||
p.scripts.postprocess_maskoverlay(p, ppmo)
|
||||
mask_for_overlay, overlay_image = ppmo.mask_for_overlay, ppmo.overlay_image
|
||||
|
||||
if p.color_corrections is not None and i < len(p.color_corrections):
|
||||
if save_samples and opts.save_images_before_color_correction:
|
||||
image_without_cc = apply_overlay(image, p.paste_to, i, p.overlay_images)
|
||||
image_without_cc = apply_overlay(image, p.paste_to, overlay_image)
|
||||
images.save_image(image_without_cc, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-before-color-correction")
|
||||
image = apply_color_correction(p.color_corrections[i], image)
|
||||
|
||||
image = apply_overlay(image, p.paste_to, i, p.overlay_images)
|
||||
# If the intention is to show the output from the model
|
||||
# that is being composited over the original image,
|
||||
# we need to keep the original image around
|
||||
# and use it in the composite step.
|
||||
original_denoised_image = image.copy()
|
||||
|
||||
if p.paste_to is not None:
|
||||
original_denoised_image = uncrop(original_denoised_image, (overlay_image.width, overlay_image.height), p.paste_to)
|
||||
|
||||
image = apply_overlay(image, p.paste_to, overlay_image)
|
||||
|
||||
if save_samples:
|
||||
images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p)
|
||||
@@ -938,16 +970,17 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
|
||||
if opts.enable_pnginfo:
|
||||
image.info["parameters"] = text
|
||||
output_images.append(image)
|
||||
if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay:
|
||||
|
||||
if mask_for_overlay is not None:
|
||||
if opts.return_mask or opts.save_mask:
|
||||
image_mask = p.mask_for_overlay.convert('RGB')
|
||||
image_mask = mask_for_overlay.convert('RGB')
|
||||
if save_samples and opts.save_mask:
|
||||
images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask")
|
||||
if opts.return_mask:
|
||||
output_images.append(image_mask)
|
||||
|
||||
if opts.return_mask_composite or opts.save_mask_composite:
|
||||
image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
|
||||
image_mask_composite = Image.composite(original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
|
||||
if save_samples and opts.save_mask_composite:
|
||||
images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite")
|
||||
if opts.return_mask_composite:
|
||||
@@ -1351,6 +1384,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
|
||||
mask_blur_x: int = 4
|
||||
mask_blur_y: int = 4
|
||||
mask_blur: int = None
|
||||
mask_round: bool = True
|
||||
inpainting_fill: int = 0
|
||||
inpaint_full_res: bool = True
|
||||
inpaint_full_res_padding: int = 0
|
||||
@@ -1396,7 +1430,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
|
||||
if image_mask is not None:
|
||||
# image_mask is passed in as RGBA by Gradio to support alpha masks,
|
||||
# but we still want to support binary masks.
|
||||
image_mask = create_binary_mask(image_mask)
|
||||
image_mask = create_binary_mask(image_mask, round=self.mask_round)
|
||||
|
||||
if self.inpainting_mask_invert:
|
||||
image_mask = ImageOps.invert(image_mask)
|
||||
@@ -1503,7 +1537,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
|
||||
latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
|
||||
latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
|
||||
latmask = latmask[0]
|
||||
latmask = np.around(latmask)
|
||||
if self.mask_round:
|
||||
latmask = np.around(latmask)
|
||||
latmask = np.tile(latmask[None], (4, 1, 1))
|
||||
|
||||
self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype)
|
||||
@@ -1515,7 +1550,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
|
||||
elif self.inpainting_fill == 3:
|
||||
self.init_latent = self.init_latent * self.mask
|
||||
|
||||
self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask)
|
||||
self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.mask_round)
|
||||
|
||||
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
|
||||
x = self.rng.next()
|
||||
@@ -1527,7 +1562,14 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
|
||||
samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
|
||||
|
||||
if self.mask is not None:
|
||||
samples = samples * self.nmask + self.init_latent * self.mask
|
||||
blended_samples = samples * self.nmask + self.init_latent * self.mask
|
||||
|
||||
if self.scripts is not None:
|
||||
mba = scripts.MaskBlendArgs(samples, self.nmask, self.init_latent, self.mask, blended_samples)
|
||||
self.scripts.on_mask_blend(self, mba)
|
||||
blended_samples = mba.blended_latent
|
||||
|
||||
samples = blended_samples
|
||||
|
||||
del x
|
||||
devices.torch_gc()
|
||||
|
||||
@@ -11,11 +11,31 @@ from modules import shared, paths, script_callbacks, extensions, script_loading,
|
||||
|
||||
AlwaysVisible = object()
|
||||
|
||||
class MaskBlendArgs:
|
||||
def __init__(self, current_latent, nmask, init_latent, mask, blended_latent, denoiser=None, sigma=None):
|
||||
self.current_latent = current_latent
|
||||
self.nmask = nmask
|
||||
self.init_latent = init_latent
|
||||
self.mask = mask
|
||||
self.blended_latent = blended_latent
|
||||
|
||||
self.denoiser = denoiser
|
||||
self.is_final_blend = denoiser is None
|
||||
self.sigma = sigma
|
||||
|
||||
class PostSampleArgs:
|
||||
def __init__(self, samples):
|
||||
self.samples = samples
|
||||
|
||||
class PostprocessImageArgs:
|
||||
def __init__(self, image):
|
||||
self.image = image
|
||||
|
||||
class PostProcessMaskOverlayArgs:
|
||||
def __init__(self, index, mask_for_overlay, overlay_image):
|
||||
self.index = index
|
||||
self.mask_for_overlay = mask_for_overlay
|
||||
self.overlay_image = overlay_image
|
||||
|
||||
class PostprocessBatchListArgs:
|
||||
def __init__(self, images):
|
||||
@@ -206,6 +226,25 @@ class Script:
|
||||
|
||||
pass
|
||||
|
||||
def on_mask_blend(self, p, mba: MaskBlendArgs, *args):
|
||||
"""
|
||||
Called in inpainting mode when the original content is blended with the inpainted content.
|
||||
This is called at every step in the denoising process and once at the end.
|
||||
If is_final_blend is true, this is called for the final blending stage.
|
||||
Otherwise, denoiser and sigma are defined and may be used to inform the procedure.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def post_sample(self, p, ps: PostSampleArgs, *args):
|
||||
"""
|
||||
Called after the samples have been generated,
|
||||
but before they have been decoded by the VAE, if applicable.
|
||||
Check getattr(samples, 'already_decoded', False) to test if the images are decoded.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def postprocess_image(self, p, pp: PostprocessImageArgs, *args):
|
||||
"""
|
||||
Called for every image after it has been generated.
|
||||
@@ -213,6 +252,13 @@ class Script:
|
||||
|
||||
pass
|
||||
|
||||
def postprocess_maskoverlay(self, p, ppmo: PostProcessMaskOverlayArgs, *args):
|
||||
"""
|
||||
Called for every image after it has been generated.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def postprocess(self, p, processed, *args):
|
||||
"""
|
||||
This function is called after processing ends for AlwaysVisible scripts.
|
||||
@@ -767,6 +813,22 @@ class ScriptRunner:
|
||||
except Exception:
|
||||
errors.report(f"Error running postprocess_batch_list: {script.filename}", exc_info=True)
|
||||
|
||||
def post_sample(self, p, ps: PostSampleArgs):
|
||||
for script in self.alwayson_scripts:
|
||||
try:
|
||||
script_args = p.script_args[script.args_from:script.args_to]
|
||||
script.post_sample(p, ps, *script_args)
|
||||
except Exception:
|
||||
errors.report(f"Error running post_sample: {script.filename}", exc_info=True)
|
||||
|
||||
def on_mask_blend(self, p, mba: MaskBlendArgs):
|
||||
for script in self.alwayson_scripts:
|
||||
try:
|
||||
script_args = p.script_args[script.args_from:script.args_to]
|
||||
script.on_mask_blend(p, mba, *script_args)
|
||||
except Exception:
|
||||
errors.report(f"Error running post_sample: {script.filename}", exc_info=True)
|
||||
|
||||
def postprocess_image(self, p, pp: PostprocessImageArgs):
|
||||
for script in self.alwayson_scripts:
|
||||
try:
|
||||
@@ -775,6 +837,14 @@ class ScriptRunner:
|
||||
except Exception:
|
||||
errors.report(f"Error running postprocess_image: {script.filename}", exc_info=True)
|
||||
|
||||
def postprocess_maskoverlay(self, p, ppmo: PostProcessMaskOverlayArgs):
|
||||
for script in self.alwayson_scripts:
|
||||
try:
|
||||
script_args = p.script_args[script.args_from:script.args_to]
|
||||
script.postprocess_maskoverlay(p, ppmo, *script_args)
|
||||
except Exception:
|
||||
errors.report(f"Error running postprocess_image: {script.filename}", exc_info=True)
|
||||
|
||||
def before_component(self, component, **kwargs):
|
||||
for callback, script in self.on_before_component_elem_id.get(kwargs.get("elem_id"), []):
|
||||
try:
|
||||
|
||||
@@ -56,6 +56,9 @@ class CFGDenoiser(torch.nn.Module):
|
||||
self.sampler = sampler
|
||||
self.model_wrap = None
|
||||
self.p = None
|
||||
|
||||
# NOTE: masking before denoising can cause the original latents to be oversmoothed
|
||||
# as the original latents do not have noise
|
||||
self.mask_before_denoising = False
|
||||
|
||||
@property
|
||||
@@ -105,8 +108,21 @@ class CFGDenoiser(torch.nn.Module):
|
||||
|
||||
assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)"
|
||||
|
||||
# If we use masks, blending between the denoised and original latent images occurs here.
|
||||
def apply_blend(current_latent):
|
||||
blended_latent = current_latent * self.nmask + self.init_latent * self.mask
|
||||
|
||||
if self.p.scripts is not None:
|
||||
from modules import scripts
|
||||
mba = scripts.MaskBlendArgs(current_latent, self.nmask, self.init_latent, self.mask, blended_latent, denoiser=self, sigma=sigma)
|
||||
self.p.scripts.on_mask_blend(self.p, mba)
|
||||
blended_latent = mba.blended_latent
|
||||
|
||||
return blended_latent
|
||||
|
||||
# Blend in the original latents (before)
|
||||
if self.mask_before_denoising and self.mask is not None:
|
||||
x = self.init_latent * self.mask + self.nmask * x
|
||||
x = apply_blend(x)
|
||||
|
||||
batch_size = len(conds_list)
|
||||
repeats = [len(conds_list[i]) for i in range(batch_size)]
|
||||
@@ -207,8 +223,9 @@ class CFGDenoiser(torch.nn.Module):
|
||||
else:
|
||||
denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale)
|
||||
|
||||
# Blend in the original latents (after)
|
||||
if not self.mask_before_denoising and self.mask is not None:
|
||||
denoised = self.init_latent * self.mask + self.nmask * denoised
|
||||
denoised = apply_blend(denoised)
|
||||
|
||||
self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user