Merge branch 'main' into upt

This commit is contained in:
lllyasviel
2024-02-11 16:42:36 -08:00
committed by GitHub
1005 changed files with 154843 additions and 3044 deletions

View File

@@ -256,6 +256,9 @@ class StableDiffusionProcessing:
self.cached_uc = StableDiffusionProcessing.cached_uc
self.cached_c = StableDiffusionProcessing.cached_c
self.extra_result_images = []
self.modified_noise = None
@property
def sd_model(self):
return shared.sd_model
@@ -515,8 +518,9 @@ class StableDiffusionProcessing:
class Processed:
def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""):
def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments="", extra_images_list=[]):
self.images = images_list
self.extra_images = extra_images_list
self.prompt = p.prompt
self.negative_prompt = p.negative_prompt
self.seed = seed
@@ -628,44 +632,7 @@ def decode_latent_batch(model, batch, target_device=None, check_for_nans=False):
for i in range(batch.shape[0]):
sample = decode_first_stage(model, batch[i:i + 1])[0]
if check_for_nans:
try:
devices.test_for_nans(sample, "vae")
except devices.NansException as e:
if shared.opts.auto_vae_precision_bfloat16:
autofix_dtype = torch.bfloat16
autofix_dtype_text = "bfloat16"
autofix_dtype_setting = "Automatically convert VAE to bfloat16"
autofix_dtype_comment = ""
elif shared.opts.auto_vae_precision:
autofix_dtype = torch.float32
autofix_dtype_text = "32-bit float"
autofix_dtype_setting = "Automatically revert VAE to 32-bit floats"
autofix_dtype_comment = "\nTo always start with 32-bit VAE, use --no-half-vae commandline flag."
else:
raise e
if devices.dtype_vae == autofix_dtype:
raise e
errors.print_error_explanation(
"A tensor with all NaNs was produced in VAE.\n"
f"Web UI will now convert VAE into {autofix_dtype_text} and retry.\n"
f"To disable this behavior, disable the '{autofix_dtype_setting}' setting.{autofix_dtype_comment}"
)
devices.dtype_vae = autofix_dtype
model.first_stage_model.to(devices.dtype_vae)
batch = batch.to(devices.dtype_vae)
sample = decode_first_stage(model, batch[i:i + 1])[0]
if target_device is not None:
sample = sample.to(target_device)
samples.append(sample)
samples.append(sample.to(target_device))
return samples
@@ -848,7 +815,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
infotexts = []
output_images = []
with torch.no_grad(), p.sd_model.ema_scope():
with torch.inference_mode():
with devices.autocast():
p.init(p.all_prompts, p.all_seeds, p.all_subseeds)
@@ -872,6 +839,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
sd_models.reload_model_weights() # model can be changed for example by refiner
p.sd_model.forge_objects = p.sd_model.forge_objects_original.shallow_copy()
p.prompts = p.all_prompts[n * p.batch_size:(n + 1) * p.batch_size]
p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n + 1) * p.batch_size]
p.seeds = p.all_seeds[n * p.batch_size:(n + 1) * p.batch_size]
@@ -888,8 +856,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
p.parse_extra_network_prompts()
if not p.disable_extra_networks:
with devices.autocast():
extra_networks.activate(p, p.extra_network_data)
extra_networks.activate(p, p.extra_network_data)
p.sd_model.forge_objects = p.sd_model.forge_objects_after_applying_lora.shallow_copy()
if p.scripts is not None:
p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds)
@@ -941,8 +910,18 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
alphas_cumprod_modifiers = p.sd_model.forge_objects.unet.model_options.get('alphas_cumprod_modifiers', [])
alphas_cumprod_backup = None
if len(alphas_cumprod_modifiers) > 0:
alphas_cumprod_backup = p.sd_model.alphas_cumprod
for modifier in alphas_cumprod_modifiers:
p.sd_model.alphas_cumprod = modifier(p.sd_model.alphas_cumprod)
samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
if alphas_cumprod_backup is not None:
p.sd_model.alphas_cumprod = alphas_cumprod_backup
if p.scripts is not None:
ps = scripts.PostSampleArgs(samples_ddim)
@@ -961,9 +940,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
del samples_ddim
if lowvram.is_enabled(shared.sd_model):
lowvram.send_everything_to_cpu()
devices.torch_gc()
state.nextjob()
@@ -1102,6 +1078,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
subseed=p.all_subseeds[0],
index_of_first_image=index_of_first_image,
infotexts=infotexts,
extra_images_list=p.extra_result_images,
)
if p.scripts is not None:
@@ -1270,7 +1247,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
image = np.array(self.firstpass_image).astype(np.float32) / 255.0
image = np.moveaxis(image, 2, 0)
image = torch.from_numpy(np.expand_dims(image, axis=0))
image = image.to(shared.device, dtype=devices.dtype_vae)
image = image.to(shared.device, dtype=torch.float32)
if opts.sd_vae_encode_method != 'Full':
self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method
@@ -1283,6 +1260,19 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
# here we generate an image normally
x = self.rng.next()
self.sd_model.forge_objects = self.sd_model.forge_objects_after_applying_lora.shallow_copy()
if self.scripts is not None:
self.scripts.process_before_every_sampling(self,
x=x,
noise=x,
c=conditioning,
uc=unconditional_conditioning)
if self.modified_noise is not None:
x = self.modified_noise
self.modified_noise = None
samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
del x
@@ -1354,7 +1344,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
batch_images.append(image)
decoded_samples = torch.from_numpy(np.array(batch_images))
decoded_samples = decoded_samples.to(shared.device, dtype=devices.dtype_vae)
decoded_samples = decoded_samples.to(shared.device, dtype=torch.float32)
if opts.sd_vae_encode_method != 'Full':
self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method
@@ -1384,6 +1374,18 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
if self.scripts is not None:
self.scripts.before_hr(self)
self.sd_model.forge_objects = self.sd_model.forge_objects_after_applying_lora.shallow_copy()
if self.scripts is not None:
self.scripts.process_before_every_sampling(self,
x=samples,
noise=noise,
c=self.hr_c,
uc=self.hr_uc)
if self.modified_noise is not None:
noise = self.modified_noise
self.modified_noise = None
samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
@@ -1459,7 +1461,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
if shared.opts.hires_fix_use_firstpass_conds:
self.calculate_hr_conds()
elif lowvram.is_enabled(shared.sd_model) and shared.sd_model.sd_checkpoint_info == sd_models.select_checkpoint(): # if in lowvram mode, we need to calculate conds right away, before the cond NN is unloaded
elif shared.sd_model.sd_checkpoint_info == sd_models.select_checkpoint(): # if in lowvram mode, we need to calculate conds right away, before the cond NN is unloaded
with devices.autocast():
extra_networks.activate(self, self.hr_extra_network_data)
@@ -1646,7 +1648,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
image = torch.from_numpy(batch_images)
image = image.to(shared.device, dtype=devices.dtype_vae)
image = image.to(shared.device, dtype=torch.float32)
if opts.sd_vae_encode_method != 'Full':
self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method
@@ -1687,6 +1689,18 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier
x *= self.initial_noise_multiplier
self.sd_model.forge_objects = self.sd_model.forge_objects_after_applying_lora.shallow_copy()
if self.scripts is not None:
self.scripts.process_before_every_sampling(self,
x=self.init_latent,
noise=x,
c=conditioning,
uc=unconditional_conditioning)
if self.modified_noise is not None:
x = self.modified_noise
self.modified_noise = None
samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
if self.mask is not None: