From 19b41b94389522110291fb6e9a92325e8f1caccc Mon Sep 17 00:00:00 2001 From: layerdiffusion <19834515+lllyasviel@users.noreply.github.com> Date: Sun, 11 Aug 2024 17:02:50 -0700 Subject: [PATCH] Add option to experiment with results from other impl Setting -> Compatibility -> Try to reproduce the results from external software --- backend/nn/flux.py | 10 +++++++++- modules/processing.py | 6 ++++-- modules/rng.py | 30 +++++++++++++++++++----------- modules/shared_options.py | 1 + 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/backend/nn/flux.py b/backend/nn/flux.py index d12bd30c..942d9c33 100644 --- a/backend/nn/flux.py +++ b/backend/nn/flux.py @@ -48,7 +48,15 @@ def apply_rope(xq, xk, freqs_cis): def timestep_embedding(t, dim, max_period=10000, time_factor=1000.0): t = time_factor * t half = dim // 2 - freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to(t.device) + + # TODO: Once A trainer for flux get popular, make timestep_embedding consistent to that trainer + + # Do not block CUDA steam, but having about 1e-4 differences with Flux official codes: + freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=t.device) / half) + + # Block CUDA steam, but consistent with official codes: + # freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to(t.device) + args = t[:, None].float() * freqs[None] del freqs embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) diff --git a/modules/processing.py b/modules/processing.py index 3571f575..15c70693 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -17,7 +17,7 @@ from typing import Any import modules.sd_hijack from modules import devices, prompt_parser, masking, sd_samplers, lowvram, infotext_utils, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors, rng, profiling -from modules.rng import slerp # noqa: F401 +from modules.rng import slerp, get_noise_source_type # noqa: F401 from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes from modules.shared import opts, cmd_opts, state import modules.shared as shared @@ -729,6 +729,8 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter if p.sd_model.use_distilled_cfg_scale: generation_params['Distilled CFG Scale'] = p.distilled_cfg_scale + noise_source_type = get_noise_source_type() + generation_params.update({ "Image CFG scale": getattr(p, 'image_cfg_scale', None), "Seed": p.all_seeds[0] if use_main_prompt else all_seeds[index], @@ -750,7 +752,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter "Token merging ratio": None if token_merging_ratio == 0 else token_merging_ratio, "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr, "Init image hash": getattr(p, 'init_img_hash', None), - "RNG": opts.randn_source if opts.randn_source != "GPU" else None, + "RNG": noise_source_type if noise_source_type != "GPU" else None, "Tiling": "True" if p.tiling else None, **p.extra_generation_params, "Version": program_version() if opts.add_version_to_infotext else None, diff --git a/modules/rng.py b/modules/rng.py index f3afb4de..174881c5 100644 --- a/modules/rng.py +++ b/modules/rng.py @@ -3,12 +3,20 @@ import torch from modules import devices, rng_philox, shared +def get_noise_source_type(): + if shared.opts.forge_try_reproduce in ['ComfyUI', 'DrawThings']: + return "CPU" + + return shared.opts.randn_source + + def randn(seed, shape, generator=None): """Generate a tensor with random numbers from a normal distribution using seed. Uses the seed parameter to set the global torch seed; to generate more with that seed, use randn_like/randn_without_seed.""" if generator is not None: + # Forge Note: # If generator is not none, we must use another seed to # avoid global torch.rand to get same noise again. # Note: removing this will make DDPM sampler broken. @@ -16,10 +24,10 @@ def randn(seed, shape, generator=None): else: manual_seed(seed) - if shared.opts.randn_source == "NV": + if get_noise_source_type() == "NV": return torch.asarray((generator or nv_rng).randn(shape), device=devices.device) - if shared.opts.randn_source == "CPU" or devices.device.type == 'mps': + if get_noise_source_type() == "CPU" or devices.device.type == 'mps': return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device) return torch.randn(shape, device=devices.device, generator=generator) @@ -30,11 +38,11 @@ def randn_local(seed, shape): Does not change the global random number generator. You can only generate the seed's first tensor using this function.""" - if shared.opts.randn_source == "NV": + if get_noise_source_type() == "NV": rng = rng_philox.Generator(seed) return torch.asarray(rng.randn(shape), device=devices.device) - local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device + local_device = devices.cpu if get_noise_source_type() == "CPU" or devices.device.type == 'mps' else devices.device local_generator = torch.Generator(local_device).manual_seed(int(seed)) return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device) @@ -44,10 +52,10 @@ def randn_like(x): Use either randn() or manual_seed() to initialize the generator.""" - if shared.opts.randn_source == "NV": + if get_noise_source_type() == "NV": return torch.asarray(nv_rng.randn(x.shape), device=x.device, dtype=x.dtype) - if shared.opts.randn_source == "CPU" or x.device.type == 'mps': + if get_noise_source_type() == "CPU" or x.device.type == 'mps': return torch.randn_like(x, device=devices.cpu).to(x.device) return torch.randn_like(x) @@ -58,10 +66,10 @@ def randn_without_seed(shape, generator=None): Use either randn() or manual_seed() to initialize the generator.""" - if shared.opts.randn_source == "NV": + if get_noise_source_type() == "NV": return torch.asarray((generator or nv_rng).randn(shape), device=devices.device) - if shared.opts.randn_source == "CPU" or devices.device.type == 'mps': + if get_noise_source_type() == "CPU" or devices.device.type == 'mps': return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device) return torch.randn(shape, device=devices.device, generator=generator) @@ -70,7 +78,7 @@ def randn_without_seed(shape, generator=None): def manual_seed(seed): """Set up a global random number generator using the specified seed.""" - if shared.opts.randn_source == "NV": + if get_noise_source_type() == "NV": global nv_rng nv_rng = rng_philox.Generator(seed) return @@ -79,10 +87,10 @@ def manual_seed(seed): def create_generator(seed): - if shared.opts.randn_source == "NV": + if get_noise_source_type() == "NV": return rng_philox.Generator(seed) - device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device + device = devices.cpu if get_noise_source_type() == "CPU" or devices.device.type == 'mps' else devices.device generator = torch.Generator(device).manual_seed(int(seed)) return generator diff --git a/modules/shared_options.py b/modules/shared_options.py index 03b334c1..5f3c86dc 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -246,6 +246,7 @@ options_templates.update(options_section(('optimizations', "Optimizations", "sd" })) options_templates.update(options_section(('compatibility', "Compatibility", "sd"), { + "forge_try_reproduce": OptionInfo('None', "Try to reproduce the results from external software", gr.Radio, lambda: {"choices": ['None', 'Diffusers', 'ComfyUI', 'WebUI 1.5', 'InvokeAI', 'EasyDiffusion', 'DrawThings']}), "auto_backcompat": OptionInfo(True, "Automatic backward compatibility").info("automatically enable options for backwards compatibility when importing generation parameters from infotext that has program version."), "use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."), "use_old_karras_scheduler_sigmas": OptionInfo(False, "Use old karras scheduler sigmas (0.1 to 10)."),