Added ability to split up flux across gpus (experimental). Changed the way timestep scheduling works to prep for more specific schedules.

This commit is contained in:
Jaret Burkett
2024-12-31 07:06:55 -07:00
parent 8ef07a9c36
commit 4723f23c0d
5 changed files with 182 additions and 7 deletions

View File

@@ -386,7 +386,7 @@ class TrainConfig:
# adds an additional loss to the network to encourage it output a normalized standard deviation
self.target_norm_std = kwargs.get('target_norm_std', None)
self.target_norm_std_value = kwargs.get('target_norm_std_value', 1.0)
self.timestep_type = kwargs.get('timestep_type', 'sigmoid') # sigmoid, linear
self.timestep_type = kwargs.get('timestep_type', 'sigmoid') # sigmoid, linear, lognorm_blend
self.linear_timesteps = kwargs.get('linear_timesteps', False)
self.linear_timesteps2 = kwargs.get('linear_timesteps2', False)
self.disable_sampling = kwargs.get('disable_sampling', False)
@@ -470,6 +470,11 @@ class ModelConfig:
if self.ignore_if_contains is not None or self.only_if_contains is not None:
if not self.is_flux:
raise ValueError("ignore_if_contains and only_if_contains are only supported with flux models currently")
# splits the model over the available gpus WIP
self.split_model_over_gpus = kwargs.get("split_model_over_gpus", False)
if self.split_model_over_gpus and not self.is_flux:
raise ValueError("split_model_over_gpus is only supported with flux models currently")
class EMAConfig: