mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2026-04-26 09:18:59 +00:00
Implement many kernels from scratch
This commit is contained in:
@@ -7,23 +7,29 @@ from backend import stream
|
||||
stash = {}
|
||||
|
||||
|
||||
def weights_manual_cast(layer, x):
|
||||
def weights_manual_cast(layer, x, skip_dtype=False):
|
||||
weight, bias, signal = None, None, None
|
||||
non_blocking = True
|
||||
|
||||
if getattr(x.device, 'type', None) == 'mps':
|
||||
non_blocking = False
|
||||
|
||||
target_dtype = x.dtype
|
||||
target_device = x.device
|
||||
|
||||
if skip_dtype:
|
||||
target_dtype = None
|
||||
|
||||
if stream.using_stream:
|
||||
with stream.stream_context()(stream.mover_stream):
|
||||
if layer.bias is not None:
|
||||
bias = layer.bias.to(device=x.device, dtype=x.dtype, non_blocking=non_blocking)
|
||||
weight = layer.weight.to(device=x.device, dtype=x.dtype, non_blocking=non_blocking)
|
||||
bias = layer.bias.to(device=target_device, dtype=target_dtype, non_blocking=non_blocking)
|
||||
weight = layer.weight.to(device=target_device, dtype=target_dtype, non_blocking=non_blocking)
|
||||
signal = stream.mover_stream.record_event()
|
||||
else:
|
||||
if layer.bias is not None:
|
||||
bias = layer.bias.to(device=x.device, dtype=x.dtype, non_blocking=non_blocking)
|
||||
weight = layer.weight.to(device=x.device, dtype=x.dtype, non_blocking=non_blocking)
|
||||
bias = layer.bias.to(device=target_device, dtype=target_dtype, non_blocking=non_blocking)
|
||||
weight = layer.weight.to(device=target_device, dtype=target_dtype, non_blocking=non_blocking)
|
||||
|
||||
return weight, bias, signal
|
||||
|
||||
@@ -60,9 +66,19 @@ def cleanup_cache():
|
||||
return
|
||||
|
||||
|
||||
current_device = None
|
||||
current_dtype = None
|
||||
current_manual_cast_enabled = False
|
||||
|
||||
|
||||
class ForgeOperations:
|
||||
class Linear(torch.nn.Linear):
|
||||
parameters_manual_cast = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
kwargs['dtype'] = current_dtype
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
|
||||
def reset_parameters(self):
|
||||
return None
|
||||
@@ -76,7 +92,12 @@ class ForgeOperations:
|
||||
return super().forward(x)
|
||||
|
||||
class Conv2d(torch.nn.Conv2d):
|
||||
parameters_manual_cast = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
kwargs['dtype'] = current_dtype
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
|
||||
def reset_parameters(self):
|
||||
return None
|
||||
@@ -90,7 +111,12 @@ class ForgeOperations:
|
||||
return super().forward(x)
|
||||
|
||||
class Conv3d(torch.nn.Conv3d):
|
||||
parameters_manual_cast = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
kwargs['dtype'] = current_dtype
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
|
||||
def reset_parameters(self):
|
||||
return None
|
||||
@@ -103,8 +129,98 @@ class ForgeOperations:
|
||||
else:
|
||||
return super().forward(x)
|
||||
|
||||
class Conv1d(torch.nn.Conv1d):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
kwargs['dtype'] = current_dtype
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
|
||||
def reset_parameters(self):
|
||||
return None
|
||||
|
||||
def forward(self, x):
|
||||
if self.parameters_manual_cast:
|
||||
weight, bias, signal = weights_manual_cast(self, x)
|
||||
with main_stream_worker(weight, bias, signal):
|
||||
return self._conv_forward(x, weight, bias)
|
||||
else:
|
||||
return super().forward(x)
|
||||
|
||||
class ConvTranspose2d(torch.nn.ConvTranspose2d):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
kwargs['dtype'] = current_dtype
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
|
||||
def reset_parameters(self):
|
||||
return None
|
||||
|
||||
def forward(self, x, output_size=None):
|
||||
if self.parameters_manual_cast:
|
||||
num_spatial_dims = 2
|
||||
output_padding = self._output_padding(x, output_size, self.stride, self.padding, self.kernel_size, num_spatial_dims, self.dilation)
|
||||
|
||||
weight, bias, signal = weights_manual_cast(self, x)
|
||||
with main_stream_worker(weight, bias, signal):
|
||||
return torch.nn.functional.conv_transpose2d(x, weight, bias, self.stride, self.padding, output_padding, self.groups, self.dilation)
|
||||
else:
|
||||
return super().forward(x, output_size)
|
||||
|
||||
class ConvTranspose1d(torch.nn.ConvTranspose1d):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
kwargs['dtype'] = current_dtype
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
|
||||
def reset_parameters(self):
|
||||
return None
|
||||
|
||||
def forward(self, x, output_size=None):
|
||||
if self.parameters_manual_cast:
|
||||
num_spatial_dims = 1
|
||||
output_padding = self._output_padding(x, output_size, self.stride, self.padding, self.kernel_size, num_spatial_dims, self.dilation)
|
||||
|
||||
weight, bias, signal = weights_manual_cast(self, x)
|
||||
with main_stream_worker(weight, bias, signal):
|
||||
return torch.nn.functional.conv_transpose1d(x, weight, bias, self.stride, self.padding, output_padding, self.groups, self.dilation)
|
||||
else:
|
||||
return super().forward(x, output_size)
|
||||
|
||||
class ConvTranspose3d(torch.nn.ConvTranspose3d):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
kwargs['dtype'] = current_dtype
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
|
||||
def reset_parameters(self):
|
||||
return None
|
||||
|
||||
def forward(self, x, output_size=None):
|
||||
if self.parameters_manual_cast:
|
||||
num_spatial_dims = 3
|
||||
output_padding = self._output_padding(x, output_size, self.stride, self.padding, self.kernel_size, num_spatial_dims, self.dilation)
|
||||
|
||||
weight, bias, signal = weights_manual_cast(self, x)
|
||||
with main_stream_worker(weight, bias, signal):
|
||||
return torch.nn.functional.conv_transpose3d(x, weight, bias, self.stride, self.padding, output_padding, self.groups, self.dilation)
|
||||
else:
|
||||
return super().forward(x, output_size)
|
||||
|
||||
class GroupNorm(torch.nn.GroupNorm):
|
||||
parameters_manual_cast = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
kwargs['dtype'] = current_dtype
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
|
||||
def reset_parameters(self):
|
||||
return None
|
||||
@@ -118,7 +234,12 @@ class ForgeOperations:
|
||||
return super().forward(x)
|
||||
|
||||
class LayerNorm(torch.nn.LayerNorm):
|
||||
parameters_manual_cast = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
kwargs['dtype'] = current_dtype
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
|
||||
def reset_parameters(self):
|
||||
return None
|
||||
@@ -131,34 +252,37 @@ class ForgeOperations:
|
||||
else:
|
||||
return super().forward(x)
|
||||
|
||||
class Embedding(torch.nn.Embedding):
|
||||
|
||||
class ForgeOperationsWithManualCast(ForgeOperations):
|
||||
class Linear(ForgeOperations.Linear):
|
||||
parameters_manual_cast = True
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['device'] = current_device
|
||||
super().__init__(*args, **kwargs)
|
||||
self.parameters_manual_cast = current_manual_cast_enabled
|
||||
self.bias = None
|
||||
|
||||
class Conv2d(ForgeOperations.Conv2d):
|
||||
parameters_manual_cast = True
|
||||
def reset_parameters(self):
|
||||
self.bias = None
|
||||
return None
|
||||
|
||||
class Conv3d(ForgeOperations.Conv3d):
|
||||
parameters_manual_cast = True
|
||||
|
||||
class GroupNorm(ForgeOperations.GroupNorm):
|
||||
parameters_manual_cast = True
|
||||
|
||||
class LayerNorm(ForgeOperations.LayerNorm):
|
||||
parameters_manual_cast = True
|
||||
def forward(self, x):
|
||||
if self.parameters_manual_cast:
|
||||
weight, bias, signal = weights_manual_cast(self, x, skip_dtype=True)
|
||||
with main_stream_worker(weight, bias, signal):
|
||||
return torch.nn.functional.embedding(x, weight, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse)
|
||||
else:
|
||||
return super().forward(x)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def using_forge_operations(parameters_manual_cast=False, operations=None):
|
||||
def using_forge_operations(operations=None, device=None, dtype=None, manual_cast_enabled=False):
|
||||
global current_device, current_dtype, current_manual_cast_enabled
|
||||
|
||||
current_device, current_dtype, current_manual_cast_enabled = device, dtype, manual_cast_enabled
|
||||
|
||||
if operations is None:
|
||||
operations = ForgeOperations
|
||||
|
||||
if parameters_manual_cast:
|
||||
operations = ForgeOperationsWithManualCast
|
||||
|
||||
op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm']
|
||||
op_names = ['Linear', 'Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d', 'GroupNorm', 'LayerNorm', 'Embedding']
|
||||
backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names}
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user