mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2026-02-02 14:27:27 +00:00
less aggressive clip skip
to make CivitAI pony image meta works better
This commit is contained in:
@@ -13,18 +13,18 @@ def move_clip_to_gpu():
|
||||
return
|
||||
|
||||
|
||||
def apply_clip_skip_to_transformer_outputs(x, last_layer, skip):
|
||||
return x.hidden_states[last_layer + 1 - skip]
|
||||
|
||||
|
||||
class CLIP_SD_15_L(FrozenCLIPEmbedderWithCustomWords):
|
||||
def __init__(self, wrapped, hijack):
|
||||
super().__init__(wrapped, hijack)
|
||||
self.minimal_clip_skip = 1
|
||||
|
||||
def encode_with_transformers(self, tokens):
|
||||
move_clip_to_gpu()
|
||||
self.wrapped.transformer.text_model.embeddings.to(tokens.device)
|
||||
outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
|
||||
|
||||
if opts.CLIP_stop_at_last_layers > 1:
|
||||
z = apply_clip_skip_to_transformer_outputs(outputs, last_layer=-1, skip=opts.CLIP_stop_at_last_layers)
|
||||
if opts.CLIP_stop_at_last_layers > self.minimal_clip_skip:
|
||||
z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers]
|
||||
z = self.wrapped.transformer.text_model.final_layer_norm(z)
|
||||
else:
|
||||
z = outputs.last_hidden_state
|
||||
@@ -43,14 +43,15 @@ class CLIP_SD_21_H(FrozenCLIPEmbedderWithCustomWords):
|
||||
self.id_start = 49406
|
||||
self.id_end = 49407
|
||||
self.id_pad = 0
|
||||
self.minimal_clip_skip = 2
|
||||
|
||||
def encode_with_transformers(self, tokens):
|
||||
move_clip_to_gpu()
|
||||
self.wrapped.transformer.text_model.embeddings.to(tokens.device)
|
||||
outputs = self.wrapped.transformer(tokens, output_hidden_states=self.wrapped.layer == "hidden")
|
||||
|
||||
if opts.CLIP_stop_at_last_layers > 1:
|
||||
z = apply_clip_skip_to_transformer_outputs(outputs, last_layer=self.wrapped.layer_idx, skip=opts.CLIP_stop_at_last_layers)
|
||||
if opts.CLIP_stop_at_last_layers > self.minimal_clip_skip:
|
||||
z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers]
|
||||
z = self.wrapped.transformer.text_model.final_layer_norm(z)
|
||||
elif self.wrapped.layer == "last":
|
||||
z = outputs.last_hidden_state
|
||||
@@ -64,13 +65,14 @@ class CLIP_SD_21_H(FrozenCLIPEmbedderWithCustomWords):
|
||||
class CLIP_SD_XL_L(FrozenCLIPEmbedderWithCustomWords):
|
||||
def __init__(self, wrapped, hijack):
|
||||
super().__init__(wrapped, hijack)
|
||||
self.minimal_clip_skip = 2
|
||||
|
||||
def encode_with_transformers(self, tokens):
|
||||
self.wrapped.transformer.text_model.embeddings.to(tokens.device)
|
||||
outputs = self.wrapped.transformer(tokens, output_hidden_states=self.wrapped.layer == "hidden")
|
||||
|
||||
if opts.CLIP_stop_at_last_layers > 1:
|
||||
z = apply_clip_skip_to_transformer_outputs(outputs, last_layer=self.wrapped.layer_idx, skip=opts.CLIP_stop_at_last_layers)
|
||||
if opts.CLIP_stop_at_last_layers > self.minimal_clip_skip:
|
||||
z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers]
|
||||
elif self.wrapped.layer == "last":
|
||||
z = outputs.last_hidden_state
|
||||
else:
|
||||
@@ -90,13 +92,14 @@ class CLIP_SD_XL_G(FrozenCLIPEmbedderWithCustomWords):
|
||||
self.id_start = 49406
|
||||
self.id_end = 49407
|
||||
self.id_pad = 0
|
||||
self.minimal_clip_skip = 2
|
||||
|
||||
def encode_with_transformers(self, tokens):
|
||||
self.wrapped.transformer.text_model.embeddings.to(tokens.device)
|
||||
outputs = self.wrapped.transformer(tokens, output_hidden_states=self.wrapped.layer == "hidden")
|
||||
|
||||
if opts.CLIP_stop_at_last_layers > 1:
|
||||
z = apply_clip_skip_to_transformer_outputs(outputs, last_layer=self.wrapped.layer_idx, skip=opts.CLIP_stop_at_last_layers)
|
||||
if opts.CLIP_stop_at_last_layers > self.minimal_clip_skip:
|
||||
z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers]
|
||||
elif self.wrapped.layer == "last":
|
||||
z = outputs.last_hidden_state
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user