Varous bug fixes

2026-03-13 14:39:50 +00:00 · 2024-07-10 15:20:04 -06:00
parent c008405480
commit c062b7716c
9 changed files with 294 additions and 58 deletions
--- a/testing/merge_in_text_encoder_adapter.py
+++ b/testing/merge_in_text_encoder_adapter.py
@@ -11,10 +11,10 @@ import json
 # te_path = "google/flan-t5-xl"
 # te_aug_path = "/mnt/Train/out/ip_adapter/t5xx_sd15_v1/t5xx_sd15_v1_000032000.safetensors"
 # output_path = "/home/jaret/Dev/models/hf/kl-f16-d42_sd15_t5xl_raw"
-model_path = "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-1024-MS"
-te_path = "google/flan-t5-base"
-te_aug_path = "/home/jaret/Dev/models/tmp/pixart_sigma_t5base_000227500.safetensors"
-output_path = "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_t5base_raw"
+model_path = "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS"
+te_path = "google/flan-t5-large"
+te_aug_path = "/home/jaret/Dev/models/tmp/pixart_sigma_t5l_000034000.safetensors"
+output_path = "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_t5large_raw"


 print("Loading te adapter")
--- a/testing/shrink_pixart_sm.py
+++ b/testing/shrink_pixart_sm.py
@@ -2,62 +2,83 @@ import torch
 from safetensors.torch import load_file, save_file
 from collections import OrderedDict

-model_path = "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_tiny/transformer/diffusion_pytorch_model.orig.safetensors"
-output_path = "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_tiny/transformer/diffusion_pytorch_model.safetensors"
-
-state_dict = load_file(model_path)
-
 meta = OrderedDict()
-meta["format"] = "pt"
+meta['format'] = "pt"

+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def reduce_weight(weight, target_size):
+    weight = weight.to(device, torch.float32)
+    original_shape = weight.shape
+    flattened = weight.view(-1, original_shape[-1])
+
+    if flattened.shape[1] <= target_size:
+        return weight
+
+    U, S, V = torch.svd(flattened)
+    reduced = torch.mm(U[:, :target_size], torch.diag(S[:target_size]))
+
+    if reduced.shape[1] < target_size:
+        padding = torch.zeros(reduced.shape[0], target_size - reduced.shape[1], device=device)
+        reduced = torch.cat((reduced, padding), dim=1)
+
+    return reduced.view(original_shape[:-1] + (target_size,))
+
+
+def reduce_bias(bias, target_size):
+    bias = bias.to(device, torch.float32)
+    original_size = bias.shape[0]
+
+    if original_size <= target_size:
+        return torch.nn.functional.pad(bias, (0, target_size - original_size))
+    else:
+        return bias.view(-1, original_size // target_size).mean(dim=1)[:target_size]
+
+
+# Load your original state dict
+state_dict = load_file(
+    "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_t5large_raw/transformer/diffusion_pytorch_model.orig.safetensors")
+
+# Create a new state dict for the reduced model
 new_state_dict = {}

-# Move non-blocks over
+source_hidden_size = 1152
+target_hidden_size = 1024
+
 for key, value in state_dict.items():
-    if not key.startswith("transformer_blocks."):
-        new_state_dict[key] = value
+    value = value.to(device, torch.float32)
+    if 'weight' in key or 'scale_shift_table' in key:
+        if value.shape[0] == source_hidden_size:
+            value = value[:target_hidden_size]
+        elif value.shape[0] == source_hidden_size * 4:
+            value = value[:target_hidden_size * 4]
+        elif value.shape[0] == source_hidden_size * 6:
+            value = value[:target_hidden_size * 6]

-block_names = ['transformer_blocks.{idx}.attn1.to_k.bias', 'transformer_blocks.{idx}.attn1.to_k.weight',
-               'transformer_blocks.{idx}.attn1.to_out.0.bias', 'transformer_blocks.{idx}.attn1.to_out.0.weight',
-               'transformer_blocks.{idx}.attn1.to_q.bias', 'transformer_blocks.{idx}.attn1.to_q.weight',
-               'transformer_blocks.{idx}.attn1.to_v.bias', 'transformer_blocks.{idx}.attn1.to_v.weight',
-               'transformer_blocks.{idx}.attn2.to_k.bias', 'transformer_blocks.{idx}.attn2.to_k.weight',
-               'transformer_blocks.{idx}.attn2.to_out.0.bias', 'transformer_blocks.{idx}.attn2.to_out.0.weight',
-               'transformer_blocks.{idx}.attn2.to_q.bias', 'transformer_blocks.{idx}.attn2.to_q.weight',
-               'transformer_blocks.{idx}.attn2.to_v.bias', 'transformer_blocks.{idx}.attn2.to_v.weight',
-               'transformer_blocks.{idx}.ff.net.0.proj.bias', 'transformer_blocks.{idx}.ff.net.0.proj.weight',
-               'transformer_blocks.{idx}.ff.net.2.bias', 'transformer_blocks.{idx}.ff.net.2.weight',
-               'transformer_blocks.{idx}.scale_shift_table']
+        if len(value.shape) > 1 and value.shape[
+            1] == source_hidden_size and 'attn2.to_k.weight' not in key and 'attn2.to_v.weight' not in key:
+            value = value[:, :target_hidden_size]
+        elif len(value.shape) > 1 and value.shape[1] == source_hidden_size * 4:
+            value = value[:, :target_hidden_size * 4]

-# New block idx 0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 27
+    elif 'bias' in key:
+        if value.shape[0] == source_hidden_size:
+            value = value[:target_hidden_size]
+        elif value.shape[0] == source_hidden_size * 4:
+            value = value[:target_hidden_size * 4]
+        elif value.shape[0] == source_hidden_size * 6:
+            value = value[:target_hidden_size * 6]

-current_idx = 0
-for i in range(28):
-    if i not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
-        # todo merge in with previous block
-        for name in block_names:
-            continue
-            # try:
-            #     new_state_dict_key = name.format(idx=current_idx - 1)
-            #     old_state_dict_key = name.format(idx=i)
-            #     new_state_dict[new_state_dict_key] = (new_state_dict[new_state_dict_key] * 0.5) + (state_dict[old_state_dict_key] * 0.5)
-            # except KeyError:
-            #     raise KeyError(f"KeyError: {name.format(idx=current_idx)}")
-    else:
-        for name in block_names:
-            new_state_dict[name.format(idx=current_idx)] = state_dict[name.format(idx=i)]
-        current_idx += 1
+    new_state_dict[key] = value

-
-# make sure they are all fp16 and on cpu
+# Move all to CPU and convert to float16
 for key, value in new_state_dict.items():
-    new_state_dict[key] = value.to(torch.float16).cpu()
+    new_state_dict[key] = value.cpu().to(torch.float16)

-# save the new state dict
-save_file(new_state_dict, output_path, metadata=meta)
+# Save the new state dict
+save_file(new_state_dict,
+          "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_t5large_raw/transformer/diffusion_pytorch_model.safetensors",
+          metadata=meta)

-new_param_count = sum([v.numel() for v in new_state_dict.values()])
-old_param_count = sum([v.numel() for v in state_dict.values()])
-
-print(f"Old param count: {old_param_count:,}")
-print(f"New param count: {new_param_count:,}")
+print("Done!")
--- a/testing/shrink_pixart_sm2.py
+++ b/testing/shrink_pixart_sm2.py
@@ -0,0 +1,110 @@
+import torch
+from safetensors.torch import load_file, save_file
+from collections import OrderedDict
+
+meta = OrderedDict()
+meta['format'] = "pt"
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def reduce_weight(weight, target_size):
+    weight = weight.to(device, torch.float32)
+    original_shape = weight.shape
+
+    if len(original_shape) == 1:
+        # For 1D tensors, simply truncate
+        return weight[:target_size]
+
+    if original_shape[0] <= target_size:
+        return weight
+
+    # Reshape the tensor to 2D
+    flattened = weight.reshape(original_shape[0], -1)
+
+    # Perform SVD
+    U, S, V = torch.svd(flattened)
+
+    # Reduce the dimensions
+    reduced = torch.mm(U[:target_size, :], torch.diag(S)).mm(V.t())
+
+    # Reshape back to the original shape with reduced first dimension
+    new_shape = (target_size,) + original_shape[1:]
+    return reduced.reshape(new_shape)
+
+
+def reduce_bias(bias, target_size):
+    bias = bias.to(device, torch.float32)
+    return bias[:target_size]
+
+
+# Load your original state dict
+state_dict = load_file(
+    "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_t5large_raw/transformer/diffusion_pytorch_model.orig.safetensors")
+
+# Create a new state dict for the reduced model
+new_state_dict = {}
+
+for key, value in state_dict.items():
+    value = value.to(device, torch.float32)
+
+    if 'weight' in key or 'scale_shift_table' in key:
+        if value.shape[0] == 1152:
+            if len(value.shape) == 4:
+                orig_shape = value.shape
+                output_shape = (512, orig_shape[1], orig_shape[2], orig_shape[3])  # reshape to (1152, -1)
+                # reshape to (1152, -1)
+                value = value.view(value.shape[0], -1)
+                value = reduce_weight(value, 512)
+                value = value.view(output_shape)
+            else:
+                # value = reduce_weight(value.t(), 576).t().contiguous()
+                value = reduce_weight(value, 512)
+                pass
+        elif value.shape[0] == 4608:
+            if len(value.shape) == 4:
+                orig_shape = value.shape
+                output_shape = (2048, orig_shape[1], orig_shape[2], orig_shape[3])
+                value = value.view(value.shape[0], -1)
+                value = reduce_weight(value, 2048)
+                value = value.view(output_shape)
+            else:
+                value = reduce_weight(value, 2048)
+        elif value.shape[0] == 6912:
+            if len(value.shape) == 4:
+                orig_shape = value.shape
+                output_shape = (3072, orig_shape[1], orig_shape[2], orig_shape[3])
+                value = value.view(value.shape[0], -1)
+                value = reduce_weight(value, 3072)
+                value = value.view(output_shape)
+            else:
+                value = reduce_weight(value, 3072)
+
+        if len(value.shape) > 1 and value.shape[
+            1] == 1152 and 'attn2.to_k.weight' not in key and 'attn2.to_v.weight' not in key:
+            value = reduce_weight(value.t(), 512).t().contiguous()  # Transpose before and after reduction
+            pass
+        elif len(value.shape) > 1 and value.shape[1] == 4608:
+            value = reduce_weight(value.t(), 2048).t().contiguous()   # Transpose before and after reduction
+            pass
+
+    elif 'bias' in key:
+        if value.shape[0] == 1152:
+            value = reduce_bias(value, 512)
+        elif value.shape[0] == 4608:
+            value = reduce_bias(value, 2048)
+        elif value.shape[0] == 6912:
+            value = reduce_bias(value, 3072)
+
+    new_state_dict[key] = value
+
+# Move all to CPU and convert to float16
+for key, value in new_state_dict.items():
+    new_state_dict[key] = value.cpu().to(torch.float16)
+
+# Save the new state dict
+save_file(new_state_dict,
+          "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_t5large_raw/transformer/diffusion_pytorch_model.safetensors",
+          metadata=meta)
+
+print("Done!")
--- a/testing/shrink_pixart_sm3.py
+++ b/testing/shrink_pixart_sm3.py
@@ -0,0 +1,100 @@
+import torch
+from safetensors.torch import load_file, save_file
+from collections import OrderedDict
+
+meta = OrderedDict()
+meta['format'] = "pt"
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def reduce_weight(weight, target_size):
+    weight = weight.to(device, torch.float32)
+    # resize so target_size is the first dimension
+    tmp_weight = weight.view(1, 1, weight.shape[0], weight.shape[1])
+
+    # use interpolate to resize the tensor
+    new_weight = torch.nn.functional.interpolate(tmp_weight, size=(target_size, weight.shape[1]), mode='bicubic', align_corners=True)
+
+    # reshape back to original shape
+    return new_weight.view(target_size, weight.shape[1])
+
+
+def reduce_bias(bias, target_size):
+    bias = bias.view(1, 1, bias.shape[0], 1)
+
+    new_bias = torch.nn.functional.interpolate(bias, size=(target_size, 1), mode='bicubic', align_corners=True)
+
+    return new_bias.view(target_size)
+
+
+# Load your original state dict
+state_dict = load_file(
+    "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_t5large_raw/transformer/diffusion_pytorch_model.orig.safetensors")
+
+# Create a new state dict for the reduced model
+new_state_dict = {}
+
+for key, value in state_dict.items():
+    value = value.to(device, torch.float32)
+
+    if 'weight' in key or 'scale_shift_table' in key:
+        if value.shape[0] == 1152:
+            if len(value.shape) == 4:
+                orig_shape = value.shape
+                output_shape = (512, orig_shape[1], orig_shape[2], orig_shape[3])  # reshape to (1152, -1)
+                # reshape to (1152, -1)
+                value = value.view(value.shape[0], -1)
+                value = reduce_weight(value, 512)
+                value = value.view(output_shape)
+            else:
+                # value = reduce_weight(value.t(), 576).t().contiguous()
+                value = reduce_weight(value, 512)
+                pass
+        elif value.shape[0] == 4608:
+            if len(value.shape) == 4:
+                orig_shape = value.shape
+                output_shape = (2048, orig_shape[1], orig_shape[2], orig_shape[3])
+                value = value.view(value.shape[0], -1)
+                value = reduce_weight(value, 2048)
+                value = value.view(output_shape)
+            else:
+                value = reduce_weight(value, 2048)
+        elif value.shape[0] == 6912:
+            if len(value.shape) == 4:
+                orig_shape = value.shape
+                output_shape = (3072, orig_shape[1], orig_shape[2], orig_shape[3])
+                value = value.view(value.shape[0], -1)
+                value = reduce_weight(value, 3072)
+                value = value.view(output_shape)
+            else:
+                value = reduce_weight(value, 3072)
+
+        if len(value.shape) > 1 and value.shape[
+            1] == 1152 and 'attn2.to_k.weight' not in key and 'attn2.to_v.weight' not in key:
+            value = reduce_weight(value.t(), 512).t().contiguous()  # Transpose before and after reduction
+            pass
+        elif len(value.shape) > 1 and value.shape[1] == 4608:
+            value = reduce_weight(value.t(), 2048).t().contiguous()   # Transpose before and after reduction
+            pass
+
+    elif 'bias' in key:
+        if value.shape[0] == 1152:
+            value = reduce_bias(value, 512)
+        elif value.shape[0] == 4608:
+            value = reduce_bias(value, 2048)
+        elif value.shape[0] == 6912:
+            value = reduce_bias(value, 3072)
+
+    new_state_dict[key] = value
+
+# Move all to CPU and convert to float16
+for key, value in new_state_dict.items():
+    new_state_dict[key] = value.cpu().to(torch.float16)
+
+# Save the new state dict
+save_file(new_state_dict,
+          "/home/jaret/Dev/models/hf/PixArt-Sigma-XL-2-512_MS_t5large_raw/transformer/diffusion_pytorch_model.safetensors",
+          metadata=meta)
+
+print("Done!")