Added ability to quantize with torchao

2026-04-27 17:51:41 +00:00 · 2025-03-20 16:28:54 -06:00
parent 3a6b24f4c8
commit f5aa4232fa
7 changed files with 57 additions and 26 deletions
--- a/toolkit/models/base_model.py
+++ b/toolkit/models/base_model.py
@@ -23,27 +23,22 @@ from toolkit.models.decorator import Decorator
 from toolkit.paths import KEYMAPS_ROOT
 from toolkit.prompt_utils import inject_trigger_into_prompt, PromptEmbeds, concat_prompt_embeds
 from toolkit.reference_adapter import ReferenceAdapter
-from toolkit.saving import save_ldm_model_from_diffusers
 from toolkit.sd_device_states_presets import empty_preset
 from toolkit.train_tools import get_torch_dtype, apply_noise_offset
 import torch
 from toolkit.pipelines import CustomStableDiffusionXLPipeline
 from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, T2IAdapter, DDPMScheduler, \
-    LCMScheduler, Transformer2DModel, AutoencoderTiny, ControlNetModel, \
-    FluxTransformer2DModel
-from toolkit.models.lumina2 import Lumina2Transformer2DModel
+    LCMScheduler, Transformer2DModel, AutoencoderTiny, ControlNetModel
 import diffusers
 from diffusers import \
    AutoencoderKL, \
    UNet2DConditionModel
 from diffusers import PixArtAlphaPipeline
-from transformers import T5EncoderModel, UMT5EncoderModel
 from transformers import CLIPTextModel, CLIPTokenizer, CLIPTextModelWithProjection

 from toolkit.accelerator import get_accelerator, unwrap_model
 from typing import TYPE_CHECKING
 from toolkit.print import print_acc
-from transformers import Gemma2Model, Qwen2Model, LlamaModel

 if TYPE_CHECKING:
    from toolkit.lora_special import LoRASpecialNetwork
--- a/toolkit/models/cogview4.py
+++ b/toolkit/models/cogview4.py
@@ -19,7 +19,7 @@ import torch
 import diffusers
 from diffusers import AutoencoderKL, CogView4Transformer2DModel, CogView4Pipeline
 from optimum.quanto import freeze, qfloat8, QTensor, qint4
-from toolkit.util.quantize import quantize
+from toolkit.util.quantize import quantize, get_qtype
 from transformers import GlmModel, AutoTokenizer
 from diffusers import FlowMatchEulerDiscreteScheduler
 from typing import TYPE_CHECKING
@@ -114,7 +114,7 @@ class CogView4(BaseModel):

        if self.model_config.quantize_te:
            self.print_and_status_update("Quantizing GlmModel")
-            quantize(text_encoder, weights=qfloat8)
+            quantize(text_encoder, weights=get_qtype(self.model_config.qtype))
            freeze(text_encoder)
            flush()

@@ -166,7 +166,7 @@ class CogView4(BaseModel):

            # patch the state dict method
            patch_dequantization_on_save(transformer)
-            quantization_type = qfloat8
+            quantization_type = get_qtype(self.model_config.qtype)
            self.print_and_status_update("Quantizing transformer")
            quantize(transformer, weights=quantization_type, **quantization_args)
            freeze(transformer)
--- a/toolkit/models/wan21/wan21.py
+++ b/toolkit/models/wan21/wan21.py
@@ -29,7 +29,7 @@ import copy
 from toolkit.config_modules import ModelConfig, GenerateImageConfig, ModelArch
 import torch
 from optimum.quanto import freeze, qfloat8, QTensor, qint4
-from toolkit.util.quantize import quantize
+from toolkit.util.quantize import quantize, get_qtype
 from diffusers import FlowMatchEulerDiscreteScheduler, UniPCMultistepScheduler
 from typing import TYPE_CHECKING, List
 from toolkit.accelerator import unwrap_model
@@ -377,7 +377,7 @@ class Wan21(BaseModel):
                quantization_args['exclude'] = []
            # patch the state dict method
            patch_dequantization_on_save(transformer)
-            quantization_type = qfloat8
+            quantization_type = get_qtype(self.model_config.qtype)
            self.print_and_status_update("Quantizing transformer")
            if self.model_config.low_vram:
                print("Quantizing blocks")
@@ -425,7 +425,7 @@ class Wan21(BaseModel):

        if self.model_config.quantize_te:
            self.print_and_status_update("Quantizing UMT5EncoderModel")
-            quantize(text_encoder, weights=qfloat8)
+            quantize(text_encoder, weights=get_qtype(self.model_config.qtype))
            freeze(text_encoder)
            flush()