Quantize blocks sequentialls without a ARA

2026-03-13 14:39:50 +00:00 · 2025-08-14 09:59:58 -06:00
parent 3ff4430e84
commit e12bb21780
1 changed files with 3 additions and 3 deletions
--- a/toolkit/util/quantize.py
+++ b/toolkit/util/quantize.py
@@ -283,9 +283,9 @@ def quantize_model(
        all_blocks: List[torch.nn.Module] = []
        transformer_block_names = base_model.get_transformer_block_names()
        for name in transformer_block_names:
-            block = getattr(model_to_quantize, name, None)
-            if block is not None:
-                all_blocks.append(block)
+            block_list = getattr(model_to_quantize, name, None)
+            if block_list is not None:
+                all_blocks += list(block_list)
        base_model.print_and_status_update(
            f" - quantizing {len(all_blocks)} transformer blocks"
        )