restrict baking to 16bits

2026-05-11 08:20:27 +00:00 · 2024-08-26 06:16:13 -07:00
parent 7cd94babdd
commit f22b80ef94
1 changed files with 5 additions and 0 deletions
--- a/backend/operations_gguf.py
+++ b/backend/operations_gguf.py
@@ -70,6 +70,11 @@ class ParameterGGUF(torch.nn.Parameter):

 def bake_gguf_model(model):
    computation_dtype = model.computation_dtype
+
+    if computation_dtype not in [torch.float16, torch.bfloat16]:
+        # Baking only supports 16bits otherwise super slow
+        computation_dtype = torch.float16
+
    backed_layer_counter = 0

    for m in model.modules():