experimental LoRA support for NF4 Model

method may change later depending on result quality
2026-04-29 10:41:25 +00:00 · 2024-08-14 19:52:19 -07:00
parent 70a5acd8ad
commit cb889470ba
2 changed files with 38 additions and 1 deletions
--- a/backend/patcher/base.py
+++ b/backend/patcher/base.py
@@ -255,9 +255,23 @@ class ModelPatcher:
            if key not in self.backup:
                self.backup[key] = weight.to(device=self.offload_device)

+            bnb_layer = None
+
            if operations.bnb_avaliable:
                if hasattr(weight, 'bnb_quantized'):
-                    raise NotImplementedError('LoRAs for NF4/FP4 models are under construction and not available now.\nSorry for the inconvenience!')
+                    assert weight.module is not None, 'BNB bad weight without parent layer!'
+                    bnb_layer = weight.module
+                    if weight.bnb_quantized:
+                        if device_to is not None:
+                            assert device_to.type == 'cuda', 'BNB Must use CUDA!'
+                            weight = weight.to(device_to)
+                        else:
+                            weight = weight.cuda()
+
+                        from backend.operations_bnb import functional_dequantize_4bit
+                        weight = functional_dequantize_4bit(weight)
+                    else:
+                        weight = weight.data

            to_args = dict(dtype=torch.float32)

@@ -269,6 +283,10 @@ class ModelPatcher:

            out_weight = merge_lora_to_model_weight(current_patches, temp_weight, key).to(weight.dtype)

+            if bnb_layer is not None:
+                bnb_layer.reload_weight(out_weight)
+                continue
+
            utils.set_attr_raw(self.model, key, torch.nn.Parameter(out_weight, requires_grad=False))

        if device_to is not None: