Fix issue where text encoder could be the wrong quantization and fail when using memory manager

This commit is contained in:
Jaret Burkett
2025-10-15 11:01:30 -06:00
parent 7abf5e20be
commit 1f81bc4060

View File

@@ -632,7 +632,7 @@ class ModelConfig:
self.layer_offloading = kwargs.get("layer_offloading", self.auto_memory )
if self.layer_offloading and self.qtype == "qfloat8":
self.qtype = "float8"
if self.layer_offloading and not self.qtype_te == "qfloat8":
if self.layer_offloading and self.qtype_te == "qfloat8":
self.qtype_te = "float8"
# 0 is off and 1.0 is 100% of the layers