From 8b46282aefb881b5651b65bcbb67b09e51d62250 Mon Sep 17 00:00:00 2001 From: kingbri Date: Mon, 11 Mar 2024 23:42:52 -0400 Subject: [PATCH] Model: Fix state flag sets on unload The load state should be false only if the models are unloaded. Signed-off-by: kingbri --- backends/exllamav2/model.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 920c0df..6e36ee2 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -475,11 +475,13 @@ class ExllamaV2Container: self.tokenizer = None self.generator = None + # Set all model state variables to False + self.model_is_loading = False + self.model_loaded = False + gc.collect() torch.cuda.empty_cache() - # Update model load state - self.model_loaded = False logger.info("Loras unloaded." if loras_only else "Model unloaded.") def encode_tokens(self, text: str, **kwargs):