From 37aea9de83e15c2a0dd80d1ad8ec38dc120c7341 Mon Sep 17 00:00:00 2001 From: mefich Date: Thu, 30 Oct 2025 12:30:23 +0500 Subject: [PATCH] Update exl3 backend model.py: fix for unloading vision models This change ensures that when unloading vlm their vision part is also unloaded. --- backends/exllamav3/model.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backends/exllamav3/model.py b/backends/exllamav3/model.py index d385de7..44298c9 100644 --- a/backends/exllamav3/model.py +++ b/backends/exllamav3/model.py @@ -563,6 +563,10 @@ class ExllamaV3Container(BaseModelContainer): self.draft_config = None self.draft_cache = None + if self.use_vision: + self.vision_model.unload() + self.vision_model = None + # Cleanup the generator from any pending jobs if self.generator is not None: await self.generator.close()