Update exl3 backend model.py: fix for unloading vision models

This change ensures that when unloading vlm their vision part is also unloaded.
This commit is contained in:
mefich
2025-10-30 12:30:23 +05:00
committed by GitHub
parent 996bc8dbe1
commit 37aea9de83

View File

@@ -563,6 +563,10 @@ class ExllamaV3Container(BaseModelContainer):
self.draft_config = None
self.draft_cache = None
if self.use_vision:
self.vision_model.unload()
self.vision_model = None
# Cleanup the generator from any pending jobs
if self.generator is not None:
await self.generator.close()