mirror of
https://github.com/turboderp-org/exllamav3.git
synced 2026-04-20 14:29:51 +00:00
generator: Free recurrent state after job completed (prevent memory leak with large job queue)
This commit is contained in:
@@ -572,6 +572,7 @@ class Generator:
|
||||
num_jobs = self.num_remaining_jobs()
|
||||
for job in completed_jobs + requeuing_jobs:
|
||||
job.deallocate_pages()
|
||||
job.free_recurrent_state()
|
||||
self.active_jobs.remove(job)
|
||||
|
||||
# Requeue jobs
|
||||
|
||||
@@ -1096,3 +1096,6 @@ class Job:
|
||||
page = seq.allocated_pages[last_page]
|
||||
assert page.kv_position == PAGE_SIZE
|
||||
cache.stash(page.phash, self.recurrent_state)
|
||||
|
||||
def free_recurrent_state(self):
|
||||
self.recurrent_state = None
|
||||
|
||||
Reference in New Issue
Block a user