mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-24 08:19:19 +00:00
Model: Create generator on load
This commit is contained in:
@@ -345,6 +345,9 @@ class ExllamaV3Container(BaseModelContainer):
|
|||||||
async for value in iterate_in_threadpool(generator):
|
async for value in iterate_in_threadpool(generator):
|
||||||
yield value
|
yield value
|
||||||
|
|
||||||
|
# Create async generator
|
||||||
|
await self.create_generator()
|
||||||
|
|
||||||
# Clean up any extra vram usage from torch and cuda
|
# Clean up any extra vram usage from torch and cuda
|
||||||
# (Helps reduce VRAM bottlenecking on Windows)
|
# (Helps reduce VRAM bottlenecking on Windows)
|
||||||
gc.collect()
|
gc.collect()
|
||||||
@@ -774,12 +777,6 @@ class ExllamaV3Container(BaseModelContainer):
|
|||||||
f"max_seq_len {self.max_seq_len}"
|
f"max_seq_len {self.max_seq_len}"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.generator = AsyncGenerator(
|
|
||||||
model=self.model,
|
|
||||||
cache=self.cache,
|
|
||||||
tokenizer=self.tokenizer,
|
|
||||||
)
|
|
||||||
|
|
||||||
generation = {}
|
generation = {}
|
||||||
job = AsyncJob(
|
job = AsyncJob(
|
||||||
self.generator,
|
self.generator,
|
||||||
|
|||||||
Reference in New Issue
Block a user