mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-20 06:19:15 +00:00
Model: Add torch.inference_mode() to generator function
Provides a speedup to model forward. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -648,6 +648,7 @@ class ExllamaV2Container:
|
||||
async for value in iterate_in_threadpool(sync_generator):
|
||||
yield value
|
||||
|
||||
@torch.inference_mode()
|
||||
def generate_gen_sync(
|
||||
self, prompt: str, abort_event: Optional[threading.Event] = None, **kwargs
|
||||
):
|
||||
|
||||
Reference in New Issue
Block a user