Model: Add torch.inference_mode() to generator function

Provides a speedup to model forward.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2024-03-30 10:42:43 -04:00
parent e8b6a02aa8
commit b11aac51e2

View File

@@ -648,6 +648,7 @@ class ExllamaV2Container:
async for value in iterate_in_threadpool(sync_generator):
yield value
@torch.inference_mode()
def generate_gen_sync(
self, prompt: str, abort_event: Optional[threading.Event] = None, **kwargs
):