Merge pull request #112 from DocShotgun/main

Separate new prompt tokens from those reused from cache in metric logging
This commit is contained in:
Brian Dashore
2024-05-27 18:04:43 -04:00
committed by GitHub
2 changed files with 9 additions and 2 deletions

View File

@@ -1144,6 +1144,7 @@ class ExllamaV2Container:
log_metrics(
result.get("time_enqueued"),
result.get("prompt_tokens"),
result.get("cached_tokens"),
result.get("time_prefill"),
result.get("new_tokens"),
result.get("time_generate"),