mirror of
https://github.com/turboderp-org/exllamav2.git
synced 2026-04-27 17:51:48 +00:00
Use high priority stream for forward pass
This commit is contained in:
@@ -590,6 +590,7 @@ if args.speed:
|
||||
|
||||
logits = model.forward(ids[:, -1:], cache)
|
||||
sample = torch.argmax(logits[0, -1]).cpu().unsqueeze(0).unsqueeze(0)
|
||||
sample.clamp_(0, tokenizer.get_vocab_size() - 1)
|
||||
ids = torch.cat((ids, sample), dim=-1)
|
||||
|
||||
time_end = time.time()
|
||||
|
||||
Reference in New Issue
Block a user