mirror of
https://github.com/turboderp-org/exllamav2.git
synced 2026-04-20 14:29:28 +00:00
Fix FP32 residual for paged attn
This commit is contained in:
@@ -628,7 +628,7 @@ class ExLlamaV2Attention(ExLlamaV2Module):
|
||||
else:
|
||||
hidden_states = self.o_proj.forward(attn_output, loras = loras)
|
||||
if self.post_layernorm:
|
||||
hidden_states = self.post_layernorm.forward(hidden_states)
|
||||
hidden_states = self.post_layernorm.forward(hidden_states, output_fp32 = self.archparams.residual_stream_fp32)
|
||||
if self.has_residual:
|
||||
hidden_states += residual
|
||||
|
||||
|
||||
Reference in New Issue
Block a user