mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-08 04:50:13 +00:00
* It compiles * Seems to be working with coopmat * Vulkan needs f32 precision for flash attention * Vulkan: fix u_batch > 4096/n_active_experts for coopmat1. Without this fix we get an assert. We get the same assert in mainline too. --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
14 lines
327 B
Plaintext
14 lines
327 B
Plaintext
#version 450
|
|
|
|
#include "glu_head.comp"
|
|
|
|
const float GELU_COEF_A = 0.044715f;
|
|
const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
|
|
|
|
float op(float a, float b) {
|
|
const float val = SQRT_2_OVER_PI*a*(1.0f + GELU_COEF_A*a*a);
|
|
return 0.5f*a*(2.0f - 2.0f / (exp(2 * val) + 1)) * b;
|
|
}
|
|
|
|
#include "glu_main.comp"
|