mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-11 06:20:09 +00:00
* It compiles * Seems to be working with coopmat * Vulkan needs f32 precision for flash attention * Vulkan: fix u_batch > 4096/n_active_experts for coopmat1. Without this fix we get an assert. We get the same assert in mainline too. --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
30 lines
859 B
Plaintext
30 lines
859 B
Plaintext
void main() {
|
|
const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
|
|
|
|
if (i >= p.N) {
|
|
return;
|
|
}
|
|
|
|
const uint row = i / p.ne20;
|
|
const uint col = i - row * p.ne20;
|
|
|
|
if (p.mode == 0) {
|
|
// Default
|
|
const uint offset = p.ne00 / 2;
|
|
const uint idx = row * p.ne00 + col;
|
|
|
|
data_d[row * offset + col] = D_TYPE(op(float(data_a[idx]), float(data_a[idx + offset])));
|
|
} else if (p.mode == 1) {
|
|
// Swapped
|
|
const uint offset = p.ne00 / 2;
|
|
const uint idx = row * p.ne00 + col;
|
|
|
|
data_d[row * offset + col] = D_TYPE(op(float(data_a[idx + offset]), float(data_a[idx])));
|
|
} else {
|
|
// Split
|
|
const uint idx = row * p.ne00 + col;
|
|
|
|
data_d[idx] = D_TYPE(op(float(data_a[idx]), float(data_b[idx])));
|
|
}
|
|
}
|