mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-01 11:51:53 +00:00
ggml : refactor rope norm/neox (#7634)
* ggml : unify rope norm/neox (CPU) * ggml : fix compile warning * ggml : remove GLM rope mode ggml-ci * metal : better rope implementation ggml-ci * cuda : better rope implementation ggml-ci * naming : n_orig_ctx -> n_ctx_orig ggml-ci * dev : add reminders to update backends ggml-ci * vulkan : fix ggml_rope_ext() usage * cuda : fix array size + indents ggml-ci
This commit is contained in:
@@ -522,8 +522,8 @@ static struct ggml_tensor * forward(
|
||||
// wk shape [n_embd, n_embd, 1, 1]
|
||||
// Qcur shape [n_embd/n_head, n_head, N, 1]
|
||||
// Kcur shape [n_embd/n_head, n_head, N, 1]
|
||||
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0, 0);
|
||||
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0);
|
||||
|
||||
// store key and value to memory
|
||||
{
|
||||
@@ -759,8 +759,8 @@ static struct ggml_tensor * forward_batch(
|
||||
// wk shape [n_embd, n_embd, 1, 1]
|
||||
// Qcur shape [n_embd/n_head, n_head, N, n_batch]
|
||||
// Kcur shape [n_embd/n_head, n_head, N, n_batch]
|
||||
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0, 0);
|
||||
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0);
|
||||
assert_shape_4d(Qcur, n_embd/n_head, n_head, N, n_batch);
|
||||
assert_shape_4d(Kcur, n_embd/n_head, n_head, N, n_batch);
|
||||
|
||||
@@ -1056,7 +1056,7 @@ static struct ggml_tensor * forward_lora(
|
||||
model->layers[il].wqb,
|
||||
cur)),
|
||||
n_embd/n_head, n_head, N),
|
||||
KQ_pos, n_rot, 0, 0);
|
||||
KQ_pos, n_rot, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0,
|
||||
ggml_reshape_3d(ctx0,
|
||||
ggml_mul_mat(ctx0,
|
||||
@@ -1065,7 +1065,7 @@ static struct ggml_tensor * forward_lora(
|
||||
model->layers[il].wkb,
|
||||
cur)),
|
||||
n_embd/n_head, n_head, N),
|
||||
KQ_pos, n_rot, 0, 0);
|
||||
KQ_pos, n_rot, 0);
|
||||
|
||||
// store key and value to memory
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user