diff --git a/src/llama.cpp b/src/llama.cpp index bd108845..e6a736b4 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -10920,11 +10920,8 @@ struct llm_build_context { } if (il == n_layer - 1 && inp_out_ids) { - struct ggml_tensor * inp_out_ids = build_inp_out_ids(); cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); - //cur = ggml_get_rows(ctx0, cur, inp_out_ids); - //inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); @@ -16309,7 +16306,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { } ggml_backend_tensor_set(lctx.inp_pos, pos_data.data(), 0, pos_data.size()*ggml_element_size(lctx.inp_pos)); } else { - ggml_backend_tensor_set(lctx.inp_pos, batch.pos, 0, n_tokens*ggml_element_size(lctx.inp_pos)); + ggml_backend_tensor_set(lctx.inp_pos, batch.pos, 0, n_tokens*n_pos_per_embd*ggml_element_size(lctx.inp_pos)); } }