mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
WIP
This commit is contained in:
@@ -3471,6 +3471,7 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
|
||||
needs_f16_f32_copy = true;
|
||||
|
||||
} else {
|
||||
printf("%s on device %d -> %s on device %d\n", src->name, cuda_ctx_src->device, dst->name, cuda_ctx_dst->device);
|
||||
ggml_cuda_set_device(cuda_ctx_src->device);
|
||||
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), cuda_ctx_src->stream()));
|
||||
}
|
||||
|
||||
@@ -663,7 +663,8 @@ ggml_tensor * llm_build_context::llm_build_ffn(
|
||||
GGML_ASSERT((!split_u && !iextra->splits[id]) || (split_u && iextra->splits[id]));
|
||||
}
|
||||
if (!split_u) continue;
|
||||
auto cur = iextra ? iextra->splits[id] : input;
|
||||
//auto cur = iextra ? iextra->splits[id] : input;
|
||||
auto cur = input;
|
||||
if (ffn_norm && ffn_norm->extra) {
|
||||
auto norm = (ggml_split_tensor_t *)ffn_norm->extra;
|
||||
GGML_ASSERT(norm->splits[id]);
|
||||
@@ -671,7 +672,7 @@ ggml_tensor * llm_build_context::llm_build_ffn(
|
||||
cb(cur, "ffn_inp_normed", il_cb);
|
||||
}
|
||||
if (cur->type != GGML_TYPE_F32) {
|
||||
cur = ggml_cast(ctx, input, GGML_TYPE_F32);
|
||||
cur = ggml_cast(ctx, cur, GGML_TYPE_F32);
|
||||
}
|
||||
cur = ggml_fused_up_gate(ctx, split_u, split_g, cur, unary_op);
|
||||
cb(cur, "ffn_up_gate", il_cb);
|
||||
@@ -9398,7 +9399,8 @@ ggml_tensor * llm_build_context::build_std_attention(ggml_cgraph * gf, ggml_tens
|
||||
GGML_ASSERT((!split_wq && !iextra->splits[id]) || (split_wq && iextra->splits[id]));
|
||||
}
|
||||
if (!split_wq) continue;
|
||||
auto cur = iextra ? iextra->splits[id] : input;
|
||||
//auto cur = iextra ? iextra->splits[id] : input;
|
||||
auto cur = input;
|
||||
if (attn_norm) {
|
||||
auto split_norm = attn_norm->splits[id];
|
||||
cur = llm_build_norm(ctx0, cur, hparams, split_norm, NULL, LLM_NORM_RMS, cb, il);
|
||||
|
||||
Reference in New Issue
Block a user