mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 02:41:47 +00:00
Fused norm (#1086)
* Adding fused_norm - same idea as fused_rms_norm * Avoid computing the attention reduce op for cohere2 --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -691,6 +691,7 @@ extern "C" {
|
||||
|
||||
GGML_OP_REDUCE,
|
||||
GGML_OP_FAKE_CPY,
|
||||
GGML_OP_FUSED_NORM,
|
||||
|
||||
GGML_OP_COUNT,
|
||||
};
|
||||
@@ -1487,6 +1488,18 @@ extern "C" {
|
||||
struct ggml_tensor * b,
|
||||
float eps);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_fused_norm(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
float eps);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_fused_norm_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
float eps);
|
||||
|
||||
// group normalize along ne0*ne1*n_groups
|
||||
// used in stable-diffusion
|
||||
GGML_API struct ggml_tensor * ggml_group_norm(
|
||||
|
||||
Reference in New Issue
Block a user