From 6b0c8e02a898bc51e80a24a14628f89b12620ad8 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Thu, 25 Sep 2025 13:23:13 +0300 Subject: [PATCH] Add mtmd: clip.cpp compiles --- examples/mtmd/clip.cpp | 23 ++- ggml/include/ggml.h | 222 +++++++++++++++++++++- ggml/src/ggml.c | 415 +++++++++++++++++++++++++++++++++++------ 3 files changed, 585 insertions(+), 75 deletions(-) diff --git a/examples/mtmd/clip.cpp b/examples/mtmd/clip.cpp index 8d50c7cc..a3985bf5 100644 --- a/examples/mtmd/clip.cpp +++ b/examples/mtmd/clip.cpp @@ -4283,15 +4283,18 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima GGML_ABORT("Unknown projector type"); } - // ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads); - ggml_backend_dev_t dev = ggml_backend_get_device(ctx->backend_cpu); - ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr; - if (reg) { - auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads"); - if (ggml_backend_set_n_threads_fn) { - ggml_backend_set_n_threads_fn(ctx->backend_cpu, n_threads); - } + if (ctx->backend_cpu) { + ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads); } + //// ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads); + //ggml_backend_dev_t dev = ggml_backend_get_device(ctx->backend_cpu); + //ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr; + //if (reg) { + // auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads"); + // if (ggml_backend_set_n_threads_fn) { + // ggml_backend_set_n_threads_fn(ctx->backend_cpu, n_threads); + // } + //} auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf); if (status != GGML_STATUS_SUCCESS) { @@ -4312,7 +4315,9 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima } // the last node is the embedding tensor - ggml_tensor * embeddings = ggml_graph_node(gf, -1); + //ggml_tensor * embeddings = ggml_graph_node(gf, -1); + GGML_ASSERT(gf->n_nodes > 0); + ggml_tensor * embeddings = gf->nodes[gf->n_nodes-1]; // sanity check (only support batch size of 1 for now) const int n_tokens_out = embeddings->ne[1]; diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 4ffb18d0..e782c1ad 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -235,8 +235,8 @@ #define GGML_MAX_DIMS 4 #define GGML_MAX_PARAMS 2048 #ifndef GGML_MAX_CONTEXTS -// Maximum number of model contexts (e.g., for model shards). -// Increase this value using -DGGML_MAX_CONTEXTS= in CMake +// Maximum number of model contexts (e.g., for model shards). +// Increase this value using -DGGML_MAX_CONTEXTS= in CMake // if you need to load more than 64 model shards. #define GGML_MAX_CONTEXTS 64 #endif @@ -256,6 +256,12 @@ #define GGML_EXIT_SUCCESS 0 #define GGML_EXIT_ABORTED 1 +#define GGML_ROPE_TYPE_NEOX 2 +#define GGML_ROPE_TYPE_MROPE 8 +#define GGML_ROPE_TYPE_VISION 24 + +#define GGML_MROPE_SECTIONS 4 + #define GGUF_MAGIC "GGUF" #define GGUF_VERSION 3 @@ -647,6 +653,9 @@ extern "C" { GGML_OP_SOFTCAP, GGML_OP_SOFT_CAP_MAX, + GGML_OP_CONV_2D, + GGML_OP_CONV_2D_DW, + GGML_OP_FLASH_ATTN_EXT, GGML_OP_FLASH_ATTN_BACK, GGML_OP_SSM_CONV, @@ -670,6 +679,9 @@ extern "C" { GGML_OP_CROSS_ENTROPY_LOSS, GGML_OP_CROSS_ENTROPY_LOSS_BACK, + + GGML_OP_GLU, + GGML_OP_COUNT, }; @@ -682,13 +694,14 @@ extern "C" { GGML_UNARY_OP_ELU, GGML_UNARY_OP_RELU, GGML_UNARY_OP_SIGMOID, - GGML_UNARY_OP_GELU, + GGML_UNARY_OP_GELU_ERF, GGML_UNARY_OP_GELU_QUICK, GGML_UNARY_OP_SILU, GGML_UNARY_OP_HARDSWISH, GGML_UNARY_OP_HARDSIGMOID, GGML_UNARY_OP_SWIGLU, GGML_UNARY_OP_SWIGLU_OAI, + GGML_UNARY_OP_GELU, GGML_UNARY_OP_COUNT, }; @@ -1273,6 +1286,16 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + // GELU using erf (error function) when possible + // some backends may fallback to approximation based on Abramowitz and Stegun formula + GGML_API struct ggml_tensor * ggml_gelu_erf( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_gelu_erf_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + GGML_API struct ggml_tensor * ggml_gelu_quick( struct ggml_context * ctx, struct ggml_tensor * a); @@ -1300,6 +1323,100 @@ extern "C" { float alpha, float limit); + // + // *** from mainline begin *** + // + enum ggml_glu_op { + GGML_GLU_OP_REGLU, + GGML_GLU_OP_GEGLU, + GGML_GLU_OP_SWIGLU, + GGML_GLU_OP_SWIGLU_OAI, + GGML_GLU_OP_GEGLU_ERF, + GGML_GLU_OP_GEGLU_QUICK, + + GGML_GLU_OP_COUNT, + }; + GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op); + GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor); + + GGML_API struct ggml_tensor * ggml_glu( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_glu_op op, + bool swapped); + + GGML_API struct ggml_tensor * ggml_glu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + enum ggml_glu_op op); + + GGML_API struct ggml_tensor * ggml_reglu( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_reglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_reglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_geglu( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_swiglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_swiglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_geglu_erf( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_erf_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_erf_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_geglu_quick( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_quick_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_quick_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // + // *** from mainline end *** + // + + // a - x // b - dy GGML_API struct ggml_tensor * ggml_silu_back( @@ -1764,6 +1881,23 @@ extern "C" { float beta_fast, float beta_slow); + GGML_API struct ggml_tensor * ggml_rope_multi( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c, + int n_dims, + int sections[GGML_MROPE_SECTIONS], + int mode, + int n_ctx_orig, + float freq_base, + float freq_scale, + float ext_factor, + float attn_factor, + float beta_fast, + float beta_slow); + + // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_rope_ext_inplace( struct ggml_context * ctx, @@ -1780,6 +1914,23 @@ extern "C" { float beta_fast, float beta_slow); + GGML_API struct ggml_tensor * ggml_rope_multi_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c, + int n_dims, + int sections[GGML_MROPE_SECTIONS], + int mode, + int n_ctx_orig, + float freq_base, + float freq_scale, + float ext_factor, + float attn_factor, + float beta_fast, + float beta_slow); + + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom( struct ggml_context * ctx, struct ggml_tensor * a, @@ -1932,6 +2083,34 @@ extern "C" { struct ggml_tensor * b, int stride); + // depthwise (via im2col and mul_mat) + GGML_API struct ggml_tensor * ggml_conv_2d_dw( + struct ggml_context * ctx, + struct ggml_tensor * a, // convolution kernel + struct ggml_tensor * b, // data + int s0, // stride dimension 0 + int s1, // stride dimension 1 + int p0, // padding dimension 0 + int p1, // padding dimension 1 + int d0, // dilation dimension 0 + int d1); // dilation dimension 1 + + // Depthwise 2D convolution + // may be faster than ggml_conv_2d_dw, but not available in all backends + // a: KW KH 1 C convolution kernel + // b: W H C N input data + // res: W_out H_out C N + GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + int stride0, + int stride1, + int pad0, + int pad1, + int dilation0, + int dilation1); + enum ggml_op_pool { GGML_OP_POOL_MAX, GGML_OP_POOL_AVG, @@ -1959,24 +2138,49 @@ extern "C" { float p0, float p1); + enum ggml_scale_mode { + GGML_SCALE_MODE_NEAREST = 0, + GGML_SCALE_MODE_BILINEAR = 1, + + GGML_SCALE_MODE_COUNT + }; + + enum ggml_scale_flag { + GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8) + }; + // nearest interpolate // multiplies ne0 and ne1 by scale factor // used in stable-diffusion GGML_API struct ggml_tensor * ggml_upscale( struct ggml_context * ctx, struct ggml_tensor * a, - int scale_factor); + int scale_factor, + enum ggml_scale_mode mode); - // nearest interpolate - // nearest interpolate to specified dimensions - // used in tortoise.cpp - GGML_API struct ggml_tensor * ggml_upscale_ext( + // interpolate + // interpolate scale to specified dimensions + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext( struct ggml_context * ctx, struct ggml_tensor * a, int ne0, int ne1, int ne2, - int ne3); + int ne3, + enum ggml_scale_mode mode), + "use ggml_interpolate instead"); + + // Up- or downsamples the input to the specified size. + // 2D scale modes (eg. bilinear) are applied to the first two dimensions. + GGML_API struct ggml_tensor * ggml_interpolate( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...] + // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0] GGML_API struct ggml_tensor * ggml_pad( diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index c8cd6b17..83530ec7 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -4090,6 +4090,9 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "SOFTCAP", "SOFT_CAP_MAX", + "CONV_2D", + "CONV_2D_DW", + "FLASH_ATTN_EXT", "FLASH_ATTN_BACK", "SSM_CONV", @@ -4114,9 +4117,11 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "CROSS_ENTROPY_LOSS", "CROSS_ENTROPY_LOSS_BACK", + + "GPU", }; -static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 82"); +static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", @@ -4187,6 +4192,9 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "k2*tanh(k1*x)", "soft_max(k2*tanh(k1*x))", + "conv_2d(x)", + "conv_2d_dw(x)", + "flash_attn_ext(x)", "flash_attn_back(x)", "ssm_conv(x)", @@ -4211,9 +4219,11 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "cross_entropy_loss(x,y)", "cross_entropy_loss_back(x,y)", + + "glu(x)," }; -static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 82"); +static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86"); static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); @@ -4234,9 +4244,10 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "HARDSIGMOID", "SWIGLU", "SWIGLU_OAI", + "GELU_ERF", }; -static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15"); +static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16"); static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN"); @@ -4552,6 +4563,23 @@ GGML_CALL const char * ggml_op_desc(const struct ggml_tensor * t) { return ggml_op_name(t->op); } +const char * ggml_glu_op_name(enum ggml_glu_op op) { + static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = { + "REGLU", + "GEGLU", + "SWIGLU", + "SWIGLU_OAI", + "GEGLU_ERF", + "GEGLU_QUICK", + }; + return GGML_GLU_OP_NAME[op]; +} + +enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) { + GGML_ASSERT(tensor->op == GGML_OP_GLU); + return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0); +} + GGML_CALL size_t ggml_element_size(const struct ggml_tensor * tensor) { return ggml_type_size(tensor->type); } @@ -6667,6 +6695,20 @@ struct ggml_tensor * ggml_gelu_inplace( return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU); } +// ggml_gelu_erf + +struct ggml_tensor * ggml_gelu_erf( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF); +} + +struct ggml_tensor * ggml_gelu_erf_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF); +} + // ggml_gelu_quick struct ggml_tensor * ggml_gelu_quick( @@ -6782,6 +6824,152 @@ struct ggml_tensor * ggml_hardsigmoid( return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID); } +// =============== from mainline begin ===================================== + +// ggml_glu + +static struct ggml_tensor * ggml_glu_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + enum ggml_glu_op op, + bool swapped) { + GGML_ASSERT(ggml_is_contiguous_1(a)); + + if (b) { + GGML_ASSERT(ggml_is_contiguous_1(b)); + GGML_ASSERT(ggml_are_same_shape(a, b)); + GGML_ASSERT(a->type == b->type); + } + + int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i]; + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0); + + ggml_set_op_params_i32(result, 0, (int32_t) op); + ggml_set_op_params_i32(result, 1, (int32_t) swapped); + + result->op = GGML_OP_GLU; + result->src[0] = a; + result->src[1] = b; + + return result; +} + +struct ggml_tensor * ggml_glu( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_glu_op op, + bool swapped) { + return ggml_glu_impl(ctx, a, NULL, op, swapped); +} + +struct ggml_tensor * ggml_glu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + enum ggml_glu_op op) { + return ggml_glu_impl(ctx, a, b, op, false); +} + +// ggml_reglu + +struct ggml_tensor * ggml_reglu( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false); +} + +struct ggml_tensor * ggml_reglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true); +} + +struct ggml_tensor * ggml_reglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false); +} + +// ggml_geglu + +struct ggml_tensor * ggml_geglu( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false); +} + +struct ggml_tensor * ggml_geglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true); +} + +struct ggml_tensor * ggml_geglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false); +} + +struct ggml_tensor * ggml_swiglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true); +} + +struct ggml_tensor * ggml_swiglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false); +} + +// ggml_geglu_erf + +struct ggml_tensor * ggml_geglu_erf( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false); +} + +struct ggml_tensor * ggml_geglu_erf_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true); +} + +struct ggml_tensor * ggml_geglu_erf_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false); +} + +// ggml_geglu_quick + +struct ggml_tensor * ggml_geglu_quick( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false); +} + +struct ggml_tensor * ggml_geglu_quick_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true); +} + +struct ggml_tensor * ggml_geglu_quick_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false); +} + +// =============== from mainline end ===================================== + // ggml_norm static struct ggml_tensor * ggml_norm_impl( @@ -8212,6 +8400,7 @@ static struct ggml_tensor * ggml_rope_impl( struct ggml_tensor * b, struct ggml_tensor * c, int n_dims, + int sections[GGML_MROPE_SECTIONS], int mode, int n_ctx_orig, float freq_base, @@ -8225,32 +8414,36 @@ static struct ggml_tensor * ggml_rope_impl( GGML_ASSERT(ggml_is_vector(b)); GGML_ASSERT(b->type == GGML_TYPE_I32); - GGML_ASSERT(a->ne[2] == b->ne[0]); + + bool mrope_used = mode & GGML_ROPE_TYPE_MROPE; + if (mrope_used) { + GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token + } else { + GGML_ASSERT(a->ne[2] == b->ne[0]); + } if (c) { GGML_ASSERT(c->type == GGML_TYPE_F32); GGML_ASSERT(c->ne[0] >= n_dims / 2); } - bool is_node = false; - - if (a->grad) { - is_node = true; - } - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig }; + int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig }; memcpy(params + 5, &freq_base, sizeof(float)); memcpy(params + 6, &freq_scale, sizeof(float)); memcpy(params + 7, &ext_factor, sizeof(float)); memcpy(params + 8, &attn_factor, sizeof(float)); memcpy(params + 9, &beta_fast, sizeof(float)); memcpy(params + 10, &beta_slow, sizeof(float)); + if (mrope_used) { + memcpy(params + 11, sections, sizeof(int32_t) * GGML_MROPE_SECTIONS); + } else { + memset(params + 11, 0, sizeof(int32_t) * GGML_MROPE_SECTIONS); + } ggml_set_op_params(result, params, sizeof(params)); - result->op = GGML_OP_ROPE; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->op = GGML_OP_ROPE; result->src[0] = a; result->src[1] = b; result->src[2] = c; @@ -8265,7 +8458,7 @@ struct ggml_tensor * ggml_rope( int n_dims, int mode) { return ggml_rope_impl( - ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false + ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false ); } @@ -8276,7 +8469,7 @@ struct ggml_tensor * ggml_rope_inplace( int n_dims, int mode) { return ggml_rope_impl( - ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true + ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true ); } @@ -8295,7 +8488,7 @@ struct ggml_tensor * ggml_rope_ext( float beta_fast, float beta_slow) { return ggml_rope_impl( - ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, + ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, false ); } @@ -8315,7 +8508,49 @@ struct ggml_tensor * ggml_rope_ext_inplace( float beta_fast, float beta_slow) { return ggml_rope_impl( - ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, + ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow, true + ); +} + +struct ggml_tensor * ggml_rope_multi( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c, + int n_dims, + int sections[GGML_MROPE_SECTIONS], + int mode, + int n_ctx_orig, + float freq_base, + float freq_scale, + float ext_factor, + float attn_factor, + float beta_fast, + float beta_slow) { + return ggml_rope_impl( + ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow, false + ); +} + +struct ggml_tensor * ggml_rope_multi_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c, + int n_dims, + int sections[GGML_MROPE_SECTIONS], + int mode, + int n_ctx_orig, + float freq_base, + float freq_scale, + float ext_factor, + float attn_factor, + float beta_fast, + float beta_slow) { + return ggml_rope_impl( + ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, true ); } @@ -8334,7 +8569,7 @@ struct ggml_tensor * ggml_rope_custom( float beta_fast, float beta_slow) { return ggml_rope_impl( - ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale, + ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, false ); } @@ -8353,7 +8588,7 @@ struct ggml_tensor * ggml_rope_custom_inplace( float beta_fast, float beta_slow) { return ggml_rope_impl( - ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale, + ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, true ); } @@ -8634,6 +8869,71 @@ struct ggml_tensor * ggml_conv_2d_s1_ph( return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1); } +// ggml_conv_2d_dw + +struct ggml_tensor * ggml_conv_2d_dw( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + int s0, + int s1, + int p0, + int p1, + int d0, + int d1) { + struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]); + struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, + ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]), + s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW] + struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW] + + new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW] + struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b); + result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW] + + return result; +} + +// ggml_conv_2d_dw_direct + +struct ggml_tensor * ggml_conv_2d_dw_direct( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + int stride0, + int stride1, + int pad0, + int pad1, + int dilation0, + int dilation1) { + GGML_ASSERT(a->ne[2] == 1); + GGML_ASSERT(a->ne[3] == b->ne[2]); + int64_t ne[4]; + ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0); + ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1); + ne[2] = b->ne[2]; + ne[3] = b->ne[3]; + + struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne); + + if (ggml_is_contiguous_channels(b)) { + // Result will be permuted the same way as input (CWHN order) + const int64_t type_size = ggml_type_size(result->type); + GGML_ASSERT(ggml_blck_size(result->type) == 1); + result->nb[0] = result->ne[2] * type_size; + result->nb[1] = result->ne[0] * result->nb[0]; + result->nb[2] = type_size; + } + + int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 }; + ggml_set_op_params(result, params, sizeof(params)); + + result->op = GGML_OP_CONV_2D_DW; + result->src[0] = a; + result->src[1] = b; + return result; +} + // ggml_conv_transpose_2d_p0 static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) { @@ -8750,57 +9050,57 @@ struct ggml_tensor * ggml_pool_2d( return result; } -// ggml_upscale +// ggml_upscale -> ggml_interpolate -static struct ggml_tensor * ggml_upscale_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - int ne0, - int ne1, - int ne2, - int ne3) { - bool is_node = false; +static struct ggml_tensor * ggml_interpolate_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode) { + GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT); - if (a->grad) { - GGML_ABORT("fatal error"); // TODO: implement backward - is_node = true; - } + struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); - GGML_ASSERT(a->ne[0] <= ne0); - GGML_ASSERT(a->ne[1] <= ne1); - GGML_ASSERT(a->ne[2] <= ne2); - GGML_ASSERT(a->ne[3] <= ne3); + ggml_set_op_params_i32(result, 0, (int32_t)mode); - struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, - ne0, - ne1, - ne2, - ne3 - ); - - result->op = GGML_OP_UPSCALE; - - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->op = GGML_OP_UPSCALE; result->src[0] = a; return result; } struct ggml_tensor * ggml_upscale( - struct ggml_context * ctx, - struct ggml_tensor * a, - int scale_factor) { - return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]); + struct ggml_context * ctx, + struct ggml_tensor * a, + int scale_factor, + enum ggml_scale_mode mode) { + GGML_ASSERT(scale_factor > 1); + return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode); } struct ggml_tensor * ggml_upscale_ext( - struct ggml_context * ctx, - struct ggml_tensor * a, - int ne0, - int ne1, - int ne2, - int ne3) { - return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3); + struct ggml_context * ctx, + struct ggml_tensor * a, + int ne0, + int ne1, + int ne2, + int ne3, + enum ggml_scale_mode mode) { + return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); +} + +struct ggml_tensor * ggml_interpolate( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode) { + return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); } // ggml_pad @@ -21537,6 +21837,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor src1, src2, n_dims, + NULL, mode, n_ctx_orig, freq_base,