mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-14 15:57:37 +00:00
Add mtmd: clip.cpp compiles
This commit is contained in:
@@ -4283,15 +4283,18 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
GGML_ABORT("Unknown projector type");
|
||||
}
|
||||
|
||||
// ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads);
|
||||
ggml_backend_dev_t dev = ggml_backend_get_device(ctx->backend_cpu);
|
||||
ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
|
||||
if (reg) {
|
||||
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
||||
if (ggml_backend_set_n_threads_fn) {
|
||||
ggml_backend_set_n_threads_fn(ctx->backend_cpu, n_threads);
|
||||
}
|
||||
if (ctx->backend_cpu) {
|
||||
ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads);
|
||||
}
|
||||
//// ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads);
|
||||
//ggml_backend_dev_t dev = ggml_backend_get_device(ctx->backend_cpu);
|
||||
//ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
|
||||
//if (reg) {
|
||||
// auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
||||
// if (ggml_backend_set_n_threads_fn) {
|
||||
// ggml_backend_set_n_threads_fn(ctx->backend_cpu, n_threads);
|
||||
// }
|
||||
//}
|
||||
|
||||
auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf);
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
@@ -4312,7 +4315,9 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
}
|
||||
|
||||
// the last node is the embedding tensor
|
||||
ggml_tensor * embeddings = ggml_graph_node(gf, -1);
|
||||
//ggml_tensor * embeddings = ggml_graph_node(gf, -1);
|
||||
GGML_ASSERT(gf->n_nodes > 0);
|
||||
ggml_tensor * embeddings = gf->nodes[gf->n_nodes-1];
|
||||
|
||||
// sanity check (only support batch size of 1 for now)
|
||||
const int n_tokens_out = embeddings->ne[1];
|
||||
|
||||
@@ -235,8 +235,8 @@
|
||||
#define GGML_MAX_DIMS 4
|
||||
#define GGML_MAX_PARAMS 2048
|
||||
#ifndef GGML_MAX_CONTEXTS
|
||||
// Maximum number of model contexts (e.g., for model shards).
|
||||
// Increase this value using -DGGML_MAX_CONTEXTS=<value> in CMake
|
||||
// Maximum number of model contexts (e.g., for model shards).
|
||||
// Increase this value using -DGGML_MAX_CONTEXTS=<value> in CMake
|
||||
// if you need to load more than 64 model shards.
|
||||
#define GGML_MAX_CONTEXTS 64
|
||||
#endif
|
||||
@@ -256,6 +256,12 @@
|
||||
#define GGML_EXIT_SUCCESS 0
|
||||
#define GGML_EXIT_ABORTED 1
|
||||
|
||||
#define GGML_ROPE_TYPE_NEOX 2
|
||||
#define GGML_ROPE_TYPE_MROPE 8
|
||||
#define GGML_ROPE_TYPE_VISION 24
|
||||
|
||||
#define GGML_MROPE_SECTIONS 4
|
||||
|
||||
#define GGUF_MAGIC "GGUF"
|
||||
|
||||
#define GGUF_VERSION 3
|
||||
@@ -647,6 +653,9 @@ extern "C" {
|
||||
GGML_OP_SOFTCAP,
|
||||
GGML_OP_SOFT_CAP_MAX,
|
||||
|
||||
GGML_OP_CONV_2D,
|
||||
GGML_OP_CONV_2D_DW,
|
||||
|
||||
GGML_OP_FLASH_ATTN_EXT,
|
||||
GGML_OP_FLASH_ATTN_BACK,
|
||||
GGML_OP_SSM_CONV,
|
||||
@@ -670,6 +679,9 @@ extern "C" {
|
||||
|
||||
GGML_OP_CROSS_ENTROPY_LOSS,
|
||||
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
||||
|
||||
GGML_OP_GLU,
|
||||
|
||||
GGML_OP_COUNT,
|
||||
};
|
||||
|
||||
@@ -682,13 +694,14 @@ extern "C" {
|
||||
GGML_UNARY_OP_ELU,
|
||||
GGML_UNARY_OP_RELU,
|
||||
GGML_UNARY_OP_SIGMOID,
|
||||
GGML_UNARY_OP_GELU,
|
||||
GGML_UNARY_OP_GELU_ERF,
|
||||
GGML_UNARY_OP_GELU_QUICK,
|
||||
GGML_UNARY_OP_SILU,
|
||||
GGML_UNARY_OP_HARDSWISH,
|
||||
GGML_UNARY_OP_HARDSIGMOID,
|
||||
GGML_UNARY_OP_SWIGLU,
|
||||
GGML_UNARY_OP_SWIGLU_OAI,
|
||||
GGML_UNARY_OP_GELU,
|
||||
|
||||
GGML_UNARY_OP_COUNT,
|
||||
};
|
||||
@@ -1273,6 +1286,16 @@ extern "C" {
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// GELU using erf (error function) when possible
|
||||
// some backends may fallback to approximation based on Abramowitz and Stegun formula
|
||||
GGML_API struct ggml_tensor * ggml_gelu_erf(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_gelu_erf_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_gelu_quick(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
@@ -1300,6 +1323,100 @@ extern "C" {
|
||||
float alpha,
|
||||
float limit);
|
||||
|
||||
//
|
||||
// *** from mainline begin ***
|
||||
//
|
||||
enum ggml_glu_op {
|
||||
GGML_GLU_OP_REGLU,
|
||||
GGML_GLU_OP_GEGLU,
|
||||
GGML_GLU_OP_SWIGLU,
|
||||
GGML_GLU_OP_SWIGLU_OAI,
|
||||
GGML_GLU_OP_GEGLU_ERF,
|
||||
GGML_GLU_OP_GEGLU_QUICK,
|
||||
|
||||
GGML_GLU_OP_COUNT,
|
||||
};
|
||||
GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
|
||||
GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_glu(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_glu_op op,
|
||||
bool swapped);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_glu_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
enum ggml_glu_op op);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_reglu(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_reglu_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_reglu_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_geglu(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_geglu_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_geglu_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_swiglu_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_swiglu_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
//
|
||||
// *** from mainline end ***
|
||||
//
|
||||
|
||||
|
||||
// a - x
|
||||
// b - dy
|
||||
GGML_API struct ggml_tensor * ggml_silu_back(
|
||||
@@ -1764,6 +1881,23 @@ extern "C" {
|
||||
float beta_fast,
|
||||
float beta_slow);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_rope_multi(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
int n_dims,
|
||||
int sections[GGML_MROPE_SECTIONS],
|
||||
int mode,
|
||||
int n_ctx_orig,
|
||||
float freq_base,
|
||||
float freq_scale,
|
||||
float ext_factor,
|
||||
float attn_factor,
|
||||
float beta_fast,
|
||||
float beta_slow);
|
||||
|
||||
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
|
||||
struct ggml_context * ctx,
|
||||
@@ -1780,6 +1914,23 @@ extern "C" {
|
||||
float beta_fast,
|
||||
float beta_slow);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_rope_multi_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
int n_dims,
|
||||
int sections[GGML_MROPE_SECTIONS],
|
||||
int mode,
|
||||
int n_ctx_orig,
|
||||
float freq_base,
|
||||
float freq_scale,
|
||||
float ext_factor,
|
||||
float attn_factor,
|
||||
float beta_fast,
|
||||
float beta_slow);
|
||||
|
||||
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
@@ -1932,6 +2083,34 @@ extern "C" {
|
||||
struct ggml_tensor * b,
|
||||
int stride);
|
||||
|
||||
// depthwise (via im2col and mul_mat)
|
||||
GGML_API struct ggml_tensor * ggml_conv_2d_dw(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a, // convolution kernel
|
||||
struct ggml_tensor * b, // data
|
||||
int s0, // stride dimension 0
|
||||
int s1, // stride dimension 1
|
||||
int p0, // padding dimension 0
|
||||
int p1, // padding dimension 1
|
||||
int d0, // dilation dimension 0
|
||||
int d1); // dilation dimension 1
|
||||
|
||||
// Depthwise 2D convolution
|
||||
// may be faster than ggml_conv_2d_dw, but not available in all backends
|
||||
// a: KW KH 1 C convolution kernel
|
||||
// b: W H C N input data
|
||||
// res: W_out H_out C N
|
||||
GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int stride0,
|
||||
int stride1,
|
||||
int pad0,
|
||||
int pad1,
|
||||
int dilation0,
|
||||
int dilation1);
|
||||
|
||||
enum ggml_op_pool {
|
||||
GGML_OP_POOL_MAX,
|
||||
GGML_OP_POOL_AVG,
|
||||
@@ -1959,24 +2138,49 @@ extern "C" {
|
||||
float p0,
|
||||
float p1);
|
||||
|
||||
enum ggml_scale_mode {
|
||||
GGML_SCALE_MODE_NEAREST = 0,
|
||||
GGML_SCALE_MODE_BILINEAR = 1,
|
||||
|
||||
GGML_SCALE_MODE_COUNT
|
||||
};
|
||||
|
||||
enum ggml_scale_flag {
|
||||
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
||||
};
|
||||
|
||||
// nearest interpolate
|
||||
// multiplies ne0 and ne1 by scale factor
|
||||
// used in stable-diffusion
|
||||
GGML_API struct ggml_tensor * ggml_upscale(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int scale_factor);
|
||||
int scale_factor,
|
||||
enum ggml_scale_mode mode);
|
||||
|
||||
// nearest interpolate
|
||||
// nearest interpolate to specified dimensions
|
||||
// used in tortoise.cpp
|
||||
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
||||
// interpolate
|
||||
// interpolate scale to specified dimensions
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int ne2,
|
||||
int ne3);
|
||||
int ne3,
|
||||
enum ggml_scale_mode mode),
|
||||
"use ggml_interpolate instead");
|
||||
|
||||
// Up- or downsamples the input to the specified size.
|
||||
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
||||
GGML_API struct ggml_tensor * ggml_interpolate(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2,
|
||||
int64_t ne3,
|
||||
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
||||
|
||||
|
||||
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
||||
GGML_API struct ggml_tensor * ggml_pad(
|
||||
|
||||
415
ggml/src/ggml.c
415
ggml/src/ggml.c
@@ -4090,6 +4090,9 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||
"SOFTCAP",
|
||||
"SOFT_CAP_MAX",
|
||||
|
||||
"CONV_2D",
|
||||
"CONV_2D_DW",
|
||||
|
||||
"FLASH_ATTN_EXT",
|
||||
"FLASH_ATTN_BACK",
|
||||
"SSM_CONV",
|
||||
@@ -4114,9 +4117,11 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||
|
||||
"CROSS_ENTROPY_LOSS",
|
||||
"CROSS_ENTROPY_LOSS_BACK",
|
||||
|
||||
"GPU",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 82");
|
||||
static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
@@ -4187,6 +4192,9 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"k2*tanh(k1*x)",
|
||||
"soft_max(k2*tanh(k1*x))",
|
||||
|
||||
"conv_2d(x)",
|
||||
"conv_2d_dw(x)",
|
||||
|
||||
"flash_attn_ext(x)",
|
||||
"flash_attn_back(x)",
|
||||
"ssm_conv(x)",
|
||||
@@ -4211,9 +4219,11 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
|
||||
"cross_entropy_loss(x,y)",
|
||||
"cross_entropy_loss_back(x,y)",
|
||||
|
||||
"glu(x),"
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 82");
|
||||
static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
@@ -4234,9 +4244,10 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
||||
"HARDSIGMOID",
|
||||
"SWIGLU",
|
||||
"SWIGLU_OAI",
|
||||
"GELU_ERF",
|
||||
};
|
||||
|
||||
static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
|
||||
static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16");
|
||||
|
||||
|
||||
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
||||
@@ -4552,6 +4563,23 @@ GGML_CALL const char * ggml_op_desc(const struct ggml_tensor * t) {
|
||||
return ggml_op_name(t->op);
|
||||
}
|
||||
|
||||
const char * ggml_glu_op_name(enum ggml_glu_op op) {
|
||||
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
|
||||
"REGLU",
|
||||
"GEGLU",
|
||||
"SWIGLU",
|
||||
"SWIGLU_OAI",
|
||||
"GEGLU_ERF",
|
||||
"GEGLU_QUICK",
|
||||
};
|
||||
return GGML_GLU_OP_NAME[op];
|
||||
}
|
||||
|
||||
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
|
||||
GGML_ASSERT(tensor->op == GGML_OP_GLU);
|
||||
return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
|
||||
}
|
||||
|
||||
GGML_CALL size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
||||
return ggml_type_size(tensor->type);
|
||||
}
|
||||
@@ -6667,6 +6695,20 @@ struct ggml_tensor * ggml_gelu_inplace(
|
||||
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
|
||||
}
|
||||
|
||||
// ggml_gelu_erf
|
||||
|
||||
struct ggml_tensor * ggml_gelu_erf(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_gelu_erf_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
|
||||
}
|
||||
|
||||
// ggml_gelu_quick
|
||||
|
||||
struct ggml_tensor * ggml_gelu_quick(
|
||||
@@ -6782,6 +6824,152 @@ struct ggml_tensor * ggml_hardsigmoid(
|
||||
return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
|
||||
}
|
||||
|
||||
// =============== from mainline begin =====================================
|
||||
|
||||
// ggml_glu
|
||||
|
||||
static struct ggml_tensor * ggml_glu_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
enum ggml_glu_op op,
|
||||
bool swapped) {
|
||||
GGML_ASSERT(ggml_is_contiguous_1(a));
|
||||
|
||||
if (b) {
|
||||
GGML_ASSERT(ggml_is_contiguous_1(b));
|
||||
GGML_ASSERT(ggml_are_same_shape(a, b));
|
||||
GGML_ASSERT(a->type == b->type);
|
||||
}
|
||||
|
||||
int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
|
||||
|
||||
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
||||
ggml_set_op_params_i32(result, 1, (int32_t) swapped);
|
||||
|
||||
result->op = GGML_OP_GLU;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_glu(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_glu_op op,
|
||||
bool swapped) {
|
||||
return ggml_glu_impl(ctx, a, NULL, op, swapped);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_glu_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
enum ggml_glu_op op) {
|
||||
return ggml_glu_impl(ctx, a, b, op, false);
|
||||
}
|
||||
|
||||
// ggml_reglu
|
||||
|
||||
struct ggml_tensor * ggml_reglu(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_reglu_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_reglu_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b) {
|
||||
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
|
||||
}
|
||||
|
||||
// ggml_geglu
|
||||
|
||||
struct ggml_tensor * ggml_geglu(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_geglu_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_geglu_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b) {
|
||||
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_swiglu_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_swiglu_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b) {
|
||||
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
|
||||
}
|
||||
|
||||
// ggml_geglu_erf
|
||||
|
||||
struct ggml_tensor * ggml_geglu_erf(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_geglu_erf_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_geglu_erf_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b) {
|
||||
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
|
||||
}
|
||||
|
||||
// ggml_geglu_quick
|
||||
|
||||
struct ggml_tensor * ggml_geglu_quick(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_geglu_quick_swapped(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_geglu_quick_split(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b) {
|
||||
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
|
||||
}
|
||||
|
||||
// =============== from mainline end =====================================
|
||||
|
||||
// ggml_norm
|
||||
|
||||
static struct ggml_tensor * ggml_norm_impl(
|
||||
@@ -8212,6 +8400,7 @@ static struct ggml_tensor * ggml_rope_impl(
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
int n_dims,
|
||||
int sections[GGML_MROPE_SECTIONS],
|
||||
int mode,
|
||||
int n_ctx_orig,
|
||||
float freq_base,
|
||||
@@ -8225,32 +8414,36 @@ static struct ggml_tensor * ggml_rope_impl(
|
||||
|
||||
GGML_ASSERT(ggml_is_vector(b));
|
||||
GGML_ASSERT(b->type == GGML_TYPE_I32);
|
||||
GGML_ASSERT(a->ne[2] == b->ne[0]);
|
||||
|
||||
bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
|
||||
if (mrope_used) {
|
||||
GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
|
||||
} else {
|
||||
GGML_ASSERT(a->ne[2] == b->ne[0]);
|
||||
}
|
||||
|
||||
if (c) {
|
||||
GGML_ASSERT(c->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(c->ne[0] >= n_dims / 2);
|
||||
}
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (a->grad) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
|
||||
int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
|
||||
memcpy(params + 5, &freq_base, sizeof(float));
|
||||
memcpy(params + 6, &freq_scale, sizeof(float));
|
||||
memcpy(params + 7, &ext_factor, sizeof(float));
|
||||
memcpy(params + 8, &attn_factor, sizeof(float));
|
||||
memcpy(params + 9, &beta_fast, sizeof(float));
|
||||
memcpy(params + 10, &beta_slow, sizeof(float));
|
||||
if (mrope_used) {
|
||||
memcpy(params + 11, sections, sizeof(int32_t) * GGML_MROPE_SECTIONS);
|
||||
} else {
|
||||
memset(params + 11, 0, sizeof(int32_t) * GGML_MROPE_SECTIONS);
|
||||
}
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_ROPE;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->op = GGML_OP_ROPE;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
result->src[2] = c;
|
||||
@@ -8265,7 +8458,7 @@ struct ggml_tensor * ggml_rope(
|
||||
int n_dims,
|
||||
int mode) {
|
||||
return ggml_rope_impl(
|
||||
ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
|
||||
ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
|
||||
);
|
||||
}
|
||||
|
||||
@@ -8276,7 +8469,7 @@ struct ggml_tensor * ggml_rope_inplace(
|
||||
int n_dims,
|
||||
int mode) {
|
||||
return ggml_rope_impl(
|
||||
ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
|
||||
ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
|
||||
);
|
||||
}
|
||||
|
||||
@@ -8295,7 +8488,7 @@ struct ggml_tensor * ggml_rope_ext(
|
||||
float beta_fast,
|
||||
float beta_slow) {
|
||||
return ggml_rope_impl(
|
||||
ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow, false
|
||||
);
|
||||
}
|
||||
@@ -8315,7 +8508,49 @@ struct ggml_tensor * ggml_rope_ext_inplace(
|
||||
float beta_fast,
|
||||
float beta_slow) {
|
||||
return ggml_rope_impl(
|
||||
ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow, true
|
||||
);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_rope_multi(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
int n_dims,
|
||||
int sections[GGML_MROPE_SECTIONS],
|
||||
int mode,
|
||||
int n_ctx_orig,
|
||||
float freq_base,
|
||||
float freq_scale,
|
||||
float ext_factor,
|
||||
float attn_factor,
|
||||
float beta_fast,
|
||||
float beta_slow) {
|
||||
return ggml_rope_impl(
|
||||
ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow, false
|
||||
);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_rope_multi_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
int n_dims,
|
||||
int sections[GGML_MROPE_SECTIONS],
|
||||
int mode,
|
||||
int n_ctx_orig,
|
||||
float freq_base,
|
||||
float freq_scale,
|
||||
float ext_factor,
|
||||
float attn_factor,
|
||||
float beta_fast,
|
||||
float beta_slow) {
|
||||
return ggml_rope_impl(
|
||||
ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow, true
|
||||
);
|
||||
}
|
||||
@@ -8334,7 +8569,7 @@ struct ggml_tensor * ggml_rope_custom(
|
||||
float beta_fast,
|
||||
float beta_slow) {
|
||||
return ggml_rope_impl(
|
||||
ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow, false
|
||||
);
|
||||
}
|
||||
@@ -8353,7 +8588,7 @@ struct ggml_tensor * ggml_rope_custom_inplace(
|
||||
float beta_fast,
|
||||
float beta_slow) {
|
||||
return ggml_rope_impl(
|
||||
ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow, true
|
||||
);
|
||||
}
|
||||
@@ -8634,6 +8869,71 @@ struct ggml_tensor * ggml_conv_2d_s1_ph(
|
||||
return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
|
||||
}
|
||||
|
||||
// ggml_conv_2d_dw
|
||||
|
||||
struct ggml_tensor * ggml_conv_2d_dw(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
int p1,
|
||||
int d0,
|
||||
int d1) {
|
||||
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
||||
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
|
||||
ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
||||
s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
|
||||
struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
||||
|
||||
new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
||||
struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
|
||||
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_conv_2d_dw_direct
|
||||
|
||||
struct ggml_tensor * ggml_conv_2d_dw_direct(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int stride0,
|
||||
int stride1,
|
||||
int pad0,
|
||||
int pad1,
|
||||
int dilation0,
|
||||
int dilation1) {
|
||||
GGML_ASSERT(a->ne[2] == 1);
|
||||
GGML_ASSERT(a->ne[3] == b->ne[2]);
|
||||
int64_t ne[4];
|
||||
ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
|
||||
ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
|
||||
ne[2] = b->ne[2];
|
||||
ne[3] = b->ne[3];
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
|
||||
|
||||
if (ggml_is_contiguous_channels(b)) {
|
||||
// Result will be permuted the same way as input (CWHN order)
|
||||
const int64_t type_size = ggml_type_size(result->type);
|
||||
GGML_ASSERT(ggml_blck_size(result->type) == 1);
|
||||
result->nb[0] = result->ne[2] * type_size;
|
||||
result->nb[1] = result->ne[0] * result->nb[0];
|
||||
result->nb[2] = type_size;
|
||||
}
|
||||
|
||||
int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_CONV_2D_DW;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_conv_transpose_2d_p0
|
||||
|
||||
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
|
||||
@@ -8750,57 +9050,57 @@ struct ggml_tensor * ggml_pool_2d(
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_upscale
|
||||
// ggml_upscale -> ggml_interpolate
|
||||
|
||||
static struct ggml_tensor * ggml_upscale_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int ne2,
|
||||
int ne3) {
|
||||
bool is_node = false;
|
||||
static struct ggml_tensor * ggml_interpolate_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2,
|
||||
int64_t ne3,
|
||||
uint32_t mode) {
|
||||
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
|
||||
|
||||
if (a->grad) {
|
||||
GGML_ABORT("fatal error"); // TODO: implement backward
|
||||
is_node = true;
|
||||
}
|
||||
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
||||
|
||||
GGML_ASSERT(a->ne[0] <= ne0);
|
||||
GGML_ASSERT(a->ne[1] <= ne1);
|
||||
GGML_ASSERT(a->ne[2] <= ne2);
|
||||
GGML_ASSERT(a->ne[3] <= ne3);
|
||||
ggml_set_op_params_i32(result, 0, (int32_t)mode);
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
|
||||
ne0,
|
||||
ne1,
|
||||
ne2,
|
||||
ne3
|
||||
);
|
||||
|
||||
result->op = GGML_OP_UPSCALE;
|
||||
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->op = GGML_OP_UPSCALE;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_upscale(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int scale_factor) {
|
||||
return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int scale_factor,
|
||||
enum ggml_scale_mode mode) {
|
||||
GGML_ASSERT(scale_factor > 1);
|
||||
return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_upscale_ext(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int ne2,
|
||||
int ne3) {
|
||||
return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int ne2,
|
||||
int ne3,
|
||||
enum ggml_scale_mode mode) {
|
||||
return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_interpolate(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2,
|
||||
int64_t ne3,
|
||||
uint32_t mode) {
|
||||
return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
|
||||
}
|
||||
|
||||
// ggml_pad
|
||||
@@ -21537,6 +21837,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||
src1,
|
||||
src2,
|
||||
n_dims,
|
||||
NULL,
|
||||
mode,
|
||||
n_ctx_orig,
|
||||
freq_base,
|
||||
|
||||
Reference in New Issue
Block a user