mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-13 15:30:03 +00:00
Merge mainline llama.cpp (#3)
* Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -16,6 +16,10 @@
|
||||
#include "ggml-metal.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CANN
|
||||
#include "ggml-cann.h"
|
||||
#endif
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#include "stb_image.h"
|
||||
|
||||
@@ -1001,6 +1005,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
LOG_TEE("%s: CLIP using Metal backend\n", __func__);
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CANN
|
||||
new_clip->backend = ggml_backend_cann_init(0);
|
||||
LOG_TEE("%s: CLIP using CANN backend\n", __func__);
|
||||
#endif
|
||||
|
||||
|
||||
if (!new_clip->backend) {
|
||||
new_clip->backend = ggml_backend_cpu_init();
|
||||
@@ -1121,20 +1130,20 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
}
|
||||
if (n < 32)
|
||||
hparams.image_grid_pinpoints[n] = 0;
|
||||
} catch (std::runtime_error & e) {
|
||||
} catch (std::runtime_error & /*e*/) {
|
||||
hparams.image_grid_pinpoints[0]=0;
|
||||
}
|
||||
|
||||
try {
|
||||
int idx = get_key_idx(ctx, KEY_MM_PATCH_MERGE_TYPE);
|
||||
strcpy(hparams.mm_patch_merge_type, gguf_get_val_str(ctx, idx));
|
||||
} catch (std::runtime_error & e) {
|
||||
} catch (std::runtime_error & /*e*/) {
|
||||
strcpy(hparams.mm_patch_merge_type, "flat");
|
||||
}
|
||||
|
||||
try {
|
||||
hparams.image_crop_resolution = get_u32(ctx, KEY_IMAGE_CROP_RESOLUTION); // llava-1.6
|
||||
} catch(const std::exception& e) {
|
||||
} catch(const std::exception& /*e*/) {
|
||||
hparams.image_crop_resolution = hparams.image_size;
|
||||
}
|
||||
|
||||
@@ -1173,7 +1182,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
try {
|
||||
vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD);
|
||||
new_clip->has_class_embedding = true;
|
||||
} catch (const std::exception& e) {
|
||||
} catch (const std::exception& /*e*/) {
|
||||
new_clip->has_class_embedding = false;
|
||||
}
|
||||
|
||||
@@ -1181,7 +1190,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight"));
|
||||
vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias"));
|
||||
new_clip->has_pre_norm = true;
|
||||
} catch (std::exception & e) {
|
||||
} catch (std::exception & /*e*/) {
|
||||
new_clip->has_pre_norm = false;
|
||||
}
|
||||
|
||||
@@ -1189,21 +1198,21 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
vision_model.post_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "weight"));
|
||||
vision_model.post_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "bias"));
|
||||
new_clip->has_post_norm = true;
|
||||
} catch (std::exception & e) {
|
||||
} catch (std::exception & /*e*/) {
|
||||
new_clip->has_post_norm = false;
|
||||
}
|
||||
|
||||
try {
|
||||
vision_model.patch_bias = get_tensor(new_clip->ctx_data, TN_PATCH_BIAS);
|
||||
new_clip->has_patch_bias = true;
|
||||
} catch (std::exception & e) {
|
||||
} catch (std::exception & /*e*/) {
|
||||
new_clip->has_patch_bias = false;
|
||||
}
|
||||
|
||||
try {
|
||||
vision_model.patch_embeddings = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD);
|
||||
vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
|
||||
} catch(const std::exception& e) {
|
||||
} catch(const std::exception& /*e*/) {
|
||||
LOG_TEE("%s: failed to load vision model tensors\n", __func__);
|
||||
}
|
||||
|
||||
@@ -1215,26 +1224,26 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
// Yi-type llava
|
||||
vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "weight"));
|
||||
vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "bias"));
|
||||
} catch (std::runtime_error & e) { }
|
||||
} catch (std::runtime_error & /*e*/) { }
|
||||
try {
|
||||
// missing in Yi-type llava
|
||||
vision_model.mm_2_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight"));
|
||||
vision_model.mm_2_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias"));
|
||||
} catch (std::runtime_error & e) { }
|
||||
} catch (std::runtime_error & /*e*/) { }
|
||||
try {
|
||||
// Yi-type llava
|
||||
vision_model.mm_3_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "weight"));
|
||||
vision_model.mm_3_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "bias"));
|
||||
} catch (std::runtime_error & e) { }
|
||||
} catch (std::runtime_error & /*e*/) { }
|
||||
try {
|
||||
// Yi-type llava
|
||||
vision_model.mm_4_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "weight"));
|
||||
vision_model.mm_4_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "bias"));
|
||||
} catch (std::runtime_error & e) { }
|
||||
} catch (std::runtime_error & /*e*/) { }
|
||||
try {
|
||||
vision_model.image_newline = get_tensor(new_clip->ctx_data, TN_IMAGE_NEWLINE);
|
||||
// LOG_TEE("%s: image_newline tensor (llava-1.6) found\n", __func__);
|
||||
} catch (std::runtime_error & e) { }
|
||||
} catch (std::runtime_error & /*e*/) { }
|
||||
} else if (new_clip->proj_type == PROJECTOR_TYPE_LDP) {
|
||||
// MobileVLM projection
|
||||
vision_model.mm_model_mlp_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "weight"));
|
||||
|
||||
Reference in New Issue
Block a user