mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-01 03:41:53 +00:00
Port of Qwen3-VL support from mainline (#883)
* Port of Qwen3-VL for latest ik_llama.cpp - convert_hf_to_gguf.py - Not touched, use llama.cpp to convert model instead - sysl and metal support for imrope not added - Vulkan support for imrope not tested - Code not tested * Bugfix n_embd was declared multiple times https://github.com/ikawrakow/ik_llama.cpp/pull/883#issuecomment-3471179655 * Fix n_embd issue with qwen3vl * model.output tensor not required https://github.com/ikawrakow/ik_llama.cpp/pull/883#discussion_r2480388389 * Improved logic for qkv combined tensors59ceaf8fcb (r2480395800)59ceaf8fcb (r2480398187)* Fix n_embd for merge_qkv() + cleaner code https://github.com/ikawrakow/ik_llama.cpp/pull/883#discussion_r2481227395 * Revert TENSOR_NOT_REQUIRED
This commit is contained in:
@@ -24,22 +24,39 @@ enum e_model {
|
||||
MODEL_80M,
|
||||
MODEL_109M,
|
||||
MODEL_137M,
|
||||
MODEL_140M,
|
||||
MODEL_160M,
|
||||
MODEL_190M,
|
||||
MODEL_220M,
|
||||
MODEL_250M,
|
||||
MODEL_256M,
|
||||
MODEL_270M,
|
||||
MODEL_335M,
|
||||
MODEL_350M,
|
||||
MODEL_360M,
|
||||
MODEL_410M,
|
||||
MODEL_450M,
|
||||
MODEL_475M,
|
||||
MODEL_558M,
|
||||
MODEL_700M,
|
||||
MODEL_770M,
|
||||
MODEL_780M,
|
||||
MODEL_950M,
|
||||
MODEL_0_3B,
|
||||
MODEL_0_5B,
|
||||
MODEL_0_6B,
|
||||
MODEL_1B,
|
||||
MODEL_1_2B,
|
||||
MODEL_1_3B,
|
||||
MODEL_1_4B,
|
||||
MODEL_1_5B,
|
||||
MODEL_1_6B,
|
||||
MODEL_1_7B,
|
||||
MODEL_1_8B,
|
||||
MODEL_2B,
|
||||
MODEL_2_6B,
|
||||
MODEL_2_8B,
|
||||
MODEL_2_9B,
|
||||
MODEL_3B,
|
||||
MODEL_4B,
|
||||
MODEL_6B,
|
||||
@@ -54,17 +71,19 @@ enum e_model {
|
||||
MODEL_15B,
|
||||
MODEL_16B,
|
||||
MODEL_20B,
|
||||
MODEL_27B,
|
||||
MODEL_30B,
|
||||
MODEL_32B,
|
||||
MODEL_34B,
|
||||
MODEL_35B,
|
||||
MODEL_36B,
|
||||
MODEL_40B,
|
||||
MODEL_65B,
|
||||
MODEL_70B,
|
||||
MODEL_106B_A12B,
|
||||
MODEL_120B,
|
||||
MODEL_142B,
|
||||
MODEL_236B,
|
||||
MODEL_355B_A32B,
|
||||
MODEL_290B,
|
||||
MODEL_314B,
|
||||
MODEL_405B,
|
||||
MODEL_671B,
|
||||
@@ -72,22 +91,33 @@ enum e_model {
|
||||
MODEL_MEDIUM,
|
||||
MODEL_LARGE,
|
||||
MODEL_XL,
|
||||
MODEL_A1_7B,
|
||||
MODEL_A2_7B,
|
||||
MODEL_8x7B,
|
||||
MODEL_8x22B,
|
||||
MODEL_16x12B,
|
||||
MODEL_16x3_8B,
|
||||
MODEL_10B_128x3_66B,
|
||||
MODEL_21B_A3B, // Ernie MoE small
|
||||
MODEL_57B_A14B,
|
||||
MODEL_27B,
|
||||
MODEL_17B_16E,
|
||||
MODEL_17B_128E,
|
||||
MODEL_80B_A13B,
|
||||
MODEL_300B_A47B, // Ernie MoE big
|
||||
MODEL_A13B,
|
||||
MODEL_7B_A1B,
|
||||
MODEL_8B_A1B,
|
||||
MODEL_16B_A1B,
|
||||
MODEL_21B_A3B, // Ernie MoE small
|
||||
MODEL_30B_A3B,
|
||||
MODEL_80B_A13B,
|
||||
MODEL_100B_A6B,
|
||||
MODEL_106B_A12B,
|
||||
MODEL_235B_A22B,
|
||||
MODEL_300B_A47B, // Ernie MoE big
|
||||
MODEL_355B_A32B,
|
||||
MODEL_E2B,
|
||||
MODEL_E4B,
|
||||
};
|
||||
|
||||
|
||||
struct llama_layer_nextn {
|
||||
struct ggml_tensor * eh_proj = nullptr;
|
||||
struct ggml_tensor * embed_tokens = nullptr;
|
||||
|
||||
Reference in New Issue
Block a user