Port of Qwen3-VL support from mainline (#883)

* Port of Qwen3-VL for latest ik_llama.cpp

- convert_hf_to_gguf.py - Not touched, use llama.cpp to convert model instead
- sysl and metal support for imrope not added
- Vulkan support for imrope not tested
- Code not tested

* Bugfix n_embd was declared multiple times

https://github.com/ikawrakow/ik_llama.cpp/pull/883#issuecomment-3471179655

* Fix n_embd issue with qwen3vl

* model.output tensor not required

https://github.com/ikawrakow/ik_llama.cpp/pull/883#discussion_r2480388389

* Improved logic for qkv combined tensors

59ceaf8fcb (r2480395800)
59ceaf8fcb (r2480398187)

* Fix n_embd for merge_qkv() + cleaner code

https://github.com/ikawrakow/ik_llama.cpp/pull/883#discussion_r2481227395

* Revert TENSOR_NOT_REQUIRED
This commit is contained in:
Thireus ☠
2025-11-04 17:20:54 +00:00
committed by GitHub
parent efcb5f9d9e
commit 86597623a5
21 changed files with 850 additions and 78 deletions

View File

@@ -24,22 +24,39 @@ enum e_model {
MODEL_80M,
MODEL_109M,
MODEL_137M,
MODEL_140M,
MODEL_160M,
MODEL_190M,
MODEL_220M,
MODEL_250M,
MODEL_256M,
MODEL_270M,
MODEL_335M,
MODEL_350M,
MODEL_360M,
MODEL_410M,
MODEL_450M,
MODEL_475M,
MODEL_558M,
MODEL_700M,
MODEL_770M,
MODEL_780M,
MODEL_950M,
MODEL_0_3B,
MODEL_0_5B,
MODEL_0_6B,
MODEL_1B,
MODEL_1_2B,
MODEL_1_3B,
MODEL_1_4B,
MODEL_1_5B,
MODEL_1_6B,
MODEL_1_7B,
MODEL_1_8B,
MODEL_2B,
MODEL_2_6B,
MODEL_2_8B,
MODEL_2_9B,
MODEL_3B,
MODEL_4B,
MODEL_6B,
@@ -54,17 +71,19 @@ enum e_model {
MODEL_15B,
MODEL_16B,
MODEL_20B,
MODEL_27B,
MODEL_30B,
MODEL_32B,
MODEL_34B,
MODEL_35B,
MODEL_36B,
MODEL_40B,
MODEL_65B,
MODEL_70B,
MODEL_106B_A12B,
MODEL_120B,
MODEL_142B,
MODEL_236B,
MODEL_355B_A32B,
MODEL_290B,
MODEL_314B,
MODEL_405B,
MODEL_671B,
@@ -72,22 +91,33 @@ enum e_model {
MODEL_MEDIUM,
MODEL_LARGE,
MODEL_XL,
MODEL_A1_7B,
MODEL_A2_7B,
MODEL_8x7B,
MODEL_8x22B,
MODEL_16x12B,
MODEL_16x3_8B,
MODEL_10B_128x3_66B,
MODEL_21B_A3B, // Ernie MoE small
MODEL_57B_A14B,
MODEL_27B,
MODEL_17B_16E,
MODEL_17B_128E,
MODEL_80B_A13B,
MODEL_300B_A47B, // Ernie MoE big
MODEL_A13B,
MODEL_7B_A1B,
MODEL_8B_A1B,
MODEL_16B_A1B,
MODEL_21B_A3B, // Ernie MoE small
MODEL_30B_A3B,
MODEL_80B_A13B,
MODEL_100B_A6B,
MODEL_106B_A12B,
MODEL_235B_A22B,
MODEL_300B_A47B, // Ernie MoE big
MODEL_355B_A32B,
MODEL_E2B,
MODEL_E4B,
};
struct llama_layer_nextn {
struct ggml_tensor * eh_proj = nullptr;
struct ggml_tensor * embed_tokens = nullptr;