Port of Qwen3-VL support from mainline (#883)

* Port of Qwen3-VL for latest ik_llama.cpp

- convert_hf_to_gguf.py - Not touched, use llama.cpp to convert model instead
- sysl and metal support for imrope not added
- Vulkan support for imrope not tested
- Code not tested

* Bugfix n_embd was declared multiple times

https://github.com/ikawrakow/ik_llama.cpp/pull/883#issuecomment-3471179655

* Fix n_embd issue with qwen3vl

* model.output tensor not required

https://github.com/ikawrakow/ik_llama.cpp/pull/883#discussion_r2480388389

* Improved logic for qkv combined tensors

59ceaf8fcb (r2480395800)
59ceaf8fcb (r2480398187)

* Fix n_embd for merge_qkv() + cleaner code

https://github.com/ikawrakow/ik_llama.cpp/pull/883#discussion_r2481227395

* Revert TENSOR_NOT_REQUIRED
This commit is contained in:
Thireus ☠
2025-11-04 17:20:54 +00:00
committed by GitHub
parent efcb5f9d9e
commit 86597623a5
21 changed files with 850 additions and 78 deletions

View File

@@ -29,6 +29,7 @@ layout (push_constant) uniform parameter {
uint s1;
uint s2;
int sections[4];
uint is_imrope;
uint is_back;
} p;

View File

@@ -32,17 +32,29 @@ void main() {
const uint sector = (i0 / 2) % sect_dims;
float theta_base = 0.0;
if (sector < p.sections[0]) {
theta_base = data_pos[channel_x]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= p.sections[0] && sector < sec_w) {
theta_base = data_pos[channel_x + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= sec_w && sector < sec_w + p.sections[2]) {
theta_base = data_pos[channel_x + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= sec_w + p.sections[2]) {
theta_base = data_pos[channel_x + ne2 * 3]*pow(p.theta_scale, i0/2.0f);
if (p.is_imrope != 0) {
if (sector % 3 == 1 && sector < 3 * p.sections[1]) {
theta_base = data_pos[channel_x + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
} else if (sector % 3 == 2 && sector < 3 * p.sections[2]) {
theta_base = data_pos[channel_x + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
} else if (sector % 3 == 0 && sector < 3 * p.sections[0]) {
theta_base = data_pos[channel_x]*pow(p.theta_scale, i0/2.0f);
} else {
theta_base = data_pos[channel_x + ne2 * 3]*pow(p.theta_scale, i0/2.0f);
}
} else {
if (sector < p.sections[0]) {
theta_base = data_pos[channel_x]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= p.sections[0] && sector < sec_w) {
theta_base = data_pos[channel_x + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= sec_w && sector < sec_w + p.sections[2]) {
theta_base = data_pos[channel_x + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= sec_w + p.sections[2]) {
theta_base = data_pos[channel_x + ne2 * 3]*pow(p.theta_scale, i0/2.0f);
}
}
const float freq_factor = p.has_ff != 0 ? data_ff[i0/2] : 1.0f;