cuda: neox works

This commit is contained in:
Iwan Kawrakow
2025-11-01 17:54:25 +02:00
parent 9a790a8905
commit f2c4b3a8d1
4 changed files with 225 additions and 1 deletions

View File

@@ -3470,7 +3470,7 @@ ggml_cgraph * llm_build_context::build_qwen3moe() {
auto rope_cache = ggml_rope_cache(ctx0, inp_pos, nullptr, n_embd_head, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow);
ggml_set_input(rope_cache);
//ggml_set_input(rope_cache);
for (int il = 0; il < n_layer; ++il) {
struct ggml_tensor * inpSA = inpL;