cuda: neox works

2026-04-27 01:49:28 +00:00 · 2025-11-01 17:54:25 +02:00
parent 9a790a8905
commit f2c4b3a8d1
4 changed files with 225 additions and 1 deletions
--- a/src/llama-build-context.cpp
+++ b/src/llama-build-context.cpp
@@ -3470,7 +3470,7 @@ ggml_cgraph * llm_build_context::build_qwen3moe() {

    auto rope_cache = ggml_rope_cache(ctx0, inp_pos, nullptr, n_embd_head, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
            ext_factor, attn_factor, beta_fast, beta_slow);
-    ggml_set_input(rope_cache);
+    //ggml_set_input(rope_cache);

    for (int il = 0; il < n_layer; ++il) {
        struct ggml_tensor * inpSA = inpL;