Revert "Better CPU prompt processing performance for SWA models (#696)" (#701)

This reverts commit 93a4f6089f. Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-04-30 19:31:48 +00:00 · 2025-08-17 15:44:02 +03:00
parent 7d14f8ea79
commit a3a523009e
5 changed files with 30 additions and 140 deletions
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -2043,10 +2043,6 @@ extern "C" {
            struct ggml_tensor * a,
            struct ggml_tensor * sinks);

-    GGML_API void ggml_flash_attn_ext_add_bounds(
-            struct ggml_tensor * a,
-            struct ggml_tensor * bounds);
-
    // TODO: needs to be adapted to ggml_flash_attn_ext
    GGML_API struct ggml_tensor * ggml_flash_attn_back(
           struct ggml_context * ctx,