gguf-split: fix the split output files naming (#1336)

* Fix gguf-split.cpp splits output naming With this fix, the initial extension of the source .gguf file is not included in the naming of the output file before the numeration of the splits. ex: No more model.gguf-00001-of-00200.gguf Instead, model-00001-of-00200.gguf * increase ggml_max_context to 2048 * Revert GGML_MAX_CONTEXTS to 64
2026-05-11 08:30:19 +00:00 · 2026-03-02 08:43:47 +01:00
parent d239dabcc6
commit d4ac5f1566
2 changed files with 8 additions and 2 deletions
--- a/examples/gguf-split/gguf-split.cpp
+++ b/examples/gguf-split/gguf-split.cpp
@@ -318,10 +318,16 @@ struct split_strategy {
    void write() {
        int i_split = 0;
        int n_split = ctx_outs.size();
+        std::string output_prefix = params.output;
+        const std::string suffix = ".gguf";
+        if (output_prefix.size() >= suffix.size() && 
+            output_prefix.compare(output_prefix.size() - suffix.size(), suffix.size(), suffix) == 0) {
+            output_prefix.resize(output_prefix.size() - suffix.size());
+        }
        for (auto & ctx_out : ctx_outs) {
            // construct file path
            char split_path[PATH_MAX] = {0};
-            llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
+            llama_split_path(split_path, sizeof(split_path), output_prefix.c_str(), i_split, n_split);

            ensure_output_directory(split_path);

--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -238,7 +238,7 @@
 // Maximum number of model contexts (e.g., for model shards).
 // Increase this value using -DGGML_MAX_CONTEXTS=<value> in CMake
 // if you need to load more than 64 model shards.
-#define GGML_MAX_CONTEXTS 64
+#define GGML_MAX_CONTEXTS       64
 #endif
 #define GGML_MAX_SRC            10
 #ifndef GGML_MAX_NAME