diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp index ecb7fab6..0ab8d833 100644 --- a/examples/gguf-split/gguf-split.cpp +++ b/examples/gguf-split/gguf-split.cpp @@ -318,10 +318,16 @@ struct split_strategy { void write() { int i_split = 0; int n_split = ctx_outs.size(); + std::string output_prefix = params.output; + const std::string suffix = ".gguf"; + if (output_prefix.size() >= suffix.size() && + output_prefix.compare(output_prefix.size() - suffix.size(), suffix.size(), suffix) == 0) { + output_prefix.resize(output_prefix.size() - suffix.size()); + } for (auto & ctx_out : ctx_outs) { // construct file path char split_path[PATH_MAX] = {0}; - llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split); + llama_split_path(split_path, sizeof(split_path), output_prefix.c_str(), i_split, n_split); ensure_output_directory(split_path); diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 2056b6a9..781deb1c 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -238,7 +238,7 @@ // Maximum number of model contexts (e.g., for model shards). // Increase this value using -DGGML_MAX_CONTEXTS= in CMake // if you need to load more than 64 model shards. -#define GGML_MAX_CONTEXTS 64 +#define GGML_MAX_CONTEXTS 64 #endif #define GGML_MAX_SRC 10 #ifndef GGML_MAX_NAME