Llama-quantize: Partial requant feature (#1313)

* Partial Requant feature for llama-quantize

- Inspired by the recently portcopied --dry-run feature.
- Allows to partially requantize a split quantized .gguf by requantizing only the missing splits in the destination directory.
- Works both for GGUF which are split tensors by tensors, or by group of several tensors (though this one is not very much tested beyond 2 tensors by split).
- Vibe coded.

* Create output directory if it doesn't exist in llama-quantize

* Create output directory if it doesn't exist in gguf-split

* Add exit when directory fails to be created on Windows

* Use std::filesystem

* cleanup
This commit is contained in:
Nexes the Elder
2026-02-25 07:25:15 +01:00
committed by GitHub
parent 68431b049a
commit 170467e835
5 changed files with 69 additions and 2 deletions

View File

@@ -7,6 +7,7 @@
#include <fstream>
#include <string>
#include <vector>
#include <filesystem>
#include <stdio.h>
#include <string.h>
@@ -190,6 +191,18 @@ static void zeros(std::ofstream & file, size_t n) {
}
}
static void ensure_output_directory(const std::string & filepath) {
std::filesystem::path p(filepath);
if (p.has_parent_path()) {
std::error_code ec;
std::filesystem::create_directories(p.parent_path(), ec);
if (ec) {
fprintf(stderr, "Failed to create directory '%s': %s\n", p.parent_path().string().c_str(), ec.message().c_str());
exit(EXIT_FAILURE);
}
}
}
struct split_strategy {
const split_params params;
std::ifstream & f_input;
@@ -310,6 +323,8 @@ struct split_strategy {
char split_path[PATH_MAX] = {0};
llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
ensure_output_directory(split_path);
// open the output file
printf("Writing file %s ... ", split_path);
fflush(stdout);
@@ -401,6 +416,8 @@ static void gguf_merge(const split_params & split_params) {
int n_split = 1;
int total_tensors = 0;
ensure_output_directory(split_params.output);
// avoid overwriting existing output file
if (std::ifstream(split_params.output.c_str())) {
fprintf(stderr, "%s: output file %s already exists\n", __func__, split_params.output.c_str());