Adding q6_0: CUDA cpy, so Q6_0 can be used for KV-cache

This commit is contained in:
Iwan Kawrakow
2024-10-02 10:50:37 +03:00
parent 4cdf9b333f
commit c255a14a45
2 changed files with 53 additions and 0 deletions

View File

@@ -2242,6 +2242,9 @@ static ggml_type kv_cache_type_from_str(const std::string & s) {
if (s == "q5_1") {
return GGML_TYPE_Q5_1;
}
if (s == "q6_0") {
return GGML_TYPE_Q6_0;
}
throw std::runtime_error("Invalid cache type: " + s);
}