mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 19:01:47 +00:00
Merge mainline llama.cpp (#3)
* Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -23,6 +23,10 @@
|
||||
#include "ggml-cuda.h"
|
||||
#include "ggml-sycl.h"
|
||||
|
||||
#ifdef GGML_USE_CANN
|
||||
#include "ggml-cann.h"
|
||||
#endif
|
||||
|
||||
// utils
|
||||
static uint64_t get_time_ns() {
|
||||
using clock = std::chrono::high_resolution_clock;
|
||||
@@ -120,6 +124,17 @@ static std::string get_gpu_info() {
|
||||
id += "/";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef GGML_USE_CANN
|
||||
uint32_t count = ggml_backend_cann_get_device_count();
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
char buf[128];
|
||||
ggml_backend_cann_get_device_description(i, buf, sizeof(buf));
|
||||
id += buf;
|
||||
if (i < count - 1) {
|
||||
id += "/";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// TODO: other backends
|
||||
return id;
|
||||
|
||||
Reference in New Issue
Block a user