mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-02 01:50:01 +00:00
CUDA call tracer
This commit is contained in:
@@ -98,6 +98,17 @@ static void ggml_cuda_log(enum ggml_log_level level, const char * format, ...) {
|
||||
}
|
||||
}
|
||||
|
||||
void Tracer::print_calls() const {
|
||||
if (num_calls == 0) return;
|
||||
GGML_CUDA_LOG_ERROR("========================== CUDA trace: %zu previous calls\n", num_calls);
|
||||
int first = std::max(int64_t(0), num_calls - 1 - kNumStored);
|
||||
for (int64_t i = num_calls-1; i >= first; --i) {
|
||||
auto& call = calls[i%kNumStored];
|
||||
GGML_CUDA_LOG_ERROR("%12zu: function %s, file %s, line %d\n", uint64_t(i), call.func.c_str(), call.file.c_str(), call.line);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
[[noreturn]]
|
||||
void ggml_cuda_error(const char * stmt, const char * func, const char * file, int line, const char * msg) {
|
||||
int id = -1; // in case cudaGetDevice fails
|
||||
@@ -106,6 +117,7 @@ void ggml_cuda_error(const char * stmt, const char * func, const char * file, in
|
||||
GGML_CUDA_LOG_ERROR("CUDA error: %s\n", msg);
|
||||
GGML_CUDA_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
|
||||
GGML_CUDA_LOG_ERROR(" %s\n", stmt);
|
||||
Tracer::instance().print_calls();
|
||||
// abort with GGML_ASSERT to get a stack trace
|
||||
GGML_ABORT("CUDA error");
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <cfloat>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
#if defined(GGML_USE_HIPBLAS)
|
||||
#include "vendors/hip.h"
|
||||
@@ -66,11 +67,44 @@
|
||||
[[noreturn]]
|
||||
void ggml_cuda_error(const char * stmt, const char * func, const char * file, int line, const char * msg);
|
||||
|
||||
struct Tracer {
|
||||
constexpr static int kNumStored = 32;
|
||||
struct Call {
|
||||
std::string func;
|
||||
std::string file;
|
||||
int line;
|
||||
};
|
||||
std::mutex mutex;
|
||||
std::array<Call, kNumStored> calls;
|
||||
int64_t num_calls = 0;
|
||||
|
||||
inline void add_call(const char * func, const char * file, int line) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
calls[num_calls%kNumStored] = {{func}, {file}, line};
|
||||
++num_calls;
|
||||
}
|
||||
|
||||
static Tracer& instance() {
|
||||
static Tracer tracer;
|
||||
return tracer;
|
||||
}
|
||||
|
||||
void print_calls() const;
|
||||
|
||||
static inline void register_call(const char * func, const char * file, int line) {
|
||||
instance().add_call(func, file, line);
|
||||
}
|
||||
|
||||
~Tracer() { print_calls(); }
|
||||
};
|
||||
|
||||
#define CUDA_CHECK_GEN(err, success, error_fn) \
|
||||
do { \
|
||||
auto err_ = (err); \
|
||||
if (err_ != (success)) { \
|
||||
ggml_cuda_error(#err, __func__, __FILE__, __LINE__, error_fn(err_)); \
|
||||
} else { \
|
||||
Tracer::register_call(__func__, __FILE__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user