diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index 9c8c91f4..5faba723 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -98,6 +98,17 @@ static void ggml_cuda_log(enum ggml_log_level level, const char * format, ...) { } } +void Tracer::print_calls() const { + if (num_calls == 0) return; + GGML_CUDA_LOG_ERROR("========================== CUDA trace: %zu previous calls\n", num_calls); + int first = std::max(int64_t(0), num_calls - 1 - kNumStored); + for (int64_t i = num_calls-1; i >= first; --i) { + auto& call = calls[i%kNumStored]; + GGML_CUDA_LOG_ERROR("%12zu: function %s, file %s, line %d\n", uint64_t(i), call.func.c_str(), call.file.c_str(), call.line); + } +} + + [[noreturn]] void ggml_cuda_error(const char * stmt, const char * func, const char * file, int line, const char * msg) { int id = -1; // in case cudaGetDevice fails @@ -106,6 +117,7 @@ void ggml_cuda_error(const char * stmt, const char * func, const char * file, in GGML_CUDA_LOG_ERROR("CUDA error: %s\n", msg); GGML_CUDA_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line); GGML_CUDA_LOG_ERROR(" %s\n", stmt); + Tracer::instance().print_calls(); // abort with GGML_ASSERT to get a stack trace GGML_ABORT("CUDA error"); } diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index a04a1929..db45a55f 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #if defined(GGML_USE_HIPBLAS) #include "vendors/hip.h" @@ -66,11 +67,44 @@ [[noreturn]] void ggml_cuda_error(const char * stmt, const char * func, const char * file, int line, const char * msg); +struct Tracer { + constexpr static int kNumStored = 32; + struct Call { + std::string func; + std::string file; + int line; + }; + std::mutex mutex; + std::array calls; + int64_t num_calls = 0; + + inline void add_call(const char * func, const char * file, int line) { + std::lock_guard lock(mutex); + calls[num_calls%kNumStored] = {{func}, {file}, line}; + ++num_calls; + } + + static Tracer& instance() { + static Tracer tracer; + return tracer; + } + + void print_calls() const; + + static inline void register_call(const char * func, const char * file, int line) { + instance().add_call(func, file, line); + } + + ~Tracer() { print_calls(); } +}; + #define CUDA_CHECK_GEN(err, success, error_fn) \ do { \ auto err_ = (err); \ if (err_ != (success)) { \ ggml_cuda_error(#err, __func__, __FILE__, __LINE__, error_fn(err_)); \ + } else { \ + Tracer::register_call(__func__, __FILE__, __LINE__); \ } \ } while (0)