From 2a46644692cd92e479f16b02e682a1317742afff Mon Sep 17 00:00:00 2001 From: Saeed Maleki Date: Mon, 24 Apr 2023 23:08:30 +0000 Subject: [PATCH] adding checks.hpp --- src/include/checks.hpp | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 src/include/checks.hpp diff --git a/src/include/checks.hpp b/src/include/checks.hpp new file mode 100644 index 00000000..ee5f7058 --- /dev/null +++ b/src/include/checks.hpp @@ -0,0 +1,55 @@ +/************************************************************************* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. + * + * See LICENSE.txt for license information + ************************************************************************/ + +#ifndef MSCCLPP_CHECKS_HPP_ +#define MSCCLPP_CHECKS_HPP_ + +#include "debug.h" +#include + +#define MSCCLPPTHROW(call) \ + do { \ + mscclppResult_t res = call; \ + if (res != mscclppSuccess && res != mscclppInProgress) { \ + throw std::runtime_error(std::string("Call to " #call " failed with error code ") + mscclppGetErrorString(res)); \ + } \ + } while (0); + +#define CUDATHROW(cmd) \ + do { \ + cudaError_t err = cmd; \ + if (err != cudaSuccess) { \ + throw std::runtime_error(std::string("Cuda failure '") + cudaGetErrorString(err) + "'"); \ + } \ + } while (false) + +#endif + +#include +// Check system calls +#define SYSCHECKTHROW(call, name) \ + do { \ + int retval; \ + SYSCHECKVAL(call, name, retval); \ + } while (false) + +#define SYSCHECKVALTHROW(call, name, retval) \ + do { \ + SYSCHECKSYNC(call, name, retval); \ + if (retval == -1) { \ + std::runtime_error(std::string("Call to " name " failed : ") + strerror(errno)); \ + } \ + } while (false) + +#define SYSCHECKSYNCTHROW(call, name, retval) \ + do { \ + retval = call; \ + if (retval == -1 && (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN)) { \ + INFO(MSCCLPP_ALL, "Call to " name " returned %s, retrying", strerror(errno)); \ + } else { \ + break; \ + } \ + } while (true)