mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-12 17:26:04 +00:00
added mscclppGetErrorString
This commit is contained in:
@@ -35,35 +35,4 @@
|
||||
ret func(args)
|
||||
#endif // end PROFAPI
|
||||
|
||||
/*
|
||||
static __inline__ int mscclppTypeSize(mscclppDataType_t type) {
|
||||
switch (type) {
|
||||
case mscclppInt8:
|
||||
case mscclppUint8:
|
||||
return 1;
|
||||
case mscclppFloat16:
|
||||
#if defined(__CUDA_BF16_TYPES_EXIST__)
|
||||
case mscclppBfloat16:
|
||||
#endif
|
||||
return 2;
|
||||
case mscclppInt32:
|
||||
case mscclppUint32:
|
||||
case mscclppFloat32:
|
||||
return 4;
|
||||
case mscclppInt64:
|
||||
case mscclppUint64:
|
||||
case mscclppFloat64:
|
||||
return 8;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#include "debug.h"
|
||||
#include "checks.h"
|
||||
#include "cudawrap.h"
|
||||
#include "utils.h"
|
||||
#include "nvtx.h"
|
||||
*/
|
||||
|
||||
#endif // end include guard
|
||||
|
||||
@@ -198,6 +198,16 @@ mscclppResult_t mscclppBootstrapAllGather(mscclppComm_t comm, void* data, int si
|
||||
*/
|
||||
mscclppResult_t mscclppCommDestroy(mscclppComm_t comm);
|
||||
|
||||
/* Return the string for the given error code.
|
||||
*
|
||||
* Ouput:
|
||||
* returns the string
|
||||
*
|
||||
* Inputs:
|
||||
* result: the error code that this function needs to translate
|
||||
*/
|
||||
const char* mscclppGetErrorString(mscclppResult_t result);
|
||||
|
||||
/* Connect to a remote rank. This function only prepares metadata for connection. The actual connection
|
||||
* is made by a following call of mscclppConnectionSetup(). Note that this function is two-way and a connection
|
||||
* from rank i to remote rank j needs to have a counterpart from rank j to rank i.
|
||||
|
||||
14
src/init.cc
14
src/init.cc
@@ -255,6 +255,20 @@ mscclppResult_t mscclppCommDestroy(mscclppComm_t comm){
|
||||
return mscclppSuccess;
|
||||
}
|
||||
|
||||
MSCCLPP_API(const char*, mscclppGetErrorString, mscclppResult_t code);
|
||||
const char* mscclppGetErrorString(mscclppResult_t code) {
|
||||
switch (code) {
|
||||
case mscclppSuccess : return "no error";
|
||||
case mscclppUnhandledCudaError : return "unhandled cuda error";
|
||||
case mscclppSystemError : return "unhandled system error";
|
||||
case mscclppInternalError : return "internal error";
|
||||
case mscclppInvalidArgument : return "invalid argument";
|
||||
case mscclppInvalidUsage : return "invalid usage";
|
||||
case mscclppRemoteError : return "remote process exited or there was a network error";
|
||||
case mscclppInProgress : return "MSCCL++ operation in progress";
|
||||
default : return "unknown result code";
|
||||
}
|
||||
}
|
||||
|
||||
MSCCLPP_API(mscclppResult_t, mscclppGetDeviceConnection, mscclppComm_t comm, int remoteRank, int tag, mscclppDevConn_t** devConn);
|
||||
mscclppResult_t mscclppGetDeviceConnection(mscclppComm_t comm, int remoteRank, int tag, mscclppDevConn_t** devConn){
|
||||
|
||||
@@ -18,9 +18,9 @@ static int nranksPerNode = 8;
|
||||
#define MSCCLPPCHECK(call) do { \
|
||||
mscclppResult_t res = call; \
|
||||
if (res != mscclppSuccess && res != mscclppInProgress) { \
|
||||
/* Print the back trace*/ \
|
||||
printf("Failure at %s:%d -> %d\n", __FILE__, __LINE__, res); \
|
||||
return res; \
|
||||
/* Print the back trace*/ \
|
||||
printf("Failure at %s:%d -> %s\n", __FILE__, __LINE__, mscclppGetErrorString(res)); \
|
||||
return res; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user