bind numa node to communicator

This commit is contained in:
Binyang Li
2023-03-31 08:05:49 +00:00
parent fe1d7fee9e
commit af5825b474
4 changed files with 28 additions and 7 deletions

View File

@@ -54,6 +54,7 @@ struct mscclppComm
int rank; // my rank in the communicator
int nRanks; // number of GPUs in communicator
int cudaDev; // my cuda device index
int numaNode; // my numa node number
// Flag to ask MSCCLPP kernels to abort
volatile uint32_t* abortFlag;

View File

@@ -373,6 +373,13 @@ void mscclppDefaultLogHandler(const char* msg);
*/
mscclppResult_t mscclppSetLogHandler(mscclppLogHandler_t handler);
/* Bind NUMA node for the communicator.
*
* Inputs:
* numaNode: the NUMA node to be bound
*/
mscclppResult_t mscclppNumaBind(mscclppComm_t comm, int numaNode);
#ifdef __cplusplus
} // end extern "C"
#endif

View File

@@ -75,6 +75,7 @@ mscclppResult_t mscclppCommInitRank(mscclppComm_t* comm, int nranks, const char*
MSCCLPPCHECKGOTO(mscclppCalloc(&_comm, 1), res, fail);
_comm->rank = rank;
_comm->nRanks = nranks;
_comm->numaNode = -1;
// We assume that the user has set the device to the intended one already
CUDACHECK(cudaGetDevice(&_comm->cudaDev));
@@ -547,3 +548,10 @@ mscclppResult_t mscclppSetBootstrapConnTimeout(int timeout)
config->setBootstrapConnectionTimeoutConfig(timeout);
return mscclppSuccess;
}
MSCCLPP_API(mscclppResult_t, mscclppNumaBind, mscclppComm_t comm, int numaNode);
mscclppResult_t mscclppNumaBind(mscclppComm_t comm, int numaNode)
{
comm->numaNode = numaNode;
return mscclppSuccess;
}

View File

@@ -74,14 +74,19 @@ void* mscclppProxyService(void* _args)
PROXYCUDACHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
bool isP2pProxy = (ibCtx == nullptr);
if (isP2pProxy) {
// TODO(chhwang): find numa node
// Current mapping is based on NDv4: GPU [0,1,2,3,4,5,6,7] -> NUMA [1,1,0,0,3,3,2,2]
// TODO(saemal): either ask user or detect it automatically
NumaBind((comm->cudaDev / 2) ^ 1);
p2pStream = args->proxyState->stream;
int numaNode = comm->numaNode;
if (numaNode != -1) {
NumaBind(numaNode);
} else {
NumaBind(ibCtx->numaNode);
if (isP2pProxy) {
// TODO(chhwang): find numa node
// Current mapping is based on NDv4: GPU [0,1,2,3,4,5,6,7] -> NUMA [1,1,0,0,3,3,2,2]
// TODO(saemal): either ask user or detect it automatically
NumaBind((comm->cudaDev / 2) ^ 1);
p2pStream = args->proxyState->stream;
} else {
NumaBind(ibCtx->numaNode);
}
}
free(_args); // allocated in mscclppProxyCreate