mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-13 01:36:10 +00:00
works without bcast
This commit is contained in:
@@ -185,10 +185,13 @@ mscclppResult_t bootstrapCreateRoot(struct mscclppBootstrapHandle* handle, bool
|
||||
return mscclppSuccess;
|
||||
}
|
||||
|
||||
mscclppResult_t bootstrapGetUniqueId(struct mscclppBootstrapHandle* handle) {
|
||||
memset(handle, 0, sizeof(mscclppBootstrapHandle));
|
||||
MSCCLPPCHECK(getRandomData(&handle->magic, sizeof(handle->magic)));
|
||||
// #include <netinet/in.h>
|
||||
// #include <arpa/inet.h>
|
||||
|
||||
mscclppResult_t bootstrapGetUniqueId(struct mscclppBootstrapHandle* handle, bool isRoot) {
|
||||
memset(handle, 0, sizeof(mscclppBootstrapHandle));
|
||||
// MSCCLPPCHECK(getRandomData(&handle->magic, sizeof(handle->magic)));
|
||||
handle->magic = 0xdeadbeef;
|
||||
char* env = getenv("MSCCLPP_COMM_ID");
|
||||
if (env) {
|
||||
INFO(MSCCLPP_ENV, "MSCCLPP_COMM_ID set by environment to %s", env);
|
||||
@@ -196,10 +199,14 @@ mscclppResult_t bootstrapGetUniqueId(struct mscclppBootstrapHandle* handle) {
|
||||
WARN("Invalid MSCCLPP_COMM_ID, please use format: <ipv4>:<port> or [<ipv6>]:<port> or <hostname>:<port>");
|
||||
return mscclppInvalidArgument;
|
||||
}
|
||||
if (isRoot)
|
||||
MSCCLPPCHECK(bootstrapCreateRoot(handle, false));
|
||||
} else {
|
||||
memcpy(&handle->addr, &bootstrapNetIfAddr, sizeof(union mscclppSocketAddress));
|
||||
MSCCLPPCHECK(bootstrapCreateRoot(handle, false));
|
||||
}
|
||||
// printf("addr = %s port = %d\n", inet_ntoa(handle->addr.sin.sin_addr), (int)ntohs(handle->addr.sin.sin_port));
|
||||
// printf("addr = %s\n", inet_ntoa((*(struct sockaddr_in*)&handle->addr.sa).sin_addr));
|
||||
|
||||
return mscclppSuccess;
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ int main()
|
||||
int world_size;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
|
||||
// int a;
|
||||
// scanf("%d", &a);
|
||||
|
||||
mscclppResult_t res = bootstrapNetInit();
|
||||
if (res != mscclppSuccess) {
|
||||
@@ -19,15 +21,15 @@ int main()
|
||||
}
|
||||
|
||||
mscclppBootstrapHandle handle;
|
||||
if (rank == 0) {
|
||||
res = bootstrapGetUniqueId(&handle);
|
||||
if (true || rank == 0) {
|
||||
res = bootstrapGetUniqueId(&handle, rank == 0);
|
||||
if (res != mscclppSuccess) {
|
||||
printf("bootstrapGetUniqueId failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Bcast(&handle, sizeof(mscclppBootstrapHandle), MPI_BYTE, 0, MPI_COMM_WORLD);
|
||||
// MPI_Bcast(&handle, sizeof(mscclppBootstrapHandle), MPI_BYTE, 0, MPI_COMM_WORLD);
|
||||
|
||||
mscclppComm *comm;
|
||||
res = mscclppCalloc(&comm, 1);
|
||||
@@ -80,6 +82,6 @@ int main()
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
printf("Succeeded!\n");
|
||||
printf("Succeeded! %d\n", rank);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ static_assert(sizeof(struct mscclppBootstrapHandle) <= sizeof(mscclppUniqueId),
|
||||
|
||||
mscclppResult_t bootstrapNetInit();
|
||||
mscclppResult_t bootstrapCreateRoot(struct mscclppBootstrapHandle* handle, bool idFromEnv);
|
||||
mscclppResult_t bootstrapGetUniqueId(struct mscclppBootstrapHandle* handle);
|
||||
mscclppResult_t bootstrapGetUniqueId(struct mscclppBootstrapHandle* handle, bool isRoot = true);
|
||||
mscclppResult_t bootstrapInit(struct mscclppBootstrapHandle* handle, struct mscclppComm* comm);
|
||||
mscclppResult_t bootstrapAllGather(void* commState, void* allData, int size);
|
||||
mscclppResult_t bootstrapSend(void* commState, int peer, int tag, void* data, int size);
|
||||
|
||||
Reference in New Issue
Block a user