all tests are passing with memory registeration

This commit is contained in:
Saeed Maleki
2023-05-01 22:25:14 +00:00
parent 8a5a7873e0
commit 5b7e76cae4
2 changed files with 16 additions and 4 deletions

View File

@@ -1,3 +1,4 @@
#include <algorithm>
#include "connection.hpp"
#include "checks.hpp"
#include "infiniband/verbs.h"
@@ -142,15 +143,25 @@ void IBConnection::flush()
void IBConnection::startSetup(std::shared_ptr<BaseBootstrap> bootstrap)
{
bootstrap->send(&qp->getInfo(), sizeof(qp->getInfo()), remoteRank(), tag());
bootstrap->send(&transport_, sizeof(transport_), remoteRank(), tag());
std::vector<char> ibQpTransport;
std::copy_n(reinterpret_cast<char*>(&qp->getInfo()), sizeof(qp->getInfo()), std::back_inserter(ibQpTransport));
std::copy_n(reinterpret_cast<char*>(&transport_), sizeof(transport_), std::back_inserter(ibQpTransport));
bootstrap->send(ibQpTransport.data(), ibQpTransport.size(), remoteRank(), tag());
}
void IBConnection::endSetup(std::shared_ptr<BaseBootstrap> bootstrap)
{
std::vector<char> ibQpTransport(sizeof(IbQpInfo) + sizeof(Transport));
bootstrap->recv(ibQpTransport.data(), ibQpTransport.size(), remoteRank(), tag());
IbQpInfo qpInfo;
bootstrap->recv(&qpInfo, sizeof(qpInfo), remoteRank(), tag());
bootstrap->recv(&remoteTransport_, sizeof(remoteTransport_), remoteRank(), tag());
auto it = ibQpTransport.begin();
std::copy_n(it, sizeof(qpInfo), reinterpret_cast<char*>(&qpInfo));
it += sizeof(qpInfo);
std::copy_n(it, sizeof(remoteTransport_), reinterpret_cast<char*>(&remoteTransport_));
it += sizeof(qpInfo);
qp->rtr(qpInfo);
qp->rts();
}

View File

@@ -30,6 +30,7 @@ RegisteredMemory::Impl::Impl(void* data, size_t size, int rank, TransportFlags t
transportInfo.ibLocal = true;
transportInfo.ibMrInfo = mr->getInfo();
this->transportInfos.push_back(transportInfo);
INFO(MSCCLPP_NET, "IB mr for address %p with size %ld is registered", data, size);
};
if (transports.has(Transport::IB0))
addIb(Transport::IB0);