deserializing registered memory is failing -- commented out

This commit is contained in:
Saeed Maleki
2023-04-27 23:57:51 +00:00
parent cbfc21851d
commit 962e63b11a
2 changed files with 27 additions and 8 deletions

View File

@@ -13,8 +13,11 @@ RegisteredMemory::Impl::Impl(void* data, size_t size, int rank, TransportFlags t
TransportInfo transportInfo;
transportInfo.transport = Transport::CudaIpc;
cudaIpcMemHandle_t handle;
// TODO: translate data to a base pointer
CUDATHROW(cudaIpcGetMemHandle(&handle, data));
void* baseDataPtr;
size_t baseDataSize; // dummy
CUTHROW(cuMemGetAddressRange((CUdeviceptr*)&baseDataPtr, &baseDataSize, (CUdeviceptr)data));
CUDATHROW(cudaIpcGetMemHandle(&handle, baseDataPtr));
transportInfo.cudaIpcHandle = handle;
this->transportInfos.push_back(transportInfo);
}
@@ -72,7 +75,7 @@ TransportFlags RegisteredMemory::transports()
return pimpl->transports;
}
std::vector<char> RegisteredMemory::serialize()
MSCCLPP_API_CPP std::vector<char> RegisteredMemory::serialize()
{
std::vector<char> result;
std::copy_n(reinterpret_cast<char*>(&pimpl->size), sizeof(pimpl->size), std::back_inserter(result));
@@ -97,7 +100,7 @@ std::vector<char> RegisteredMemory::serialize()
return result;
}
RegisteredMemory RegisteredMemory::deserialize(const std::vector<char>& data)
MSCCLPP_API_CPP RegisteredMemory RegisteredMemory::deserialize(const std::vector<char>& data)
{
return RegisteredMemory(std::make_shared<Impl>(data));
}
@@ -140,10 +143,7 @@ RegisteredMemory::Impl::Impl(const std::vector<char>& serialization)
if (transports.has(Transport::CudaIpc)) {
auto entry = getTransportInfo(Transport::CudaIpc);
void* baseDataPtr;
size_t baseDataSize; // dummy
CUTHROW(cuMemGetAddressRange((CUdeviceptr*)&baseDataPtr, &baseDataSize, (CUdeviceptr)data));
CUDATHROW(cudaIpcOpenMemHandle(&baseDataPtr, entry.cudaIpcHandle, cudaIpcMemLazyEnablePeerAccess));
CUDATHROW(cudaIpcOpenMemHandle(&data, entry.cudaIpcHandle, cudaIpcMemLazyEnablePeerAccess));
INFO(MSCCLPP_P2P, "Opened CUDA IPC handle for base point of %p", data);
}
}

View File

@@ -55,6 +55,25 @@ void test_communicator(int rank, int worldSize, int nranksPerNode)
CUDATHROW(cudaMalloc(&devicePtr, size));
auto registeredMemory = communicator->registerMemory(devicePtr, size, mscclpp::Transport::CudaIpc | myIbDevice);
for (int i = 0; i < worldSize; i++) {
if (i != rank){
auto serialized = registeredMemory.serialize();
int serializedSize = serialized.size();
bootstrap->send(&serializedSize, sizeof(int), i, 0);
bootstrap->send(serialized.data(), serializedSize, i, 1);
}
}
for (int i = 0; i < worldSize; i++) {
if (i != rank){
int deserializedSize;
bootstrap->recv(&deserializedSize, sizeof(int), i, 0);
std::vector<char> deserialized(deserializedSize);
bootstrap->recv(deserialized.data(), deserializedSize, i, 1);
// auto deserializedRegisteredMemory = mscclpp::RegisteredMemory::deserialize(deserialized);
}
}
if (bootstrap->getRank() == 0)
std::cout << "Memory registeration passed" << std::endl;