mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-11 17:00:22 +00:00
Use GpuIpcMem for NVLS connections (#719)
* Now `NvlsConnection` internally reuses `GpuIpcMem` for multicast memory handling. * Removed unnecessary barriers from `connectNvlsCollective()` (CUDA API handles this automatically). * Updated `GpuIpcMem::map()` and `GpuIpcMem::mapMulticast()` to return a shared pointer with custom deleter for unmapping, which prevents misuse of raw pointers and reduces states to be stored in the `GpuIpcMem` instance. * Now for `RuntimeIpc` type handles, for consistency with other types, `cudaIpcOpenMemHandle` will be called in `GpuIpcMem::map()` instead of the ctor of `GpuIpcMem`. --------- Co-authored-by: Binyang Li <binyli@microsoft.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com> Co-authored-by: Binyang2014 <9415966+Binyang2014@users.noreply.github.com>
This commit is contained in:
@@ -14,12 +14,12 @@ class NvlsConnection;
|
||||
struct SwitchChannel {
|
||||
private:
|
||||
void* devicePtr_;
|
||||
std::shared_ptr<char> mcPtr_;
|
||||
std::shared_ptr<void> mcPtr_;
|
||||
size_t bufferSize_;
|
||||
|
||||
public:
|
||||
using DeviceHandle = SwitchChannelDeviceHandle;
|
||||
SwitchChannel(void* devicePtr, std::shared_ptr<char> mcPtr, size_t bufferSize)
|
||||
SwitchChannel(void* devicePtr, std::shared_ptr<void> mcPtr, size_t bufferSize)
|
||||
: devicePtr_(devicePtr), mcPtr_(mcPtr), bufferSize_(bufferSize) {}
|
||||
DeviceHandle deviceHandle() const;
|
||||
void* getDevicePtr();
|
||||
@@ -34,10 +34,6 @@ class NvlsConnection {
|
||||
NvlsConnection() = delete;
|
||||
std::vector<char> serialize();
|
||||
|
||||
// Everyone needs to synchronize after creating a NVLS connection before adding devices
|
||||
void addDevice();
|
||||
void addDevice(int cudaDeviceId);
|
||||
|
||||
/// Bind the memory allocated via mscclpp::GpuBuffer to the multicast handle. The behavior
|
||||
/// is undefined if the devicePtr is not allocated by mscclpp::GpuBuffer.
|
||||
/// @param devicePtr The device pointer returned by `mscclpp::GpuBuffer::data()`.
|
||||
@@ -45,8 +41,6 @@ class NvlsConnection {
|
||||
/// @return SwitchChannel with devicePtr, mcPtr and bufferSize
|
||||
SwitchChannel bindAllocatedMemory(CUdeviceptr devicePtr, size_t size);
|
||||
|
||||
size_t getMultiCastMinGranularity();
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
std::shared_ptr<Impl> pimpl_;
|
||||
|
||||
Reference in New Issue
Block a user