Revised MemoryChannel interfaces (#508)

* Moved the `MemoryChannel::copy()` method out of the `MemoryChannel` as a standalone function. * Renamed `mscclpp::putPackets()` and `mscclpp::getPackets()` to `mscclpp::copyToPackets()` and `mscclpp::copyFromPackets()` respectively for consistency. * Renamed `MemoryChannel::getPackets()` to `MemoryChannel::unpackPackets()` for clarity. Renamed `getPacketBuffer` to `packetBuffer`. * Added the `MemoryChannel::unpackPacket()` method that unpacks one packet in the buffer. * Added the `BaseMemoryChannel` class that only contains a semaphore without memory addresses. * Removed the `MemoryDevice2DeviceSemaphoreDeviceHandle::signalPacket()` method that is lacking use cases.
2026-04-19 14:29:13 +00:00 · 2025-04-24 17:02:56 -07:00
parent 9df2bdb2bf
commit 710f6686dc
19 changed files with 518 additions and 499 deletions
--- a/python/test/memory_channel_test.cu
+++ b/python/test/memory_channel_test.cu
@@ -16,7 +16,7 @@ extern "C" __global__ void __launch_bounds__(1024, 1)
  if (bid < nranks && bid != my_rank) {
    if (use_packet) {
      channels[bid].putPackets(2 * my_offset, my_offset, size_per_rank, tid, blockDim.x, flag);
-      channels[bid].getPackets(2 * my_nghr_offset, my_nghr_offset, size_per_rank, tid, blockDim.x, flag);
+      channels[bid].unpackPackets(2 * my_nghr_offset, my_nghr_offset, size_per_rank, tid, blockDim.x, flag);
    } else {
      channels[bid].put(my_offset, my_offset, size_per_rank, tid, blockDim.x);
      __syncthreads();
--- a/python/test/port_channel_test.cu
+++ b/python/test/port_channel_test.cu
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT license.

-#include <mscclpp/packet_device.hpp>
+#include <mscclpp/copy_device.hpp>
 #include <mscclpp/port_channel_device.hpp>

 // be careful about using channels[my_rank] as it is inavlie and it is there just for simplicity of indexing
@@ -18,14 +18,14 @@ extern "C" __global__ void __launch_bounds__(1024, 1)
  __syncthreads();
  int flag = 123;
  if (use_packet) {
-    mscclpp::putPackets(scratch, 2 * my_offset, data, my_offset, size_per_rank, tid, nthreads, flag);
+    mscclpp::copyToPackets((char*)scratch + 2 * my_offset, (char*)data + my_offset, size_per_rank, tid, nthreads, flag);
    __syncthreads();
    if (tid < nranks && tid != my_rank) {
      channels[tid].put(2 * my_offset, 2 * my_offset, 2 * size_per_rank);
    }
    if (my_nghr != my_rank && my_nghr < nranks)
-      mscclpp::getPackets(scratch, 2 * my_nghr_offset, data, my_nghr_offset, size_per_rank, tid % nthreads_per_rank,
-                          nthreads_per_rank, flag);
+      mscclpp::copyFromPackets((char*)data + my_nghr_offset, (char*)scratch + 2 * my_nghr_offset, size_per_rank,
+                               tid % nthreads_per_rank, nthreads_per_rank, flag);
  } else {
    if (tid < nranks && tid != my_rank) {
      channels[tid].putWithSignalAndFlush(my_offset, my_offset, size_per_rank);