This commit is contained in:
Changho Hwang
2026-04-04 06:22:04 +00:00
parent f62633ad41
commit b04fa2daa7
4 changed files with 11 additions and 11 deletions

View File

@@ -91,8 +91,8 @@ IbMr::IbMr(ibv_pd* pd, void* buff, std::size_t size, bool isDataDirect) : mr_(nu
// routes DMA through the Data Direct engine for correct ordering and higher throughput.
// Fall back to the default (non-PCIe) mapping if the flag is unsupported.
#if (CUDA_VERSION >= 12030)
CUresult cuRes = cuMemGetHandleForAddressRange(
&fd, addr, rangeSize, CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE);
CUresult cuRes = cuMemGetHandleForAddressRange(&fd, addr, rangeSize, CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD,
CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE);
if (cuRes != CUDA_SUCCESS || fd < 0) {
if (fd >= 0) ::close(fd);
fd = -1;

View File

@@ -110,9 +110,9 @@ class IBConnection : public BaseConnection {
bool gdrSignalForwarding_; // ibNoAtomic_ && gdrEnabled() — decided once at construction
std::thread recvThread_;
std::atomic<bool> stopRecvThread_;
std::atomic<bool> recvThreadError_; // Set by recv thread on fatal error
std::string recvThreadErrorMsg_; // Error message from recv thread (written before recvThreadError_ is set)
int localGpuDeviceId_; // Local GPU device ID for CUDA context and GDR mapping
std::atomic<bool> recvThreadError_; // Set by recv thread on fatal error
std::string recvThreadErrorMsg_; // Error message from recv thread (written before recvThreadError_ is set)
int localGpuDeviceId_; // Local GPU device ID for CUDA context and GDR mapping
// Signal forwarding design (HostNoAtomic mode):
// - Sender: 0-byte RDMA WRITE_WITH_IMM carrying the lower 32 bits of the token in imm_data.

View File

@@ -222,7 +222,7 @@ int TestRegistry::runAllTests(int argc, char* argv[]) {
skippedByFilter++;
continue;
}
totalToRun++;;
totalToRun++;
}
if (gMpiRank == 0) {

View File

@@ -517,12 +517,12 @@ void PortChannelOneToOneTest::testBandwidth(PingPongTestParams params) {
if (gEnv->rank >= numRanksToUse) return;
const int maxElem = 32 * 1024 * 1024; // 128 MB per direction
const int bufElem = maxElem * 2; // 2x for bidirectional
const int bufElem = maxElem * 2; // 2x for bidirectional
std::vector<mscclpp::PortChannel> portChannels;
std::shared_ptr<int> buff = mscclpp::GpuBuffer<int>(bufElem).memory();
setupMeshConnections(portChannels, params.useIPC, params.useIB, params.useEthernet, buff.get(),
bufElem * sizeof(int), nullptr, 0, params.ibMode);
setupMeshConnections(portChannels, params.useIPC, params.useIB, params.useEthernet, buff.get(), bufElem * sizeof(int),
nullptr, 0, params.ibMode);
std::vector<DeviceHandle<mscclpp::PortChannel>> portChannelHandles;
for (auto& ch : portChannels) portChannelHandles.push_back(ch.deviceHandle());
@@ -554,8 +554,8 @@ void PortChannelOneToOneTest::testBandwidth(PingPongTestParams params) {
double elapsedMsPerIter = elapsedUs / 1e3 / nIters;
double gbps = copyBytes / elapsedMsPerIter * 1e-6;
double sizeKB = copyBytes / 1024.0;
std::string label = (sizeKB >= 1024.0) ? (std::to_string((int)(sizeKB / 1024.0)) + " MB")
: (std::to_string((int)sizeKB) + " KB");
std::string label =
(sizeKB >= 1024.0) ? (std::to_string((int)(sizeKB / 1024.0)) + " MB") : (std::to_string((int)sizeKB) + " KB");
::mscclpp::test::reportPerfResult(label, gbps, "GB/s");
}
}