diff --git a/src/core/ib.cc b/src/core/ib.cc index 290511e4..557f0426 100644 --- a/src/core/ib.cc +++ b/src/core/ib.cc @@ -91,8 +91,8 @@ IbMr::IbMr(ibv_pd* pd, void* buff, std::size_t size, bool isDataDirect) : mr_(nu // routes DMA through the Data Direct engine for correct ordering and higher throughput. // Fall back to the default (non-PCIe) mapping if the flag is unsupported. #if (CUDA_VERSION >= 12030) - CUresult cuRes = cuMemGetHandleForAddressRange( - &fd, addr, rangeSize, CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE); + CUresult cuRes = cuMemGetHandleForAddressRange(&fd, addr, rangeSize, CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, + CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE); if (cuRes != CUDA_SUCCESS || fd < 0) { if (fd >= 0) ::close(fd); fd = -1; diff --git a/src/core/include/connection.hpp b/src/core/include/connection.hpp index 077a6c6a..22a9930f 100644 --- a/src/core/include/connection.hpp +++ b/src/core/include/connection.hpp @@ -110,9 +110,9 @@ class IBConnection : public BaseConnection { bool gdrSignalForwarding_; // ibNoAtomic_ && gdrEnabled() — decided once at construction std::thread recvThread_; std::atomic stopRecvThread_; - std::atomic recvThreadError_; // Set by recv thread on fatal error - std::string recvThreadErrorMsg_; // Error message from recv thread (written before recvThreadError_ is set) - int localGpuDeviceId_; // Local GPU device ID for CUDA context and GDR mapping + std::atomic recvThreadError_; // Set by recv thread on fatal error + std::string recvThreadErrorMsg_; // Error message from recv thread (written before recvThreadError_ is set) + int localGpuDeviceId_; // Local GPU device ID for CUDA context and GDR mapping // Signal forwarding design (HostNoAtomic mode): // - Sender: 0-byte RDMA WRITE_WITH_IMM carrying the lower 32 bits of the token in imm_data. diff --git a/test/framework.cc b/test/framework.cc index f62d8bbd..941fdcba 100644 --- a/test/framework.cc +++ b/test/framework.cc @@ -222,7 +222,7 @@ int TestRegistry::runAllTests(int argc, char* argv[]) { skippedByFilter++; continue; } - totalToRun++;; + totalToRun++; } if (gMpiRank == 0) { diff --git a/test/mp_unit/port_channel_tests.cu b/test/mp_unit/port_channel_tests.cu index 4a9c8f3c..166d7ed2 100644 --- a/test/mp_unit/port_channel_tests.cu +++ b/test/mp_unit/port_channel_tests.cu @@ -517,12 +517,12 @@ void PortChannelOneToOneTest::testBandwidth(PingPongTestParams params) { if (gEnv->rank >= numRanksToUse) return; const int maxElem = 32 * 1024 * 1024; // 128 MB per direction - const int bufElem = maxElem * 2; // 2x for bidirectional + const int bufElem = maxElem * 2; // 2x for bidirectional std::vector portChannels; std::shared_ptr buff = mscclpp::GpuBuffer(bufElem).memory(); - setupMeshConnections(portChannels, params.useIPC, params.useIB, params.useEthernet, buff.get(), - bufElem * sizeof(int), nullptr, 0, params.ibMode); + setupMeshConnections(portChannels, params.useIPC, params.useIB, params.useEthernet, buff.get(), bufElem * sizeof(int), + nullptr, 0, params.ibMode); std::vector> portChannelHandles; for (auto& ch : portChannels) portChannelHandles.push_back(ch.deviceHandle()); @@ -554,8 +554,8 @@ void PortChannelOneToOneTest::testBandwidth(PingPongTestParams params) { double elapsedMsPerIter = elapsedUs / 1e3 / nIters; double gbps = copyBytes / elapsedMsPerIter * 1e-6; double sizeKB = copyBytes / 1024.0; - std::string label = (sizeKB >= 1024.0) ? (std::to_string((int)(sizeKB / 1024.0)) + " MB") - : (std::to_string((int)sizeKB) + " KB"); + std::string label = + (sizeKB >= 1024.0) ? (std::to_string((int)(sizeKB / 1024.0)) + " MB") : (std::to_string((int)sizeKB) + " KB"); ::mscclpp::test::reportPerfResult(label, gbps, "GB/s"); } }