Fix cpplint error in main branch (#740)

Fix the legacy cpplint error in main branch.

---------

Co-authored-by: Qinghua Zhou <qinghuahzhou@microsoft.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Binyang Li <binyli@microsoft.com>
This commit is contained in:
Qinghua Zhou
2026-02-06 01:25:12 +08:00
committed by GitHub
parent dc747b1522
commit 620378b4fb
14 changed files with 75 additions and 75 deletions

View File

@@ -9,7 +9,7 @@
#include <sstream>
template <typename... Args>
void log(Args &&...args) {
void log(Args&&... args) {
std::stringstream ss;
(ss << ... << args);
ss << std::endl;
@@ -23,7 +23,7 @@ __device__ void spin_cycles(unsigned long long cycles) {
}
}
__global__ void gpuKernel0(mscclpp::BaseMemoryChannelDeviceHandle *devHandle, int iter) {
__global__ void gpuKernel0(mscclpp::BaseMemoryChannelDeviceHandle* devHandle, int iter) {
if (threadIdx.x + blockIdx.x * blockDim.x == 0) {
for (int i = 0; i < iter; ++i) {
devHandle->relaxedWait();
@@ -34,7 +34,7 @@ __global__ void gpuKernel0(mscclpp::BaseMemoryChannelDeviceHandle *devHandle, in
}
}
__global__ void gpuKernel1(mscclpp::BaseMemoryChannelDeviceHandle *devHandle, int iter) {
__global__ void gpuKernel1(mscclpp::BaseMemoryChannelDeviceHandle* devHandle, int iter) {
if (threadIdx.x + blockIdx.x * blockDim.x == 0) {
for (int i = 0; i < iter; ++i) {
devHandle->relaxedSignal();
@@ -88,7 +88,7 @@ int main() {
mscclpp::Semaphore sema0(/*localSemaphoreStub*/ semaStub0, /*remoteSemaphoreStub*/ semaStub1);
mscclpp::BaseMemoryChannel memChan0(sema0);
mscclpp::BaseMemoryChannelDeviceHandle memChanHandle0 = memChan0.deviceHandle();
void *devHandle0;
void* devHandle0;
MSCCLPP_CUDATHROW(cudaMalloc(&devHandle0, sizeof(mscclpp::BaseMemoryChannelDeviceHandle)));
MSCCLPP_CUDATHROW(cudaMemcpy(devHandle0, &memChanHandle0, sizeof(memChanHandle0), cudaMemcpyHostToDevice));
@@ -98,14 +98,14 @@ int main() {
mscclpp::Semaphore sema1(/*localSemaphoreStub*/ semaStub1, /*remoteSemaphoreStub*/ semaStub0);
mscclpp::BaseMemoryChannel memChan1(sema1);
mscclpp::BaseMemoryChannelDeviceHandle memChanHandle1 = memChan1.deviceHandle();
void *devHandle1;
void* devHandle1;
MSCCLPP_CUDATHROW(cudaMalloc(&devHandle1, sizeof(mscclpp::BaseMemoryChannelDeviceHandle)));
MSCCLPP_CUDATHROW(cudaMemcpy(devHandle1, &memChanHandle1, sizeof(memChanHandle1), cudaMemcpyHostToDevice));
log("GPU 0: Launching gpuKernel0 ...");
MSCCLPP_CUDATHROW(cudaSetDevice(0));
gpuKernel0<<<1, 1>>>(reinterpret_cast<mscclpp::BaseMemoryChannelDeviceHandle *>(devHandle0), iter);
gpuKernel0<<<1, 1>>>(reinterpret_cast<mscclpp::BaseMemoryChannelDeviceHandle*>(devHandle0), iter);
MSCCLPP_CUDATHROW(cudaGetLastError());
log("GPU 1: Launching gpuKernel1 ...");
@@ -115,7 +115,7 @@ int main() {
MSCCLPP_CUDATHROW(cudaEventCreate(&start));
MSCCLPP_CUDATHROW(cudaEventCreate(&end));
MSCCLPP_CUDATHROW(cudaEventRecord(start));
gpuKernel1<<<1, 1>>>(reinterpret_cast<mscclpp::BaseMemoryChannelDeviceHandle *>(devHandle1), iter);
gpuKernel1<<<1, 1>>>(reinterpret_cast<mscclpp::BaseMemoryChannelDeviceHandle*>(devHandle1), iter);
MSCCLPP_CUDATHROW(cudaGetLastError());
MSCCLPP_CUDATHROW(cudaEventRecord(end));
MSCCLPP_CUDATHROW(cudaEventSynchronize(end));

View File

@@ -14,7 +14,7 @@
#define PORT_NUMBER "50505"
template <typename... Args>
void log(Args &&...args) {
void log(Args&&... args) {
std::stringstream ss;
(ss << ... << args);
ss << std::endl;
@@ -50,7 +50,7 @@ __device__ void spin_cycles(unsigned long long cycles) {
}
}
__global__ void gpuKernel0(mscclpp::BaseMemoryChannelDeviceHandle *devHandle, int iter) {
__global__ void gpuKernel0(mscclpp::BaseMemoryChannelDeviceHandle* devHandle, int iter) {
if (threadIdx.x + blockIdx.x * blockDim.x == 0) {
for (int i = 0; i < iter; ++i) {
devHandle->relaxedWait();
@@ -61,7 +61,7 @@ __global__ void gpuKernel0(mscclpp::BaseMemoryChannelDeviceHandle *devHandle, in
}
}
__global__ void gpuKernel1(mscclpp::BaseMemoryChannelDeviceHandle *devHandle, int iter) {
__global__ void gpuKernel1(mscclpp::BaseMemoryChannelDeviceHandle* devHandle, int iter) {
if (threadIdx.x + blockIdx.x * blockDim.x == 0) {
for (int i = 0; i < iter; ++i) {
devHandle->relaxedSignal();
@@ -115,14 +115,14 @@ void worker(int gpuId) {
mscclpp::BaseMemoryChannel memChan(sema);
auto memChanHandle = memChan.deviceHandle();
void *devHandle;
void* devHandle;
MSCCLPP_CUDATHROW(cudaMalloc(&devHandle, sizeof(memChanHandle)));
MSCCLPP_CUDATHROW(cudaMemcpy(devHandle, &memChanHandle, sizeof(memChanHandle), cudaMemcpyHostToDevice));
log("GPU ", gpuId, ": Launching a GPU kernel ...");
if (gpuId == 0) {
gpuKernel0<<<1, 1>>>(reinterpret_cast<mscclpp::BaseMemoryChannelDeviceHandle *>(devHandle), iter);
gpuKernel0<<<1, 1>>>(reinterpret_cast<mscclpp::BaseMemoryChannelDeviceHandle*>(devHandle), iter);
MSCCLPP_CUDATHROW(cudaGetLastError());
MSCCLPP_CUDATHROW(cudaDeviceSynchronize());
} else {
@@ -130,7 +130,7 @@ void worker(int gpuId) {
MSCCLPP_CUDATHROW(cudaEventCreate(&start));
MSCCLPP_CUDATHROW(cudaEventCreate(&end));
MSCCLPP_CUDATHROW(cudaEventRecord(start));
gpuKernel1<<<1, 1>>>(reinterpret_cast<mscclpp::BaseMemoryChannelDeviceHandle *>(devHandle), iter);
gpuKernel1<<<1, 1>>>(reinterpret_cast<mscclpp::BaseMemoryChannelDeviceHandle*>(devHandle), iter);
MSCCLPP_CUDATHROW(cudaGetLastError());
MSCCLPP_CUDATHROW(cudaEventRecord(end));
MSCCLPP_CUDATHROW(cudaEventSynchronize(end));

View File

@@ -16,7 +16,7 @@
#define PORT_NUMBER "50505"
template <typename... Args>
void log(Args &&...args) {
void log(Args&&... args) {
std::stringstream ss;
(ss << ... << args);
ss << std::endl;
@@ -47,7 +47,7 @@ int wait_process(int pid) {
__device__ mscclpp::DeviceSyncer devSyncer;
__global__ void bidirPutKernel(mscclpp::MemoryChannelDeviceHandle *devHandle, size_t copyBytes, int myRank) {
__global__ void bidirPutKernel(mscclpp::MemoryChannelDeviceHandle* devHandle, size_t copyBytes, int myRank) {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid == 0) {
devHandle->relaxedSignal();
@@ -65,7 +65,7 @@ __global__ void bidirPutKernel(mscclpp::MemoryChannelDeviceHandle *devHandle, si
}
}
__global__ void bidirGetKernel(mscclpp::MemoryChannelDeviceHandle *devHandle, size_t copyBytes, int myRank) {
__global__ void bidirGetKernel(mscclpp::MemoryChannelDeviceHandle* devHandle, size_t copyBytes, int myRank) {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid == 0) {
devHandle->relaxedSignal();
@@ -79,7 +79,7 @@ __global__ void bidirGetKernel(mscclpp::MemoryChannelDeviceHandle *devHandle, si
devHandle->get(srcOffset, dstOffset, copyBytes, /*threadId*/ tid, /*numThreads*/ blockDim.x * gridDim.x);
}
__global__ void bidirPutPacketKernel(mscclpp::MemoryChannelDeviceHandle *devHandle, size_t copyBytes, int myRank,
__global__ void bidirPutPacketKernel(mscclpp::MemoryChannelDeviceHandle* devHandle, size_t copyBytes, int myRank,
uint32_t flag) {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid == 0) {
@@ -95,7 +95,7 @@ __global__ void bidirPutPacketKernel(mscclpp::MemoryChannelDeviceHandle *devHand
devHandle->unpackPackets(pktBufOffset, dstOffset, copyBytes, tid, blockDim.x * gridDim.x, flag);
}
void worker(int myRank, int gpuId, const std::string &ipPort) {
void worker(int myRank, int gpuId, const std::string& ipPort) {
MSCCLPP_CUDATHROW(cudaSetDevice(gpuId));
const int remoteRank = myRank == 0 ? 1 : 0;
const int nRanks = 2;
@@ -132,8 +132,8 @@ void worker(int myRank, int gpuId, const std::string &ipPort) {
auto memChanHandle = memChan.deviceHandle();
auto memPktChanHandle = memPktChan.deviceHandle();
void *devHandle;
void *devPktHandle;
void* devHandle;
void* devPktHandle;
MSCCLPP_CUDATHROW(cudaMalloc(&devHandle, sizeof(memChanHandle)));
MSCCLPP_CUDATHROW(cudaMalloc(&devPktHandle, sizeof(memPktChanHandle)));
MSCCLPP_CUDATHROW(cudaMemcpy(devHandle, &memChanHandle, sizeof(memChanHandle), cudaMemcpyHostToDevice));
@@ -145,18 +145,18 @@ void worker(int myRank, int gpuId, const std::string &ipPort) {
std::function<void(size_t)> kernels[3];
kernels[0] = [&](size_t copyBytes) {
bidirPutKernel<<<32, 1024, 0, stream>>>(reinterpret_cast<mscclpp::MemoryChannelDeviceHandle *>(devHandle),
copyBytes, myRank);
bidirPutKernel<<<32, 1024, 0, stream>>>(reinterpret_cast<mscclpp::MemoryChannelDeviceHandle*>(devHandle), copyBytes,
myRank);
};
kernels[1] = [&](size_t copyBytes) {
bidirGetKernel<<<32, 1024, 0, stream>>>(reinterpret_cast<mscclpp::MemoryChannelDeviceHandle *>(devHandle),
copyBytes, myRank);
bidirGetKernel<<<32, 1024, 0, stream>>>(reinterpret_cast<mscclpp::MemoryChannelDeviceHandle*>(devHandle), copyBytes,
myRank);
};
kernels[2] = [&](size_t copyBytes) {
static uint32_t flag = 1;
bidirPutPacketKernel<<<32, 1024, 0, stream>>>(reinterpret_cast<mscclpp::MemoryChannelDeviceHandle *>(devPktHandle),
bidirPutPacketKernel<<<32, 1024, 0, stream>>>(reinterpret_cast<mscclpp::MemoryChannelDeviceHandle*>(devPktHandle),
copyBytes, myRank, flag++);
};
@@ -215,7 +215,7 @@ void worker(int myRank, int gpuId, const std::string &ipPort) {
bootstrap->barrier();
}
int main(int argc, char **argv) {
int main(int argc, char** argv) {
if (argc == 1) {
int pid0 = spawn_process([]() { worker(0, 0, "lo:127.0.0.1:" PORT_NUMBER); });
int pid1 = spawn_process([]() { worker(1, 1, "lo:127.0.0.1:" PORT_NUMBER); });
@@ -241,7 +241,7 @@ int main(int argc, char **argv) {
try {
rank = std::stoi(argv[2]);
gpuId = std::stoi(argv[3]);
} catch (const std::exception &) {
} catch (const std::exception&) {
log("Error: rank and gpu_id must be valid integers.");
return -1;
}

View File

@@ -16,7 +16,7 @@
#define PORT_NUMBER "50505"
template <typename... Args>
void log(Args &&...args) {
void log(Args&&... args) {
std::stringstream ss;
(ss << ... << args);
ss << std::endl;
@@ -45,7 +45,7 @@ int wait_process(int pid) {
return -1;
}
__global__ void bidirPutKernel(mscclpp::PortChannelDeviceHandle *devHandle, size_t copyBytes, int myRank) {
__global__ void bidirPutKernel(mscclpp::PortChannelDeviceHandle* devHandle, size_t copyBytes, int myRank) {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid == 0) {
devHandle->signal();
@@ -58,7 +58,7 @@ __global__ void bidirPutKernel(mscclpp::PortChannelDeviceHandle *devHandle, size
}
}
void worker(int rank, int gpuId, const std::string &ipPort, mscclpp::Transport transport) {
void worker(int rank, int gpuId, const std::string& ipPort, mscclpp::Transport transport) {
MSCCLPP_CUDATHROW(cudaSetDevice(gpuId));
const int myRank = rank;
const int remoteRank = myRank == 0 ? 1 : 0;
@@ -90,7 +90,7 @@ void worker(int rank, int gpuId, const std::string &ipPort, mscclpp::Transport t
auto portChanHandle = portChan.deviceHandle();
void *devHandle;
void* devHandle;
MSCCLPP_CUDATHROW(cudaMalloc(&devHandle, sizeof(portChanHandle)));
MSCCLPP_CUDATHROW(cudaMemcpy(devHandle, &portChanHandle, sizeof(portChanHandle), cudaMemcpyHostToDevice));
@@ -100,7 +100,7 @@ void worker(int rank, int gpuId, const std::string &ipPort, mscclpp::Transport t
std::function<void(size_t)> kernels[1];
kernels[0] = [&](size_t copyBytes) {
bidirPutKernel<<<1, 1, 0, stream>>>(reinterpret_cast<mscclpp::PortChannelDeviceHandle *>(devHandle), copyBytes,
bidirPutKernel<<<1, 1, 0, stream>>>(reinterpret_cast<mscclpp::PortChannelDeviceHandle*>(devHandle), copyBytes,
myRank);
};
@@ -166,7 +166,7 @@ void worker(int rank, int gpuId, const std::string &ipPort, mscclpp::Transport t
bootstrap->barrier();
}
mscclpp::Transport parseTransport(const std::string &transportStr) {
mscclpp::Transport parseTransport(const std::string& transportStr) {
if (transportStr == "CudaIpc") return mscclpp::Transport::CudaIpc;
if (transportStr == "IB0") return mscclpp::Transport::IB0;
if (transportStr == "IB1") return mscclpp::Transport::IB1;
@@ -180,7 +180,7 @@ mscclpp::Transport parseTransport(const std::string &transportStr) {
throw std::runtime_error("Unknown transport: " + transportStr);
}
int main(int argc, char **argv) {
int main(int argc, char** argv) {
if (argc == 1) {
int pid0 = spawn_process([]() { worker(0, 0, "lo:127.0.0.1:" PORT_NUMBER, mscclpp::Transport::CudaIpc); });
int pid1 = spawn_process([]() { worker(1, 1, "lo:127.0.0.1:" PORT_NUMBER, mscclpp::Transport::CudaIpc); });

View File

@@ -19,11 +19,11 @@
#else // defined(DEBUG_BUILD)
#if defined(MSCCLPP_DEVICE_HIP)
extern "C" __device__ void __assert_fail(const char *__assertion, const char *__file, unsigned int __line,
const char *__function);
extern "C" __device__ void __assert_fail(const char* __assertion, const char* __file, unsigned int __line,
const char* __function);
#else // !defined(MSCCLPP_DEVICE_HIP)
extern "C" __host__ __device__ void __assert_fail(const char *__assertion, const char *__file, unsigned int __line,
const char *__function) __THROW;
extern "C" __host__ __device__ void __assert_fail(const char* __assertion, const char* __file, unsigned int __line,
const char* __function) __THROW;
#endif // !defined(MSCCLPP_DEVICE_HIP)
/// Assert a condition on the device and print a message if the condition is false.

View File

@@ -11,17 +11,17 @@ using namespace mscclpp;
#define REGISTER_EXCEPTION_TRANSLATOR(name_) \
nb::register_exception_translator( \
[](const std::exception_ptr &p, void *payload) { \
[](const std::exception_ptr& p, void* payload) { \
try { \
std::rethrow_exception(p); \
} catch (const name_ &e) { \
PyErr_SetObject(reinterpret_cast<PyObject *>(payload), \
} catch (const name_& e) { \
PyErr_SetObject(reinterpret_cast<PyObject*>(payload), \
PyTuple_Pack(2, PyLong_FromLong(long(e.getErrorCode())), PyUnicode_FromString(e.what()))); \
} \
}, \
m.attr(#name_).ptr());
void register_error(nb::module_ &m) {
void register_error(nb::module_& m) {
nb::enum_<ErrorCode>(m, "CppErrorCode")
.value("SystemError", ErrorCode::SystemError)
.value("InternalError", ErrorCode::InternalError)

View File

@@ -8,7 +8,7 @@
namespace nb = nanobind;
void register_npkit(nb::module_ &m) {
void register_npkit(nb::module_& m) {
nb::module_ sub_m = m.def_submodule("cpp_npkit", "NPKit functions");
sub_m.def("init", &NpKit::Init);
sub_m.def("dump", &NpKit::Dump);

View File

@@ -6,7 +6,7 @@ int getDeviceNumaNode(int cudaDev);
void numaBind(int node);
}; // namespace mscclpp
void register_numa(nb::module_ &m) {
void register_numa(nb::module_& m) {
nb::module_ sub_m = m.def_submodule("cpp_numa", "numa functions");
sub_m.def("get_device_numa_node", &mscclpp::getDeviceNumaNode);
sub_m.def("numa_bind", &mscclpp::numaBind);

View File

@@ -23,14 +23,14 @@ void CudaIpcStream::setStreamIfNeeded() {
}
}
void CudaIpcStream::memcpyD2D(void *dst, const void *src, size_t nbytes) {
void CudaIpcStream::memcpyD2D(void* dst, const void* src, size_t nbytes) {
CudaDeviceGuard deviceGuard(deviceId_);
setStreamIfNeeded();
MSCCLPP_CUDATHROW(cudaMemcpyAsync(dst, src, nbytes, cudaMemcpyDeviceToDevice, *stream_));
dirty_ = true;
}
void CudaIpcStream::memcpyH2D(void *dst, const void *src, size_t nbytes) {
void CudaIpcStream::memcpyH2D(void* dst, const void* src, size_t nbytes) {
CudaDeviceGuard deviceGuard(deviceId_);
setStreamIfNeeded();
MSCCLPP_CUDATHROW(cudaMemcpyAsync(dst, src, nbytes, cudaMemcpyHostToDevice, *stream_));
@@ -48,7 +48,7 @@ void CudaIpcStream::sync() {
Context::Impl::Impl() {}
IbCtx *Context::Impl::getIbContext(Transport ibTransport) {
IbCtx* Context::Impl::getIbContext(Transport ibTransport) {
// Find IB context or create it
auto it = ibContexts_.find(ibTransport);
if (it == ibContexts_.end()) {
@@ -70,7 +70,7 @@ MSCCLPP_API_CPP Context::Context() : pimpl_(std::make_unique<Impl>()) {}
MSCCLPP_API_CPP Context::~Context() = default;
MSCCLPP_API_CPP RegisteredMemory Context::registerMemory(void *ptr, size_t size, TransportFlags transports) {
MSCCLPP_API_CPP RegisteredMemory Context::registerMemory(void* ptr, size_t size, TransportFlags transports) {
return RegisteredMemory(std::make_shared<RegisteredMemory::Impl>(ptr, size, transports, *pimpl_));
}
@@ -78,7 +78,7 @@ MSCCLPP_API_CPP Endpoint Context::createEndpoint(EndpointConfig config) {
return Endpoint(std::make_shared<Endpoint::Impl>(config, *pimpl_));
}
MSCCLPP_API_CPP Connection Context::connect(const Endpoint &localEndpoint, const Endpoint &remoteEndpoint) {
MSCCLPP_API_CPP Connection Context::connect(const Endpoint& localEndpoint, const Endpoint& remoteEndpoint) {
if (localEndpoint.device().type == DeviceType::GPU && localEndpoint.device().id < 0) {
throw Error("No GPU device ID provided for local endpoint", ErrorCode::InvalidUsage);
}

View File

@@ -24,9 +24,9 @@ class CudaIpcStream {
public:
CudaIpcStream(int deviceId);
void memcpyD2D(void *dst, const void *src, size_t nbytes);
void memcpyD2D(void* dst, const void* src, size_t nbytes);
void memcpyH2D(void *dst, const void *src, size_t nbytes);
void memcpyH2D(void* dst, const void* src, size_t nbytes);
void sync();
@@ -44,7 +44,7 @@ struct Context::Impl {
Impl();
IbCtx *getIbContext(Transport ibTransport);
IbCtx* getIbContext(Transport ibTransport);
std::shared_ptr<uint64_t> getToken();
};

View File

@@ -46,7 +46,7 @@ struct GpuIpcMemHandle {
char handle[64];
} fabric;
static void deleter(GpuIpcMemHandle *handle);
static void deleter(GpuIpcMemHandle* handle);
// We make GpuIpcMemHandle trivially copyable for easy serialization,
// and thus it cannot have explicit destructors.
@@ -61,7 +61,7 @@ struct GpuIpcMemHandle {
using Base::Base;
// Allow implicit conversion from Base
UniquePtr(Base &&other) : Base(std::move(other)) {}
UniquePtr(Base&& other) : Base(std::move(other)) {}
};
static UniquePtr create(const CUdeviceptr ptr);
@@ -70,7 +70,7 @@ struct GpuIpcMemHandle {
using UniqueGpuIpcMemHandle = GpuIpcMemHandle::UniquePtr;
std::ostream &operator<<(std::ostream &os, const GpuIpcMemHandle::TypeFlags &typeFlags);
std::ostream& operator<<(std::ostream& os, const GpuIpcMemHandle::TypeFlags& typeFlags);
static_assert(std::is_trivially_copyable_v<GpuIpcMemHandle>);
@@ -82,7 +82,7 @@ class GpuIpcMem : public std::enable_shared_from_this<GpuIpcMem> {
/// Create a GpuIpcMem instance from a GpuIpcMemHandle.
/// @param handle The handle to import.
/// @return A shared_ptr to the created GpuIpcMem instance.
static std::shared_ptr<GpuIpcMem> create(const GpuIpcMemHandle &handle);
static std::shared_ptr<GpuIpcMem> create(const GpuIpcMemHandle& handle);
~GpuIpcMem();
@@ -102,7 +102,7 @@ class GpuIpcMem : public std::enable_shared_from_this<GpuIpcMem> {
std::shared_ptr<void> mapMulticast(int numDevices, size_t mcOffset, CUdeviceptr bufferAddr, size_t bufferSize);
private:
GpuIpcMem(const GpuIpcMemHandle &handle);
GpuIpcMem(const GpuIpcMemHandle& handle);
GpuIpcMemHandle handle_;
CUmemGenericAllocationHandle allocHandle_;

View File

@@ -12,12 +12,12 @@ namespace mscclpp {
struct IBVerbs {
private:
static void *dlsym(const std::string &symbol, bool allowReturnNull = false);
static void* dlsym(const std::string& symbol, bool allowReturnNull = false);
public:
#define REGISTER_IBV_FUNC_WITH_NAME(name__, func__) \
template <typename... Args> \
static inline auto(name__)(Args && ...args) { \
static inline auto(name__)(Args && ... args) { \
static_assert(sizeof(&::func__) > 0, #func__ " is expected be a function, not a macro"); \
static decltype(&::func__) impl = nullptr; \
if (!impl) impl = reinterpret_cast<decltype(impl)>(IBVerbs::dlsym(#func__)); \
@@ -46,7 +46,7 @@ struct IBVerbs {
REGISTER_IBV_FUNC(ibv_wc_status_str)
static bool isDmabufSupported();
static struct ibv_mr *ibv_reg_dmabuf_mr(struct ibv_pd *, uint64_t, size_t, uint64_t, int, int);
static struct ibv_mr* ibv_reg_dmabuf_mr(struct ibv_pd*, uint64_t, size_t, uint64_t, int, int);
///
/// Below is for cases where the API (may be / is) a macro. Refer to `infiniband/verbs.h`.
@@ -57,8 +57,8 @@ struct IBVerbs {
#else // defined(ibv_get_device_list)
#undef ibv_get_device_list
REGISTER_IBV_FUNC(ibv_static_providers)
static inline struct ibv_device **ibv_get_device_list(int *num_devices) {
using FuncType = struct ibv_device **(*)(int *);
static inline struct ibv_device** ibv_get_device_list(int* num_devices) {
using FuncType = struct ibv_device** (*)(int*);
static FuncType impl = nullptr;
if (!impl) impl = reinterpret_cast<FuncType>(IBVerbs::dlsym("ibv_get_device_list"));
IBVerbs::ibv_static_providers(NULL, _RDMA_STATIC_PREFIX(RDMA_STATIC_PROVIDERS), NULL);
@@ -67,21 +67,21 @@ struct IBVerbs {
#endif // defined(ibv_get_device_list)
#undef ibv_query_port
static inline int ibv_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr) {
static inline int ibv_query_port(struct ibv_context* context, uint8_t port_num, struct ibv_port_attr* port_attr) {
static decltype(&::ibv_query_port) impl = nullptr;
if (!impl) impl = reinterpret_cast<decltype(impl)>(IBVerbs::dlsym("ibv_query_port"));
struct verbs_context *vctx = verbs_get_ctx_op(context, query_port);
struct verbs_context* vctx = verbs_get_ctx_op(context, query_port);
if (!vctx) {
int rc;
::memset(port_attr, 0, sizeof(*port_attr));
rc = impl(context, port_num, (struct _compat_ibv_port_attr *)port_attr);
rc = impl(context, port_num, (struct _compat_ibv_port_attr*)port_attr);
return rc;
}
return vctx->query_port(context, port_num, port_attr, sizeof(*port_attr));
}
#undef ibv_reg_mr
static inline struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access) {
static inline struct ibv_mr* ibv_reg_mr(struct ibv_pd* pd, void* addr, size_t length, int access) {
static decltype(&::ibv_reg_mr) impl = nullptr;
static decltype(&::ibv_reg_mr_iova2) impl_iova2 = nullptr;
int is_access_const = __builtin_constant_p(((int)(access)&IBV_ACCESS_OPTIONAL_RANGE) == 0);
@@ -98,11 +98,11 @@ struct IBVerbs {
/// Below is for cases where the API (may be / is) a static function. Refer to `infiniband/verbs.h`.
///
static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr) {
static inline int ibv_post_send(struct ibv_qp* qp, struct ibv_send_wr* wr, struct ibv_send_wr** bad_wr) {
return qp->context->ops.post_send(qp, wr, bad_wr);
}
static inline int ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc) {
static inline int ibv_poll_cq(struct ibv_cq* cq, int num_entries, struct ibv_wc* wc) {
return cq->context->ops.poll_cq(cq, num_entries, wc);
}
};

View File

@@ -8,11 +8,11 @@
extern "C" __attribute__((visibility("default"))) unsigned int la_version(unsigned int) { return LAV_CURRENT; }
extern "C" __attribute__((visibility("default"))) char *la_objsearch(const char *name, uintptr_t *, unsigned int) {
const char *library = "libmscclpp_nccl.so";
extern "C" __attribute__((visibility("default"))) char* la_objsearch(const char* name, uintptr_t*, unsigned int) {
const char* library = "libmscclpp_nccl.so";
if (strcmp(name, "libnccl.so.2") && strcmp(name, "libnccl.so") && strcmp(name, "librccl.so") &&
strcmp(name, "librccl.so.1")) {
return (char *)name;
return (char*)name;
}
return (char *)library;
return (char*)library;
}

View File

@@ -12,10 +12,10 @@
__constant__ mscclpp::PortChannelDeviceHandle gPortChannel;
__global__ void kernelLocalPortChannelTest(void *dst, void *src, size_t bytes, int *ret) {
__global__ void kernelLocalPortChannelTest(void* dst, void* src, size_t bytes, int* ret) {
if (blockIdx.x == 0) {
// sender
int *ptr = reinterpret_cast<int *>(src);
int* ptr = reinterpret_cast<int*>(src);
for (size_t idx = threadIdx.x; idx < bytes / sizeof(int); idx += blockDim.x) {
ptr[idx] = MAGIC_CONST;
}
@@ -29,7 +29,7 @@ __global__ void kernelLocalPortChannelTest(void *dst, void *src, size_t bytes, i
gPortChannel.wait();
}
__syncthreads();
int *ptr = reinterpret_cast<int *>(dst);
int* ptr = reinterpret_cast<int*>(dst);
for (size_t idx = threadIdx.x; idx < bytes / sizeof(int); idx += blockDim.x) {
if (ptr[idx] != MAGIC_CONST) {
*ret = 1; // Error: value mismatch