Add a new IB stack impl that doesn't use RDMA atomics (#728)

* Added configurable InfiniBand (IB) signaling mode.
`EndpointConfig::Ib::Mode` enum selects the mode (`Default`, `Host`,
`HostNoAtomic`). `Default` is equivalent to `Host` unless specified
different by envrionment `MSCCLPP_IBV_MODE`. `Host` corresponds to the
previous implementation using RDMA atomics for signaling, while
`HostNoAtomic` uses write-with-immediate instead.
* Regarding updates in Python bindings and API.
This commit is contained in:
Changho Hwang
2026-02-10 10:07:53 +09:00
committed by GitHub
parent c12822a7af
commit 42be3660e0
20 changed files with 648 additions and 222 deletions

View File

@@ -147,22 +147,31 @@ void register_core(nb::module_& m) {
return ss.str();
});
nb::enum_<EndpointConfig::Ib::Mode>(m, "CppIbMode")
.value("Default", EndpointConfig::Ib::Mode::Default)
.value("Host", EndpointConfig::Ib::Mode::Host)
.value("HostNoAtomic", EndpointConfig::Ib::Mode::HostNoAtomic);
nb::class_<EndpointConfig::Ib>(m, "CppEndpointConfigIb")
.def(nb::init<>())
.def(nb::init<int, int, int, int, int, int, int>(), nb::arg("device_index") = -1,
.def(nb::init<int, int, int, int, int, int, int, int, EndpointConfig::Ib::Mode>(), nb::arg("device_index") = -1,
nb::arg("port") = EndpointConfig::Ib::DefaultPort,
nb::arg("gid_index") = EndpointConfig::Ib::DefaultGidIndex,
nb::arg("max_cq_size") = EndpointConfig::Ib::DefaultMaxCqSize,
nb::arg("max_cq_poll_num") = EndpointConfig::Ib::DefaultMaxCqPollNum,
nb::arg("max_send_wr") = EndpointConfig::Ib::DefaultMaxSendWr,
nb::arg("max_wr_per_send") = EndpointConfig::Ib::DefaultMaxWrPerSend)
nb::arg("max_recv_wr") = EndpointConfig::Ib::DefaultMaxRecvWr,
nb::arg("max_wr_per_send") = EndpointConfig::Ib::DefaultMaxWrPerSend,
nb::arg("mode") = EndpointConfig::Ib::Mode::Default)
.def_rw("device_index", &EndpointConfig::Ib::deviceIndex)
.def_rw("port", &EndpointConfig::Ib::port)
.def_rw("gid_index", &EndpointConfig::Ib::gidIndex)
.def_rw("max_cq_size", &EndpointConfig::Ib::maxCqSize)
.def_rw("max_cq_poll_num", &EndpointConfig::Ib::maxCqPollNum)
.def_rw("max_send_wr", &EndpointConfig::Ib::maxSendWr)
.def_rw("max_wr_per_send", &EndpointConfig::Ib::maxWrPerSend);
.def_rw("max_recv_wr", &EndpointConfig::Ib::maxRecvWr)
.def_rw("max_wr_per_send", &EndpointConfig::Ib::maxWrPerSend)
.def_rw("mode", &EndpointConfig::Ib::mode);
nb::class_<RegisteredMemory>(m, "CppRegisteredMemory")
.def(nb::init<>())
@@ -223,9 +232,15 @@ void register_core(nb::module_& m) {
.def_prop_rw(
"ib_max_send_wr", [](EndpointConfig& self) { return self.ib.maxSendWr; },
[](EndpointConfig& self, int v) { self.ib.maxSendWr = v; })
.def_prop_rw(
"ib_max_recv_wr", [](EndpointConfig& self) { return self.ib.maxRecvWr; },
[](EndpointConfig& self, int v) { self.ib.maxRecvWr = v; })
.def_prop_rw(
"ib_max_wr_per_send", [](EndpointConfig& self) { return self.ib.maxWrPerSend; },
[](EndpointConfig& self, int v) { self.ib.maxWrPerSend = v; })
.def_prop_rw(
"ib_mode", [](EndpointConfig& self) { return self.ib.mode; },
[](EndpointConfig& self, EndpointConfig::Ib::Mode v) { self.ib.mode = v; })
.def_rw("max_write_queue_size", &EndpointConfig::maxWriteQueueSize);
nb::class_<Context>(m, "CppContext")