mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-06-06 07:52:00 +00:00
FIFO improvements (#557)
* Revert `MSCCLPP_FIFO_USE_TAIL_REPLICA=1` back to the default. * Optimize `FifoDeviceHandle`. * Do not use `cudaHostAllocWriteCombined` that increases latency. * Pin host memory for `Host2DeviceSemaphore::outboundSemaphore_`. * Fix proxy NUMA binding issues. * Prevent graph capture inside proxy threads. * Now `CudaIpcConnection` skips stream sync when unnecessary. * Now any type of connection needs to hold a shared pointer to the context for memory safety. * Now a context should be always managed by a shared pointer for memory safety. * Minor docs & interface improvements. * Minor fix in `mscclpp-test` correctness test.
This commit is contained in:
@@ -148,7 +148,7 @@ void register_core(nb::module_& m) {
|
||||
.def_rw("ib_max_wr_per_send", &EndpointConfig::ibMaxWrPerSend);
|
||||
|
||||
nb::class_<Context>(m, "Context")
|
||||
.def(nb::init<>())
|
||||
.def_static("create", &Context::create)
|
||||
.def(
|
||||
"register_memory",
|
||||
[](Communicator* self, uintptr_t ptr, size_t size, TransportFlags transports) {
|
||||
|
||||
@@ -16,7 +16,7 @@ void register_port_channel(nb::module_& m) {
|
||||
.def("stop_proxy", &BaseProxyService::stopProxy);
|
||||
|
||||
nb::class_<ProxyService, BaseProxyService>(m, "ProxyService")
|
||||
.def(nb::init<size_t>(), nb::arg("fifoSize") = DEFAULT_FIFO_SIZE)
|
||||
.def(nb::init<int>(), nb::arg("fifoSize") = DEFAULT_FIFO_SIZE)
|
||||
.def("start_proxy", &ProxyService::startProxy)
|
||||
.def("stop_proxy", &ProxyService::stopProxy)
|
||||
.def("build_and_add_semaphore", &ProxyService::buildAndAddSemaphore, nb::arg("comm"), nb::arg("connection"))
|
||||
|
||||
@@ -36,18 +36,12 @@ class MyProxyService {
|
||||
connections_(conns),
|
||||
allRegMem_(allRegMem),
|
||||
semaphores_(semaphores),
|
||||
proxy_([&](mscclpp::ProxyTrigger triggerRaw) { return handleTrigger(triggerRaw); }, [&]() { bindThread(); }) {
|
||||
proxy_([&](mscclpp::ProxyTrigger triggerRaw) { return handleTrigger(triggerRaw); }) {
|
||||
int cudaDevice;
|
||||
MSCCLPP_CUDATHROW(cudaGetDevice(&cudaDevice));
|
||||
deviceNumaNode_ = mscclpp::getDeviceNumaNode(cudaDevice);
|
||||
}
|
||||
|
||||
void bindThread() {
|
||||
if (deviceNumaNode_ >= 0) {
|
||||
mscclpp::numaBind(deviceNumaNode_);
|
||||
}
|
||||
}
|
||||
|
||||
mscclpp::ProxyHandlerResult handleTrigger(mscclpp::ProxyTrigger) {
|
||||
int dataSizePerRank = dataSize_ / nranks_;
|
||||
for (int r = 1; r < nranks_; ++r) {
|
||||
@@ -64,7 +58,7 @@ class MyProxyService {
|
||||
|
||||
void stop() { proxy_.stop(); }
|
||||
|
||||
mscclpp::FifoDeviceHandle fifoDeviceHandle() { return proxy_.fifo().deviceHandle(); }
|
||||
mscclpp::FifoDeviceHandle fifoDeviceHandle() { return proxy_.fifo()->deviceHandle(); }
|
||||
};
|
||||
|
||||
void init_mscclpp_proxy_test_module(nb::module_ &m) {
|
||||
|
||||
Reference in New Issue
Block a user