mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-04-20 14:59:29 +00:00
updates
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
# Find the GDRCopy libraries
|
||||
# Find the GDRCopy libraries (>= 2.5 required for gdr_pin_buffer_v2 / GDR_PIN_FLAG_FORCE_PCIE)
|
||||
#
|
||||
# The following variables are optionally searched for defaults
|
||||
# GDRCOPY_ROOT_DIR: Base directory where all GDRCopy components are found
|
||||
@@ -32,6 +32,17 @@ find_library(GDRCOPY_LIBRARIES
|
||||
/usr/lib
|
||||
/usr/lib/x86_64-linux-gnu)
|
||||
|
||||
if(GDRCOPY_INCLUDE_DIRS)
|
||||
include(CheckSymbolExists)
|
||||
set(CMAKE_REQUIRED_INCLUDES ${GDRCOPY_INCLUDE_DIRS})
|
||||
check_symbol_exists(gdr_pin_buffer_v2 "gdrapi.h" GDRCOPY_HAS_PIN_BUFFER_V2)
|
||||
unset(CMAKE_REQUIRED_INCLUDES)
|
||||
if(NOT GDRCOPY_HAS_PIN_BUFFER_V2)
|
||||
message(STATUS "GDRCopy found but too old (gdr_pin_buffer_v2 not available). Requires >= 2.5.")
|
||||
set(GDRCOPY_INCLUDE_DIRS GDRCOPY_INCLUDE_DIRS-NOTFOUND)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(GDRCopy DEFAULT_MSG GDRCOPY_INCLUDE_DIRS GDRCOPY_LIBRARIES)
|
||||
mark_as_advanced(GDRCOPY_INCLUDE_DIRS GDRCOPY_LIBRARIES)
|
||||
|
||||
@@ -316,15 +316,17 @@ IBConnection::IBConnection(std::shared_ptr<Context> context, const Endpoint& loc
|
||||
localSignalGpuPtr_ = reinterpret_cast<uint64_t*>(localImpl.ibSignalGpuBuffer_.get());
|
||||
}
|
||||
|
||||
// When the QP is mlx5 and the signal GPU buffer MR is a Data Direct DMABUF
|
||||
// (registered via mlx5dv_reg_dmabuf_mr with MLX5DV_REG_DMABUF_ACCESS_DATA_DIRECT),
|
||||
// and the semaphore token write also goes through Data Direct (via GDRCopy to a
|
||||
// Data Direct DMABUF MR), all writes are visible in GPU memory when the CQE is
|
||||
// polled. This allows reading the token from imm_data instead of the signal GPU buffer.
|
||||
// Data Direct requires all three conditions:
|
||||
// 1. Signal GPU buffer MR registered with MLX5DV_REG_DMABUF_ACCESS_DATA_DIRECT
|
||||
// 2. Local signal GPU GDRCopy mapping pinned with GDR_PIN_FLAG_FORCE_PCIE
|
||||
// 3. (remoteUpdateDstAddr GDRCopy mapping checked at setRemoteUpdateDstAddr time)
|
||||
// When all conditions are met, RDMA data writes and GDRCopy token writes both go
|
||||
// through the Data Direct engine, guaranteeing GPU memory visibility at CQE poll time.
|
||||
auto qp = qp_.lock();
|
||||
dataDirectEnabled_ = localImpl.ibSignalGpuMr_ && localImpl.ibSignalGpuMr_->isDataDirect();
|
||||
dataDirectEnabled_ = localImpl.ibSignalGpuMr_ && localImpl.ibSignalGpuMr_->isDataDirect() &&
|
||||
localSignalGpuMap_ && localSignalGpuMap_->valid();
|
||||
if (dataDirectEnabled_) {
|
||||
INFO(CONN, "IBConnection: Data Direct enabled (mlx5 + DMABUF)");
|
||||
INFO(CONN, "IBConnection: Data Direct enabled");
|
||||
}
|
||||
|
||||
// Pre-post receive requests for incoming write-with-imm
|
||||
@@ -361,6 +363,11 @@ void IBConnection::setRemoteUpdateDstAddr(std::shared_ptr<uint64_t> gpuMem) {
|
||||
if (gdrEnabled()) {
|
||||
if (gpuMem) {
|
||||
remoteUpdateDstAddrMap_ = std::make_unique<GdrMap>(std::move(gpuMem), localGpuDeviceId_);
|
||||
// Data Direct requires the token write mapping to also use FORCE_PCIE
|
||||
if (dataDirectEnabled_ && !(remoteUpdateDstAddrMap_ && remoteUpdateDstAddrMap_->valid())) {
|
||||
dataDirectEnabled_ = false;
|
||||
INFO(CONN, "IBConnection: Data Direct disabled (remoteUpdateDstAddr GDRCopy mapping not available)");
|
||||
}
|
||||
} else {
|
||||
remoteUpdateDstAddrMap_.reset();
|
||||
}
|
||||
|
||||
@@ -80,7 +80,12 @@ GdrContext::~GdrContext() {
|
||||
// GdrMap
|
||||
|
||||
GdrMap::GdrMap(std::shared_ptr<void> gpuMem, int deviceId)
|
||||
: ctx_(gdrContext()), gpuMem_(std::move(gpuMem)), mh_{}, barPtr_(nullptr), hostDstPtr_(nullptr), mappedSize_(0) {
|
||||
: ctx_(gdrContext()),
|
||||
gpuMem_(std::move(gpuMem)),
|
||||
mh_{},
|
||||
barPtr_(nullptr),
|
||||
hostDstPtr_(nullptr),
|
||||
mappedSize_(0) {
|
||||
// Ensure CUDA device context is active for gdr_pin_buffer
|
||||
CudaDeviceGuard deviceGuard(deviceId);
|
||||
|
||||
|
||||
@@ -125,10 +125,10 @@ class IBConnection : public BaseConnection {
|
||||
uint64_t* localSignalGpuPtr_;
|
||||
|
||||
// When true, recvThreadFunc reads the token from imm_data (from CQE) instead of the
|
||||
// signal GPU buffer via GDRCopy. Enabled when the QP is mlx5 and the signal GPU buffer
|
||||
// MR is a Data Direct DMABUF. Memory consistency is guaranteed because both the RDMA
|
||||
// data write and the semaphore token write (via GDRCopy) go through the Data Direct path,
|
||||
// so all writes are visible in GPU memory when the CQE is polled.
|
||||
// signal GPU buffer via GDRCopy. Enabled only when all Data Direct conditions are met:
|
||||
// the signal GPU buffer MR is registered with MLX5DV_REG_DMABUF_ACCESS_DATA_DIRECT,
|
||||
// and all GDRCopy mappings (local signal buffer and remoteUpdateDstAddr) are valid,
|
||||
// so both RDMA data writes and GDRCopy token writes go through the Data Direct engine.
|
||||
bool dataDirectEnabled_;
|
||||
|
||||
void recvThreadFunc();
|
||||
|
||||
Reference in New Issue
Block a user