Work towards a nanobind wrapper

This commit is contained in:
Crutcher Dunnavant
2023-03-23 21:36:09 +00:00
parent 0f31dafed5
commit be96f38ba3
14 changed files with 334 additions and 0 deletions

2
.gitignore vendored
View File

@@ -1,2 +1,4 @@
.vscode/
build/
__pycache__
.*.swp

2
python/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
.*.swp

53
python/CMakeLists.txt Normal file
View File

@@ -0,0 +1,53 @@
project(mscclpp)
cmake_minimum_required(VERSION 3.18...3.22)
find_package(Python 3.9 COMPONENTS Interpreter Development.Module REQUIRED)
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()
# Create CMake targets for all Python components needed by nanobind
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.26)
find_package(Python 3.8 COMPONENTS Interpreter Development.Module Development.SABIModule REQUIRED)
else()
find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED)
endif()
# Detect the installed nanobind package and import it into CMake
execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import nanobind; print(nanobind.cmake_dir())"
OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE NB_DIR)
list(APPEND CMAKE_PREFIX_PATH "${NB_DIR}")
find_package(nanobind CONFIG REQUIRED)
set(CUDA_DIR "/usr/local/cuda")
set(MSCCLPP_DIR ${CMAKE_CURRENT_LIST_DIR}/../build)
nanobind_add_module(
_py_mscclpp
NOSTRIP
NB_SHARED
src/_py_mscclpp.cpp
)
target_include_directories(
_py_mscclpp
PUBLIC
${CUDA_DIR}/include
${MSCCLPP_DIR}/include
)
target_link_directories(
_py_mscclpp
PUBLIC
${CUDA_DIR}/lib
${MSCCLPP_DIR}/lib
)
target_link_libraries(
_py_mscclpp
PUBLIC
mscclpp
)

4
python/Makefile Normal file
View File

@@ -0,0 +1,4 @@
test:
./test.sh

60
python/README.md Normal file
View File

@@ -0,0 +1,60 @@
This assumes that some things are built/installed
```
# assumes WORKDIR has:
# git clone git@github.com/NVIDIA/gdrcopy.git
# git clone git@github.com:microsoft/mscclpp.git
uname -r
# 5.4.0-1090-azure
# install
apt update
apt install -y \
build-essential devscripts debhelper check \
libsubunit-dev fakeroot pkg-config dkms \
nvidia-dkms-525-server \
linux-headers-5.4.0-1090-azure
cd $WORKDIR/gdrcopy
sed -i 's/\(-L \$(CUDA)\/lib64\)/\1 \1\/stubs/' tests/Makefile
cd packages
CUDA=/usr/local/cuda ./build-deb-packages.sh
dpkg -i gdrdrv-dkms_2.3-1_amd64.Ubuntu20_04.deb
dpkg -i libgdrapi_2.3-1_amd64.Ubuntu20_04.deb
dpkg -i gdrcopy-tests_2.3-1_amd64.Ubuntu20_04+cuda11.6.deb
dpkg -i gdrcopy_2.3-1_amd64.Ubuntu20_04.deb
# validate:
# $ sanity
# Running suite(s): Sanity
# 100%: Checks: 27, Failures: 0, Errors: 0
# dkms install -m gdrdrv/2.3
cd $WORKDIR/mscclpp
## numctl
apt install -y numactl libnuma-dev libnuma1
# if not mpi testing
USE_MPI_FOR_TESTS=0 make -j
```
Rough build attemtps
```
# cd to this directory:
cmake -S . -B build
cmake --build build --clean-first -v
# this should contain libmscclpp.so, but does not
ldd build/py_mscclpp.cpython-39-x86_64-linux-gnu.so
# this will fail due to a missing symbol
( cd build;
LD_LIBRARY_PATH="$PWD/../../build/lib:$LD_LIBRARY_PATH" python -c 'import py_mscclpp' )
```

View File

@@ -0,0 +1,13 @@
from . import _py_mscclpp
__all__ = (
"MscclppUniqueId",
"MSCCLPP_UNIQUE_ID_BYTES",
"MscclppComm",
)
MscclppUniqueId = _py_mscclpp.MscclppUniqueId
MSCCLPP_UNIQUE_ID_BYTES = _py_mscclpp.MSCCLPP_UNIQUE_ID_BYTES
MscclppComm = _py_mscclpp.MscclppComm

Binary file not shown.

View File

@@ -0,0 +1 @@
../build/_py_mscclpp.cpython-39-x86_64-linux-gnu.so

View File

@@ -0,0 +1,49 @@
import unittest
import hamcrest
import mscclpp
class UniqueIdTest(unittest.TestCase):
def test_no_constructor(self) -> None:
hamcrest.assert_that(
hamcrest.calling(mscclpp.MscclppUniqueId).with_args(),
hamcrest.raises(
TypeError,
"no constructor",
),
)
def test_getUniqueId(self) -> None:
myId = mscclpp.MscclppUniqueId.from_context()
hamcrest.assert_that(
myId.bytes(),
hamcrest.has_length(mscclpp.MSCCLPP_UNIQUE_ID_BYTES),
)
# from_bytes should work
copy = mscclpp.MscclppUniqueId.from_bytes(myId.bytes())
hamcrest.assert_that(
copy.bytes(),
hamcrest.equal_to(myId.bytes()),
)
# bad size
hamcrest.assert_that(
hamcrest.calling(mscclpp.MscclppUniqueId.from_bytes).with_args(b'abc'),
hamcrest.raises(
ValueError,
f"Requires exactly {mscclpp.MSCCLPP_UNIQUE_ID_BYTES} bytes; found 3"
),
)
class CommsTest(unittest.TestCase):
def _test(self) -> None:
# this hangs forever
comm = mscclpp.MscclppComm.init_rank_from_address(
address="127.0.0.1:50000",
rank=0,
world_size=2,
)
comm.close()

3
python/requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
nanobind
pytest
PyHamcrest

7
python/setup.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
set -ex
cmake -S . -B build
cmake --build build --clean-first -v
ldd build/py_mscclpp.cpython-39-x86_64-linux-gnu.so

129
python/src/_py_mscclpp.cpp Normal file
View File

@@ -0,0 +1,129 @@
#include <nanobind/nanobind.h>
#include <nanobind/stl/string.h>
#include <mscclpp.h>
#include <cstdio>
#include <cstring>
#include <memory>
#include <string>
#include <stdexcept>
namespace nb = nanobind;
using namespace nb::literals;
// This is a poorman's substitute for std::format, which is a C++20 feature.
template<typename ... Args>
std::string string_format( const std::string& format, Args ... args )
{
int size_s = std::snprintf( nullptr, 0, format.c_str(), args ... ) + 1; // Extra space for '\0'
if( size_s <= 0 ){ throw std::runtime_error( "Error during formatting." ); }
auto size = static_cast<size_t>( size_s );
std::unique_ptr<char[]> buf( new char[ size ] );
std::snprintf( buf.get(), size, format.c_str(), args ... );
return std::string( buf.get(), buf.get() + size - 1 ); // We don't want the '\0' inside
}
template<typename Val, typename ... Args>
Val maybe(mscclppResult_t status, Val val, const std::string& format, Args ... args) {
switch (status) {
case mscclppSuccess:
return val;
case mscclppUnhandledCudaError:
case mscclppSystemError:
case mscclppInternalError:
case mscclppRemoteError:
case mscclppInProgress:
case mscclppNumResults:
throw std::runtime_error(string_format(format, args ...));
case mscclppInvalidArgument:
case mscclppInvalidUsage:
default:
throw std::invalid_argument(string_format(format, args ...));
}
}
struct MscclppComm {
mscclppComm_t internal;
};
NB_MODULE(_py_mscclpp, m) {
m.doc() = "Python bindings for MSCCLPP";
m.attr("MSCCLPP_UNIQUE_ID_BYTES") = MSCCLPP_UNIQUE_ID_BYTES;
nb::class_<mscclppUniqueId>(m, "MscclppUniqueId")
.def_static("from_context", []() {
mscclppUniqueId uniqueId;
return maybe(
mscclppGetUniqueId(&uniqueId),
uniqueId,
"Failed to get MSCCLP Unique Id."
);
})
.def_static("from_bytes", [](nb::bytes source) {
if (source.size() != MSCCLPP_UNIQUE_ID_BYTES) {
throw std::invalid_argument(
string_format(
"Requires exactly %d bytes; found %d",
MSCCLPP_UNIQUE_ID_BYTES,
source.size()
)
);
}
mscclppUniqueId uniqueId;
std::memcpy(uniqueId.internal, source.c_str(), sizeof(uniqueId.internal));
return uniqueId;
})
.def("bytes", [](mscclppUniqueId id){
return nb::bytes(id.internal, sizeof(id.internal));
});
nb::class_<MscclppComm>(m, "MscclppComm")
.def_static(
"init_rank_from_address",
[](const std::string &address, int rank, int world_size) {
MscclppComm comm = { 0 };
return maybe(
mscclppCommInitRank(&comm.internal, world_size, rank, address.c_str()),
comm,
"Failed to initialize comms: %s rank=%d world_size=%d",
address,
rank,
world_size);
},
"address"_a, "rank"_a, "world_size"_a,
"Initialize comms given an IP address, rank, and world_size"
)
.def_static("init_rank_from_id", [](const mscclppUniqueId &id, int rank, int world_size) {
MscclppComm comm = { 0 };
return maybe(
mscclppCommInitRankFromId(&comm.internal, world_size, id, rank),
comm,
"Failed to initialize comms: %02X%s rank=%d world_size=%d",
id.internal,
rank,
world_size);
})
.def("close", [](MscclppComm &comm) {
maybe(
mscclppCommDestroy(comm.internal),
nb::none(),
"Failed to close comm channel"
);
comm.internal = 0;
})
.def("__del__", [](MscclppComm &comm) {
maybe(
mscclppCommDestroy(comm.internal),
nb::none(),
"Failed to close comm channel"
);
comm.internal = 0;
});
}

11
python/test.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
set -ex
if ! [ -d build ] ; then
./setup.sh
fi
cmake --build build
pytest mscclpp