Expose NVLS multicast granularity option for GpuBuffer (#815)

Add a public Granularity enum (MultiCastMinimum, MultiCastRecommended)
and let GpuBuffer choose the NVLS multicast allocation granularity via a
constructor argument, defaulting to MultiCastMinimum to minimize memory
usage. Expose the same option through the C++ and Python (nanobind)
APIs.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Binyang Li
2026-06-04 13:16:18 -07:00
committed by GitHub
parent c9f8be64bb
commit 7c390fffd6
5 changed files with 39 additions and 7 deletions

View File

@@ -114,8 +114,13 @@ static nb::capsule toDlpack(GpuBuffer<char> buffer, std::string dataType, std::v
void register_gpu_utils(nb::module_& m) {
m.def("is_nvls_supported", &isNvlsSupported);
nb::enum_<GpuBufferGranularity>(m, "CppGpuBufferGranularity")
.value("MultiCastMinimum", GpuBufferGranularity::MultiCastMinimum)
.value("MultiCastRecommended", GpuBufferGranularity::MultiCastRecommended);
nb::class_<GpuBuffer<char>>(m, "CppRawGpuBuffer")
.def(nb::init<size_t>(), nb::arg("nelems"))
.def(nb::init<size_t, GpuBufferGranularity>(), nb::arg("nelems"),
nb::arg("granularity") = GpuBufferGranularity::MultiCastMinimum)
.def("nelems", &GpuBuffer<char>::nelems)
.def("bytes", &GpuBuffer<char>::bytes)
.def("data", [](GpuBuffer<char>& self) { return reinterpret_cast<uintptr_t>(self.data()); })