mirror of
https://github.com/pybind/pybind11.git
synced 2026-04-20 14:59:27 +00:00
Avoid heap allocation for function calls with a small number of args (#5824)
* Avoid heap allocation for function calls with a small number of arguments
We don't have access to llvm::SmallVector or similar, but given the
limited subset of the `std::vector` API that
`function_call::args{,_convert}` need and the "reserve-then-fill"
usage pattern, it is relatively straightforward to implement custom
containers that get the job done.
Seems to improves time to call the collatz function in
pybind/pybind11_benchmark significantly; numbers are a little noisy
but there's a clear improvement from "about 60 ns per call" to "about
45 ns per call" on my machine (M4 Max Mac), as measured with
`timeit.repeat('collatz(4)', 'from pybind11_benchmark import
collatz')`.
* clang-tidy
* more clang-tidy
* clang-tidy NOLINTBEGIN/END instead of NOLINTNEXTLINE
* forgot to increase inline size after removing std::variant
* constexpr arg_vector_small_size, use move instead of swap to hopefully clarify second_pass_convert
* rename test_embed to test_low_level
* rename test_low_level to test_with_catch
* Be careful to NOINLINE slow paths
* rename array/vector members to iarray/hvector. Move comment per request. Add static_asserts for our untagged union implementation per request.
* drop is_standard_layout assertions; see https://github.com/pybind/pybind11/pull/5824#issuecomment-3308616072
This commit is contained in:
@@ -647,8 +647,8 @@ if(NOT PYBIND11_CUDA_TESTS)
|
||||
# Test pure C++ code (not depending on Python). Provides the `test_pure_cpp` target.
|
||||
add_subdirectory(pure_cpp)
|
||||
|
||||
# Test embedding the interpreter. Provides the `cpptest` target.
|
||||
add_subdirectory(test_embed)
|
||||
# Test C++ code that depends on Python, such as embedding the interpreter. Provides the `cpptest` target.
|
||||
add_subdirectory(test_with_catch)
|
||||
|
||||
# Test CMake build using functions and targets from subdirectory or installed location
|
||||
add_subdirectory(test_cmake_build)
|
||||
|
||||
@@ -76,6 +76,7 @@ conduit_headers = {
|
||||
}
|
||||
|
||||
detail_headers = {
|
||||
"include/pybind11/detail/argument_vector.h",
|
||||
"include/pybind11/detail/class.h",
|
||||
"include/pybind11/detail/common.h",
|
||||
"include/pybind11/detail/cpp_conduit.h",
|
||||
|
||||
@@ -26,11 +26,11 @@ add_custom_target(
|
||||
DEPENDS test_subdirectory_embed)
|
||||
|
||||
# Test custom export group -- PYBIND11_EXPORT_NAME
|
||||
add_library(test_embed_lib ../embed.cpp)
|
||||
target_link_libraries(test_embed_lib PRIVATE pybind11::embed)
|
||||
add_library(test_with_catch_lib ../embed.cpp)
|
||||
target_link_libraries(test_with_catch_lib PRIVATE pybind11::embed)
|
||||
|
||||
install(
|
||||
TARGETS test_embed_lib
|
||||
TARGETS test_with_catch_lib
|
||||
EXPORT test_export
|
||||
ARCHIVE DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
|
||||
@@ -33,10 +33,11 @@ if(PYBIND11_TEST_SMART_HOLDER)
|
||||
-DPYBIND11_RUN_TESTING_WITH_SMART_HOLDER_AS_DEFAULT_BUT_NEVER_USE_IN_PRODUCTION_PLEASE)
|
||||
endif()
|
||||
|
||||
add_executable(test_embed catch.cpp test_interpreter.cpp test_subinterpreter.cpp)
|
||||
pybind11_enable_warnings(test_embed)
|
||||
add_executable(test_with_catch catch.cpp test_args_convert_vector.cpp test_argument_vector.cpp
|
||||
test_interpreter.cpp test_subinterpreter.cpp)
|
||||
pybind11_enable_warnings(test_with_catch)
|
||||
|
||||
target_link_libraries(test_embed PRIVATE pybind11::embed Catch2::Catch2 Threads::Threads)
|
||||
target_link_libraries(test_with_catch PRIVATE pybind11::embed Catch2::Catch2 Threads::Threads)
|
||||
|
||||
if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
|
||||
file(COPY test_interpreter.py test_trampoline.py DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
@@ -44,8 +45,8 @@ endif()
|
||||
|
||||
add_custom_target(
|
||||
cpptest
|
||||
COMMAND "$<TARGET_FILE:test_embed>"
|
||||
DEPENDS test_embed
|
||||
COMMAND "$<TARGET_FILE:test_with_catch>"
|
||||
DEPENDS test_with_catch
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
pybind11_add_module(external_module THIN_LTO external_module.cpp)
|
||||
@@ -19,7 +19,7 @@ namespace py = pybind11;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// Setup for TEST_CASE in test_interpreter.cpp, tagging on a large random number:
|
||||
std::string updated_pythonpath("pybind11_test_embed_PYTHONPATH_2099743835476552");
|
||||
std::string updated_pythonpath("pybind11_test_with_catch_PYTHONPATH_2099743835476552");
|
||||
const char *preexisting_pythonpath = getenv("PYTHONPATH");
|
||||
if (preexisting_pythonpath != nullptr) {
|
||||
#if defined(_WIN32)
|
||||
80
tests/test_with_catch/test_args_convert_vector.cpp
Normal file
80
tests/test_with_catch/test_args_convert_vector.cpp
Normal file
@@ -0,0 +1,80 @@
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "catch.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
using args_convert_vector = py::detail::args_convert_vector<py::detail::arg_vector_small_size>;
|
||||
|
||||
namespace {
|
||||
template <typename Container>
|
||||
std::vector<Container> get_sample_vectors() {
|
||||
std::vector<Container> result;
|
||||
result.emplace_back();
|
||||
for (const auto sz : {0, 4, 5, 6, 31, 32, 33, 63, 64, 65}) {
|
||||
for (const bool b : {false, true}) {
|
||||
result.emplace_back(static_cast<std::size_t>(sz), b);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void require_vector_matches_sample(const args_convert_vector &actual,
|
||||
const std::vector<bool> &expected) {
|
||||
REQUIRE(actual.size() == expected.size());
|
||||
for (size_t ii = 0; ii < actual.size(); ++ii) {
|
||||
REQUIRE(actual[ii] == expected[ii]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ActualMutationFunc, typename ExpectedMutationFunc>
|
||||
void mutation_test_with_samples(ActualMutationFunc actual_mutation_func,
|
||||
ExpectedMutationFunc expected_mutation_func) {
|
||||
auto sample_contents = get_sample_vectors<std::vector<bool>>();
|
||||
auto samples = get_sample_vectors<args_convert_vector>();
|
||||
for (size_t ii = 0; ii < samples.size(); ++ii) {
|
||||
auto &actual = samples[ii];
|
||||
auto &expected = sample_contents[ii];
|
||||
|
||||
actual_mutation_func(actual);
|
||||
expected_mutation_func(expected);
|
||||
require_vector_matches_sample(actual, expected);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// I would like to write [capture](auto& vec) block inline, but we
|
||||
// have to work with C++11, which doesn't have generic lambdas.
|
||||
// NOLINTBEGIN(bugprone-macro-parentheses)
|
||||
#define MUTATION_LAMBDA(capture, block) \
|
||||
[capture](args_convert_vector & vec) block, [capture](std::vector<bool> & vec) block
|
||||
// NOLINTEND(bugprone-macro-parentheses)
|
||||
|
||||
// For readability, rather than having ugly empty arguments.
|
||||
#define NO_CAPTURE
|
||||
|
||||
TEST_CASE("check sample args_convert_vector contents") {
|
||||
mutation_test_with_samples(MUTATION_LAMBDA(NO_CAPTURE, { (void) vec; }));
|
||||
}
|
||||
|
||||
TEST_CASE("args_convert_vector push_back") {
|
||||
for (const bool b : {false, true}) {
|
||||
mutation_test_with_samples(MUTATION_LAMBDA(b, { vec.push_back(b); }));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("args_convert_vector reserve") {
|
||||
for (std::size_t ii = 0; ii < 4; ++ii) {
|
||||
mutation_test_with_samples(MUTATION_LAMBDA(ii, { vec.reserve(ii); }));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("args_convert_vector reserve then push_back") {
|
||||
for (std::size_t ii = 0; ii < 4; ++ii) {
|
||||
for (const bool b : {false, true}) {
|
||||
mutation_test_with_samples(MUTATION_LAMBDA(=, {
|
||||
vec.reserve(ii);
|
||||
vec.push_back(b);
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
94
tests/test_with_catch/test_argument_vector.cpp
Normal file
94
tests/test_with_catch/test_argument_vector.cpp
Normal file
@@ -0,0 +1,94 @@
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "catch.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
// 2 is chosen because it is the smallest number (keeping tests short)
|
||||
// where we can create non-empty vectors whose size is the inline size
|
||||
// plus or minus 1.
|
||||
using argument_vector = py::detail::argument_vector<2>;
|
||||
|
||||
namespace {
|
||||
argument_vector to_argument_vector(const std::vector<py::handle> &v) {
|
||||
argument_vector result;
|
||||
result.reserve(v.size());
|
||||
for (const auto x : v) {
|
||||
result.push_back(x);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::vector<py::handle>> get_sample_argument_vector_contents() {
|
||||
return std::vector<std::vector<py::handle>>{
|
||||
{},
|
||||
{py::handle(Py_None)},
|
||||
{py::handle(Py_None), py::handle(Py_False)},
|
||||
{py::handle(Py_None), py::handle(Py_False), py::handle(Py_True)},
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<argument_vector> get_sample_argument_vectors() {
|
||||
std::vector<argument_vector> result;
|
||||
for (const auto &vec : get_sample_argument_vector_contents()) {
|
||||
result.push_back(to_argument_vector(vec));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void require_vector_matches_sample(const argument_vector &actual,
|
||||
const std::vector<py::handle> &expected) {
|
||||
REQUIRE(actual.size() == expected.size());
|
||||
for (size_t ii = 0; ii < actual.size(); ++ii) {
|
||||
REQUIRE(actual[ii].ptr() == expected[ii].ptr());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ActualMutationFunc, typename ExpectedMutationFunc>
|
||||
void mutation_test_with_samples(ActualMutationFunc actual_mutation_func,
|
||||
ExpectedMutationFunc expected_mutation_func) {
|
||||
auto sample_contents = get_sample_argument_vector_contents();
|
||||
auto samples = get_sample_argument_vectors();
|
||||
for (size_t ii = 0; ii < samples.size(); ++ii) {
|
||||
auto &actual = samples[ii];
|
||||
auto &expected = sample_contents[ii];
|
||||
|
||||
actual_mutation_func(actual);
|
||||
expected_mutation_func(expected);
|
||||
require_vector_matches_sample(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// I would like to write [capture](auto& vec) block inline, but we
|
||||
// have to work with C++11, which doesn't have generic lambdas.
|
||||
// NOLINTBEGIN(bugprone-macro-parentheses)
|
||||
#define MUTATION_LAMBDA(capture, block) \
|
||||
[capture](argument_vector & vec) block, [capture](std::vector<py::handle> & vec) block
|
||||
// NOLINTEND(bugprone-macro-parentheses)
|
||||
|
||||
// For readability, rather than having ugly empty arguments.
|
||||
#define NO_CAPTURE
|
||||
|
||||
TEST_CASE("check sample argument_vector contents") {
|
||||
mutation_test_with_samples(MUTATION_LAMBDA(NO_CAPTURE, { (void) vec; }));
|
||||
}
|
||||
|
||||
TEST_CASE("argument_vector push_back") {
|
||||
mutation_test_with_samples(MUTATION_LAMBDA(NO_CAPTURE, { vec.emplace_back(Py_None); }));
|
||||
}
|
||||
|
||||
TEST_CASE("argument_vector reserve") {
|
||||
for (std::size_t ii = 0; ii < 4; ++ii) {
|
||||
mutation_test_with_samples(MUTATION_LAMBDA(ii, { vec.reserve(ii); }));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("argument_vector reserve then push_back") {
|
||||
for (std::size_t ii = 0; ii < 4; ++ii) {
|
||||
mutation_test_with_samples(MUTATION_LAMBDA(ii, {
|
||||
vec.reserve(ii);
|
||||
vec.emplace_back(Py_True);
|
||||
}));
|
||||
}
|
||||
}
|
||||
@@ -94,8 +94,9 @@ PYBIND11_EMBEDDED_MODULE(throw_error_already_set, ) {
|
||||
TEST_CASE("PYTHONPATH is used to update sys.path") {
|
||||
// The setup for this TEST_CASE is in catch.cpp!
|
||||
auto sys_path = py::str(py::module_::import("sys").attr("path")).cast<std::string>();
|
||||
REQUIRE_THAT(sys_path,
|
||||
Catch::Matchers::Contains("pybind11_test_embed_PYTHONPATH_2099743835476552"));
|
||||
REQUIRE_THAT(
|
||||
sys_path,
|
||||
Catch::Matchers::Contains("pybind11_test_with_catch_PYTHONPATH_2099743835476552"));
|
||||
}
|
||||
|
||||
TEST_CASE("Pass classes and data between modules defined in C++ and Python") {
|
||||
Reference in New Issue
Block a user