Files
pybind11/tests/test_with_catch/test_argument_vector.cpp
Scott Wolchok 30748f863f Avoid heap allocation for function calls with a small number of args (#5824)
* Avoid heap allocation for function calls with a small number of arguments

We don't have access to llvm::SmallVector or similar, but given the
limited subset of the `std::vector` API that
`function_call::args{,_convert}` need and the "reserve-then-fill"
usage pattern, it is relatively straightforward to implement custom
containers that get the job done.

Seems to improves time to call the collatz function in
pybind/pybind11_benchmark significantly; numbers are a little noisy
but there's a clear improvement from "about 60 ns per call" to "about
45 ns per call" on my machine (M4 Max Mac), as measured with
`timeit.repeat('collatz(4)', 'from pybind11_benchmark import
collatz')`.

* clang-tidy

* more clang-tidy

* clang-tidy NOLINTBEGIN/END instead of NOLINTNEXTLINE

* forgot to increase inline size after removing std::variant

* constexpr arg_vector_small_size, use move instead of swap to hopefully clarify second_pass_convert

* rename test_embed to test_low_level

* rename test_low_level to test_with_catch

* Be careful to NOINLINE slow paths

* rename array/vector members to iarray/hvector. Move comment per request. Add static_asserts for our untagged union implementation per request.

* drop is_standard_layout assertions; see https://github.com/pybind/pybind11/pull/5824#issuecomment-3308616072
2025-09-19 13:44:40 -07:00

95 lines
3.1 KiB
C++

#include "pybind11/pybind11.h"
#include "catch.hpp"
namespace py = pybind11;
// 2 is chosen because it is the smallest number (keeping tests short)
// where we can create non-empty vectors whose size is the inline size
// plus or minus 1.
using argument_vector = py::detail::argument_vector<2>;
namespace {
argument_vector to_argument_vector(const std::vector<py::handle> &v) {
argument_vector result;
result.reserve(v.size());
for (const auto x : v) {
result.push_back(x);
}
return result;
}
std::vector<std::vector<py::handle>> get_sample_argument_vector_contents() {
return std::vector<std::vector<py::handle>>{
{},
{py::handle(Py_None)},
{py::handle(Py_None), py::handle(Py_False)},
{py::handle(Py_None), py::handle(Py_False), py::handle(Py_True)},
};
}
std::vector<argument_vector> get_sample_argument_vectors() {
std::vector<argument_vector> result;
for (const auto &vec : get_sample_argument_vector_contents()) {
result.push_back(to_argument_vector(vec));
}
return result;
}
void require_vector_matches_sample(const argument_vector &actual,
const std::vector<py::handle> &expected) {
REQUIRE(actual.size() == expected.size());
for (size_t ii = 0; ii < actual.size(); ++ii) {
REQUIRE(actual[ii].ptr() == expected[ii].ptr());
}
}
template <typename ActualMutationFunc, typename ExpectedMutationFunc>
void mutation_test_with_samples(ActualMutationFunc actual_mutation_func,
ExpectedMutationFunc expected_mutation_func) {
auto sample_contents = get_sample_argument_vector_contents();
auto samples = get_sample_argument_vectors();
for (size_t ii = 0; ii < samples.size(); ++ii) {
auto &actual = samples[ii];
auto &expected = sample_contents[ii];
actual_mutation_func(actual);
expected_mutation_func(expected);
require_vector_matches_sample(actual, expected);
}
}
} // namespace
// I would like to write [capture](auto& vec) block inline, but we
// have to work with C++11, which doesn't have generic lambdas.
// NOLINTBEGIN(bugprone-macro-parentheses)
#define MUTATION_LAMBDA(capture, block) \
[capture](argument_vector & vec) block, [capture](std::vector<py::handle> & vec) block
// NOLINTEND(bugprone-macro-parentheses)
// For readability, rather than having ugly empty arguments.
#define NO_CAPTURE
TEST_CASE("check sample argument_vector contents") {
mutation_test_with_samples(MUTATION_LAMBDA(NO_CAPTURE, { (void) vec; }));
}
TEST_CASE("argument_vector push_back") {
mutation_test_with_samples(MUTATION_LAMBDA(NO_CAPTURE, { vec.emplace_back(Py_None); }));
}
TEST_CASE("argument_vector reserve") {
for (std::size_t ii = 0; ii < 4; ++ii) {
mutation_test_with_samples(MUTATION_LAMBDA(ii, { vec.reserve(ii); }));
}
}
TEST_CASE("argument_vector reserve then push_back") {
for (std::size_t ii = 0; ii < 4; ++ii) {
mutation_test_with_samples(MUTATION_LAMBDA(ii, {
vec.reserve(ii);
vec.emplace_back(Py_True);
}));
}
}