From 3262000195616afedaf95185f1fe965c7eb82be6 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Sun, 5 Oct 2025 11:07:25 -0700 Subject: [PATCH] Add fast_type_map, use it authoritatively for local types and as a hint for global types (ABI breaking) (#5842) * Add fast_type_map, use it authoritatively for local types and as a hint for global types nanobind has a similar two-level lookup strategy, added and explained by https://github.com/wjakob/nanobind/commit/b515b1f7f2f4ecc0357818e6201c94a9f4cbfdc2 In this PR I've ported this approach to pybind11. To avoid an ABI break, I've kept the fast maps to the `local_internals`. I think this should be safe because any particular module should see its `local_internals` reset at least as often as the global `internals`, and misses in the fast "hint" map for global types fall back to the global `internals`. Performance seems to have improved. Using my patched fork of pybind11_benchmark (https://github.com/swolchok/pybind11_benchmark/tree/benchmark-updates, specifically commit hash b6613d12607104d547b1c10a8145d1b3e9937266), I run bench.py and observe the MyInt case. Each time, I do 3 runs and just report all 3. master, Mac: 75.9, 76.9, 75.3 nsec/loop this PR, Mac: 73.8, 73.8, 73.6 nsec/loop master, Linux box: 188, 187, 188 nsec/loop this PR, Linux box: 164, 165, 164 nsec/loop Note that the "real" percentage improvement is larger than implied by the above because master does not yet include #5824. * simplify unsafe_reset_local_internals in test * pre-implement PYBIND11_INTERNALS_VERSION 12 * use PYBIND11_INTERNALS_VERSION 12 on Python 3.14 per suggestion * Implement reviewer comments: revert PY_VERSION_HEX change, fix REVIEW comment, add two-level lookup comments. ci.yml coming separately * Use the inplace build to smoke test ABI bump? * [skip ci] Remove "smoke" from comment. This is full testing, just only on a few platforms. --------- Co-authored-by: Ralf W. Grosse-Kunstleve --- .github/workflows/ci.yml | 4 ++- include/pybind11/detail/class.h | 6 +++- include/pybind11/detail/internals.h | 24 +++++++++++++-- include/pybind11/detail/type_caster_base.h | 34 ++++++++++++++++++---- include/pybind11/pybind11.h | 22 ++++++++++---- tests/test_with_catch/external_module.cpp | 17 ++++++++++- 6 files changed, 91 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 460cd82e6..15f031ad6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -203,7 +203,8 @@ jobs: if: runner.os != 'Windows' run: echo "CMAKE_GENERATOR=Ninja" >> "$GITHUB_ENV" - # More-or-less randomly adding a few extra flags here + # More-or-less randomly adding a few extra flags here. + # In particular, use this one to test the next ABI bump (internals version). - name: Configure run: > cmake -S. -B. @@ -213,6 +214,7 @@ jobs: -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON -DCMAKE_CXX_STANDARD=14 + -DPYBIND11_INTERNALS_VERSION=10000000 # Checks to makes sure defining `_` is allowed # Triggers EHsc missing error on Windows diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h index 74b70489c..b2ee3cc69 100644 --- a/include/pybind11/detail/class.h +++ b/include/pybind11/detail/class.h @@ -221,10 +221,14 @@ extern "C" inline void pybind11_meta_dealloc(PyObject *obj) { auto tindex = std::type_index(*tinfo->cpptype); internals.direct_conversions.erase(tindex); + auto &local_internals = get_local_internals(); if (tinfo->module_local) { - get_local_internals().registered_types_cpp.erase(tindex); + local_internals.registered_types_cpp.erase(tinfo->cpptype); } else { internals.registered_types_cpp.erase(tindex); +#if PYBIND11_INTERNALS_VERSION >= 12 + internals.registered_types_cpp_fast.erase(tinfo->cpptype); +#endif } internals.registered_types_py.erase(tinfo->type); diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index d23ee6ec9..ead330d28 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -39,7 +39,6 @@ /// further ABI-incompatible changes may be made before the ABI is officially /// changed to the new version. #ifndef PYBIND11_INTERNALS_VERSION -// REMINDER for next version bump: remove loader_life_support_tls # define PYBIND11_INTERNALS_VERSION 11 #endif @@ -177,6 +176,12 @@ struct type_equal_to { }; #endif +// For now, we don't bother adding a fancy hash for pointers and just +// let the standard library use the identity hash function if that's +// what it wants to do (e.g., as in libstdc++). +template +using fast_type_map = std::unordered_map; + template using type_map = std::unordered_map; @@ -237,6 +242,15 @@ struct internals { pymutex mutex; pymutex exception_translator_mutex; #endif +#if PYBIND11_INTERNALS_VERSION >= 12 + // non-normative but fast "hint" for registered_types_cpp. Meant + // to be used as the first level of a two-level lookup: successful + // lookups are correct, but unsuccessful lookups need to try + // registered_types_cpp and then backfill this map if they find + // anything. + fast_type_map registered_types_cpp_fast; +#endif + // std::type_index -> pybind11's type information type_map registered_types_cpp; // PyTypeObject* -> base type_info(s) @@ -261,7 +275,9 @@ struct internals { PyObject *instance_base = nullptr; // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined: thread_specific_storage tstate; +#if PYBIND11_INTERNALS_VERSION <= 11 thread_specific_storage loader_life_support_tls; // OBSOLETE (PR #5830) +#endif // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined: PyInterpreterState *istate = nullptr; @@ -302,7 +318,11 @@ struct internals { // impact any other modules, because the only things accessing the local internals is the // module that contains them. struct local_internals { - type_map registered_types_cpp; + // It should be safe to use fast_type_map here because this entire + // data structure is scoped to our single module, and thus a single + // DSO and single instance of type_info for any particular type. + fast_type_map registered_types_cpp; + std::forward_list registered_exception_translators; PyTypeObject *function_record_py_type = nullptr; }; diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index 3564bb242..a2512a552 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -205,21 +205,43 @@ PYBIND11_NOINLINE detail::type_info *get_type_info(PyTypeObject *type) { return bases.front(); } -inline detail::type_info *get_local_type_info(const std::type_index &tp) { - auto &locals = get_local_internals().registered_types_cpp; - auto it = locals.find(tp); +inline detail::type_info *get_local_type_info(const std::type_info &tp) { + const auto &locals = get_local_internals().registered_types_cpp; + auto it = locals.find(&tp); if (it != locals.end()) { return it->second; } return nullptr; } -inline detail::type_info *get_global_type_info(const std::type_index &tp) { +inline detail::type_info *get_global_type_info(const std::type_info &tp) { + // This is a two-level lookup. Hopefully we find the type info in + // registered_types_cpp_fast, but if not we try + // registered_types_cpp and fill registered_types_cpp_fast for + // next time. return with_internals([&](internals &internals) { detail::type_info *type_info = nullptr; +#if PYBIND11_INTERNALS_VERSION >= 12 + auto &fast_types = internals.registered_types_cpp_fast; +#endif auto &types = internals.registered_types_cpp; - auto it = types.find(tp); +#if PYBIND11_INTERNALS_VERSION >= 12 + auto fast_it = fast_types.find(&tp); + if (fast_it != fast_types.end()) { +# ifndef NDEBUG + auto types_it = types.find(std::type_index(tp)); + assert(types_it != types.end()); + assert(types_it->second == fast_it->second); +# endif + return fast_it->second; + } +#endif // PYBIND11_INTERNALS_VERSION >= 12 + + auto it = types.find(std::type_index(tp)); if (it != types.end()) { +#if PYBIND11_INTERNALS_VERSION >= 12 + fast_types.emplace(&tp, it->second); +#endif type_info = it->second; } return type_info; @@ -228,7 +250,7 @@ inline detail::type_info *get_global_type_info(const std::type_index &tp) { /// Return the type info for a given C++ type; on lookup failure can either throw or return /// nullptr. -PYBIND11_NOINLINE detail::type_info *get_type_info(const std::type_index &tp, +PYBIND11_NOINLINE detail::type_info *get_type_info(const std::type_info &tp, bool throw_if_missing = false) { if (auto *ltype = get_local_type_info(tp)) { return ltype; diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 515aab141..fcf10f199 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1637,10 +1637,14 @@ protected: with_internals([&](internals &internals) { auto tindex = std::type_index(*rec.type); tinfo->direct_conversions = &internals.direct_conversions[tindex]; + auto &local_internals = get_local_internals(); if (rec.module_local) { - get_local_internals().registered_types_cpp[tindex] = tinfo; + local_internals.registered_types_cpp[rec.type] = tinfo; } else { internals.registered_types_cpp[tindex] = tinfo; +#if PYBIND11_INTERNALS_VERSION >= 12 + internals.registered_types_cpp_fast[rec.type] = tinfo; +#endif } PYBIND11_WARNING_PUSH @@ -2138,10 +2142,18 @@ public: if (has_alias) { with_internals([&](internals &internals) { - auto &instances = record.module_local ? get_local_internals().registered_types_cpp - : internals.registered_types_cpp; - instances[std::type_index(typeid(type_alias))] - = instances[std::type_index(typeid(type))]; + auto &local_internals = get_local_internals(); + if (record.module_local) { + local_internals.registered_types_cpp[&typeid(type_alias)] + = local_internals.registered_types_cpp[&typeid(type)]; + } else { + type_info *const val + = internals.registered_types_cpp[std::type_index(typeid(type))]; + internals.registered_types_cpp[std::type_index(typeid(type_alias))] = val; +#if PYBIND11_INTERNALS_VERSION >= 12 + internals.registered_types_cpp_fast[&typeid(type_alias)] = val; +#endif + } }); } def("_pybind11_conduit_v1_", cpp_conduit_method); diff --git a/tests/test_with_catch/external_module.cpp b/tests/test_with_catch/external_module.cpp index 3465e8b37..933ee3a6f 100644 --- a/tests/test_with_catch/external_module.cpp +++ b/tests/test_with_catch/external_module.cpp @@ -6,11 +6,26 @@ namespace py = pybind11; * modules aren't preserved over a finalize/initialize. */ +namespace { +// Compare unsafe_reset_internals_for_single_interpreter in +// test_subinterpreter.cpp. +void unsafe_reset_local_internals() { + // NOTE: This code is NOT SAFE unless the caller guarantees no other threads are alive + // NOTE: This code is tied to the precise implementation of the internals holder + + py::detail::get_local_internals_pp_manager().unref(); + py::detail::get_local_internals(); +} +} // namespace + PYBIND11_MODULE(external_module, m, py::mod_gil_not_used(), py::multiple_interpreters::per_interpreter_gil()) { - + // At least one test ("Single Subinterpreter") wants to reset + // internals. We have separate local internals because we are a + // separate DSO, so ours need to be reset too! + unsafe_reset_local_internals(); class A { public: explicit A(int value) : v{value} {};