mirror of
https://github.com/pybind/pybind11.git
synced 2026-03-14 20:27:47 +00:00
Limit busy-wait loops in per-subinterpreter GIL test
Add explicit timeouts to the busy-wait coordination loops in the Per-Subinterpreter GIL test in tests/test_with_catch/test_subinterpreter.cpp. Previously those loops spun indefinitely waiting for shared atomics like `started` and `sync` to change, which is fine when CPython's free-threading and per-interpreter GIL behavior matches the test's expectations but becomes pathologically bad when that behavior regresses: the `test_with_catch` executable can then hang forever, causing our 3.14t CI jobs to time out after 90 minutes. This change keeps the structure and intent of the test but adds a std::chrono::steady_clock deadline to each of the coordination loops, using a conservative 10 second bound. Worker threads record a failure and return if they hit the timeout, while the main thread fails the test via Catch2 instead of hanging. That way, if future CPython free-threading patches change the semantics again, the test will fail quickly and produced a diagnosable error instead of wedging the CI job.
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
PYBIND11_WARNING_DISABLE_MSVC(4996)
|
||||
|
||||
# include <catch.hpp>
|
||||
# include <chrono>
|
||||
# include <cstdlib>
|
||||
# include <fstream>
|
||||
# include <functional>
|
||||
@@ -309,6 +310,9 @@ TEST_CASE("Per-Subinterpreter GIL") {
|
||||
sync = 0;
|
||||
failure = 0;
|
||||
|
||||
using clock = std::chrono::steady_clock;
|
||||
constexpr auto wait_timeout = std::chrono::seconds(10);
|
||||
|
||||
// REQUIRE throws on failure, so we can't use it within the thread
|
||||
# define T_REQUIRE(status) \
|
||||
do { \
|
||||
@@ -318,8 +322,16 @@ TEST_CASE("Per-Subinterpreter GIL") {
|
||||
} while (0)
|
||||
|
||||
auto &&thread_main = [&](int num) {
|
||||
while (started == 0)
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
{
|
||||
auto deadline = clock::now() + wait_timeout;
|
||||
while (started == 0) {
|
||||
if (clock::now() > deadline) {
|
||||
T_REQUIRE(false);
|
||||
return;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
}
|
||||
}
|
||||
++started;
|
||||
|
||||
py::gil_scoped_acquire gil;
|
||||
@@ -370,15 +382,31 @@ TEST_CASE("Per-Subinterpreter GIL") {
|
||||
|
||||
// wait for something to set sync to our thread number
|
||||
// we are holding our subinterpreter's GIL
|
||||
while (sync != num)
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
{
|
||||
auto deadline = clock::now() + wait_timeout;
|
||||
while (sync != num) {
|
||||
if (clock::now() > deadline) {
|
||||
T_REQUIRE(false);
|
||||
return;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
}
|
||||
}
|
||||
|
||||
// now change it so the next thread can move on
|
||||
++sync;
|
||||
|
||||
// but keep holding the GIL until after the next thread moves on as well
|
||||
while (sync == num + 1)
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
{
|
||||
auto deadline = clock::now() + wait_timeout;
|
||||
while (sync == num + 1) {
|
||||
if (clock::now() > deadline) {
|
||||
T_REQUIRE(false);
|
||||
return;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
}
|
||||
}
|
||||
|
||||
// one last check before quitting the thread, the internals should be different
|
||||
auto sub_int
|
||||
@@ -395,8 +423,15 @@ TEST_CASE("Per-Subinterpreter GIL") {
|
||||
++started;
|
||||
|
||||
// ok now wait for the threads to start
|
||||
while (started != 3)
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
{
|
||||
auto deadline = clock::now() + wait_timeout;
|
||||
while (started != 3) {
|
||||
if (clock::now() > deadline) {
|
||||
FAIL("Timeout while waiting for worker threads to start");
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
}
|
||||
}
|
||||
|
||||
// we still hold the main GIL, at this point both threads are waiting on the main GIL
|
||||
// IN THE CASE of free threading, the threads are waiting on sync (because there is no GIL)
|
||||
@@ -414,8 +449,15 @@ TEST_CASE("Per-Subinterpreter GIL") {
|
||||
sync = 1;
|
||||
|
||||
// wait for thread 2 to advance
|
||||
while (sync != 3)
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
{
|
||||
auto deadline = clock::now() + wait_timeout;
|
||||
while (sync != 3) {
|
||||
if (clock::now() > deadline) {
|
||||
FAIL("Timeout while waiting for sync to reach 3");
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
}
|
||||
}
|
||||
|
||||
// we know now that thread 1 has run and may be finishing
|
||||
// and thread 2 is waiting for permission to advance
|
||||
|
||||
Reference in New Issue
Block a user