summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRémi Verschelde <rverschelde@gmail.com>2023-05-15 13:42:52 +0200
committerRémi Verschelde <rverschelde@gmail.com>2023-05-15 13:42:52 +0200
commit1d83a4c5a588ae839983ed9a7227c7d2911a856c (patch)
tree3d1cd0ffbd5186a38da183abbd433696a8bec7b8
parent78f9da7a9fac3cf4388639f34f1671e6d7e87533 (diff)
parent6189ab5291e54dfe090a081cf292e3d6f9c6b8b1 (diff)
downloadredot-engine-1d83a4c5a588ae839983ed9a7227c7d2911a856c.tar.gz
Merge pull request #72249 from RandomShaper/robust_sync
Robustify multi-threading primitives
-rw-r--r--core/core_bind.cpp24
-rw-r--r--core/os/semaphore.h55
-rw-r--r--core/os/thread.cpp4
-rw-r--r--doc/classes/Mutex.xml6
-rw-r--r--doc/classes/Semaphore.xml4
-rw-r--r--doc/classes/Thread.xml5
6 files changed, 92 insertions, 6 deletions
diff --git a/core/core_bind.cpp b/core/core_bind.cpp
index 8fa7aad0ac..8afca5a2df 100644
--- a/core/core_bind.cpp
+++ b/core/core_bind.cpp
@@ -1178,14 +1178,30 @@ void Thread::_start_func(void *ud) {
String func_name = t->target_callable.is_custom() ? t->target_callable.get_custom()->get_as_text() : String(t->target_callable.get_method());
::Thread::set_name(func_name);
+ // To avoid a circular reference between the thread and the script which can possibly contain a reference
+ // to the thread, we will do the call (keeping a reference up to that point) and then break chains with it.
+ // When the call returns, we will reference the thread again if possible.
+ ObjectID th_instance_id = t->get_instance_id();
+ Callable target_callable = t->target_callable;
+ t = Ref<Thread>();
+
Callable::CallError ce;
- t->target_callable.callp(nullptr, 0, t->ret, ce);
- if (ce.error != Callable::CallError::CALL_OK) {
+ Variant ret;
+ target_callable.callp(nullptr, 0, ret, ce);
+ // If script properly kept a reference to the thread, we should be able to re-reference it now
+ // (well, or if the call failed, since we had to break chains anyway because the outcome isn't known upfront).
+ t = Ref<Thread>(ObjectDB::get_instance(th_instance_id));
+ if (t.is_valid()) {
+ t->ret = ret;
t->running.clear();
- ERR_FAIL_MSG("Could not call function '" + func_name + "' to start thread " + t->get_id() + ": " + Variant::get_callable_error_text(t->target_callable, nullptr, 0, ce) + ".");
+ } else {
+ // We could print a warning here, but the Thread object will be eventually destroyed
+ // noticing wait_to_finish() hasn't been called on it, and it will print a warning itself.
}
- t->running.clear();
+ if (ce.error != Callable::CallError::CALL_OK) {
+ ERR_FAIL_MSG("Could not call function '" + func_name + "' to start thread " + t->get_id() + ": " + Variant::get_callable_error_text(t->target_callable, nullptr, 0, ce) + ".");
+ }
}
Error Thread::start(const Callable &p_callable, Priority p_priority) {
diff --git a/core/os/semaphore.h b/core/os/semaphore.h
index a992a4587d..66dfb3ee02 100644
--- a/core/os/semaphore.h
+++ b/core/os/semaphore.h
@@ -33,6 +33,9 @@
#include "core/error/error_list.h"
#include "core/typedefs.h"
+#ifdef DEBUG_ENABLED
+#include "core/error/error_macros.h"
+#endif
#include <condition_variable>
#include <mutex>
@@ -42,6 +45,9 @@ private:
mutable std::mutex mutex;
mutable std::condition_variable condition;
mutable uint32_t count = 0; // Initialized as locked.
+#ifdef DEBUG_ENABLED
+ mutable uint32_t awaiters = 0;
+#endif
public:
_ALWAYS_INLINE_ void post() const {
@@ -52,10 +58,16 @@ public:
_ALWAYS_INLINE_ void wait() const {
std::unique_lock lock(mutex);
+#ifdef DEBUG_ENABLED
+ ++awaiters;
+#endif
while (!count) { // Handle spurious wake-ups.
condition.wait(lock);
}
- count--;
+ --count;
+#ifdef DEBUG_ENABLED
+ --awaiters;
+#endif
}
_ALWAYS_INLINE_ bool try_wait() const {
@@ -67,6 +79,47 @@ public:
return false;
}
}
+
+#ifdef DEBUG_ENABLED
+ ~Semaphore() {
+ // Destroying an std::condition_variable when not all threads waiting on it have been notified
+ // invokes undefined behavior (e.g., it may be nicely destroyed or it may be awaited forever.)
+ // That means other threads could still be running the body of std::condition_variable::wait()
+ // but already past the safety checkpoint. That's the case for instance if that function is already
+ // waiting to lock again.
+ //
+ // We will make the rule a bit more restrictive and simpler to understand at the same time: there
+ // should not be any threads at any stage of the waiting by the time the semaphore is destroyed.
+ //
+ // We do so because of the following reasons:
+ // - We have the guideline that threads must be awaited (i.e., completed), so the waiting thread
+ // must be completely done by the time the thread controlling it finally destroys the semaphore.
+ // Therefore, only a coding mistake could make the program run into such a attempt at premature
+ // destruction of the semaphore.
+ // - In scripting, given that Semaphores are wrapped by RefCounted classes, in general it can't
+ // happen that a thread is trying to destroy a Semaphore while another is still doing whatever with
+ // it, so the simplification is mostly transparent to script writers.
+ // - The redefined rule can be checked for failure to meet it, which is what this implementation does.
+ // This is useful to detect a few cases of potential misuse; namely:
+ // a) In scripting:
+ // * The coder is naughtily dealing with the reference count causing a semaphore to die prematurely.
+ // * The coder is letting the project reach its termination without having cleanly finished threads
+ // that await on semaphores (or at least, let the usual semaphore-controlled loop exit).
+ // b) In the native side, where Semaphore is not a ref-counted beast and certain coding mistakes can
+ // lead to its premature destruction as well.
+ //
+ // Let's let users know they are doing it wrong, but apply a, somewhat hacky, countermeasure against UB
+ // in debug builds.
+ std::lock_guard lock(mutex);
+ if (awaiters) {
+ WARN_PRINT(
+ "A Semaphore object is being destroyed while one or more threads are still waiting on it.\n"
+ "Please call post() on it as necessary to prevent such a situation and so ensure correct cleanup.");
+ // And now, the hacky countermeasure (i.e., leak the condition variable).
+ new (&condition) std::condition_variable();
+ }
+ }
+#endif
};
#endif // SEMAPHORE_H
diff --git a/core/os/thread.cpp b/core/os/thread.cpp
index c067ad1a6a..03e2c5409d 100644
--- a/core/os/thread.cpp
+++ b/core/os/thread.cpp
@@ -101,7 +101,9 @@ Thread::Thread() {
Thread::~Thread() {
if (id != UNASSIGNED_ID) {
#ifdef DEBUG_ENABLED
- WARN_PRINT("A Thread object has been destroyed without wait_to_finish() having been called on it. Please do so to ensure correct cleanup of the thread.");
+ WARN_PRINT(
+ "A Thread object is being destroyed without its completion having been realized.\n"
+ "Please call wait_to_finish() on it to ensure correct cleanup.");
#endif
thread.detach();
}
diff --git a/doc/classes/Mutex.xml b/doc/classes/Mutex.xml
index 78202229ed..ab1fad9624 100644
--- a/doc/classes/Mutex.xml
+++ b/doc/classes/Mutex.xml
@@ -5,6 +5,11 @@
</brief_description>
<description>
A synchronization mutex (mutual exclusion). This is used to synchronize multiple [Thread]s, and is equivalent to a binary [Semaphore]. It guarantees that only one thread can ever acquire the lock at a time. A mutex can be used to protect a critical section; however, be careful to avoid deadlocks.
+ It's of the recursive kind, so it can be locked multiple times by one thread, provided it also unlocks it as many times.
+ [b]Warning:[/b]
+ To guarantee that the operating system is able to perform proper cleanup (no crashes, no deadlocks), these conditions must be met:
+ - By the time a [Mutex]'s reference count reaches zero and therefore it is destroyed, no threads (including the one on which the destruction will happen) must have it locked.
+ - By the time a [Thread]'s reference count reaches zero and therefore it is destroyed, it must not have any mutex locked.
</description>
<tutorials>
<link title="Using multiple threads">$DOCS_URL/tutorials/performance/using_multiple_threads.html</link>
@@ -29,6 +34,7 @@
<description>
Unlocks this [Mutex], leaving it to other threads.
[b]Note:[/b] If a thread called [method lock] or [method try_lock] multiple times while already having ownership of the mutex, it must also call [method unlock] the same number of times in order to unlock it correctly.
+ [b]Warning:[/b] Calling [method unlock] more times that [method lock] on a given thread, thus ending up trying to unlock a non-locked mutex, is wrong and may causes crashes or deadlocks.
</description>
</method>
</methods>
diff --git a/doc/classes/Semaphore.xml b/doc/classes/Semaphore.xml
index c95917b7bb..32eb69fe8c 100644
--- a/doc/classes/Semaphore.xml
+++ b/doc/classes/Semaphore.xml
@@ -5,6 +5,10 @@
</brief_description>
<description>
A synchronization semaphore which can be used to synchronize multiple [Thread]s. Initialized to zero on creation. Be careful to avoid deadlocks. For a binary version, see [Mutex].
+ [b]Warning:[/b]
+ To guarantee that the operating system is able to perform proper cleanup (no crashes, no deadlocks), these conditions must be met:
+ - By the time a [Semaphore]'s reference count reaches zero and therefore it is destroyed, no threads must be waiting on it.
+ - By the time a [Thread]'s reference count reaches zero and therefore it is destroyed, it must not be waiting on any semaphore.
</description>
<tutorials>
<link title="Using multiple threads">$DOCS_URL/tutorials/performance/using_multiple_threads.html</link>
diff --git a/doc/classes/Thread.xml b/doc/classes/Thread.xml
index 2b4ef65f0c..fcb803b15a 100644
--- a/doc/classes/Thread.xml
+++ b/doc/classes/Thread.xml
@@ -6,6 +6,11 @@
<description>
A unit of execution in a process. Can run methods on [Object]s simultaneously. The use of synchronization via [Mutex] or [Semaphore] is advised if working with shared objects.
[b]Note:[/b] Breakpoints won't break on code if it's running in a thread. This is a current limitation of the GDScript debugger.
+ [b]Warning:[/b]
+ To guarantee that the operating system is able to perform proper cleanup (no crashes, no deadlocks), these conditions must be met by the time a [Thread]'s reference count reaches zero and therefore it is destroyed:
+ - It must not have any [Mutex] objects locked.
+ - It must not be waiting on any [Semaphore] objects.
+ - [method wait_to_finish] should have been called on it.
</description>
<tutorials>
<link title="Using multiple threads">$DOCS_URL/tutorials/performance/using_multiple_threads.html</link>