summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuri Sizov <yuris@humnom.net>2023-07-27 15:23:01 +0200
committerYuri Sizov <yuris@humnom.net>2023-07-27 15:23:01 +0200
commit4e22ce827a7cf769df4bc773802c4279b4d2297e (patch)
tree2256cea96879e7251571fbd2f2812467070277c9
parent4aae6d3f235dbf3565eca45e025ddb3ecfaadb44 (diff)
parentf34d580861a5b3942ffd7f24d37ccd4a8c58b524 (diff)
downloadredot-engine-4e22ce827a7cf769df4bc773802c4279b4d2297e.tar.gz
Merge pull request #79953 from RandomShaper/revert_wtp_for_range
Revert "Implement parallel foreach() for easier multithreading"
-rw-r--r--core/object/worker_thread_pool.h21
-rw-r--r--modules/raycast/raycast_occlusion_cull.cpp33
-rw-r--r--modules/raycast/raycast_occlusion_cull.h10
-rw-r--r--tests/core/threads/test_worker_thread_pool.h26
4 files changed, 40 insertions, 50 deletions
diff --git a/core/object/worker_thread_pool.h b/core/object/worker_thread_pool.h
index 9fe8497eaf..d4d9387765 100644
--- a/core/object/worker_thread_pool.h
+++ b/core/object/worker_thread_pool.h
@@ -202,25 +202,4 @@ public:
~WorkerThreadPool();
};
-template <typename F>
-static _FORCE_INLINE_ void for_range(int i_begin, int i_end, bool parallel, String name, F f) {
- if (!parallel) {
- for (int i = i_begin; i < i_end; i++) {
- f(i);
- }
- return;
- }
-
- auto wrapper = [&](int i, void *unused) {
- f(i + i_begin);
- };
-
- WorkerThreadPool *wtp = WorkerThreadPool::get_singleton();
- WorkerThreadPool::GroupID gid = wtp->add_template_group_task(
- &wrapper, &decltype(wrapper)::operator(), nullptr,
- i_end - i_begin, -1,
- true, name);
- wtp->wait_for_group_task_completion(gid);
-}
-
#endif // WORKER_THREAD_POOL_H
diff --git a/modules/raycast/raycast_occlusion_cull.cpp b/modules/raycast/raycast_occlusion_cull.cpp
index eee0de967e..69fbf87483 100644
--- a/modules/raycast/raycast_occlusion_cull.cpp
+++ b/modules/raycast/raycast_occlusion_cull.cpp
@@ -355,14 +355,41 @@ void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_in
// Embree requires the last element to be readable by a 16-byte SSE load instruction, so we add padding to be safe.
occ_inst->xformed_vertices.resize(vertices_size + 1);
- for_range(0, vertices_size, vertices_size > 1024, SNAME("RaycastOcclusionCull"), [&](const int i) {
- occ_inst->xformed_vertices[i] = occ_inst->xform.xform(occ->vertices[i]);
- });
+ const Vector3 *read_ptr = occ->vertices.ptr();
+ Vector3 *write_ptr = occ_inst->xformed_vertices.ptr();
+
+ if (vertices_size > 1024) {
+ TransformThreadData td;
+ td.xform = occ_inst->xform;
+ td.read = read_ptr;
+ td.write = write_ptr;
+ td.vertex_count = vertices_size;
+ td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count();
+ WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_transform_vertices_thread, &td, td.thread_count, -1, true, SNAME("RaycastOcclusionCull"));
+ WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
+
+ } else {
+ _transform_vertices_range(read_ptr, write_ptr, occ_inst->xform, 0, vertices_size);
+ }
occ_inst->indices.resize(occ->indices.size());
memcpy(occ_inst->indices.ptr(), occ->indices.ptr(), occ->indices.size() * sizeof(int32_t));
}
+void RaycastOcclusionCull::Scenario::_transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data) {
+ uint32_t vertex_total = p_data->vertex_count;
+ uint32_t total_threads = p_data->thread_count;
+ uint32_t from = p_thread * vertex_total / total_threads;
+ uint32_t to = (p_thread + 1 == total_threads) ? vertex_total : ((p_thread + 1) * vertex_total / total_threads);
+ _transform_vertices_range(p_data->read, p_data->write, p_data->xform, from, to);
+}
+
+void RaycastOcclusionCull::Scenario::_transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to) {
+ for (int i = p_from; i < p_to; i++) {
+ p_write[i] = p_xform.xform(p_read[i]);
+ }
+}
+
void RaycastOcclusionCull::Scenario::_commit_scene(void *p_ud) {
Scenario *scenario = (Scenario *)p_ud;
int commit_idx = 1 - (scenario->current_scene_idx);
diff --git a/modules/raycast/raycast_occlusion_cull.h b/modules/raycast/raycast_occlusion_cull.h
index 7a5346878b..c4e733b664 100644
--- a/modules/raycast/raycast_occlusion_cull.h
+++ b/modules/raycast/raycast_occlusion_cull.h
@@ -121,6 +121,14 @@ private:
const uint32_t *masks;
};
+ struct TransformThreadData {
+ uint32_t thread_count;
+ uint32_t vertex_count;
+ Transform3D xform;
+ const Vector3 *read;
+ Vector3 *write = nullptr;
+ };
+
Thread *commit_thread = nullptr;
bool commit_done = true;
bool dirty = false;
@@ -136,6 +144,8 @@ private:
void _update_dirty_instance_thread(int p_idx, RID *p_instances);
void _update_dirty_instance(int p_idx, RID *p_instances);
+ void _transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data);
+ void _transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to);
static void _commit_scene(void *p_ud);
bool update();
diff --git a/tests/core/threads/test_worker_thread_pool.h b/tests/core/threads/test_worker_thread_pool.h
index ef0b475715..e9a762b57b 100644
--- a/tests/core/threads/test_worker_thread_pool.h
+++ b/tests/core/threads/test_worker_thread_pool.h
@@ -106,32 +106,6 @@ TEST_CASE("[WorkerThreadPool] Process elements using group tasks") {
}
}
-TEST_CASE("[WorkerThreadPool] Parallel foreach") {
- const int count_max = 256;
-
- for (int midpoint = 0; midpoint < count_max; midpoint++) {
- LocalVector<int> c;
- c.resize(count_max);
-
- for_range(0, count_max, true, String(), [&](int i) {
- c[i] = 1;
- });
- c.sort();
- CHECK(c[0] == 1);
- CHECK(c[0] == c[count_max - 1]);
-
- for_range(0, midpoint, false, String(), [&](int i) {
- c[i]++;
- });
- for_range(midpoint, count_max, true, String(), [&](int i) {
- c[i]++;
- });
- c.sort();
- CHECK(c[0] == 2);
- CHECK(c[0] == c[count_max - 1]);
- }
-}
-
} // namespace TestWorkerThreadPool
#endif // TEST_WORKER_THREAD_POOL_H