diff options
author | jfons <joan.fonssanchez@gmail.com> | 2021-05-20 12:49:33 +0200 |
---|---|---|
committer | jfons <joan.fonssanchez@gmail.com> | 2021-05-21 17:00:24 +0200 |
commit | 767e374dced69b45db0afb30ca2ccf0bbbeef672 (patch) | |
tree | a712cecc2c8cc2c6d6ecdc4a50020d423ddb4c0c /thirdparty/embree/common/algorithms/parallel_prefix_sum.h | |
parent | 42b6602f1d4b108cecb94b94c0d2b645acaebd4f (diff) | |
download | redot-engine-767e374dced69b45db0afb30ca2ccf0bbbeef672.tar.gz |
Upgrade Embree to the latest official release.
Since Embree v3.13.0 supports AARCH64, switch back to the
official repo instead of using Embree-aarch64.
`thirdparty/embree/patches/godot-changes.patch` should now contain
an accurate diff of the changes done to the library.
Diffstat (limited to 'thirdparty/embree/common/algorithms/parallel_prefix_sum.h')
-rw-r--r-- | thirdparty/embree/common/algorithms/parallel_prefix_sum.h | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/thirdparty/embree/common/algorithms/parallel_prefix_sum.h b/thirdparty/embree/common/algorithms/parallel_prefix_sum.h new file mode 100644 index 0000000000..208bb4e480 --- /dev/null +++ b/thirdparty/embree/common/algorithms/parallel_prefix_sum.h @@ -0,0 +1,85 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "parallel_for.h" + +namespace embree +{ + template<typename Value> + struct ParallelPrefixSumState + { + enum { MAX_TASKS = 64 }; + Value counts[MAX_TASKS]; + Value sums [MAX_TASKS]; + }; + + template<typename Index, typename Value, typename Func, typename Reduction> + __forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction) + { + /* calculate number of tasks to use */ + const size_t numThreads = TaskScheduler::threadCount(); + const size_t numBlocks = (last-first+minStepSize-1)/minStepSize; + const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS)); + + /* perform parallel prefix sum */ + parallel_for(taskCount, [&](const size_t taskIndex) + { + const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount; + const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount; + state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]); + }); + + /* calculate prefix sum */ + Value sum=identity; + for (size_t i=0; i<taskCount; i++) + { + const Value c = state.counts[i]; + state.sums[i] = sum; + sum=reduction(sum,c); + } + + return sum; + } + + /*! parallel calculation of prefix sums */ + template<typename SrcArray, typename DstArray, typename Value, typename Add> + __forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096) + { + /* perform single threaded prefix operation for small N */ + if (N < SINGLE_THREAD_THRESHOLD) + { + Value sum=identity; + for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum; + return sum; + } + + /* perform parallel prefix operation for large N */ + else + { + ParallelPrefixSumState<Value> state; + + /* initial run just sets up start values for subtasks */ + parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value { + + Value s = identity; + for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]); + return s; + + }, add); + + /* final run calculates prefix sum */ + return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value { + + Value s = identity; + for (size_t i=r.begin(); i<r.end(); i++) { + dst[i] = add(sum,s); + s = add(s,src[i]); + } + return s; + + }, add); + } + } +} |