diff options
Diffstat (limited to 'thirdparty/embree/kernels/builders/primrefgen_presplit.h')
-rw-r--r-- | thirdparty/embree/kernels/builders/primrefgen_presplit.h | 568 |
1 files changed, 330 insertions, 238 deletions
diff --git a/thirdparty/embree/kernels/builders/primrefgen_presplit.h b/thirdparty/embree/kernels/builders/primrefgen_presplit.h index aa2026a85e..db9010995d 100644 --- a/thirdparty/embree/kernels/builders/primrefgen_presplit.h +++ b/thirdparty/embree/kernels/builders/primrefgen_presplit.h @@ -3,10 +3,12 @@ #pragma once -#include "../builders/primrefgen.h" +#include "../../common/algorithms/parallel_reduce.h" +#include "../../common/algorithms/parallel_sort.h" #include "../builders/heuristic_spatial.h" #include "../builders/splitter.h" +#include "../../common/algorithms/parallel_partition.h" #include "../../common/algorithms/parallel_for_for.h" #include "../../common/algorithms/parallel_for_for_prefix_sum.h" @@ -14,15 +16,87 @@ #define CHECK_PRESPLIT(x) #define GRID_SIZE 1024 +//#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 6 #define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5 #define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG) -#define PRIORITY_CUTOFF_THRESHOLD 1.0f +//#define PRIORITY_CUTOFF_THRESHOLD 2.0f #define PRIORITY_SPLIT_POS_WEIGHT 1.5f namespace embree { namespace isa { + struct SplittingGrid + { + __forceinline SplittingGrid(const BBox3fa& bounds) + { + base = bounds.lower; + const Vec3fa diag = bounds.size(); + extend = max(diag.x,max(diag.y,diag.z)); + scale = extend == 0.0f ? 0.0f : GRID_SIZE / extend; + } + + __forceinline bool split_pos(const PrimRef& prim, unsigned int& dim_o, float& fsplit_o) const + { + /* compute morton code */ + const Vec3fa lower = prim.lower; + const Vec3fa upper = prim.upper; + const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f); + const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f); + Vec3ia ilower(floor(glower)); + Vec3ia iupper(floor(gupper)); + + /* this ignores dimensions that are empty */ + iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)); + + /* compute a morton code for the lower and upper grid coordinates. */ + const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z); + const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z); + + /* if all bits are equal then we cannot split */ + if (unlikely(lower_code == upper_code)) + return false; + + /* compute octree level and dimension to perform the split in */ + const unsigned int diff = 31 - lzcnt(lower_code^upper_code); + const unsigned int level = diff / 3; + const unsigned int dim = diff % 3; + + /* now we compute the grid position of the split */ + const unsigned int isplit = iupper[dim] & ~((1<<level)-1); + + /* compute world space position of split */ + const float inv_grid_size = 1.0f / GRID_SIZE; + const float fsplit = base[dim] + isplit * inv_grid_size * extend; + assert(prim.lower[dim] <= fsplit && prim.upper[dim] >= fsplit); + + dim_o = dim; + fsplit_o = fsplit; + return true; + } + + __forceinline Vec2i computeMC(const PrimRef& ref) const + { + const Vec3fa lower = ref.lower; + const Vec3fa upper = ref.upper; + const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f); + const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f); + Vec3ia ilower(floor(glower)); + Vec3ia iupper(floor(gupper)); + + /* this ignores dimensions that are empty */ + iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)); + + /* compute a morton code for the lower and upper grid coordinates. */ + const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z); + const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z); + return Vec2i(lower_code,upper_code); + } + + Vec3fa base; + float scale; + float extend; + }; struct PresplitItem { @@ -32,30 +106,30 @@ namespace embree }; unsigned int index; - __forceinline operator unsigned() const - { - return reinterpret_cast<const unsigned&>(priority); - } - __forceinline bool operator < (const PresplitItem& item) const - { - return (priority < item.priority); + __forceinline operator unsigned() const { + return data; } - template<typename Mesh> - __forceinline static float compute_priority(const PrimRef &ref, Scene *scene, const Vec2i &mc) + template<typename ProjectedPrimitiveAreaFunc> + __forceinline static float compute_priority(const ProjectedPrimitiveAreaFunc& primitiveArea, const PrimRef &ref, const Vec2i &mc) { - const unsigned int geomID = ref.geomID(); - const unsigned int primID = ref.primID(); const float area_aabb = area(ref.bounds()); - const float area_prim = ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID); + const float area_prim = primitiveArea(ref); + if (area_prim == 0.0f) return 0.0f; const unsigned int diff = 31 - lzcnt(mc.x^mc.y); - assert(area_prim <= area_aabb); - //const float priority = powf((area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f); - const float priority = sqrtf(sqrtf( (area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) )); + //assert(area_prim <= area_aabb); // may trigger due to numerical issues + const float area_diff = max(0.0f, area_aabb - area_prim); + //const float priority = powf(area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f); + const float priority = sqrtf(sqrtf( area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) )); + //const float priority = sqrtf(sqrtf( area_diff ) ); + //const float priority = sqrtfarea_diff; + //const float priority = area_diff; // 104 fps !!!!!!!!!! + //const float priority = 0.2f*area_aabb + 0.8f*area_diff; // 104 fps + //const float priority = area_aabb * max(area_aabb/area_prim,32.0f); + //const float priority = area_prim; assert(priority >= 0.0f && priority < FLT_LARGE); return priority; } - }; @@ -63,77 +137,96 @@ namespace embree return cout << "index " << item.index << " priority " << item.priority; }; - template<typename SplitterFactory> - void splitPrimitive(SplitterFactory &Splitter, - const PrimRef &prim, - const unsigned int geomID, - const unsigned int primID, - const unsigned int split_level, - const Vec3fa &grid_base, - const float grid_scale, - const float grid_extend, +#if 1 + + template<typename Splitter> + void splitPrimitive(const Splitter& splitter, + const PrimRef& prim, + const unsigned int splitprims, + const SplittingGrid& grid, PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], unsigned int& numSubPrims) { - assert(split_level <= MAX_PRESPLITS_PER_PRIMITIVE_LOG); - if (split_level == 0) + assert(splitprims > 0 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE); + + if (splitprims == 1) { assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE); subPrims[numSubPrims++] = prim; } else { - const Vec3fa lower = prim.lower; - const Vec3fa upper = prim.upper; - const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f); - const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f); - Vec3ia ilower(floor(glower)); - Vec3ia iupper(floor(gupper)); - - /* this ignores dimensions that are empty */ - iupper = (Vec3ia)(select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper))); - - /* compute a morton code for the lower and upper grid coordinates. */ - const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z); - const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z); - - /* if all bits are equal then we cannot split */ - if(unlikely(lower_code == upper_code)) + unsigned int dim; float fsplit; + if (!grid.split_pos(prim, dim, fsplit)) { assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE); subPrims[numSubPrims++] = prim; return; } - - /* compute octree level and dimension to perform the split in */ - const unsigned int diff = 31 - lzcnt(lower_code^upper_code); - const unsigned int level = diff / 3; - const unsigned int dim = diff % 3; + + /* split primitive */ + PrimRef left,right; + splitter(prim,dim,fsplit,left,right); + assert(!left.bounds().empty()); + assert(!right.bounds().empty()); + + const unsigned int splitprims_left = splitprims/2; + const unsigned int splitprims_right = splitprims - splitprims_left; + splitPrimitive(splitter,left,splitprims_left,grid,subPrims,numSubPrims); + splitPrimitive(splitter,right,splitprims_right,grid,subPrims,numSubPrims); + } + } + +#else + + template<typename Splitter> + void splitPrimitive(const Splitter& splitter, + const PrimRef& prim, + const unsigned int targetSubPrims, + const SplittingGrid& grid, + PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], + unsigned int& numSubPrims) + { + assert(targetSubPrims > 0 && targetSubPrims <= MAX_PRESPLITS_PER_PRIMITIVE); - /* now we compute the grid position of the split */ - const unsigned int isplit = iupper[dim] & ~((1<<level)-1); - - /* compute world space position of split */ - const float inv_grid_size = 1.0f / GRID_SIZE; - const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend; + auto compare = [] ( const PrimRef& a, const PrimRef& b ) { + return area(a.bounds()) < area(b.bounds()); + }; + + subPrims[numSubPrims++] = prim; + + while (numSubPrims < targetSubPrims) + { + /* get top heap element */ + std::pop_heap(subPrims+0,subPrims+numSubPrims, compare); + PrimRef top = subPrims[--numSubPrims]; - assert(prim.lower[dim] <= fsplit && - prim.upper[dim] >= fsplit); - + unsigned int dim; float fsplit; + if (!grid.split_pos(top, dim, fsplit)) + { + assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE); + subPrims[numSubPrims++] = top; + return; + } + /* split primitive */ - const auto splitter = Splitter(prim); - BBox3fa left,right; - splitter(prim.bounds(),dim,fsplit,left,right); - assert(!left.empty()); - assert(!right.empty()); + PrimRef left,right; + splitter(top,dim,fsplit,left,right); + assert(!left.bounds().empty()); + assert(!right.bounds().empty()); - - splitPrimitive(Splitter,PrimRef(left ,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims); - splitPrimitive(Splitter,PrimRef(right,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims); + subPrims[numSubPrims++] = left; + std::push_heap(subPrims+0, subPrims+numSubPrims, compare); + + subPrims[numSubPrims++] = right; + std::push_heap(subPrims+0, subPrims+numSubPrims, compare); } } - +#endif + +#if !defined(RTHWIF_STANDALONE) + template<typename Mesh, typename SplitterFactory> PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) { @@ -155,87 +248,40 @@ namespace embree } return pinfo; } +#endif - __forceinline Vec2i computeMC(const Vec3fa &grid_base, const float grid_scale, const PrimRef &ref) + template<typename SplitPrimitiveFunc, typename ProjectedPrimitiveAreaFunc, typename PrimVector> + PrimInfo createPrimRefArray_presplit(size_t numPrimRefs, + PrimVector& prims, + const PrimInfo& pinfo, + const SplitPrimitiveFunc& splitPrimitive, + const ProjectedPrimitiveAreaFunc& primitiveArea) { - const Vec3fa lower = ref.lower; - const Vec3fa upper = ref.upper; - const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f); - const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f); - Vec3ia ilower(floor(glower)); - Vec3ia iupper(floor(gupper)); - - /* this ignores dimensions that are empty */ - iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)); - - /* compute a morton code for the lower and upper grid coordinates. */ - const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z); - const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z); - return Vec2i(lower_code,upper_code); - } - - template<typename Mesh, typename SplitterFactory> - PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) - { static const size_t MIN_STEP_SIZE = 128; - ParallelForForPrefixSumState<PrimInfo> pstate; - Scene::Iterator2 iter(scene,types,mblur); - - /* first try */ - progressMonitor(0); - pstate.init(iter,size_t(1024)); - PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo { - return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != numPrimRefs) - { - progressMonitor(0); - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { - return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } - /* use correct number of primitives */ size_t numPrimitives = pinfo.size(); - const size_t alloc_numPrimitives = prims.size(); - const size_t numSplitPrimitivesBudget = alloc_numPrimitives - numPrimitives; - - /* set up primitive splitter */ - SplitterFactory Splitter(scene); - - - DBG_PRESPLIT( - const size_t org_numPrimitives = pinfo.size(); - PRINT(numPrimitives); - PRINT(alloc_numPrimitives); - PRINT(numSplitPrimitivesBudget); - ); + const size_t numPrimitivesExt = prims.size(); + const size_t numSplitPrimitivesBudget = numPrimitivesExt - numPrimitives; /* allocate double buffer presplit items */ - const size_t presplit_allocation_size = sizeof(PresplitItem)*alloc_numPrimitives; - PresplitItem *presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64); - PresplitItem *tmp_presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64); + avector<PresplitItem> preSplitItem0(numPrimitivesExt); + avector<PresplitItem> preSplitItem1(numPrimitivesExt); /* compute grid */ - const Vec3fa grid_base = pinfo.geomBounds.lower; - const Vec3fa grid_diag = pinfo.geomBounds.size(); - const float grid_extend = max(grid_diag.x,max(grid_diag.y,grid_diag.z)); - const float grid_scale = grid_extend == 0.0f ? 0.0f : GRID_SIZE / grid_extend; - + SplittingGrid grid(pinfo.geomBounds); + /* init presplit items and get total sum */ const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float { float sum = 0.0f; for (size_t i=r.begin(); i<r.end(); i++) { - presplitItem[i].index = (unsigned int)i; - const Vec2i mc = computeMC(grid_base,grid_scale,prims[i]); + preSplitItem0[i].index = (unsigned int)i; + const Vec2i mc = grid.computeMC(prims[i]); /* if all bits are equal then we cannot split */ - presplitItem[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority<Mesh>(prims[i],scene,mc) : 0.0f; + preSplitItem0[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority(primitiveArea,prims[i],mc) : 0.0f; /* FIXME: sum undeterministic */ - sum += presplitItem[i].priority; + sum += preSplitItem0[i].priority; } return sum; },[](const float& a, const float& b) -> float { return a+b; }); @@ -245,132 +291,178 @@ namespace embree parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void { for (size_t i=r.begin(); i<r.end(); i++) { - if (presplitItem[i].priority > 0.0f) - { - const float rel_p = (float)numSplitPrimitivesBudget * presplitItem[i].priority * inv_psum; - if (rel_p >= PRIORITY_CUTOFF_THRESHOLD) // need at least a split budget that generates two sub-prims - { - presplitItem[i].priority = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f); - //presplitItem[i].priority = min(floorf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG); - assert(presplitItem[i].priority >= 0.0f && presplitItem[i].priority <= (float)MAX_PRESPLITS_PER_PRIMITIVE_LOG); - } - else - presplitItem[i].priority = 0.0f; + if (preSplitItem0[i].priority <= 0.0f) { + preSplitItem0[i].data = 1; + continue; } + + const float rel_p = (float)numSplitPrimitivesBudget * preSplitItem0[i].priority * inv_psum; + if (rel_p < 1) { + preSplitItem0[i].data = 1; + continue; + } + + //preSplitItem0[i].data = max(min(ceilf(rel_p),(float)MAX_PRESPLITS_PER_PRIMITIVE),1.0f); + preSplitItem0[i].data = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f); + preSplitItem0[i].data = 1 << preSplitItem0[i].data; + assert(preSplitItem0[i].data <= MAX_PRESPLITS_PER_PRIMITIVE); } }); - auto isLeft = [&] (const PresplitItem &ref) { return ref.priority < PRIORITY_CUTOFF_THRESHOLD; }; - size_t center = parallel_partitioning(presplitItem,0,numPrimitives,isLeft,1024); + auto isLeft = [&] (const PresplitItem &ref) { return ref.data <= 1; }; + size_t center = parallel_partitioning(preSplitItem0.data(),0,numPrimitives,isLeft,1024); + assert(center <= numPrimitives); /* anything to split ? */ - if (center < numPrimitives) + if (center >= numPrimitives) + return pinfo; + + size_t numPrimitivesToSplit = numPrimitives - center; + assert(preSplitItem0[center].data >= 1.0f); + + /* sort presplit items in ascending order */ + radix_sort_u32(preSplitItem0.data() + center,preSplitItem1.data() + center,numPrimitivesToSplit,1024); + + CHECK_PRESPLIT( + parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void { + for (size_t i=r.begin(); i<r.end(); i++) + assert(preSplitItem0[i-1].data <= preSplitItem0[i].data); + }); + ); + + unsigned int* primOffset0 = (unsigned int*)preSplitItem1.data(); + unsigned int* primOffset1 = (unsigned int*)preSplitItem1.data() + numPrimitivesToSplit; + + /* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */ + const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t { + size_t sum = 0; + for (size_t i=t.begin(); i<t.end(); i++) + { + const unsigned int primrefID = preSplitItem0[i].index; + const unsigned int splitprims = preSplitItem0[i].data; + assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE); + + unsigned int numSubPrims = 0; + PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE]; + splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims); + assert(numSubPrims); + + numSubPrims--; // can reuse slot + sum+=numSubPrims; + preSplitItem0[i].data = (numSubPrims << 16) | splitprims; + + primOffset0[i-center] = numSubPrims; + } + return sum; + },[](const size_t& a, const size_t& b) -> size_t { return a+b; }); + + /* if we are over budget, need to shrink the range */ + if (totalNumSubPrims > numSplitPrimitivesBudget) { - size_t numPrimitivesToSplit = numPrimitives - center; - assert(presplitItem[center].priority >= 1.0f); - - /* sort presplit items in ascending order */ - radix_sort_u32(presplitItem + center,tmp_presplitItem + center,numPrimitivesToSplit,1024); - - CHECK_PRESPLIT( - parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void { - for (size_t i=r.begin(); i<r.end(); i++) - assert(presplitItem[i-1].priority <= presplitItem[i].priority); - }); - ); - - unsigned int* primOffset0 = (unsigned int*)tmp_presplitItem; - unsigned int* primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit; - - /* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */ - const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t { - size_t sum = 0; - for (size_t i=t.begin(); i<t.end(); i++) - { - PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE]; - assert(presplitItem[i].priority >= 1.0f); - const unsigned int primrefID = presplitItem[i].index; - const float prio = presplitItem[i].priority; - const unsigned int geomID = prims[primrefID].geomID(); - const unsigned int primID = prims[primrefID].primID(); - const unsigned int split_levels = (unsigned int)prio; - unsigned int numSubPrims = 0; - splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims); - assert(numSubPrims); - numSubPrims--; // can reuse slot - sum+=numSubPrims; - presplitItem[i].data = (numSubPrims << MAX_PRESPLITS_PER_PRIMITIVE_LOG) | split_levels; - primOffset0[i-center] = numSubPrims; - } - return sum; - },[](const size_t& a, const size_t& b) -> size_t { return a+b; }); + size_t new_center = numPrimitives-1; + size_t sum = 0; + for (;new_center>=center;new_center--) + { + const unsigned int numSubPrims = preSplitItem0[new_center].data >> 16; + if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break; + sum += numSubPrims; + } + new_center++; - /* if we are over budget, need to shrink the range */ - if (totalNumSubPrims > numSplitPrimitivesBudget) + primOffset0 += new_center - center; + numPrimitivesToSplit -= new_center - center; + center = new_center; + assert(numPrimitivesToSplit == (numPrimitives - center)); + } + + /* parallel prefix sum to compute offsets for storing sub-primitives */ + const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>()); + assert(numPrimitives+offset <= numPrimitivesExt); + + /* iterate over range, and split primitives into sub primitives and append them to prims array */ + parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void { + for (size_t j=rn.begin(); j<rn.end(); j++) { - size_t new_center = numPrimitives-1; - size_t sum = 0; - for (;new_center>=center;new_center--) - { - const unsigned int numSubPrims = presplitItem[new_center].data >> MAX_PRESPLITS_PER_PRIMITIVE_LOG; - if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break; - sum += numSubPrims; - } - new_center++; - - primOffset0 += new_center - center; - numPrimitivesToSplit -= new_center - center; - center = new_center; - assert(numPrimitivesToSplit == (numPrimitives - center)); + const unsigned int primrefID = preSplitItem0[j].index; + const unsigned int splitprims = preSplitItem0[j].data & 0xFFFF; + assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE); + + unsigned int numSubPrims = 0; + PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE]; + splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims); + + const unsigned int numSubPrimsExpected MAYBE_UNUSED = preSplitItem0[j].data >> 16; + assert(numSubPrims-1 == numSubPrimsExpected); + + const size_t newID = numPrimitives + primOffset1[j-center]; + assert(newID+numSubPrims-1 <= numPrimitivesExt); + + prims[primrefID] = subPrims[0]; + for (size_t i=1;i<numSubPrims;i++) + prims[newID+i-1] = subPrims[i]; } + }); - /* parallel prefix sum to compute offsets for storing sub-primitives */ - const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>()); - assert(numPrimitives+offset <= alloc_numPrimitives); - - /* iterate over range, and split primitives into sub primitives and append them to prims array */ - parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void { - for (size_t j=rn.begin(); j<rn.end(); j++) - { - PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE]; - const unsigned int primrefID = presplitItem[j].index; - const unsigned int geomID = prims[primrefID].geomID(); - const unsigned int primID = prims[primrefID].primID(); - const unsigned int split_levels = presplitItem[j].data & ((unsigned int)(1 << MAX_PRESPLITS_PER_PRIMITIVE_LOG)-1); - - assert(split_levels); - assert(split_levels <= MAX_PRESPLITS_PER_PRIMITIVE_LOG); - unsigned int numSubPrims = 0; - splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims); - const size_t newID = numPrimitives + primOffset1[j-center]; - assert(newID+numSubPrims-1 <= alloc_numPrimitives); - prims[primrefID] = subPrims[0]; - for (size_t i=1;i<numSubPrims;i++) - prims[newID+i-1] = subPrims[i]; - } - }); - - numPrimitives += offset; - DBG_PRESPLIT( - PRINT(pinfo.size()); - PRINT(numPrimitives); - PRINT((float)numPrimitives/org_numPrimitives)); - } + numPrimitives += offset; /* recompute centroid bounding boxes */ - pinfo = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo { + const PrimInfo pinfo1 = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo { PrimInfo p(empty); for (size_t j=r.begin(); j<r.end(); j++) p.add_center2(prims[j]); return p; }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - assert(pinfo.size() == numPrimitives); + assert(pinfo1.size() == numPrimitives); - /* free double buffer presplit items */ - alignedFree(tmp_presplitItem); - alignedFree(presplitItem); - return pinfo; + return pinfo1; + } + +#if !defined(RTHWIF_STANDALONE) + + template<typename Mesh, typename SplitterFactory> + PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) + { + ParallelForForPrefixSumState<PrimInfo> pstate; + Scene::Iterator2 iter(scene,types,mblur); + + /* first try */ + progressMonitor(0); + pstate.init(iter,size_t(1024)); + PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo { + return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID); + }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); + + /* if we need to filter out geometry, run again */ + if (pinfo.size() != numPrimRefs) + { + progressMonitor(0); + pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { + return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID); + }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); + } + + + SplitterFactory Splitter(scene); + + auto split_primitive = [&] (const PrimRef &prim, + const unsigned int splitprims, + const SplittingGrid& grid, + PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], + unsigned int& numSubPrims) + { + const auto splitter = Splitter(prim); + splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims); + }; + + auto primitiveArea = [&] (const PrimRef &ref) { + const unsigned int geomID = ref.geomID(); + const unsigned int primID = ref.primID(); + return ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID); + }; + + return createPrimRefArray_presplit(numPrimRefs,prims,pinfo,split_primitive,primitiveArea); } +#endif } } |