summaryrefslogtreecommitdiffstats
path: root/thirdparty/embree/kernels/subdiv/bezier_curve.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/kernels/subdiv/bezier_curve.h')
-rw-r--r--thirdparty/embree/kernels/subdiv/bezier_curve.h100
1 files changed, 79 insertions, 21 deletions
diff --git a/thirdparty/embree/kernels/subdiv/bezier_curve.h b/thirdparty/embree/kernels/subdiv/bezier_curve.h
index a5adad5cc9..257e0afd40 100644
--- a/thirdparty/embree/kernels/subdiv/bezier_curve.h
+++ b/thirdparty/embree/kernels/subdiv/bezier_curve.h
@@ -185,9 +185,10 @@ namespace embree
__forceinline CubicBezierCurve<float> xfm(const Vertex& dx) const {
return CubicBezierCurve<float>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
}
-
- __forceinline CubicBezierCurve<vfloatx> vxfm(const Vertex& dx) const {
- return CubicBezierCurve<vfloatx>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
+
+ template<int W>
+ __forceinline CubicBezierCurve<vfloat<W>> vxfm(const Vertex& dx) const {
+ return CubicBezierCurve<vfloat<W>>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
}
__forceinline CubicBezierCurve<float> xfm(const Vertex& dx, const Vertex& p) const {
@@ -286,7 +287,7 @@ namespace embree
{
const float u0 = 0.0f, u1 = 1.0f;
const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
- const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(VSIZEX-1)));
+ const vfloatx vu0 = lerp(u0,u1,vfloatx(StepTy())*(1.0f/(VSIZEX-1)));
Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
const Vec2vfx P3 = shift_right_1(P0);
const Vec2vfx dP3du = shift_right_1(dP0du);
@@ -299,7 +300,7 @@ namespace embree
{
const float u0 = u.lower, u1 = u.upper;
const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
- const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(VSIZEX-1)));
+ const vfloatx vu0 = lerp(u0,u1,vfloatx(StepTy())*(1.0f/(VSIZEX-1)));
Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
const Vec2vfx P3 = shift_right_1(P0);
const Vec2vfx dP3du = shift_right_1(dP0du);
@@ -307,6 +308,33 @@ namespace embree
const Vec2vfx P2 = P3 - dP3du;
return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3);
}
+
+ template<int W>
+ __forceinline CubicBezierCurve<Vec2vf<W>> split(const BBox1f& u, int i, int N) const
+ {
+ const float u0 = u.lower, u1 = u.upper;
+ const float dscale = (u1-u0)*(1.0f/(3.0f*N));
+ const vfloat<W> vu0 = lerp(u0,u1,(vfloat<W>(i)+vfloat<W>(StepTy()))*(1.0f/N));
+ Vec2vf<W> P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vf<W>(dscale);
+ const Vec2vf<W> P3 = shift_right_1(P0);
+ const Vec2vf<W> dP3du = shift_right_1(dP0du);
+ const Vec2vf<W> P1 = P0 + dP0du;
+ const Vec2vf<W> P2 = P3 - dP3du;
+ return CubicBezierCurve<Vec2vf<W>>(P0,P1,P2,P3);
+ }
+
+ __forceinline CubicBezierCurve<Vec2f> split1(const BBox1f& u, int i, int N) const
+ {
+ const float u0 = u.lower, u1 = u.upper;
+ const float dscale = (u1-u0)*(1.0f/(3.0f*N));
+ const float vu0 = lerp(u0,u1,(float(i)+0)*(1.0f/N));
+ const float vu1 = lerp(u0,u1,(float(i)+1)*(1.0f/N));
+ Vec2fa P0, dP0du; eval(vu0,P0,dP0du); dP0du = dP0du * Vec2fa(dscale);
+ Vec2fa P3, dP3du; eval(vu1,P3,dP3du); dP3du = dP3du * Vec2fa(dscale);
+ const Vec2fa P1 = P0 + dP0du;
+ const Vec2fa P2 = P3 - dP3du;
+ return CubicBezierCurve<Vec2f>(P0,P1,P2,P3);
+ }
__forceinline void eval(float t, Vertex& p, Vertex& dp) const
{
@@ -461,20 +489,20 @@ namespace embree
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
}
- template<int M>
- __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const
+ template<int M, typename Vec>
+ __forceinline void veval(const vfloat<M>& t, Vec& p, Vec& dp) const
{
- const Vec4vf<M> p00 = v0;
- const Vec4vf<M> p01 = v1;
- const Vec4vf<M> p02 = v2;
- const Vec4vf<M> p03 = v3;
+ const Vec p00 = v0;
+ const Vec p01 = v1;
+ const Vec p02 = v2;
+ const Vec p03 = v3;
- const Vec4vf<M> p10 = lerp(p00,p01,t);
- const Vec4vf<M> p11 = lerp(p01,p02,t);
- const Vec4vf<M> p12 = lerp(p02,p03,t);
- const Vec4vf<M> p20 = lerp(p10,p11,t);
- const Vec4vf<M> p21 = lerp(p11,p12,t);
- const Vec4vf<M> p30 = lerp(p20,p21,t);
+ const Vec p10 = lerp(p00,p01,t);
+ const Vec p11 = lerp(p01,p02,t);
+ const Vec p12 = lerp(p02,p03,t);
+ const Vec p20 = lerp(p10,p11,t);
+ const Vec p21 = lerp(p11,p12,t);
+ const Vec p30 = lerp(p20,p21,t);
p = p30;
dp = vfloat<M>(3.0f)*(p21-p20);
@@ -485,10 +513,17 @@ namespace embree
{
assert(size <= PrecomputedBezierBasis::N);
assert(ofs <= size);
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+ assert(size > 0);
+ const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+0))*rcp(float(size));
+ Vec p,dp; veval<M>(t,p,dp);
+ return p;
+#else
return madd(vfloat<M>::loadu(&bezier_basis0.c0[size][ofs]), Vec(v0),
madd(vfloat<M>::loadu(&bezier_basis0.c1[size][ofs]), Vec(v1),
madd(vfloat<M>::loadu(&bezier_basis0.c2[size][ofs]), Vec(v2),
vfloat<M>::loadu(&bezier_basis0.c3[size][ofs]) * Vec(v3))));
+#endif
}
template<int M, typename Vec = Vec4vf<M>>
@@ -496,10 +531,17 @@ namespace embree
{
assert(size <= PrecomputedBezierBasis::N);
assert(ofs <= size);
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+ assert(size > 0);
+ const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+1))*rcp(float(size));
+ Vec p,dp; veval<M>(t,p,dp);
+ return p;
+#else
return madd(vfloat<M>::loadu(&bezier_basis1.c0[size][ofs]), Vec(v0),
madd(vfloat<M>::loadu(&bezier_basis1.c1[size][ofs]), Vec(v1),
madd(vfloat<M>::loadu(&bezier_basis1.c2[size][ofs]), Vec(v2),
vfloat<M>::loadu(&bezier_basis1.c3[size][ofs]) * Vec(v3))));
+#endif
}
template<int M, typename Vec = Vec4vf<M>>
@@ -507,10 +549,17 @@ namespace embree
{
assert(size <= PrecomputedBezierBasis::N);
assert(ofs <= size);
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+ assert(size > 0);
+ const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+0))*rcp(float(size));
+ Vec p,dp; veval<M>(t,p,dp);
+ return dp;
+#else
return madd(vfloat<M>::loadu(&bezier_basis0.d0[size][ofs]), Vec(v0),
madd(vfloat<M>::loadu(&bezier_basis0.d1[size][ofs]), Vec(v1),
madd(vfloat<M>::loadu(&bezier_basis0.d2[size][ofs]), Vec(v2),
vfloat<M>::loadu(&bezier_basis0.d3[size][ofs]) * Vec(v3))));
+#endif
}
template<int M, typename Vec = Vec4vf<M>>
@@ -518,10 +567,17 @@ namespace embree
{
assert(size <= PrecomputedBezierBasis::N);
assert(ofs <= size);
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+ assert(size > 0);
+ const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+1))*rcp(float(size));
+ Vec p,dp; veval<M>(t,p,dp);
+ return dp;
+#else
return madd(vfloat<M>::loadu(&bezier_basis1.d0[size][ofs]), Vec(v0),
madd(vfloat<M>::loadu(&bezier_basis1.d1[size][ofs]), Vec(v1),
madd(vfloat<M>::loadu(&bezier_basis1.d2[size][ofs]), Vec(v2),
vfloat<M>::loadu(&bezier_basis1.d3[size][ofs]) * Vec(v3))));
+#endif
}
/* calculates bounds of bezier curve geometry */
@@ -532,7 +588,7 @@ namespace embree
Vec3vfx pl(pos_inf), pu(neg_inf);
for (int i=0; i<=N; i+=VSIZEX)
{
- vintx vi = vintx(i)+vintx(step);
+ vintx vi = vintx(i)+vintx(StepTy());
vboolx valid = vi <= vintx(N);
const Vec3vfx p = eval0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
const Vec3vfx dp = derivative0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
@@ -554,7 +610,7 @@ namespace embree
Vec4vfx pl(pos_inf), pu(neg_inf);
for (int i=0; i<=N; i+=VSIZEX)
{
- vintx vi = vintx(i)+vintx(step);
+ vintx vi = vintx(i)+vintx(StepTy());
vboolx valid = vi <= vintx(N);
const Vec4vfx p = eval0<VSIZEX>(i,N);
const Vec4vfx dp = derivative0<VSIZEX>(i,N);
@@ -587,7 +643,7 @@ namespace embree
Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
for (int i=0; i<N; i+=VSIZEX)
{
- vboolx valid = vintx(i)+vintx(step) < vintx(N);
+ vboolx valid = vintx(i)+vintx(StepTy()) < vintx(N);
const Vec4vfx pi = eval0<VSIZEX>(i,N);
pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
@@ -645,6 +701,7 @@ namespace embree
typedef CubicBezierCurve<Vec2fa> CubicBezierCurve2fa;
typedef CubicBezierCurve<Vec3fa> CubicBezierCurve3fa;
typedef CubicBezierCurve<Vec3fa> BezierCurve3fa;
+ typedef CubicBezierCurve<Vec3ff> BezierCurve3ff;
template<> __forceinline int CubicBezierCurve<float>::maxRoots() const
{
@@ -660,8 +717,9 @@ namespace embree
return numRoots(v0,v1) + numRoots(v1,v2) + numRoots(v2,v3);
}
+ struct CurveGeometry; // FIXME: this code should move !
template<typename CurveGeometry>
- __forceinline CubicBezierCurve<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CubicBezierCurve<Vec3ff>& curve)
+ __forceinline CubicBezierCurve<Vec3ff> enlargeRadiusToMinWidth(const RayQueryContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CubicBezierCurve<Vec3ff>& curve)
{
return CubicBezierCurve<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),