summaryrefslogtreecommitdiffstats
path: root/thirdparty/embree/common/math
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/common/math')
-rw-r--r--thirdparty/embree/common/math/affinespace.h2
-rw-r--r--thirdparty/embree/common/math/bbox.h5
-rw-r--r--thirdparty/embree/common/math/col3.h2
-rw-r--r--thirdparty/embree/common/math/col4.h2
-rw-r--r--thirdparty/embree/common/math/color.h10
-rw-r--r--thirdparty/embree/common/math/color_sycl.h219
-rw-r--r--thirdparty/embree/common/math/emath.h (renamed from thirdparty/embree/common/math/math.h)15
-rw-r--r--thirdparty/embree/common/math/lbbox.h42
-rw-r--r--thirdparty/embree/common/math/linearspace2.h1
-rw-r--r--thirdparty/embree/common/math/linearspace3.h8
-rw-r--r--thirdparty/embree/common/math/math_sycl.h279
-rw-r--r--thirdparty/embree/common/math/range.h2
-rw-r--r--thirdparty/embree/common/math/vec2.h5
-rw-r--r--thirdparty/embree/common/math/vec2fa.h9
-rw-r--r--thirdparty/embree/common/math/vec2fa_sycl.h270
-rw-r--r--thirdparty/embree/common/math/vec3.h23
-rw-r--r--thirdparty/embree/common/math/vec3ba.h9
-rw-r--r--thirdparty/embree/common/math/vec3ba_sycl.h115
-rw-r--r--thirdparty/embree/common/math/vec3fa.h10
-rw-r--r--thirdparty/embree/common/math/vec3fa_sycl.h617
-rw-r--r--thirdparty/embree/common/math/vec3ia.h9
-rw-r--r--thirdparty/embree/common/math/vec3ia_sycl.h178
-rw-r--r--thirdparty/embree/common/math/vec4.h25
23 files changed, 1841 insertions, 16 deletions
diff --git a/thirdparty/embree/common/math/affinespace.h b/thirdparty/embree/common/math/affinespace.h
index 9d4a0f0846..f3e5404639 100644
--- a/thirdparty/embree/common/math/affinespace.h
+++ b/thirdparty/embree/common/math/affinespace.h
@@ -337,7 +337,7 @@ namespace embree
if (D) *D = sqrtf(D_x);
return true;
}
-
+
__forceinline void AffineSpace3fa_store_unaligned(const AffineSpace3fa &source, AffineSpace3fa* ptr)
{
Vec3fa::storeu(&ptr->l.vx, source.l.vx);
diff --git a/thirdparty/embree/common/math/bbox.h b/thirdparty/embree/common/math/bbox.h
index e4eb3df9a4..651b29a8fe 100644
--- a/thirdparty/embree/common/math/bbox.h
+++ b/thirdparty/embree/common/math/bbox.h
@@ -56,6 +56,11 @@ namespace embree
return BBox(min(a.lower, b.lower), max(a.upper, b.upper));
}
+ /*! intersects two boxes */
+ __forceinline static const BBox intersect (const BBox& a, const BBox& b) {
+ return BBox(max(a.lower, b.lower), min(a.upper, b.upper));
+ }
+
/*! enlarge box by some scaling factor */
__forceinline BBox enlarge_by(const float a) const {
return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper));
diff --git a/thirdparty/embree/common/math/col3.h b/thirdparty/embree/common/math/col3.h
index 3f50c04393..4576bc517d 100644
--- a/thirdparty/embree/common/math/col3.h
+++ b/thirdparty/embree/common/math/col3.h
@@ -3,7 +3,7 @@
#pragma once
-#include "math.h"
+#include "emath.h"
namespace embree
{
diff --git a/thirdparty/embree/common/math/col4.h b/thirdparty/embree/common/math/col4.h
index 788508516b..4ef916cc3a 100644
--- a/thirdparty/embree/common/math/col4.h
+++ b/thirdparty/embree/common/math/col4.h
@@ -3,7 +3,7 @@
#pragma once
-#include "math.h"
+#include "emath.h"
namespace embree
{
diff --git a/thirdparty/embree/common/math/color.h b/thirdparty/embree/common/math/color.h
index e62e4ad2a4..8b28ff9447 100644
--- a/thirdparty/embree/common/math/color.h
+++ b/thirdparty/embree/common/math/color.h
@@ -3,6 +3,10 @@
#pragma once
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+# include "color_sycl.h"
+#else
+
#include "constants.h"
#include "col3.h"
#include "col4.h"
@@ -64,6 +68,10 @@ namespace embree
d.b = (unsigned char)(s[2]);
d.a = (unsigned char)(s[3]);
}
+ __forceinline void set(float &f) const
+ {
+ f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
+ }
////////////////////////////////////////////////////////////////////////////////
/// Constants
@@ -256,3 +264,5 @@ namespace embree
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
}
+
+#endif
diff --git a/thirdparty/embree/common/math/color_sycl.h b/thirdparty/embree/common/math/color_sycl.h
new file mode 100644
index 0000000000..41b89ddecc
--- /dev/null
+++ b/thirdparty/embree/common/math/color_sycl.h
@@ -0,0 +1,219 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "constants.h"
+#include "col3.h"
+#include "col4.h"
+
+#include "../simd/sse.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE RGBA Color Class
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct Color4
+ {
+ struct { float r,g,b,a; };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Color4 () {}
+ //__forceinline Color4 ( const __m128 a ) : m128(a) {}
+
+ __forceinline explicit Color4 (const float v) : r(v), g(v), b(v), a(v) {}
+ __forceinline Color4 (const float r, const float g, const float b, const float a) : r(r), g(g), b(b), a(a) {}
+
+ __forceinline explicit Color4 ( const Col3uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(1.0f) {}
+ __forceinline explicit Color4 ( const Col3f& other ) : r(other.r), g(other.g), b(other.b), a(1.0f) {}
+ __forceinline explicit Color4 ( const Col4uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(other.a/255.0f) {}
+ __forceinline explicit Color4 ( const Col4f& other ) : r(other.r), g(other.g), b(other.b), a(other.a) {}
+
+ //__forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
+ //__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
+
+ //__forceinline operator const __m128&() const { return m128; }
+ //__forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Set
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
+ __forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
+
+ __forceinline void set(Col3uc& d) const
+ {
+ d.r = (unsigned char)(clamp(r)*255.0f);
+ d.g = (unsigned char)(clamp(g)*255.0f);
+ d.b = (unsigned char)(clamp(b)*255.0f);
+ }
+
+ __forceinline void set(Col4uc& d) const
+ {
+ d.r = (unsigned char)(clamp(r)*255.0f);
+ d.g = (unsigned char)(clamp(g)*255.0f);
+ d.b = (unsigned char)(clamp(b)*255.0f);
+ d.a = (unsigned char)(clamp(a)*255.0f);
+ }
+ __forceinline void set(float &f) const
+ {
+ f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Color4( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f), a(0.0f) {}
+ __forceinline Color4( OneTy ) : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
+ //__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
+ //__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE RGB Color Class
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct Color
+ {
+ struct { float r,g,b; };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Color () {}
+ //__forceinline Color ( const __m128 a ) : m128(a) {}
+
+ __forceinline explicit Color (const float v) : r(v), g(v), b(v) {}
+ __forceinline Color (const float r, const float g, const float b) : r(r), g(g), b(b) {}
+
+ //__forceinline Color ( const Color& other ) : m128(other.m128) {}
+ //__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
+
+ //__forceinline Color ( const Color4& other ) : m128(other.m128) {}
+ //__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
+
+ //__forceinline operator const __m128&() const { return m128; }
+ //__forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Set
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
+ __forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
+
+#if 0
+ __forceinline void set(Col3uc& d) const
+ {
+ vfloat4 s = clamp(vfloat4(m128))*255.0f;
+ d.r = (unsigned char)(s[0]);
+ d.g = (unsigned char)(s[1]);
+ d.b = (unsigned char)(s[2]);
+ }
+ __forceinline void set(Col4uc& d) const
+ {
+ vfloat4 s = clamp(vfloat4(m128))*255.0f;
+ d.r = (unsigned char)(s[0]);
+ d.g = (unsigned char)(s[1]);
+ d.b = (unsigned char)(s[2]);
+ d.a = 255;
+ }
+#endif
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Color( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f) {}
+ __forceinline Color( OneTy ) : r(1.0f), g(1.0f), b(1.0f) {}
+ //__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
+ //__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const Color operator +( const Color& a ) { return a; }
+ __forceinline const Color operator -( const Color& a ) { return Color(-a.r, -a.g, -a.b); }
+ __forceinline const Color abs ( const Color& a ) { return Color(abs(a.r), abs(a.g), abs(a.b)); }
+ __forceinline const Color rcp ( const Color& a ) { return Color(1.0f/a.r, 1.0f/a.g, 1.0f/a.b); }
+ __forceinline const Color rsqrt( const Color& a ) { return Color(1.0f/sqrt(a.r), 1.0f/sqrt(a.g), 1.0f/sqrt(a.b)); }
+ __forceinline const Color sqrt ( const Color& a ) { return Color(sqrt(a.r), sqrt(a.g), sqrt(a.b)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const Color operator +( const Color& a, const Color& b ) { return Color(a.r+b.r, a.g+b.g, a.b+b.b); }
+ __forceinline const Color operator -( const Color& a, const Color& b ) { return Color(a.r-b.r, a.g-b.g, a.b-b.b); }
+ __forceinline const Color operator *( const Color& a, const Color& b ) { return Color(a.r*b.r, a.g*b.g, a.b*b.b); }
+ __forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
+ __forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
+ __forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
+ __forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
+
+ __forceinline const Color min( const Color& a, const Color& b ) { return Color(min(a.r,b.r), min(a.g,b.g), min(a.b,b.b)); }
+ __forceinline const Color max( const Color& a, const Color& b ) { return Color(max(a.r,b.r), max(a.g,b.g), max(a.b,b.b)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
+ __forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
+ __forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
+ __forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
+ __forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
+ __forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
+ __forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
+ __forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
+ __forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Color& a, const Color& b ) { return a.r == b.r && a.g == b.g && a.b == b.b; }
+ __forceinline bool operator !=( const Color& a, const Color& b ) { return a.r != b.r || a.g != b.g || a.b != b.b; }
+ __forceinline bool operator < ( const Color& a, const Color& b ) {
+ if (a.r != b.r) return a.r < b.r;
+ if (a.g != b.g) return a.g < b.g;
+ if (a.b != b.b) return a.b < b.b;
+ return false;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const Color select( bool s, const Color& t, const Color& f ) {
+ return s ? t : f;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Special Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ /*! computes luminance of a color */
+ __forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
+
+ /*! output operator */
+ inline std::ostream& operator<<(std::ostream& cout, const Color& a) {
+ return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
+ }
+}
diff --git a/thirdparty/embree/common/math/math.h b/thirdparty/embree/common/math/emath.h
index 7930c17727..22a89a7669 100644
--- a/thirdparty/embree/common/math/math.h
+++ b/thirdparty/embree/common/math/emath.h
@@ -8,6 +8,10 @@
#include "constants.h"
#include <cmath>
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+# include "math_sycl.h"
+#else
+
#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#else
@@ -44,6 +48,9 @@ namespace embree
__forceinline int toInt (const float& a) { return int(a); }
__forceinline float toFloat(const int& a) { return float(a); }
+ __forceinline int asInt (const float& a) { return *((int*)&a); }
+ __forceinline float asFloat(const int& a) { return *((float*)&a); }
+
#if defined(__WIN32__)
__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
#endif
@@ -351,7 +358,11 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
- __forceinline bool all(bool s) { return s; }
+ __forceinline bool none(bool s) { return !s; }
+ __forceinline bool all (bool s) { return s; }
+ __forceinline bool any (bool s) { return s; }
+
+ __forceinline unsigned movemask (bool s) { return (unsigned)s; }
__forceinline float lerp(const float v0, const float v1, const float t) {
return madd(1.0f-t,v0,t*v1);
@@ -453,3 +464,5 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur
return x | (y << 1) | (z << 2);
}
}
+
+#endif
diff --git a/thirdparty/embree/common/math/lbbox.h b/thirdparty/embree/common/math/lbbox.h
index 2b397a05c8..7619199780 100644
--- a/thirdparty/embree/common/math/lbbox.h
+++ b/thirdparty/embree/common/math/lbbox.h
@@ -179,6 +179,48 @@ namespace embree
bounds1 = b1;
}
+ /*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
+ __forceinline LBBox(const BBox1f& time_range_in, const LBBox<T> lbounds, const BBox1f& target_time_range)
+ {
+ const BBox3f bounds0 = lbounds.bounds0;
+ const BBox3f bounds1 = lbounds.bounds1;
+
+ /* normalize global target_time_range to local time_range_in */
+ const BBox1f time_range((target_time_range.lower-time_range_in.lower)/time_range_in.size(),
+ (target_time_range.upper-time_range_in.lower)/time_range_in.size());
+
+ const BBox1f clipped_time_range(max(0.0f,time_range.lower), min(1.0f,time_range.upper));
+
+ /* compute bounds at begin and end of clipped time range */
+ BBox<T> b0 = lerp(bounds0,bounds1,clipped_time_range.lower);
+ BBox<T> b1 = lerp(bounds0,bounds1,clipped_time_range.upper);
+
+ /* make sure that b0 is properly bounded at time_range_in.lower */
+ {
+ const BBox<T> bt = lerp(b0, b1, (0.0f - time_range.lower) / time_range.size());
+ const T dlower = min(bounds0.lower-bt.lower, T(zero));
+ const T dupper = max(bounds0.upper-bt.upper, T(zero));
+ b0.lower += dlower; b1.lower += dlower;
+ b0.upper += dupper; b1.upper += dupper;
+ }
+
+ /* make sure that b1 is properly bounded at time_range_in.upper */
+ {
+ const BBox<T> bt = lerp(b0, b1, (1.0f - time_range.lower) / time_range.size());
+ const T dlower = min(bounds1.lower-bt.lower, T(zero));
+ const T dupper = max(bounds1.upper-bt.upper, T(zero));
+ b0.lower += dlower; b1.lower += dlower;
+ b0.upper += dupper; b1.upper += dupper;
+ }
+
+ this->bounds0 = b0;
+ this->bounds1 = b1;
+ }
+
+ /*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
+ __forceinline LBBox(const BBox1f& time_range_in, const BBox<T>& bounds0, const BBox<T>& bounds1, const BBox1f& target_time_range)
+ : LBBox(time_range_in,LBBox(bounds0,bounds1),target_time_range) {}
+
public:
__forceinline bool empty() const {
diff --git a/thirdparty/embree/common/math/linearspace2.h b/thirdparty/embree/common/math/linearspace2.h
index 184ee695fb..e58f61ea6b 100644
--- a/thirdparty/embree/common/math/linearspace2.h
+++ b/thirdparty/embree/common/math/linearspace2.h
@@ -18,6 +18,7 @@ namespace embree
/*! default matrix constructor */
__forceinline LinearSpace2 ( ) {}
+
__forceinline LinearSpace2 ( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; }
__forceinline LinearSpace2& operator=( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; return *this; }
diff --git a/thirdparty/embree/common/math/linearspace3.h b/thirdparty/embree/common/math/linearspace3.h
index 9eaa2cc2bb..f6d2318fa0 100644
--- a/thirdparty/embree/common/math/linearspace3.h
+++ b/thirdparty/embree/common/math/linearspace3.h
@@ -19,6 +19,7 @@ namespace embree
/*! default matrix constructor */
__forceinline LinearSpace3 ( ) {}
+
__forceinline LinearSpace3 ( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; }
__forceinline LinearSpace3& operator=( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; return *this; }
@@ -90,17 +91,20 @@ namespace embree
Vector vx,vy,vz;
};
+#if !defined(__SYCL_DEVICE_ONLY__)
+
/*! compute transposed matrix */
template<> __forceinline const LinearSpace3<Vec3fa> LinearSpace3<Vec3fa>::transposed() const {
vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz);
return LinearSpace3<Vec3fa>(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz));
}
-
+#endif
+
template<typename T>
__forceinline const LinearSpace3<T> transposed(const LinearSpace3<T>& xfm) {
return xfm.transposed();
}
-
+
////////////////////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////////////////////
diff --git a/thirdparty/embree/common/math/math_sycl.h b/thirdparty/embree/common/math/math_sycl.h
new file mode 100644
index 0000000000..ffb047569c
--- /dev/null
+++ b/thirdparty/embree/common/math/math_sycl.h
@@ -0,0 +1,279 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/platform.h"
+#include "../sys/intrinsics.h"
+#include "constants.h"
+#include <cmath>
+
+namespace embree
+{
+ __forceinline bool isvalid ( const float& v ) {
+ return (v > -FLT_LARGE) & (v < +FLT_LARGE);
+ }
+
+ __forceinline int cast_f2i(float f) {
+ return __builtin_bit_cast(int,f);
+ }
+
+ __forceinline float cast_i2f(int i) {
+ return __builtin_bit_cast(float,i);
+ }
+
+ __forceinline int toInt (const float& a) { return int(a); }
+ __forceinline float toFloat(const int& a) { return float(a); }
+
+ __forceinline float asFloat(const int a) { return __builtin_bit_cast(float,a); }
+ __forceinline int asInt (const float a) { return __builtin_bit_cast(int,a); }
+
+ //__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
+ __forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
+ __forceinline float sqr ( const float x ) { return x*x; }
+
+ __forceinline float rcp ( const float x ) {
+ return sycl::native::recip(x);
+ }
+
+ __forceinline float signmsk(const float a) { return asFloat(asInt(a) & 0x80000000); }
+ //__forceinline float signmsk ( const float x ) {
+ // return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
+ //}
+ //__forceinline float xorf( const float x, const float y ) {
+ // return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
+ //}
+ //__forceinline float andf( const float x, const unsigned y ) {
+ // return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
+ //}
+
+ __forceinline float rsqrt( const float x ) {
+ return sycl::rsqrt(x);
+ }
+
+ //__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
+ //__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
+ //__forceinline int roundf(float f) { return (int)(f + 0.5f); }
+
+ __forceinline float abs ( const float x ) { return sycl::fabs(x); }
+ __forceinline float acos ( const float x ) { return sycl::acos(x); }
+ __forceinline float asin ( const float x ) { return sycl::asin(x); }
+ __forceinline float atan ( const float x ) { return sycl::atan(x); }
+ __forceinline float atan2( const float y, const float x ) { return sycl::atan2(y, x); }
+ __forceinline float cos ( const float x ) { return sycl::cos(x); }
+ __forceinline float cosh ( const float x ) { return sycl::cosh(x); }
+ __forceinline float exp ( const float x ) { return sycl::exp(x); }
+ __forceinline float fmod ( const float x, const float y ) { return sycl::fmod(x, y); }
+ __forceinline float log ( const float x ) { return sycl::log(x); }
+ __forceinline float log10( const float x ) { return sycl::log10(x); }
+ __forceinline float pow ( const float x, const float y ) { return sycl::pow(x, y); }
+ __forceinline float sin ( const float x ) { return sycl::sin(x); }
+ __forceinline float sinh ( const float x ) { return sycl::sinh(x); }
+ __forceinline float sqrt ( const float x ) { return sycl::sqrt(x); }
+ __forceinline float tan ( const float x ) { return sycl::tan(x); }
+ __forceinline float tanh ( const float x ) { return sycl::tanh(x); }
+ __forceinline float floor( const float x ) { return sycl::floor(x); }
+ __forceinline float ceil ( const float x ) { return sycl::ceil(x); }
+ __forceinline float frac ( const float x ) { return x-floor(x); }
+
+ //__forceinline double abs ( const double x ) { return ::fabs(x); }
+ //__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
+ //__forceinline double acos ( const double x ) { return ::acos (x); }
+ //__forceinline double asin ( const double x ) { return ::asin (x); }
+ //__forceinline double atan ( const double x ) { return ::atan (x); }
+ //__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
+ //__forceinline double cos ( const double x ) { return ::cos (x); }
+ //__forceinline double cosh ( const double x ) { return ::cosh (x); }
+ //__forceinline double exp ( const double x ) { return ::exp (x); }
+ //__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
+ //__forceinline double log ( const double x ) { return ::log (x); }
+ //__forceinline double log10( const double x ) { return ::log10(x); }
+ //__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
+ //__forceinline double rcp ( const double x ) { return 1.0/x; }
+ //__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
+ //__forceinline double sin ( const double x ) { return ::sin (x); }
+ //__forceinline double sinh ( const double x ) { return ::sinh (x); }
+ //__forceinline double sqr ( const double x ) { return x*x; }
+ //__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
+ //__forceinline double tan ( const double x ) { return ::tan (x); }
+ //__forceinline double tanh ( const double x ) { return ::tanh (x); }
+ //__forceinline double floor( const double x ) { return ::floor (x); }
+ //__forceinline double ceil ( const double x ) { return ::ceil (x); }
+
+/*
+#if defined(__SSE4_1__)
+ __forceinline float mini(float a, float b) {
+ const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
+ const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
+ const __m128i ci = _mm_min_epi32(ai,bi);
+ return _mm_cvtss_f32(_mm_castsi128_ps(ci));
+ }
+#endif
+
+#if defined(__SSE4_1__)
+ __forceinline float maxi(float a, float b) {
+ const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
+ const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
+ const __m128i ci = _mm_max_epi32(ai,bi);
+ return _mm_cvtss_f32(_mm_castsi128_ps(ci));
+ }
+#endif
+*/
+
+ template<typename T>
+ __forceinline T twice(const T& a) { return a+a; }
+
+ __forceinline int min(int a, int b) { return sycl::min(a,b); }
+ __forceinline unsigned min(unsigned a, unsigned b) { return sycl::min(a,b); }
+ __forceinline int64_t min(int64_t a, int64_t b) { return sycl::min(a,b); }
+ __forceinline float min(float a, float b) { return sycl::fmin(a,b); }
+ __forceinline double min(double a, double b) { return sycl::fmin(a,b); }
+#if defined(__X86_64__)
+ __forceinline size_t min(size_t a, size_t b) { return sycl::min(a,b); }
+#endif
+
+ template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
+ template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
+ template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
+
+// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
+// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
+// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
+
+ __forceinline int max(int a, int b) { return sycl::max(a,b); }
+ __forceinline unsigned max(unsigned a, unsigned b) { return sycl::max(a,b); }
+ __forceinline int64_t max(int64_t a, int64_t b) { return sycl::max(a,b); }
+ __forceinline float max(float a, float b) { return sycl::fmax(a,b); }
+ __forceinline double max(double a, double b) { return sycl::fmax(a,b); }
+#if defined(__X86_64__)
+ __forceinline size_t max(size_t a, size_t b) { return sycl::max(a,b); }
+#endif
+
+ template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
+ template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
+ template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
+
+// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
+// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
+// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
+
+ template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
+ template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
+
+ template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
+ template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
+ template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
+ template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
+
+ __forceinline float madd ( const float a, const float b, const float c) { return +sycl::fma(+a,b,+c); }
+ __forceinline float msub ( const float a, const float b, const float c) { return +sycl::fma(+a,b,-c); }
+ __forceinline float nmadd ( const float a, const float b, const float c) { return +sycl::fma(-a,b,+c); }
+ __forceinline float nmsub ( const float a, const float b, const float c) { return -sycl::fma(+a,b,+c); }
+
+ /*! random functions */
+/*
+ template<typename T> T random() { return T(0); }
+ template<> __forceinline int random() { return int(rand()); }
+ template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
+ template<> __forceinline float random() { return rand()/float(RAND_MAX); }
+ template<> __forceinline double random() { return rand()/double(RAND_MAX); }
+*/
+
+ /*! selects */
+ __forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
+ __forceinline int select(bool s, int t, int f) { return s ? t : f; }
+ __forceinline float select(bool s, float t, float f) { return s ? t : f; }
+
+ __forceinline bool none(bool s) { return !s; }
+ __forceinline bool all (bool s) { return s; }
+ __forceinline bool any (bool s) { return s; }
+
+ __forceinline unsigned movemask (bool s) { return (unsigned)s; }
+
+ __forceinline float lerp(const float v0, const float v1, const float t) {
+ return madd(1.0f-t,v0,t*v1);
+ }
+
+ template<typename T>
+ __forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
+ return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
+ }
+
+ /*! exchange */
+ template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
+
+ /* load/store */
+ template<typename Ty> struct mem;
+
+ template<> struct mem<float> {
+ static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
+ static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
+
+ static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
+ static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
+ };
+
+ /*! bit reverse operation */
+ template<class T>
+ __forceinline T bitReverse(const T& vin)
+ {
+ T v = vin;
+ v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
+ v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
+ v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
+ v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
+ v = ( v >> 16 ) | ( v << 16);
+ return v;
+ }
+
+ /*! bit interleave operation */
+ template<class T>
+ __forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
+ {
+ T x = xin, y = yin, z = zin;
+ x = (x | (x << 16)) & 0x030000FF;
+ x = (x | (x << 8)) & 0x0300F00F;
+ x = (x | (x << 4)) & 0x030C30C3;
+ x = (x | (x << 2)) & 0x09249249;
+
+ y = (y | (y << 16)) & 0x030000FF;
+ y = (y | (y << 8)) & 0x0300F00F;
+ y = (y | (y << 4)) & 0x030C30C3;
+ y = (y | (y << 2)) & 0x09249249;
+
+ z = (z | (z << 16)) & 0x030000FF;
+ z = (z | (z << 8)) & 0x0300F00F;
+ z = (z | (z << 4)) & 0x030C30C3;
+ z = (z | (z << 2)) & 0x09249249;
+
+ return x | (y << 1) | (z << 2);
+ }
+
+ /*! bit interleave operation for 64bit data types*/
+ template<class T>
+ __forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
+ T x = xin & 0x1fffff;
+ T y = yin & 0x1fffff;
+ T z = zin & 0x1fffff;
+
+ x = (x | x << 32) & 0x1f00000000ffff;
+ x = (x | x << 16) & 0x1f0000ff0000ff;
+ x = (x | x << 8) & 0x100f00f00f00f00f;
+ x = (x | x << 4) & 0x10c30c30c30c30c3;
+ x = (x | x << 2) & 0x1249249249249249;
+
+ y = (y | y << 32) & 0x1f00000000ffff;
+ y = (y | y << 16) & 0x1f0000ff0000ff;
+ y = (y | y << 8) & 0x100f00f00f00f00f;
+ y = (y | y << 4) & 0x10c30c30c30c30c3;
+ y = (y | y << 2) & 0x1249249249249249;
+
+ z = (z | z << 32) & 0x1f00000000ffff;
+ z = (z | z << 16) & 0x1f0000ff0000ff;
+ z = (z | z << 8) & 0x100f00f00f00f00f;
+ z = (z | z << 4) & 0x10c30c30c30c30c3;
+ z = (z | z << 2) & 0x1249249249249249;
+
+ return x | (y << 1) | (z << 2);
+ }
+}
diff --git a/thirdparty/embree/common/math/range.h b/thirdparty/embree/common/math/range.h
index 909fadb995..f397615ea2 100644
--- a/thirdparty/embree/common/math/range.h
+++ b/thirdparty/embree/common/math/range.h
@@ -4,7 +4,7 @@
#pragma once
#include "../sys/platform.h"
-#include "../math/math.h"
+#include "../math/emath.h"
namespace embree
{
diff --git a/thirdparty/embree/common/math/vec2.h b/thirdparty/embree/common/math/vec2.h
index f6d98ffa0d..4e641ec249 100644
--- a/thirdparty/embree/common/math/vec2.h
+++ b/thirdparty/embree/common/math/vec2.h
@@ -3,7 +3,7 @@
#pragma once
-#include "math.h"
+#include "emath.h"
namespace embree
{
@@ -34,7 +34,7 @@ namespace embree
__forceinline Vec2( const T& x, const T& y ) : x(x), y(y) {}
__forceinline Vec2( const Vec2& other ) { x = other.x; y = other.y; }
- __forceinline Vec2( const Vec2fa& other );
+ Vec2( const Vec2fa& other );
template<typename T1> __forceinline Vec2( const Vec2<T1>& a ) : x(T(a.x)), y(T(a.y)) {}
template<typename T1> __forceinline Vec2& operator =( const Vec2<T1>& other ) { x = other.x; y = other.y; return *this; }
@@ -232,4 +232,5 @@ namespace embree
#if defined(__AVX512F__)
template<> __forceinline Vec2<vfloat16>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#endif
+
}
diff --git a/thirdparty/embree/common/math/vec2fa.h b/thirdparty/embree/common/math/vec2fa.h
index 4f222894c2..d57e549e68 100644
--- a/thirdparty/embree/common/math/vec2fa.h
+++ b/thirdparty/embree/common/math/vec2fa.h
@@ -4,7 +4,12 @@
#pragma once
#include "../sys/alloc.h"
-#include "math.h"
+#include "emath.h"
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+# include "vec2fa_sycl.h"
+#else
+
#include "../simd/sse.h"
namespace embree
@@ -316,3 +321,5 @@ namespace embree
typedef Vec2fa Vec2fa_t;
}
+
+#endif
diff --git a/thirdparty/embree/common/math/vec2fa_sycl.h b/thirdparty/embree/common/math/vec2fa_sycl.h
new file mode 100644
index 0000000000..62d62bdd01
--- /dev/null
+++ b/thirdparty/embree/common/math/vec2fa_sycl.h
@@ -0,0 +1,270 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/alloc.h"
+#include "emath.h"
+#include "../simd/sse.h"
+
+namespace embree
+{
+ struct Vec3fa;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec2fa Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec2fa
+ {
+ //ALIGNED_STRUCT_(16);
+
+ typedef float Scalar;
+ enum { N = 2 };
+ struct { float x,y; };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa( ) {}
+ //__forceinline Vec2fa( const __m128 a ) : m128(a) {}
+ explicit Vec2fa(const Vec3fa& a);
+
+ __forceinline explicit Vec2fa( const vfloat<4>& a ) {
+ x = a[0];
+ y = a[1];
+ }
+
+ __forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
+ __forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
+
+ __forceinline Vec2fa ( const Vec2fa& other ) { x = other.x; y = other.y; }
+ __forceinline Vec2fa& operator =( const Vec2fa& other ) { x = other.x; y = other.y; return *this; }
+
+ __forceinline explicit Vec2fa( const float a ) : x(a), y(a) {}
+ __forceinline Vec2fa( const float x, const float y) : x(x), y(y) {}
+
+ //__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
+
+ //__forceinline operator const __m128&() const { return m128; }
+ //__forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Loads and Stores
+ ////////////////////////////////////////////////////////////////////////////////
+
+ static __forceinline Vec2fa load( const void* const a ) {
+ const float* ptr = (const float*)a;
+ return Vec2fa(ptr[0],ptr[1]);
+ }
+
+ static __forceinline Vec2fa loadu( const void* const a ) {
+ const float* ptr = (const float*)a;
+ return Vec2fa(ptr[0],ptr[1]);
+ }
+
+ static __forceinline void storeu ( void* a, const Vec2fa& v ) {
+ float* ptr = (float*)a;
+ ptr[0] = v.x; ptr[1] = v.y;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa( ZeroTy ) : x(0.0f), y(0.0f) {}
+ __forceinline Vec2fa( OneTy ) : x(1.0f), y(1.0f) {}
+ __forceinline Vec2fa( PosInfTy ) : x(+INFINITY), y(+INFINITY) {}
+ __forceinline Vec2fa( NegInfTy ) : x(-INFINITY), y(-INFINITY) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ //__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
+ //__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
+ __forceinline Vec2fa operator -( const Vec2fa& a ) { return Vec2fa(-a.x,-a.y); }
+ __forceinline Vec2fa abs ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); }
+ __forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); }
+
+ //__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::recip(a.x),sycl::recip(a.y)); }
+ __forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y)); }
+ __forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); }
+ __forceinline Vec2fa sqr ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); }
+
+ __forceinline Vec2fa rsqrt( const Vec2fa& a ) { return Vec2fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y)); }
+
+ __forceinline Vec2fa zero_fix(const Vec2fa& a) {
+ const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
+ const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
+ return Vec2fa(x,y);
+ }
+ __forceinline Vec2fa rcp_safe(const Vec2fa& a) {
+ return rcp(zero_fix(a));
+ }
+ __forceinline Vec2fa log ( const Vec2fa& a ) {
+ return Vec2fa(sycl::log(a.x),sycl::log(a.y));
+ }
+
+ __forceinline Vec2fa exp ( const Vec2fa& a ) {
+ return Vec2fa(sycl::exp(a.x),sycl::exp(a.y));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x+b.x, a.y+b.y); }
+ __forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x-b.x, a.y-b.y); }
+ __forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x*b.x, a.y*b.y); }
+ __forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
+ __forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
+ __forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x/b.x, a.y/b.y); }
+ __forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return Vec2fa(a.x/b, a.y/b); }
+ __forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return Vec2fa(a/b.x, a/b.y); }
+
+ __forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) {
+ return Vec2fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y));
+ }
+ __forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) {
+ return Vec2fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y));
+ }
+
+/*
+#if defined(__SSE4_1__)
+ __forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
+ const vint4 ai = _mm_castps_si128(a);
+ const vint4 bi = _mm_castps_si128(b);
+ const vint4 ci = _mm_min_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+#if defined(__SSE4_1__)
+ __forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
+ const vint4 ai = _mm_castps_si128(a);
+ const vint4 bi = _mm_castps_si128(b);
+ const vint4 ci = _mm_max_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+ __forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
+ return Vec2fa(powf(a.x,b),powf(a.y,b));
+ }
+*/
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Ternary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y)); }
+ __forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y)); }
+ __forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y)); }
+ __forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y)); }
+
+ __forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
+ __forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
+ __forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
+ __forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
+ __forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
+ __forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
+ __forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
+ __forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
+ __forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
+ __forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
+ __forceinline float reduce_min(const Vec2fa& v) { return sycl::fmin(v.x,v.y); }
+ __forceinline float reduce_max(const Vec2fa& v) { return sycl::fmax(v.x,v.y); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return a.x == b.x && a.y == b.y; }
+ __forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return a.x != b.x || a.y != b.y; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Euclidian Space Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
+ return reduce_add(a*b);
+ }
+
+ __forceinline Vec2fa cross ( const Vec2fa& a ) {
+ return Vec2fa(-a.y,a.x);
+ }
+
+ __forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
+ __forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
+ __forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
+ __forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
+ __forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
+ __forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
+ return Vec2fa(s ? t.x : f.x, s ? t.y : f.y);
+ }
+
+ __forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
+ return madd(1.0f-t,v0,t*v1);
+ }
+
+ __forceinline int maxDim ( const Vec2fa& a )
+ {
+ const Vec2fa b = abs(a);
+ if (b.x > b.y) return 0;
+ else return 1;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Rounding Functions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(sycl::trunc(a.x),sycl::trunc(a.y)); }
+ __forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(sycl::floor(a.x),sycl::floor(a.y)); }
+ __forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(sycl::ceil (a.x),sycl::ceil (a.y)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ inline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
+ return cout << "(" << a.x << ", " << a.y << ")";
+ }
+
+ /*template<>
+ __forceinline vfloat_impl<4>::vfloat_impl(const Vec2fa& a)
+ {
+ v = 0;
+ const unsigned int lid = get_sub_group_local_id();
+ if (lid == 0) v = a.x;
+ if (lid == 1) v = a.y;
+ }*/
+
+ typedef Vec2fa Vec2fa_t;
+}
diff --git a/thirdparty/embree/common/math/vec3.h b/thirdparty/embree/common/math/vec3.h
index 254f6c4011..d5e78befe8 100644
--- a/thirdparty/embree/common/math/vec3.h
+++ b/thirdparty/embree/common/math/vec3.h
@@ -3,7 +3,7 @@
#pragma once
-#include "math.h"
+#include "emath.h"
namespace embree
{
@@ -286,6 +286,8 @@ namespace embree
template<> __forceinline Vec3<float>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; }
+#if !defined(__SYCL_DEVICE_ONLY__)
+
#if defined(__AVX__)
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
@@ -333,4 +335,23 @@ namespace embree
#if defined(__AVX512F__)
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) : x(a.x), y(a.y), z(a.z) {}
#endif
+
+#else
+
+#if defined(__SSE__)
+ template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
+ x = a.x; y = a.y; z = a.z;
+ }
+#endif
+#if defined(__AVX__)
+ template<> __forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
+ x = a.x; y = a.y; z = a.z;
+ }
+#endif
+#if defined(__AVX512F__)
+ template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) {
+ x = a.x; y = a.y; z = a.z;
+ }
+#endif
+#endif
}
diff --git a/thirdparty/embree/common/math/vec3ba.h b/thirdparty/embree/common/math/vec3ba.h
index a021b522dc..bf24a2a3b6 100644
--- a/thirdparty/embree/common/math/vec3ba.h
+++ b/thirdparty/embree/common/math/vec3ba.h
@@ -4,7 +4,12 @@
#pragma once
#include "../sys/alloc.h"
-#include "math.h"
+#include "emath.h"
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+# include "vec3ba_sycl.h"
+#else
+
#include "../simd/sse.h"
namespace embree
@@ -118,3 +123,5 @@ namespace embree
return cout << "(" << (a.x ? "1" : "0") << ", " << (a.y ? "1" : "0") << ", " << (a.z ? "1" : "0") << ")";
}
}
+
+#endif
diff --git a/thirdparty/embree/common/math/vec3ba_sycl.h b/thirdparty/embree/common/math/vec3ba_sycl.h
new file mode 100644
index 0000000000..a2fa13de6c
--- /dev/null
+++ b/thirdparty/embree/common/math/vec3ba_sycl.h
@@ -0,0 +1,115 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/alloc.h"
+#include "emath.h"
+#include "../simd/sse.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec3ba Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec3ba
+ {
+ //ALIGNED_STRUCT_(16);
+
+ struct { bool x,y,z; };
+
+ typedef bool Scalar;
+ enum { N = 3 };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba( ) {}
+ //__forceinline Vec3ba( const __m128 input ) : m128(input) {}
+
+ __forceinline Vec3ba( const Vec3ba& other ) : x(other.x), y(other.y), z(other.z) {}
+ __forceinline Vec3ba& operator =(const Vec3ba& other) { x = other.x; y = other.y; z = other.z; return *this; }
+
+ __forceinline explicit Vec3ba( bool a ) : x(a), y(a), z(a) {}
+ __forceinline Vec3ba( bool a, bool b, bool c) : x(a), y(b), z(c) {}
+
+ //__forceinline operator const __m128&() const { return m128; }
+ //__forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba( FalseTy ) : x(false), y(false), z(false) {}
+ __forceinline Vec3ba( TrueTy ) : x(true), y(true), z(true) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ //__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
+ //__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
+ };
+
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba operator !( const Vec3ba& a ) { return Vec3ba(!a.x,!a.y,!a.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x & b.x, a.y & b.y, a.z & b.z); }
+ __forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x | b.x, a.y | b.y, a.z | b.z); }
+ __forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
+ __forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
+ __forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators + Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
+ return a.x == b.x && a.y == b.y && a.z == b.z;
+ }
+ __forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
+ return a.x != b.x || a.y != b.y || a.z != b.z;
+ }
+/*
+ __forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
+ if (a.x != b.x) return a.x < b.x;
+ if (a.y != b.y) return a.y < b.y;
+ if (a.z != b.z) return a.z < b.z;
+ return false;
+ }
+*/
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reduction Operations
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool reduce_and( const Vec3ba& a ) { return a.x & a.y & a.z; }
+ __forceinline bool reduce_or ( const Vec3ba& a ) { return a.x | a.y | a.z; }
+
+ __forceinline bool all ( const Vec3ba& b ) { return reduce_and(b); }
+ __forceinline bool any ( const Vec3ba& b ) { return reduce_or(b); }
+ __forceinline bool none ( const Vec3ba& b ) { return !reduce_or(b); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ inline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
+ return cout;
+ }
+}
diff --git a/thirdparty/embree/common/math/vec3fa.h b/thirdparty/embree/common/math/vec3fa.h
index 8564cf6d10..967e75da74 100644
--- a/thirdparty/embree/common/math/vec3fa.h
+++ b/thirdparty/embree/common/math/vec3fa.h
@@ -4,7 +4,12 @@
#pragma once
#include "../sys/alloc.h"
-#include "math.h"
+#include "emath.h"
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+# include "vec3fa_sycl.h"
+#else
+
#include "../simd/sse.h"
namespace embree
@@ -441,7 +446,6 @@ namespace embree
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
__forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; }
-
__forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; }
__forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(a)) {}
@@ -783,3 +787,5 @@ namespace embree
typedef Vec3fx Vec3ff;
}
+
+#endif
diff --git a/thirdparty/embree/common/math/vec3fa_sycl.h b/thirdparty/embree/common/math/vec3fa_sycl.h
new file mode 100644
index 0000000000..5fdb00ab99
--- /dev/null
+++ b/thirdparty/embree/common/math/vec3fa_sycl.h
@@ -0,0 +1,617 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/alloc.h"
+#include "emath.h"
+#include "../simd/sse.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec3fa Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec3fa
+ {
+ //ALIGNED_STRUCT_(16);
+
+ typedef float Scalar;
+ enum { N = 3 };
+ struct { float x,y,z, do_not_use; };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa( ) {}
+ //__forceinline Vec3fa( const __m128 a ) : m128(a) {}
+ //__forceinline explicit Vec3fa(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]) {}
+
+ __forceinline Vec3fa ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
+ //__forceinline Vec3fa& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
+
+ __forceinline Vec3fa ( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; }
+ __forceinline Vec3fa& operator =( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; return *this; }
+
+ __forceinline explicit Vec3fa( const float a ) : x(a), y(a), z(a) {}
+ __forceinline Vec3fa( const float x, const float y, const float z) : x(x), y(y), z(z) {}
+
+ __forceinline explicit Vec3fa( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z) {}
+
+ //__forceinline operator const __m128&() const { return m128; }
+ //__forceinline operator __m128&() { return m128; }
+ __forceinline operator vfloat4() const { return vfloat4(x,y,z,0.0f); } // FIXME: we should not need this!!
+
+ //friend __forceinline Vec3fa copy_a( const Vec3fa& a, const Vec3fa& b ) { Vec3fa c = a; c.a = b.a; return c; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Loads and Stores
+ ////////////////////////////////////////////////////////////////////////////////
+
+ static __forceinline Vec3fa load( const void* const a ) {
+ const float* ptr = (const float*)a;
+ return Vec3fa(ptr[0],ptr[1],ptr[2]);
+ }
+
+ static __forceinline Vec3fa loadu( const void* const a ) {
+ const float* ptr = (const float*)a;
+ return Vec3fa(ptr[0],ptr[1],ptr[2]);
+ }
+
+ static __forceinline void storeu ( void* a, const Vec3fa& v ) {
+ float* ptr = (float*)a;
+ ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f) {}
+ __forceinline Vec3fa( OneTy ) : x(1.0f), y(1.0f), z(1.0f) {}
+ __forceinline Vec3fa( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY) {}
+ __forceinline Vec3fa( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
+ __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
+ __forceinline Vec3fa operator -( const Vec3fa& a ) { return Vec3fa(-a.x,-a.y,-a.z); }
+ __forceinline Vec3fa abs ( const Vec3fa& a ) { return Vec3fa(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z)); }
+ __forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
+
+ //__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
+ __forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z)); }
+ __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
+ __forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
+
+ __forceinline Vec3fa rsqrt( const Vec3fa& a ) { return Vec3fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z)); }
+
+ __forceinline Vec3fa zero_fix(const Vec3fa& a) {
+ const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
+ const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
+ const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
+ return Vec3fa(x,y,z);
+ }
+ __forceinline Vec3fa rcp_safe(const Vec3fa& a) {
+ return rcp(zero_fix(a));
+ }
+ __forceinline Vec3fa log ( const Vec3fa& a ) {
+ return Vec3fa(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
+ }
+
+ __forceinline Vec3fa exp ( const Vec3fa& a ) {
+ return Vec3fa(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x+b.x, a.y+b.y, a.z+b.z); }
+ __forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x-b.x, a.y-b.y, a.z-b.z); }
+ __forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x*b.x, a.y*b.y, a.z*b.z); }
+ __forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
+ __forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
+ __forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x/b.x, a.y/b.y, a.z/b.z); }
+ __forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return Vec3fa(a.x/b, a.y/b, a.z/b); }
+ __forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return Vec3fa(a/b.x, a/b.y, a/b.z); }
+
+ __forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) {
+ return Vec3fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z));
+ }
+ __forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) {
+ return Vec3fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z));
+ }
+
+/*
+#if defined(__SSE4_1__)
+ __forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
+ const vint4 ai = _mm_castps_si128(a);
+ const vint4 bi = _mm_castps_si128(b);
+ const vint4 ci = _mm_min_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+#if defined(__SSE4_1__)
+ __forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
+ const vint4 ai = _mm_castps_si128(a);
+ const vint4 bi = _mm_castps_si128(b);
+ const vint4 ci = _mm_max_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+*/
+ __forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
+ return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Ternary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
+ __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
+ __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z)); }
+ __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z)); }
+
+ __forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
+ __forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
+ __forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
+ __forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
+ __forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
+ __forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
+ __forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
+ __forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
+ __forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float reduce_add(const Vec3fa& v) { return v.x+v.y+v.z; }
+ __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
+ __forceinline float reduce_min(const Vec3fa& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
+ __forceinline float reduce_max(const Vec3fa& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
+ __forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
+
+ __forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
+ __forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
+ __forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
+ __forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
+ __forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
+ __forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
+
+ __forceinline bool isvalid ( const Vec3fa& v ) {
+ return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
+ }
+
+ __forceinline bool is_finite ( const Vec3fa& a ) {
+ return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Euclidian Space Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
+ return reduce_add(a*b);
+ }
+
+ __forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) {
+ return Vec3fa(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
+ }
+
+ __forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
+ __forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
+ __forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
+ __forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
+ __forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
+ __forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
+ __forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
+ __forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
+
+ __forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
+ const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
+ }
+
+ /*! differentiated normalization */
+ __forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
+ {
+ const float pp = dot(p,p);
+ const float pdp = dot(p,dp);
+ return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
+ return Vec3fa(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z);
+ }
+
+ __forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
+ return Vec3fa(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
+ }
+
+ __forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
+ return madd(1.0f-t,v0,t*v1);
+ }
+
+ __forceinline int maxDim ( const Vec3fa& a )
+ {
+ const Vec3fa b = abs(a);
+ if (b.x > b.y) {
+ if (b.x > b.z) return 0; else return 2;
+ } else {
+ if (b.y > b.z) return 1; else return 2;
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Rounding Functions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z)); }
+ __forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z)); }
+ __forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ inline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
+ return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
+ }
+
+ __forceinline Vec2fa::Vec2fa(const Vec3fa& a)
+ : x(a.x), y(a.y) {}
+
+ __forceinline Vec3ia::Vec3ia( const Vec3fa& a )
+ : x((int)a.x), y((int)a.y), z((int)a.z) {}
+
+ typedef Vec3fa Vec3fa_t;
+
+
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec3fx Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec3fx
+ {
+ //ALIGNED_STRUCT_(16);
+
+ typedef float Scalar;
+ enum { N = 3 };
+ struct { float x,y,z; union { int a; unsigned u; float w; }; };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx( ) {}
+ //__forceinline Vec3fx( const __m128 a ) : m128(a) {}
+ __forceinline explicit Vec3fx(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
+
+ __forceinline explicit Vec3fx(const Vec3fa& v) : x(v.x), y(v.y), z(v.z), w(0.0f) {}
+ __forceinline operator Vec3fa() const { return Vec3fa(x,y,z); }
+
+ __forceinline explicit Vec3fx ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
+ //__forceinline Vec3fx& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
+
+ //__forceinline Vec3fx ( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; }
+ //__forceinline Vec3fx& operator =( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; return *this; }
+
+ __forceinline explicit Vec3fx( const float a ) : x(a), y(a), z(a), w(a) {}
+ __forceinline Vec3fx( const float x, const float y, const float z) : x(x), y(y), z(z), w(z) {}
+
+ __forceinline Vec3fx( const Vec3fa& other, const int a1) : x(other.x), y(other.y), z(other.z), a(a1) {}
+ __forceinline Vec3fx( const Vec3fa& other, const unsigned a1) : x(other.x), y(other.y), z(other.z), u(a1) {}
+ __forceinline Vec3fx( const Vec3fa& other, const float w1) : x(other.x), y(other.y), z(other.z), w(w1) {}
+
+ //__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
+ //__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
+ __forceinline Vec3fx( const float x, const float y, const float z, const float w) : x(x), y(y), z(z), w(w) {}
+
+ __forceinline explicit Vec3fx( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z), w(0.0f) {}
+
+ //__forceinline operator const __m128&() const { return m128; }
+ //__forceinline operator __m128&() { return m128; }
+ __forceinline operator vfloat4() const { return vfloat4(x,y,z,w); }
+
+ //friend __forceinline Vec3fx copy_a( const Vec3fx& a, const Vec3fx& b ) { Vec3fx c = a; c.a = b.a; return c; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Loads and Stores
+ ////////////////////////////////////////////////////////////////////////////////
+
+ static __forceinline Vec3fx load( const void* const a ) {
+ const float* ptr = (const float*)a;
+ return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
+ }
+
+ static __forceinline Vec3fx loadu( const void* const a ) {
+ const float* ptr = (const float*)a;
+ return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
+ }
+
+ static __forceinline void storeu ( void* a, const Vec3fx& v ) {
+ float* ptr = (float*)a;
+ ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; ptr[3] = v.w;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {}
+ __forceinline Vec3fx( OneTy ) : x(1.0f), y(1.0f), z(1.0f), w(1.0f) {}
+ __forceinline Vec3fx( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY), w(+INFINITY) {}
+ __forceinline Vec3fx( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY), w(-INFINITY) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
+ __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
+ __forceinline Vec3fx operator -( const Vec3fx& a ) { return Vec3fx(-a.x,-a.y,-a.z,-a.w); }
+ __forceinline Vec3fx abs ( const Vec3fx& a ) { return Vec3fx(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z),sycl::fabs(a.w)); }
+ __forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
+
+ //__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
+ __forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z),__sycl_std::__invoke_native_recip<float>(a.w)); }
+ __forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
+ __forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
+
+ __forceinline Vec3fx rsqrt( const Vec3fx& a ) { return Vec3fx(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z),sycl::rsqrt(a.w)); }
+
+ __forceinline Vec3fx zero_fix(const Vec3fx& a) {
+ const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
+ const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
+ const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
+ return Vec3fx(x,y,z);
+ }
+ __forceinline Vec3fx rcp_safe(const Vec3fx& a) {
+ return rcp(zero_fix(a));
+ }
+ __forceinline Vec3fx log ( const Vec3fx& a ) {
+ return Vec3fx(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
+ }
+
+ __forceinline Vec3fx exp ( const Vec3fx& a ) {
+ return Vec3fx(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); }
+ __forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); }
+ __forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); }
+ __forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
+ __forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
+ __forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); }
+ __forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return Vec3fx(a.x/b, a.y/b, a.z/b, a.w/b); }
+ __forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return Vec3fx(a/b.x, a/b.y, a/b.z, a/b.w); }
+
+ __forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) {
+ return Vec3fx(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z), sycl::fmin(a.w,b.w));
+ }
+ __forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) {
+ return Vec3fx(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z), sycl::fmax(a.w,b.w));
+ }
+
+/*
+#if defined(__SSE4_1__)
+ __forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
+ const vint4 ai = _mm_castps_si128(a);
+ const vint4 bi = _mm_castps_si128(b);
+ const vint4 ci = _mm_min_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+#if defined(__SSE4_1__)
+ __forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
+ const vint4 ai = _mm_castps_si128(a);
+ const vint4 bi = _mm_castps_si128(b);
+ const vint4 ci = _mm_max_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+ __forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
+ return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
+ }
+*/
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Ternary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
+ __forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
+ __forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z), nmadd(a.w,b.w,c.w)); }
+ __forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z), nmsub(a.w,b.w,c.w)); }
+
+ __forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
+ __forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
+ __forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
+ __forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
+ __forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
+ __forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
+ __forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
+ __forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
+ __forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float reduce_add(const Vec3fx& v) { return v.x+v.y+v.z; }
+ __forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
+ __forceinline float reduce_min(const Vec3fx& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
+ __forceinline float reduce_max(const Vec3fx& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
+ __forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
+
+ __forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
+ __forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
+ __forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
+ __forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
+ __forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
+ __forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
+
+ __forceinline bool isvalid ( const Vec3fx& v ) {
+ return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
+ }
+
+ __forceinline bool is_finite ( const Vec3fx& a ) {
+ return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
+ }
+
+ __forceinline bool isvalid4 ( const Vec3fx& v ) {
+ const bool valid_x = v.x >= -FLT_LARGE & v.x <= +FLT_LARGE;
+ const bool valid_y = v.y >= -FLT_LARGE & v.y <= +FLT_LARGE;
+ const bool valid_z = v.z >= -FLT_LARGE & v.z <= +FLT_LARGE;
+ const bool valid_w = v.w >= -FLT_LARGE & v.w <= +FLT_LARGE;
+ return valid_x & valid_y & valid_z & valid_w;
+ }
+
+ __forceinline bool is_finite4 ( const Vec3fx& v ) {
+ const bool finite_x = v.x >= -FLT_MAX & v.x <= +FLT_MAX;
+ const bool finite_y = v.y >= -FLT_MAX & v.y <= +FLT_MAX;
+ const bool finite_z = v.z >= -FLT_MAX & v.z <= +FLT_MAX;
+ const bool finite_w = v.w >= -FLT_MAX & v.w <= +FLT_MAX;
+ return finite_x & finite_y & finite_z & finite_w;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Euclidian Space Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
+ return reduce_add(a*b);
+ }
+
+ __forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) {
+ return Vec3fx(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
+ }
+
+ __forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
+ __forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
+ __forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
+ __forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
+ __forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
+ __forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
+ __forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
+ __forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
+
+ __forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
+ const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
+ }
+
+ /*! differentiated normalization */
+ __forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
+ {
+ const float pp = dot(p,p);
+ const float pdp = dot(p,dp);
+ return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
+ return Vec3fx(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z, s ? t.w : f.w);
+ }
+
+ __forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
+ return Vec3fx(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
+ }
+
+ __forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
+ return madd(1.0f-t,v0,t*v1);
+ }
+
+ __forceinline int maxDim ( const Vec3fx& a )
+ {
+ const Vec3fx b = abs(a);
+ if (b.x > b.y) {
+ if (b.x > b.z) return 0; else return 2;
+ } else {
+ if (b.y > b.z) return 1; else return 2;
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Rounding Functions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z),sycl::trunc(a.w)); }
+ __forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z),sycl::floor(a.w)); }
+ __forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z),sycl::ceil (a.w)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ inline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
+ return cout << "(" << a.x << ", " << a.y << ", " << a.z << "," << a.w << ")";
+ }
+
+ typedef Vec3fx Vec3ff;
+
+ //__forceinline Vec2fa::Vec2fa(const Vec3fx& a)
+ // : x(a.x), y(a.y) {}
+
+ //__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
+ // : x((int)a.x), y((int)a.y), z((int)a.z) {}
+}
diff --git a/thirdparty/embree/common/math/vec3ia.h b/thirdparty/embree/common/math/vec3ia.h
index d4cc3125cd..1472fe9135 100644
--- a/thirdparty/embree/common/math/vec3ia.h
+++ b/thirdparty/embree/common/math/vec3ia.h
@@ -4,7 +4,12 @@
#pragma once
#include "../sys/alloc.h"
-#include "math.h"
+#include "emath.h"
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+# include "vec3ia_sycl.h"
+#else
+
#include "../simd/sse.h"
namespace embree
@@ -194,3 +199,5 @@ namespace embree
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
}
+
+#endif
diff --git a/thirdparty/embree/common/math/vec3ia_sycl.h b/thirdparty/embree/common/math/vec3ia_sycl.h
new file mode 100644
index 0000000000..5a3d396373
--- /dev/null
+++ b/thirdparty/embree/common/math/vec3ia_sycl.h
@@ -0,0 +1,178 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/alloc.h"
+#include "emath.h"
+#include "../simd/sse.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec3ia Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec3ia
+ {
+ ALIGNED_STRUCT_(16);
+
+ struct { int x,y,z; };
+
+ typedef int Scalar;
+ enum { N = 3 };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia( ) {}
+ //__forceinline Vec3ia( const __m128i a ) : m128(a) {}
+
+ __forceinline Vec3ia( const Vec3ia& other ) : x(other.x), y(other.y), z(other.z) {}
+ __forceinline Vec3ia& operator =(const Vec3ia& other) { x = other.x; y = other.y; z = other.z; return *this; }
+
+ __forceinline explicit Vec3ia( const int a ) : x(a), y(a), z(a) {}
+ __forceinline Vec3ia( const int x, const int y, const int z) : x(x), y(y), z(z) {}
+ //__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
+ __forceinline explicit Vec3ia(const vint4& a) : x(a[0]), y(a[1]), z(a[2]) {}
+
+ __forceinline explicit Vec3ia( const Vec3fa& a );
+
+ //__forceinline operator const __m128i&() const { return m128; }
+ //__forceinline operator __m128i&() { return m128; }
+ __forceinline operator vint4() const { return vint4(x,y,z,z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia( ZeroTy ) : x(0), y(0), z(0) {}
+ __forceinline Vec3ia( OneTy ) : x(1), y(1), z(1) {}
+ __forceinline Vec3ia( PosInfTy ) : x(0x7FFFFFFF), y(0x7FFFFFFF), z(0x7FFFFFFF) {}
+ __forceinline Vec3ia( NegInfTy ) : x(0x80000000), y(0x80000000), z(0x80000000) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
+ __forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
+ };
+
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia operator +( const Vec3ia& a ) { return Vec3ia(+a.x,+a.y,+a.z); }
+ __forceinline Vec3ia operator -( const Vec3ia& a ) { return Vec3ia(-a.x,-a.y,-a.z); }
+ __forceinline Vec3ia abs ( const Vec3ia& a ) { return Vec3ia(sycl::abs(a.x),sycl::abs(a.y),sycl::abs(a.z)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x+b.x, a.y+b.y, a.z+b.z); }
+ __forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
+ __forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
+
+ __forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x-b.x, a.y-b.y, a.z-b.z); }
+ __forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
+ __forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
+
+ __forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x*b.x, a.y*b.y, a.z*b.z); }
+ __forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
+ __forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
+
+ __forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x&b.x, a.y&b.y, a.z&b.z); }
+ __forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
+ __forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
+
+ __forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x|b.x, a.y|b.y, a.z|b.z); }
+ __forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
+ __forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
+
+ __forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x^b.x, a.y^b.y, a.z^b.z); }
+ __forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
+ __forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
+
+ __forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return Vec3ia(a.x<<n, a.y<<n, a.z<<n); }
+ __forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return Vec3ia(a.x>>n, a.y>>n, a.z>>n); }
+
+ __forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return Vec3ia(a.x<<b, a.y<<b, a.z<<b); }
+ __forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return Vec3ia(a.x>>b, a.y>>b, a.z>>b); }
+ __forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return Vec3ia(unsigned(a.x)>>b, unsigned(a.y)>>b, unsigned(a.z)>>b); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
+ __forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
+
+ __forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
+ __forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
+
+ __forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
+ __forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
+
+ __forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
+ __forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
+
+ __forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
+ __forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
+
+ __forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
+ __forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
+ __forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
+ __forceinline int reduce_min(const Vec3ia& v) { return sycl::min(sycl::min(v.x,v.y),v.z); }
+ __forceinline int reduce_max(const Vec3ia& v) { return sycl::max(sycl::max(v.x,v.y),v.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return a.x == b.x & a.y == b.y & a.z == b.z; }
+ __forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return a.x != b.x & a.y != b.y & a.z != b.z; }
+
+/*
+ __forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
+ if (a.x != b.x) return a.x < b.x;
+ if (a.y != b.y) return a.y < b.y;
+ if (a.z != b.z) return a.z < b.z;
+ return false;
+ }
+*/
+ __forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
+ __forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
+ __forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
+ const int x = m.x ? t.x : f.x;
+ const int y = m.y ? t.y : f.y;
+ const int z = m.z ? t.z : f.z;
+ return Vec3ia(x,y,z);
+ }
+
+ __forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::min(a.x,b.x), sycl::min(a.y,b.y), sycl::min(a.z,b.z)); }
+ __forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::max(a.x,b.x), sycl::max(a.y,b.y), sycl::max(a.z,b.z)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ inline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
+ return cout;
+ }
+}
diff --git a/thirdparty/embree/common/math/vec4.h b/thirdparty/embree/common/math/vec4.h
index 10c53f47b4..5647859257 100644
--- a/thirdparty/embree/common/math/vec4.h
+++ b/thirdparty/embree/common/math/vec4.h
@@ -3,7 +3,7 @@
#pragma once
-#include "math.h"
+#include "emath.h"
#include "vec3.h"
namespace embree
@@ -221,6 +221,8 @@ namespace embree
{
template<> __forceinline Vec4<float>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; }
+#if !defined(__SYCL_DEVICE_ONLY__)
+
#if defined(__AVX__)
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
x = a.x; y = a.y; z = a.z; w = a.w;
@@ -240,4 +242,25 @@ namespace embree
#if defined(__AVX512F__)
template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fx& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {}
#endif
+
+#else
+
+#if defined(__SSE__)
+ template<> __forceinline Vec4<vfloat4>::Vec4(const Vec3fx& a) {
+ x = a.x; y = a.y; z = a.z; w = a.w;
+ }
+#endif
+#if defined(__AVX__)
+ template<> __forceinline Vec4<vfloat8>::Vec4(const Vec3fx& a) {
+ x = a.x; y = a.y; z = a.z; w = a.w;
+ }
+#endif
+#if defined(__AVX512F__)
+ template<> __forceinline Vec4<vfloat16>::Vec4(const Vec3fx& a) {
+ x = a.x; y = a.y; z = a.z; w = a.w;
+ }
+#endif
+
+#endif
}
+