diff options
Diffstat (limited to 'thirdparty/embree/common/math')
23 files changed, 1841 insertions, 16 deletions
diff --git a/thirdparty/embree/common/math/affinespace.h b/thirdparty/embree/common/math/affinespace.h index 9d4a0f0846..f3e5404639 100644 --- a/thirdparty/embree/common/math/affinespace.h +++ b/thirdparty/embree/common/math/affinespace.h @@ -337,7 +337,7 @@ namespace embree if (D) *D = sqrtf(D_x); return true; } - + __forceinline void AffineSpace3fa_store_unaligned(const AffineSpace3fa &source, AffineSpace3fa* ptr) { Vec3fa::storeu(&ptr->l.vx, source.l.vx); diff --git a/thirdparty/embree/common/math/bbox.h b/thirdparty/embree/common/math/bbox.h index e4eb3df9a4..651b29a8fe 100644 --- a/thirdparty/embree/common/math/bbox.h +++ b/thirdparty/embree/common/math/bbox.h @@ -56,6 +56,11 @@ namespace embree return BBox(min(a.lower, b.lower), max(a.upper, b.upper)); } + /*! intersects two boxes */ + __forceinline static const BBox intersect (const BBox& a, const BBox& b) { + return BBox(max(a.lower, b.lower), min(a.upper, b.upper)); + } + /*! enlarge box by some scaling factor */ __forceinline BBox enlarge_by(const float a) const { return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper)); diff --git a/thirdparty/embree/common/math/col3.h b/thirdparty/embree/common/math/col3.h index 3f50c04393..4576bc517d 100644 --- a/thirdparty/embree/common/math/col3.h +++ b/thirdparty/embree/common/math/col3.h @@ -3,7 +3,7 @@ #pragma once -#include "math.h" +#include "emath.h" namespace embree { diff --git a/thirdparty/embree/common/math/col4.h b/thirdparty/embree/common/math/col4.h index 788508516b..4ef916cc3a 100644 --- a/thirdparty/embree/common/math/col4.h +++ b/thirdparty/embree/common/math/col4.h @@ -3,7 +3,7 @@ #pragma once -#include "math.h" +#include "emath.h" namespace embree { diff --git a/thirdparty/embree/common/math/color.h b/thirdparty/embree/common/math/color.h index e62e4ad2a4..8b28ff9447 100644 --- a/thirdparty/embree/common/math/color.h +++ b/thirdparty/embree/common/math/color.h @@ -3,6 +3,10 @@ #pragma once +#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) +# include "color_sycl.h" +#else + #include "constants.h" #include "col3.h" #include "col4.h" @@ -64,6 +68,10 @@ namespace embree d.b = (unsigned char)(s[2]); d.a = (unsigned char)(s[3]); } + __forceinline void set(float &f) const + { + f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance. + } //////////////////////////////////////////////////////////////////////////////// /// Constants @@ -256,3 +264,5 @@ namespace embree return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")"; } } + +#endif diff --git a/thirdparty/embree/common/math/color_sycl.h b/thirdparty/embree/common/math/color_sycl.h new file mode 100644 index 0000000000..41b89ddecc --- /dev/null +++ b/thirdparty/embree/common/math/color_sycl.h @@ -0,0 +1,219 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "constants.h" +#include "col3.h" +#include "col4.h" + +#include "../simd/sse.h" + +namespace embree +{ + //////////////////////////////////////////////////////////////////////////////// + /// SSE RGBA Color Class + //////////////////////////////////////////////////////////////////////////////// + + struct Color4 + { + struct { float r,g,b,a; }; + + //////////////////////////////////////////////////////////////////////////////// + /// Construction + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Color4 () {} + //__forceinline Color4 ( const __m128 a ) : m128(a) {} + + __forceinline explicit Color4 (const float v) : r(v), g(v), b(v), a(v) {} + __forceinline Color4 (const float r, const float g, const float b, const float a) : r(r), g(g), b(b), a(a) {} + + __forceinline explicit Color4 ( const Col3uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(1.0f) {} + __forceinline explicit Color4 ( const Col3f& other ) : r(other.r), g(other.g), b(other.b), a(1.0f) {} + __forceinline explicit Color4 ( const Col4uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(other.a/255.0f) {} + __forceinline explicit Color4 ( const Col4f& other ) : r(other.r), g(other.g), b(other.b), a(other.a) {} + + //__forceinline Color4 ( const Color4& other ) : m128(other.m128) {} + //__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; } + + //__forceinline operator const __m128&() const { return m128; } + //__forceinline operator __m128&() { return m128; } + + //////////////////////////////////////////////////////////////////////////////// + /// Set + //////////////////////////////////////////////////////////////////////////////// + + __forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; } + __forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; } + + __forceinline void set(Col3uc& d) const + { + d.r = (unsigned char)(clamp(r)*255.0f); + d.g = (unsigned char)(clamp(g)*255.0f); + d.b = (unsigned char)(clamp(b)*255.0f); + } + + __forceinline void set(Col4uc& d) const + { + d.r = (unsigned char)(clamp(r)*255.0f); + d.g = (unsigned char)(clamp(g)*255.0f); + d.b = (unsigned char)(clamp(b)*255.0f); + d.a = (unsigned char)(clamp(a)*255.0f); + } + __forceinline void set(float &f) const + { + f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance. + } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Color4( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f), a(0.0f) {} + __forceinline Color4( OneTy ) : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {} + //__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {} + //__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {} + }; + + //////////////////////////////////////////////////////////////////////////////// + /// SSE RGB Color Class + //////////////////////////////////////////////////////////////////////////////// + + struct Color + { + struct { float r,g,b; }; + + //////////////////////////////////////////////////////////////////////////////// + /// Construction + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Color () {} + //__forceinline Color ( const __m128 a ) : m128(a) {} + + __forceinline explicit Color (const float v) : r(v), g(v), b(v) {} + __forceinline Color (const float r, const float g, const float b) : r(r), g(g), b(b) {} + + //__forceinline Color ( const Color& other ) : m128(other.m128) {} + //__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; } + + //__forceinline Color ( const Color4& other ) : m128(other.m128) {} + //__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; } + + //__forceinline operator const __m128&() const { return m128; } + //__forceinline operator __m128&() { return m128; } + + //////////////////////////////////////////////////////////////////////////////// + /// Set + //////////////////////////////////////////////////////////////////////////////// + + __forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; } + __forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; } + +#if 0 + __forceinline void set(Col3uc& d) const + { + vfloat4 s = clamp(vfloat4(m128))*255.0f; + d.r = (unsigned char)(s[0]); + d.g = (unsigned char)(s[1]); + d.b = (unsigned char)(s[2]); + } + __forceinline void set(Col4uc& d) const + { + vfloat4 s = clamp(vfloat4(m128))*255.0f; + d.r = (unsigned char)(s[0]); + d.g = (unsigned char)(s[1]); + d.b = (unsigned char)(s[2]); + d.a = 255; + } +#endif + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Color( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f) {} + __forceinline Color( OneTy ) : r(1.0f), g(1.0f), b(1.0f) {} + //__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {} + //__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {} + }; + + //////////////////////////////////////////////////////////////////////////////// + /// Unary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const Color operator +( const Color& a ) { return a; } + __forceinline const Color operator -( const Color& a ) { return Color(-a.r, -a.g, -a.b); } + __forceinline const Color abs ( const Color& a ) { return Color(abs(a.r), abs(a.g), abs(a.b)); } + __forceinline const Color rcp ( const Color& a ) { return Color(1.0f/a.r, 1.0f/a.g, 1.0f/a.b); } + __forceinline const Color rsqrt( const Color& a ) { return Color(1.0f/sqrt(a.r), 1.0f/sqrt(a.g), 1.0f/sqrt(a.b)); } + __forceinline const Color sqrt ( const Color& a ) { return Color(sqrt(a.r), sqrt(a.g), sqrt(a.b)); } + + //////////////////////////////////////////////////////////////////////////////// + /// Binary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const Color operator +( const Color& a, const Color& b ) { return Color(a.r+b.r, a.g+b.g, a.b+b.b); } + __forceinline const Color operator -( const Color& a, const Color& b ) { return Color(a.r-b.r, a.g-b.g, a.b-b.b); } + __forceinline const Color operator *( const Color& a, const Color& b ) { return Color(a.r*b.r, a.g*b.g, a.b*b.b); } + __forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); } + __forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; } + __forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); } + __forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); } + + __forceinline const Color min( const Color& a, const Color& b ) { return Color(min(a.r,b.r), min(a.g,b.g), min(a.b,b.b)); } + __forceinline const Color max( const Color& a, const Color& b ) { return Color(max(a.r,b.r), max(a.g,b.g), max(a.b,b.b)); } + + //////////////////////////////////////////////////////////////////////////////// + /// Assignment Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; } + __forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; } + __forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; } + __forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; } + __forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; } + __forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; } + + //////////////////////////////////////////////////////////////////////////////// + /// Reductions + //////////////////////////////////////////////////////////////////////////////// + + __forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; } + __forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; } + __forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); } + __forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); } + + //////////////////////////////////////////////////////////////////////////////// + /// Comparison Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator ==( const Color& a, const Color& b ) { return a.r == b.r && a.g == b.g && a.b == b.b; } + __forceinline bool operator !=( const Color& a, const Color& b ) { return a.r != b.r || a.g != b.g || a.b != b.b; } + __forceinline bool operator < ( const Color& a, const Color& b ) { + if (a.r != b.r) return a.r < b.r; + if (a.g != b.g) return a.g < b.g; + if (a.b != b.b) return a.b < b.b; + return false; + } + + //////////////////////////////////////////////////////////////////////////////// + /// Select + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const Color select( bool s, const Color& t, const Color& f ) { + return s ? t : f; + } + + //////////////////////////////////////////////////////////////////////////////// + /// Special Operators + //////////////////////////////////////////////////////////////////////////////// + + /*! computes luminance of a color */ + __forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); } + + /*! output operator */ + inline std::ostream& operator<<(std::ostream& cout, const Color& a) { + return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")"; + } +} diff --git a/thirdparty/embree/common/math/math.h b/thirdparty/embree/common/math/emath.h index 7930c17727..22a89a7669 100644 --- a/thirdparty/embree/common/math/math.h +++ b/thirdparty/embree/common/math/emath.h @@ -8,6 +8,10 @@ #include "constants.h" #include <cmath> +#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) +# include "math_sycl.h" +#else + #if defined(__ARM_NEON) #include "../simd/arm/emulation.h" #else @@ -44,6 +48,9 @@ namespace embree __forceinline int toInt (const float& a) { return int(a); } __forceinline float toFloat(const int& a) { return float(a); } + __forceinline int asInt (const float& a) { return *((int*)&a); } + __forceinline float asFloat(const int& a) { return *((float*)&a); } + #if defined(__WIN32__) __forceinline bool finite ( const float x ) { return _finite(x) != 0; } #endif @@ -351,7 +358,11 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur __forceinline int select(bool s, int t, int f) { return s ? t : f; } __forceinline float select(bool s, float t, float f) { return s ? t : f; } - __forceinline bool all(bool s) { return s; } + __forceinline bool none(bool s) { return !s; } + __forceinline bool all (bool s) { return s; } + __forceinline bool any (bool s) { return s; } + + __forceinline unsigned movemask (bool s) { return (unsigned)s; } __forceinline float lerp(const float v0, const float v1, const float t) { return madd(1.0f-t,v0,t*v1); @@ -453,3 +464,5 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur return x | (y << 1) | (z << 2); } } + +#endif diff --git a/thirdparty/embree/common/math/lbbox.h b/thirdparty/embree/common/math/lbbox.h index 2b397a05c8..7619199780 100644 --- a/thirdparty/embree/common/math/lbbox.h +++ b/thirdparty/embree/common/math/lbbox.h @@ -179,6 +179,48 @@ namespace embree bounds1 = b1; } + /*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */ + __forceinline LBBox(const BBox1f& time_range_in, const LBBox<T> lbounds, const BBox1f& target_time_range) + { + const BBox3f bounds0 = lbounds.bounds0; + const BBox3f bounds1 = lbounds.bounds1; + + /* normalize global target_time_range to local time_range_in */ + const BBox1f time_range((target_time_range.lower-time_range_in.lower)/time_range_in.size(), + (target_time_range.upper-time_range_in.lower)/time_range_in.size()); + + const BBox1f clipped_time_range(max(0.0f,time_range.lower), min(1.0f,time_range.upper)); + + /* compute bounds at begin and end of clipped time range */ + BBox<T> b0 = lerp(bounds0,bounds1,clipped_time_range.lower); + BBox<T> b1 = lerp(bounds0,bounds1,clipped_time_range.upper); + + /* make sure that b0 is properly bounded at time_range_in.lower */ + { + const BBox<T> bt = lerp(b0, b1, (0.0f - time_range.lower) / time_range.size()); + const T dlower = min(bounds0.lower-bt.lower, T(zero)); + const T dupper = max(bounds0.upper-bt.upper, T(zero)); + b0.lower += dlower; b1.lower += dlower; + b0.upper += dupper; b1.upper += dupper; + } + + /* make sure that b1 is properly bounded at time_range_in.upper */ + { + const BBox<T> bt = lerp(b0, b1, (1.0f - time_range.lower) / time_range.size()); + const T dlower = min(bounds1.lower-bt.lower, T(zero)); + const T dupper = max(bounds1.upper-bt.upper, T(zero)); + b0.lower += dlower; b1.lower += dlower; + b0.upper += dupper; b1.upper += dupper; + } + + this->bounds0 = b0; + this->bounds1 = b1; + } + + /*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */ + __forceinline LBBox(const BBox1f& time_range_in, const BBox<T>& bounds0, const BBox<T>& bounds1, const BBox1f& target_time_range) + : LBBox(time_range_in,LBBox(bounds0,bounds1),target_time_range) {} + public: __forceinline bool empty() const { diff --git a/thirdparty/embree/common/math/linearspace2.h b/thirdparty/embree/common/math/linearspace2.h index 184ee695fb..e58f61ea6b 100644 --- a/thirdparty/embree/common/math/linearspace2.h +++ b/thirdparty/embree/common/math/linearspace2.h @@ -18,6 +18,7 @@ namespace embree /*! default matrix constructor */ __forceinline LinearSpace2 ( ) {} + __forceinline LinearSpace2 ( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; } __forceinline LinearSpace2& operator=( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; return *this; } diff --git a/thirdparty/embree/common/math/linearspace3.h b/thirdparty/embree/common/math/linearspace3.h index 9eaa2cc2bb..f6d2318fa0 100644 --- a/thirdparty/embree/common/math/linearspace3.h +++ b/thirdparty/embree/common/math/linearspace3.h @@ -19,6 +19,7 @@ namespace embree /*! default matrix constructor */ __forceinline LinearSpace3 ( ) {} + __forceinline LinearSpace3 ( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; } __forceinline LinearSpace3& operator=( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; return *this; } @@ -90,17 +91,20 @@ namespace embree Vector vx,vy,vz; }; +#if !defined(__SYCL_DEVICE_ONLY__) + /*! compute transposed matrix */ template<> __forceinline const LinearSpace3<Vec3fa> LinearSpace3<Vec3fa>::transposed() const { vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz); return LinearSpace3<Vec3fa>(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz)); } - +#endif + template<typename T> __forceinline const LinearSpace3<T> transposed(const LinearSpace3<T>& xfm) { return xfm.transposed(); } - + //////////////////////////////////////////////////////////////////////////////// // Unary Operators //////////////////////////////////////////////////////////////////////////////// diff --git a/thirdparty/embree/common/math/math_sycl.h b/thirdparty/embree/common/math/math_sycl.h new file mode 100644 index 0000000000..ffb047569c --- /dev/null +++ b/thirdparty/embree/common/math/math_sycl.h @@ -0,0 +1,279 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../sys/platform.h" +#include "../sys/intrinsics.h" +#include "constants.h" +#include <cmath> + +namespace embree +{ + __forceinline bool isvalid ( const float& v ) { + return (v > -FLT_LARGE) & (v < +FLT_LARGE); + } + + __forceinline int cast_f2i(float f) { + return __builtin_bit_cast(int,f); + } + + __forceinline float cast_i2f(int i) { + return __builtin_bit_cast(float,i); + } + + __forceinline int toInt (const float& a) { return int(a); } + __forceinline float toFloat(const int& a) { return float(a); } + + __forceinline float asFloat(const int a) { return __builtin_bit_cast(float,a); } + __forceinline int asInt (const float a) { return __builtin_bit_cast(int,a); } + + //__forceinline bool finite ( const float x ) { return _finite(x) != 0; } + __forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; } + __forceinline float sqr ( const float x ) { return x*x; } + + __forceinline float rcp ( const float x ) { + return sycl::native::recip(x); + } + + __forceinline float signmsk(const float a) { return asFloat(asInt(a) & 0x80000000); } + //__forceinline float signmsk ( const float x ) { + // return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); + //} + //__forceinline float xorf( const float x, const float y ) { + // return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y))); + //} + //__forceinline float andf( const float x, const unsigned y ) { + // return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y)))); + //} + + __forceinline float rsqrt( const float x ) { + return sycl::rsqrt(x); + } + + //__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); } + //__forceinline double nextafter(double x, double y) { return _nextafter(x, y); } + //__forceinline int roundf(float f) { return (int)(f + 0.5f); } + + __forceinline float abs ( const float x ) { return sycl::fabs(x); } + __forceinline float acos ( const float x ) { return sycl::acos(x); } + __forceinline float asin ( const float x ) { return sycl::asin(x); } + __forceinline float atan ( const float x ) { return sycl::atan(x); } + __forceinline float atan2( const float y, const float x ) { return sycl::atan2(y, x); } + __forceinline float cos ( const float x ) { return sycl::cos(x); } + __forceinline float cosh ( const float x ) { return sycl::cosh(x); } + __forceinline float exp ( const float x ) { return sycl::exp(x); } + __forceinline float fmod ( const float x, const float y ) { return sycl::fmod(x, y); } + __forceinline float log ( const float x ) { return sycl::log(x); } + __forceinline float log10( const float x ) { return sycl::log10(x); } + __forceinline float pow ( const float x, const float y ) { return sycl::pow(x, y); } + __forceinline float sin ( const float x ) { return sycl::sin(x); } + __forceinline float sinh ( const float x ) { return sycl::sinh(x); } + __forceinline float sqrt ( const float x ) { return sycl::sqrt(x); } + __forceinline float tan ( const float x ) { return sycl::tan(x); } + __forceinline float tanh ( const float x ) { return sycl::tanh(x); } + __forceinline float floor( const float x ) { return sycl::floor(x); } + __forceinline float ceil ( const float x ) { return sycl::ceil(x); } + __forceinline float frac ( const float x ) { return x-floor(x); } + + //__forceinline double abs ( const double x ) { return ::fabs(x); } + //__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; } + //__forceinline double acos ( const double x ) { return ::acos (x); } + //__forceinline double asin ( const double x ) { return ::asin (x); } + //__forceinline double atan ( const double x ) { return ::atan (x); } + //__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); } + //__forceinline double cos ( const double x ) { return ::cos (x); } + //__forceinline double cosh ( const double x ) { return ::cosh (x); } + //__forceinline double exp ( const double x ) { return ::exp (x); } + //__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); } + //__forceinline double log ( const double x ) { return ::log (x); } + //__forceinline double log10( const double x ) { return ::log10(x); } + //__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); } + //__forceinline double rcp ( const double x ) { return 1.0/x; } + //__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); } + //__forceinline double sin ( const double x ) { return ::sin (x); } + //__forceinline double sinh ( const double x ) { return ::sinh (x); } + //__forceinline double sqr ( const double x ) { return x*x; } + //__forceinline double sqrt ( const double x ) { return ::sqrt (x); } + //__forceinline double tan ( const double x ) { return ::tan (x); } + //__forceinline double tanh ( const double x ) { return ::tanh (x); } + //__forceinline double floor( const double x ) { return ::floor (x); } + //__forceinline double ceil ( const double x ) { return ::ceil (x); } + +/* +#if defined(__SSE4_1__) + __forceinline float mini(float a, float b) { + const __m128i ai = _mm_castps_si128(_mm_set_ss(a)); + const __m128i bi = _mm_castps_si128(_mm_set_ss(b)); + const __m128i ci = _mm_min_epi32(ai,bi); + return _mm_cvtss_f32(_mm_castsi128_ps(ci)); + } +#endif + +#if defined(__SSE4_1__) + __forceinline float maxi(float a, float b) { + const __m128i ai = _mm_castps_si128(_mm_set_ss(a)); + const __m128i bi = _mm_castps_si128(_mm_set_ss(b)); + const __m128i ci = _mm_max_epi32(ai,bi); + return _mm_cvtss_f32(_mm_castsi128_ps(ci)); + } +#endif +*/ + + template<typename T> + __forceinline T twice(const T& a) { return a+a; } + + __forceinline int min(int a, int b) { return sycl::min(a,b); } + __forceinline unsigned min(unsigned a, unsigned b) { return sycl::min(a,b); } + __forceinline int64_t min(int64_t a, int64_t b) { return sycl::min(a,b); } + __forceinline float min(float a, float b) { return sycl::fmin(a,b); } + __forceinline double min(double a, double b) { return sycl::fmin(a,b); } +#if defined(__X86_64__) + __forceinline size_t min(size_t a, size_t b) { return sycl::min(a,b); } +#endif + + template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); } + template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); } + template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); } + +// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); } +// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); } +// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); } + + __forceinline int max(int a, int b) { return sycl::max(a,b); } + __forceinline unsigned max(unsigned a, unsigned b) { return sycl::max(a,b); } + __forceinline int64_t max(int64_t a, int64_t b) { return sycl::max(a,b); } + __forceinline float max(float a, float b) { return sycl::fmax(a,b); } + __forceinline double max(double a, double b) { return sycl::fmax(a,b); } +#if defined(__X86_64__) + __forceinline size_t max(size_t a, size_t b) { return sycl::max(a,b); } +#endif + + template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); } + template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); } + template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); } + +// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); } +// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); } +// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); } + + template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); } + template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); } + + template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); } + template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); } + template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); } + template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); } + + __forceinline float madd ( const float a, const float b, const float c) { return +sycl::fma(+a,b,+c); } + __forceinline float msub ( const float a, const float b, const float c) { return +sycl::fma(+a,b,-c); } + __forceinline float nmadd ( const float a, const float b, const float c) { return +sycl::fma(-a,b,+c); } + __forceinline float nmsub ( const float a, const float b, const float c) { return -sycl::fma(+a,b,+c); } + + /*! random functions */ +/* + template<typename T> T random() { return T(0); } + template<> __forceinline int random() { return int(rand()); } + template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); } + template<> __forceinline float random() { return rand()/float(RAND_MAX); } + template<> __forceinline double random() { return rand()/double(RAND_MAX); } +*/ + + /*! selects */ + __forceinline bool select(bool s, bool t , bool f) { return s ? t : f; } + __forceinline int select(bool s, int t, int f) { return s ? t : f; } + __forceinline float select(bool s, float t, float f) { return s ? t : f; } + + __forceinline bool none(bool s) { return !s; } + __forceinline bool all (bool s) { return s; } + __forceinline bool any (bool s) { return s; } + + __forceinline unsigned movemask (bool s) { return (unsigned)s; } + + __forceinline float lerp(const float v0, const float v1, const float t) { + return madd(1.0f-t,v0,t*v1); + } + + template<typename T> + __forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) { + return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3))); + } + + /*! exchange */ + template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; } + + /* load/store */ + template<typename Ty> struct mem; + + template<> struct mem<float> { + static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; } + static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; } + + static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; } + static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; } + }; + + /*! bit reverse operation */ + template<class T> + __forceinline T bitReverse(const T& vin) + { + T v = vin; + v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); + v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); + v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); + v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); + v = ( v >> 16 ) | ( v << 16); + return v; + } + + /*! bit interleave operation */ + template<class T> + __forceinline T bitInterleave(const T& xin, const T& yin, const T& zin) + { + T x = xin, y = yin, z = zin; + x = (x | (x << 16)) & 0x030000FF; + x = (x | (x << 8)) & 0x0300F00F; + x = (x | (x << 4)) & 0x030C30C3; + x = (x | (x << 2)) & 0x09249249; + + y = (y | (y << 16)) & 0x030000FF; + y = (y | (y << 8)) & 0x0300F00F; + y = (y | (y << 4)) & 0x030C30C3; + y = (y | (y << 2)) & 0x09249249; + + z = (z | (z << 16)) & 0x030000FF; + z = (z | (z << 8)) & 0x0300F00F; + z = (z | (z << 4)) & 0x030C30C3; + z = (z | (z << 2)) & 0x09249249; + + return x | (y << 1) | (z << 2); + } + + /*! bit interleave operation for 64bit data types*/ + template<class T> + __forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){ + T x = xin & 0x1fffff; + T y = yin & 0x1fffff; + T z = zin & 0x1fffff; + + x = (x | x << 32) & 0x1f00000000ffff; + x = (x | x << 16) & 0x1f0000ff0000ff; + x = (x | x << 8) & 0x100f00f00f00f00f; + x = (x | x << 4) & 0x10c30c30c30c30c3; + x = (x | x << 2) & 0x1249249249249249; + + y = (y | y << 32) & 0x1f00000000ffff; + y = (y | y << 16) & 0x1f0000ff0000ff; + y = (y | y << 8) & 0x100f00f00f00f00f; + y = (y | y << 4) & 0x10c30c30c30c30c3; + y = (y | y << 2) & 0x1249249249249249; + + z = (z | z << 32) & 0x1f00000000ffff; + z = (z | z << 16) & 0x1f0000ff0000ff; + z = (z | z << 8) & 0x100f00f00f00f00f; + z = (z | z << 4) & 0x10c30c30c30c30c3; + z = (z | z << 2) & 0x1249249249249249; + + return x | (y << 1) | (z << 2); + } +} diff --git a/thirdparty/embree/common/math/range.h b/thirdparty/embree/common/math/range.h index 909fadb995..f397615ea2 100644 --- a/thirdparty/embree/common/math/range.h +++ b/thirdparty/embree/common/math/range.h @@ -4,7 +4,7 @@ #pragma once #include "../sys/platform.h" -#include "../math/math.h" +#include "../math/emath.h" namespace embree { diff --git a/thirdparty/embree/common/math/vec2.h b/thirdparty/embree/common/math/vec2.h index f6d98ffa0d..4e641ec249 100644 --- a/thirdparty/embree/common/math/vec2.h +++ b/thirdparty/embree/common/math/vec2.h @@ -3,7 +3,7 @@ #pragma once -#include "math.h" +#include "emath.h" namespace embree { @@ -34,7 +34,7 @@ namespace embree __forceinline Vec2( const T& x, const T& y ) : x(x), y(y) {} __forceinline Vec2( const Vec2& other ) { x = other.x; y = other.y; } - __forceinline Vec2( const Vec2fa& other ); + Vec2( const Vec2fa& other ); template<typename T1> __forceinline Vec2( const Vec2<T1>& a ) : x(T(a.x)), y(T(a.y)) {} template<typename T1> __forceinline Vec2& operator =( const Vec2<T1>& other ) { x = other.x; y = other.y; return *this; } @@ -232,4 +232,5 @@ namespace embree #if defined(__AVX512F__) template<> __forceinline Vec2<vfloat16>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {} #endif + } diff --git a/thirdparty/embree/common/math/vec2fa.h b/thirdparty/embree/common/math/vec2fa.h index 4f222894c2..d57e549e68 100644 --- a/thirdparty/embree/common/math/vec2fa.h +++ b/thirdparty/embree/common/math/vec2fa.h @@ -4,7 +4,12 @@ #pragma once #include "../sys/alloc.h" -#include "math.h" +#include "emath.h" + +#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) +# include "vec2fa_sycl.h" +#else + #include "../simd/sse.h" namespace embree @@ -316,3 +321,5 @@ namespace embree typedef Vec2fa Vec2fa_t; } + +#endif diff --git a/thirdparty/embree/common/math/vec2fa_sycl.h b/thirdparty/embree/common/math/vec2fa_sycl.h new file mode 100644 index 0000000000..62d62bdd01 --- /dev/null +++ b/thirdparty/embree/common/math/vec2fa_sycl.h @@ -0,0 +1,270 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../sys/alloc.h" +#include "emath.h" +#include "../simd/sse.h" + +namespace embree +{ + struct Vec3fa; + + //////////////////////////////////////////////////////////////////////////////// + /// SSE Vec2fa Type + //////////////////////////////////////////////////////////////////////////////// + + struct __aligned(16) Vec2fa + { + //ALIGNED_STRUCT_(16); + + typedef float Scalar; + enum { N = 2 }; + struct { float x,y; }; + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec2fa( ) {} + //__forceinline Vec2fa( const __m128 a ) : m128(a) {} + explicit Vec2fa(const Vec3fa& a); + + __forceinline explicit Vec2fa( const vfloat<4>& a ) { + x = a[0]; + y = a[1]; + } + + __forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; } + __forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; } + + __forceinline Vec2fa ( const Vec2fa& other ) { x = other.x; y = other.y; } + __forceinline Vec2fa& operator =( const Vec2fa& other ) { x = other.x; y = other.y; return *this; } + + __forceinline explicit Vec2fa( const float a ) : x(a), y(a) {} + __forceinline Vec2fa( const float x, const float y) : x(x), y(y) {} + + //__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {} + + //__forceinline operator const __m128&() const { return m128; } + //__forceinline operator __m128&() { return m128; } + + //////////////////////////////////////////////////////////////////////////////// + /// Loads and Stores + //////////////////////////////////////////////////////////////////////////////// + + static __forceinline Vec2fa load( const void* const a ) { + const float* ptr = (const float*)a; + return Vec2fa(ptr[0],ptr[1]); + } + + static __forceinline Vec2fa loadu( const void* const a ) { + const float* ptr = (const float*)a; + return Vec2fa(ptr[0],ptr[1]); + } + + static __forceinline void storeu ( void* a, const Vec2fa& v ) { + float* ptr = (float*)a; + ptr[0] = v.x; ptr[1] = v.y; + } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec2fa( ZeroTy ) : x(0.0f), y(0.0f) {} + __forceinline Vec2fa( OneTy ) : x(1.0f), y(1.0f) {} + __forceinline Vec2fa( PosInfTy ) : x(+INFINITY), y(+INFINITY) {} + __forceinline Vec2fa( NegInfTy ) : x(-INFINITY), y(-INFINITY) {} + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + //__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; } + //__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; } + }; + + //////////////////////////////////////////////////////////////////////////////// + /// Unary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec2fa operator +( const Vec2fa& a ) { return a; } + __forceinline Vec2fa operator -( const Vec2fa& a ) { return Vec2fa(-a.x,-a.y); } + __forceinline Vec2fa abs ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); } + __forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); } + + //__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::recip(a.x),sycl::recip(a.y)); } + __forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y)); } + __forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); } + __forceinline Vec2fa sqr ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); } + + __forceinline Vec2fa rsqrt( const Vec2fa& a ) { return Vec2fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y)); } + + __forceinline Vec2fa zero_fix(const Vec2fa& a) { + const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x; + const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y; + return Vec2fa(x,y); + } + __forceinline Vec2fa rcp_safe(const Vec2fa& a) { + return rcp(zero_fix(a)); + } + __forceinline Vec2fa log ( const Vec2fa& a ) { + return Vec2fa(sycl::log(a.x),sycl::log(a.y)); + } + + __forceinline Vec2fa exp ( const Vec2fa& a ) { + return Vec2fa(sycl::exp(a.x),sycl::exp(a.y)); + } + + //////////////////////////////////////////////////////////////////////////////// + /// Binary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x+b.x, a.y+b.y); } + __forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x-b.x, a.y-b.y); } + __forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x*b.x, a.y*b.y); } + __forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); } + __forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; } + __forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x/b.x, a.y/b.y); } + __forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return Vec2fa(a.x/b, a.y/b); } + __forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return Vec2fa(a/b.x, a/b.y); } + + __forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { + return Vec2fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y)); + } + __forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { + return Vec2fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y)); + } + +/* +#if defined(__SSE4_1__) + __forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) { + const vint4 ai = _mm_castps_si128(a); + const vint4 bi = _mm_castps_si128(b); + const vint4 ci = _mm_min_epi32(ai,bi); + return _mm_castsi128_ps(ci); + } +#endif + +#if defined(__SSE4_1__) + __forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) { + const vint4 ai = _mm_castps_si128(a); + const vint4 bi = _mm_castps_si128(b); + const vint4 ci = _mm_max_epi32(ai,bi); + return _mm_castsi128_ps(ci); + } +#endif + + __forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) { + return Vec2fa(powf(a.x,b),powf(a.y,b)); + } +*/ + + //////////////////////////////////////////////////////////////////////////////// + /// Ternary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y)); } + __forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y)); } + __forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y)); } + __forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y)); } + + __forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); } + __forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); } + __forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); } + __forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); } + + //////////////////////////////////////////////////////////////////////////////// + /// Assignment Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; } + __forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; } + __forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; } + __forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; } + __forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; } + __forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; } + + //////////////////////////////////////////////////////////////////////////////// + /// Reductions + //////////////////////////////////////////////////////////////////////////////// + + __forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; } + __forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; } + __forceinline float reduce_min(const Vec2fa& v) { return sycl::fmin(v.x,v.y); } + __forceinline float reduce_max(const Vec2fa& v) { return sycl::fmax(v.x,v.y); } + + //////////////////////////////////////////////////////////////////////////////// + /// Comparison Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return a.x == b.x && a.y == b.y; } + __forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return a.x != b.x || a.y != b.y; } + + //////////////////////////////////////////////////////////////////////////////// + /// Euclidian Space Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) { + return reduce_add(a*b); + } + + __forceinline Vec2fa cross ( const Vec2fa& a ) { + return Vec2fa(-a.y,a.x); + } + + __forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); } + __forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); } + __forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); } + __forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); } + __forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); } + __forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); } + + //////////////////////////////////////////////////////////////////////////////// + /// Select + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) { + return Vec2fa(s ? t.x : f.x, s ? t.y : f.y); + } + + __forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) { + return madd(1.0f-t,v0,t*v1); + } + + __forceinline int maxDim ( const Vec2fa& a ) + { + const Vec2fa b = abs(a); + if (b.x > b.y) return 0; + else return 1; + } + + //////////////////////////////////////////////////////////////////////////////// + /// Rounding Functions + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(sycl::trunc(a.x),sycl::trunc(a.y)); } + __forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(sycl::floor(a.x),sycl::floor(a.y)); } + __forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(sycl::ceil (a.x),sycl::ceil (a.y)); } + + //////////////////////////////////////////////////////////////////////////////// + /// Output Operators + //////////////////////////////////////////////////////////////////////////////// + + inline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) { + return cout << "(" << a.x << ", " << a.y << ")"; + } + + /*template<> + __forceinline vfloat_impl<4>::vfloat_impl(const Vec2fa& a) + { + v = 0; + const unsigned int lid = get_sub_group_local_id(); + if (lid == 0) v = a.x; + if (lid == 1) v = a.y; + }*/ + + typedef Vec2fa Vec2fa_t; +} diff --git a/thirdparty/embree/common/math/vec3.h b/thirdparty/embree/common/math/vec3.h index 254f6c4011..d5e78befe8 100644 --- a/thirdparty/embree/common/math/vec3.h +++ b/thirdparty/embree/common/math/vec3.h @@ -3,7 +3,7 @@ #pragma once -#include "math.h" +#include "emath.h" namespace embree { @@ -286,6 +286,8 @@ namespace embree template<> __forceinline Vec3<float>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; } +#if !defined(__SYCL_DEVICE_ONLY__) + #if defined(__AVX__) template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; @@ -333,4 +335,23 @@ namespace embree #if defined(__AVX512F__) template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) : x(a.x), y(a.y), z(a.z) {} #endif + +#else + +#if defined(__SSE__) + template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) { + x = a.x; y = a.y; z = a.z; + } +#endif +#if defined(__AVX__) + template<> __forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) { + x = a.x; y = a.y; z = a.z; + } +#endif +#if defined(__AVX512F__) + template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) { + x = a.x; y = a.y; z = a.z; + } +#endif +#endif } diff --git a/thirdparty/embree/common/math/vec3ba.h b/thirdparty/embree/common/math/vec3ba.h index a021b522dc..bf24a2a3b6 100644 --- a/thirdparty/embree/common/math/vec3ba.h +++ b/thirdparty/embree/common/math/vec3ba.h @@ -4,7 +4,12 @@ #pragma once #include "../sys/alloc.h" -#include "math.h" +#include "emath.h" + +#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) +# include "vec3ba_sycl.h" +#else + #include "../simd/sse.h" namespace embree @@ -118,3 +123,5 @@ namespace embree return cout << "(" << (a.x ? "1" : "0") << ", " << (a.y ? "1" : "0") << ", " << (a.z ? "1" : "0") << ")"; } } + +#endif diff --git a/thirdparty/embree/common/math/vec3ba_sycl.h b/thirdparty/embree/common/math/vec3ba_sycl.h new file mode 100644 index 0000000000..a2fa13de6c --- /dev/null +++ b/thirdparty/embree/common/math/vec3ba_sycl.h @@ -0,0 +1,115 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../sys/alloc.h" +#include "emath.h" +#include "../simd/sse.h" + +namespace embree +{ + //////////////////////////////////////////////////////////////////////////////// + /// SSE Vec3ba Type + //////////////////////////////////////////////////////////////////////////////// + + struct __aligned(16) Vec3ba + { + //ALIGNED_STRUCT_(16); + + struct { bool x,y,z; }; + + typedef bool Scalar; + enum { N = 3 }; + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ba( ) {} + //__forceinline Vec3ba( const __m128 input ) : m128(input) {} + + __forceinline Vec3ba( const Vec3ba& other ) : x(other.x), y(other.y), z(other.z) {} + __forceinline Vec3ba& operator =(const Vec3ba& other) { x = other.x; y = other.y; z = other.z; return *this; } + + __forceinline explicit Vec3ba( bool a ) : x(a), y(a), z(a) {} + __forceinline Vec3ba( bool a, bool b, bool c) : x(a), y(b), z(c) {} + + //__forceinline operator const __m128&() const { return m128; } + //__forceinline operator __m128&() { return m128; } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ba( FalseTy ) : x(false), y(false), z(false) {} + __forceinline Vec3ba( TrueTy ) : x(true), y(true), z(true) {} + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + //__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } + //__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } + }; + + + //////////////////////////////////////////////////////////////////////////////// + /// Unary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ba operator !( const Vec3ba& a ) { return Vec3ba(!a.x,!a.y,!a.z); } + + //////////////////////////////////////////////////////////////////////////////// + /// Binary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x & b.x, a.y & b.y, a.z & b.z); } + __forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x | b.x, a.y | b.y, a.z | b.z); } + __forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); } + + //////////////////////////////////////////////////////////////////////////////// + /// Assignment Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; } + __forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; } + __forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; } + + //////////////////////////////////////////////////////////////////////////////// + /// Comparison Operators + Select + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) { + return a.x == b.x && a.y == b.y && a.z == b.z; + } + __forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) { + return a.x != b.x || a.y != b.y || a.z != b.z; + } +/* + __forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) { + if (a.x != b.x) return a.x < b.x; + if (a.y != b.y) return a.y < b.y; + if (a.z != b.z) return a.z < b.z; + return false; + } +*/ + //////////////////////////////////////////////////////////////////////////////// + /// Reduction Operations + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool reduce_and( const Vec3ba& a ) { return a.x & a.y & a.z; } + __forceinline bool reduce_or ( const Vec3ba& a ) { return a.x | a.y | a.z; } + + __forceinline bool all ( const Vec3ba& b ) { return reduce_and(b); } + __forceinline bool any ( const Vec3ba& b ) { return reduce_or(b); } + __forceinline bool none ( const Vec3ba& b ) { return !reduce_or(b); } + + //////////////////////////////////////////////////////////////////////////////// + /// Output Operators + //////////////////////////////////////////////////////////////////////////////// + + inline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) { + return cout; + } +} diff --git a/thirdparty/embree/common/math/vec3fa.h b/thirdparty/embree/common/math/vec3fa.h index 8564cf6d10..967e75da74 100644 --- a/thirdparty/embree/common/math/vec3fa.h +++ b/thirdparty/embree/common/math/vec3fa.h @@ -4,7 +4,12 @@ #pragma once #include "../sys/alloc.h" -#include "math.h" +#include "emath.h" + +#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) +# include "vec3fa_sycl.h" +#else + #include "../simd/sse.h" namespace embree @@ -441,7 +446,6 @@ namespace embree //__forceinline Vec3fx& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; } __forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; } - __forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; } __forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(a)) {} @@ -783,3 +787,5 @@ namespace embree typedef Vec3fx Vec3ff; } + +#endif diff --git a/thirdparty/embree/common/math/vec3fa_sycl.h b/thirdparty/embree/common/math/vec3fa_sycl.h new file mode 100644 index 0000000000..5fdb00ab99 --- /dev/null +++ b/thirdparty/embree/common/math/vec3fa_sycl.h @@ -0,0 +1,617 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../sys/alloc.h" +#include "emath.h" +#include "../simd/sse.h" + +namespace embree +{ + //////////////////////////////////////////////////////////////////////////////// + /// SSE Vec3fa Type + //////////////////////////////////////////////////////////////////////////////// + + struct __aligned(16) Vec3fa + { + //ALIGNED_STRUCT_(16); + + typedef float Scalar; + enum { N = 3 }; + struct { float x,y,z, do_not_use; }; + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fa( ) {} + //__forceinline Vec3fa( const __m128 a ) : m128(a) {} + //__forceinline explicit Vec3fa(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]) {} + + __forceinline Vec3fa ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; } + //__forceinline Vec3fa& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; } + + __forceinline Vec3fa ( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; } + __forceinline Vec3fa& operator =( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; return *this; } + + __forceinline explicit Vec3fa( const float a ) : x(a), y(a), z(a) {} + __forceinline Vec3fa( const float x, const float y, const float z) : x(x), y(y), z(z) {} + + __forceinline explicit Vec3fa( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z) {} + + //__forceinline operator const __m128&() const { return m128; } + //__forceinline operator __m128&() { return m128; } + __forceinline operator vfloat4() const { return vfloat4(x,y,z,0.0f); } // FIXME: we should not need this!! + + //friend __forceinline Vec3fa copy_a( const Vec3fa& a, const Vec3fa& b ) { Vec3fa c = a; c.a = b.a; return c; } + + //////////////////////////////////////////////////////////////////////////////// + /// Loads and Stores + //////////////////////////////////////////////////////////////////////////////// + + static __forceinline Vec3fa load( const void* const a ) { + const float* ptr = (const float*)a; + return Vec3fa(ptr[0],ptr[1],ptr[2]); + } + + static __forceinline Vec3fa loadu( const void* const a ) { + const float* ptr = (const float*)a; + return Vec3fa(ptr[0],ptr[1],ptr[2]); + } + + static __forceinline void storeu ( void* a, const Vec3fa& v ) { + float* ptr = (float*)a; + ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; + } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fa( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f) {} + __forceinline Vec3fa( OneTy ) : x(1.0f), y(1.0f), z(1.0f) {} + __forceinline Vec3fa( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY) {} + __forceinline Vec3fa( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY) {} + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } + __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } + }; + + //////////////////////////////////////////////////////////////////////////////// + /// Unary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fa operator +( const Vec3fa& a ) { return a; } + __forceinline Vec3fa operator -( const Vec3fa& a ) { return Vec3fa(-a.x,-a.y,-a.z); } + __forceinline Vec3fa abs ( const Vec3fa& a ) { return Vec3fa(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z)); } + __forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); } + + //__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); } + __forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z)); } + __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); } + __forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); } + + __forceinline Vec3fa rsqrt( const Vec3fa& a ) { return Vec3fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z)); } + + __forceinline Vec3fa zero_fix(const Vec3fa& a) { + const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x; + const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y; + const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z; + return Vec3fa(x,y,z); + } + __forceinline Vec3fa rcp_safe(const Vec3fa& a) { + return rcp(zero_fix(a)); + } + __forceinline Vec3fa log ( const Vec3fa& a ) { + return Vec3fa(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z)); + } + + __forceinline Vec3fa exp ( const Vec3fa& a ) { + return Vec3fa(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z)); + } + + //////////////////////////////////////////////////////////////////////////////// + /// Binary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x+b.x, a.y+b.y, a.z+b.z); } + __forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x-b.x, a.y-b.y, a.z-b.z); } + __forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x*b.x, a.y*b.y, a.z*b.z); } + __forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); } + __forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; } + __forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x/b.x, a.y/b.y, a.z/b.z); } + __forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return Vec3fa(a.x/b, a.y/b, a.z/b); } + __forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return Vec3fa(a/b.x, a/b.y, a/b.z); } + + __forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) { + return Vec3fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z)); + } + __forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) { + return Vec3fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z)); + } + +/* +#if defined(__SSE4_1__) + __forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) { + const vint4 ai = _mm_castps_si128(a); + const vint4 bi = _mm_castps_si128(b); + const vint4 ci = _mm_min_epi32(ai,bi); + return _mm_castsi128_ps(ci); + } +#endif + +#if defined(__SSE4_1__) + __forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) { + const vint4 ai = _mm_castps_si128(a); + const vint4 bi = _mm_castps_si128(b); + const vint4 ci = _mm_max_epi32(ai,bi); + return _mm_castsi128_ps(ci); + } +#endif +*/ + __forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) { + return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b)); + } + + //////////////////////////////////////////////////////////////////////////////// + /// Ternary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); } + __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); } + __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z)); } + __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z)); } + + __forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); } + __forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); } + __forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); } + __forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); } + + //////////////////////////////////////////////////////////////////////////////// + /// Assignment Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; } + __forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; } + __forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; } + __forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; } + __forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; } + __forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; } + + //////////////////////////////////////////////////////////////////////////////// + /// Reductions + //////////////////////////////////////////////////////////////////////////////// + + __forceinline float reduce_add(const Vec3fa& v) { return v.x+v.y+v.z; } + __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; } + __forceinline float reduce_min(const Vec3fa& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); } + __forceinline float reduce_max(const Vec3fa& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); } + + //////////////////////////////////////////////////////////////////////////////// + /// Comparison Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; } + __forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; } + + __forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); } + __forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); } + __forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); } + __forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); } + __forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); } + __forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); } + + __forceinline bool isvalid ( const Vec3fa& v ) { + return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE))); + } + + __forceinline bool is_finite ( const Vec3fa& a ) { + return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX))); + } + + //////////////////////////////////////////////////////////////////////////////// + /// Euclidian Space Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) { + return reduce_add(a*b); + } + + __forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) { + return Vec3fa(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x)); + } + + __forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); } + __forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); } + __forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); } + __forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); } + __forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); } + __forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); } + __forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); } + __forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); } + + __forceinline Vec3fa normalize_safe( const Vec3fa& a ) { + const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d); + } + + /*! differentiated normalization */ + __forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp) + { + const float pp = dot(p,p); + const float pdp = dot(p,dp); + return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp); + } + + //////////////////////////////////////////////////////////////////////////////// + /// Select + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) { + return Vec3fa(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z); + } + + __forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) { + return Vec3fa(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z); + } + + __forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) { + return madd(1.0f-t,v0,t*v1); + } + + __forceinline int maxDim ( const Vec3fa& a ) + { + const Vec3fa b = abs(a); + if (b.x > b.y) { + if (b.x > b.z) return 0; else return 2; + } else { + if (b.y > b.z) return 1; else return 2; + } + } + + //////////////////////////////////////////////////////////////////////////////// + /// Rounding Functions + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z)); } + __forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z)); } + __forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z)); } + + //////////////////////////////////////////////////////////////////////////////// + /// Output Operators + //////////////////////////////////////////////////////////////////////////////// + + inline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) { + return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")"; + } + + __forceinline Vec2fa::Vec2fa(const Vec3fa& a) + : x(a.x), y(a.y) {} + + __forceinline Vec3ia::Vec3ia( const Vec3fa& a ) + : x((int)a.x), y((int)a.y), z((int)a.z) {} + + typedef Vec3fa Vec3fa_t; + + + + //////////////////////////////////////////////////////////////////////////////// + /// SSE Vec3fx Type + //////////////////////////////////////////////////////////////////////////////// + + struct __aligned(16) Vec3fx + { + //ALIGNED_STRUCT_(16); + + typedef float Scalar; + enum { N = 3 }; + struct { float x,y,z; union { int a; unsigned u; float w; }; }; + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fx( ) {} + //__forceinline Vec3fx( const __m128 a ) : m128(a) {} + __forceinline explicit Vec3fx(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {} + + __forceinline explicit Vec3fx(const Vec3fa& v) : x(v.x), y(v.y), z(v.z), w(0.0f) {} + __forceinline operator Vec3fa() const { return Vec3fa(x,y,z); } + + __forceinline explicit Vec3fx ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; } + //__forceinline Vec3fx& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; } + + //__forceinline Vec3fx ( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; } + //__forceinline Vec3fx& operator =( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; return *this; } + + __forceinline explicit Vec3fx( const float a ) : x(a), y(a), z(a), w(a) {} + __forceinline Vec3fx( const float x, const float y, const float z) : x(x), y(y), z(z), w(z) {} + + __forceinline Vec3fx( const Vec3fa& other, const int a1) : x(other.x), y(other.y), z(other.z), a(a1) {} + __forceinline Vec3fx( const Vec3fa& other, const unsigned a1) : x(other.x), y(other.y), z(other.z), u(a1) {} + __forceinline Vec3fx( const Vec3fa& other, const float w1) : x(other.x), y(other.y), z(other.z), w(w1) {} + + //__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly! + //__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly! + __forceinline Vec3fx( const float x, const float y, const float z, const float w) : x(x), y(y), z(z), w(w) {} + + __forceinline explicit Vec3fx( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z), w(0.0f) {} + + //__forceinline operator const __m128&() const { return m128; } + //__forceinline operator __m128&() { return m128; } + __forceinline operator vfloat4() const { return vfloat4(x,y,z,w); } + + //friend __forceinline Vec3fx copy_a( const Vec3fx& a, const Vec3fx& b ) { Vec3fx c = a; c.a = b.a; return c; } + + //////////////////////////////////////////////////////////////////////////////// + /// Loads and Stores + //////////////////////////////////////////////////////////////////////////////// + + static __forceinline Vec3fx load( const void* const a ) { + const float* ptr = (const float*)a; + return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]); + } + + static __forceinline Vec3fx loadu( const void* const a ) { + const float* ptr = (const float*)a; + return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]); + } + + static __forceinline void storeu ( void* a, const Vec3fx& v ) { + float* ptr = (float*)a; + ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; ptr[3] = v.w; + } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fx( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {} + __forceinline Vec3fx( OneTy ) : x(1.0f), y(1.0f), z(1.0f), w(1.0f) {} + __forceinline Vec3fx( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY), w(+INFINITY) {} + __forceinline Vec3fx( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY), w(-INFINITY) {} + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } + __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } + }; + + //////////////////////////////////////////////////////////////////////////////// + /// Unary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fx operator +( const Vec3fx& a ) { return a; } + __forceinline Vec3fx operator -( const Vec3fx& a ) { return Vec3fx(-a.x,-a.y,-a.z,-a.w); } + __forceinline Vec3fx abs ( const Vec3fx& a ) { return Vec3fx(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z),sycl::fabs(a.w)); } + __forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); } + + //__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); } + __forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z),__sycl_std::__invoke_native_recip<float>(a.w)); } + __forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); } + __forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); } + + __forceinline Vec3fx rsqrt( const Vec3fx& a ) { return Vec3fx(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z),sycl::rsqrt(a.w)); } + + __forceinline Vec3fx zero_fix(const Vec3fx& a) { + const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x; + const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y; + const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z; + return Vec3fx(x,y,z); + } + __forceinline Vec3fx rcp_safe(const Vec3fx& a) { + return rcp(zero_fix(a)); + } + __forceinline Vec3fx log ( const Vec3fx& a ) { + return Vec3fx(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z)); + } + + __forceinline Vec3fx exp ( const Vec3fx& a ) { + return Vec3fx(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z)); + } + + //////////////////////////////////////////////////////////////////////////////// + /// Binary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); } + __forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); } + __forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); } + __forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); } + __forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; } + __forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); } + __forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return Vec3fx(a.x/b, a.y/b, a.z/b, a.w/b); } + __forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return Vec3fx(a/b.x, a/b.y, a/b.z, a/b.w); } + + __forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) { + return Vec3fx(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z), sycl::fmin(a.w,b.w)); + } + __forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) { + return Vec3fx(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z), sycl::fmax(a.w,b.w)); + } + +/* +#if defined(__SSE4_1__) + __forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) { + const vint4 ai = _mm_castps_si128(a); + const vint4 bi = _mm_castps_si128(b); + const vint4 ci = _mm_min_epi32(ai,bi); + return _mm_castsi128_ps(ci); + } +#endif + +#if defined(__SSE4_1__) + __forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) { + const vint4 ai = _mm_castps_si128(a); + const vint4 bi = _mm_castps_si128(b); + const vint4 ci = _mm_max_epi32(ai,bi); + return _mm_castsi128_ps(ci); + } +#endif + + __forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) { + return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b)); + } +*/ + + //////////////////////////////////////////////////////////////////////////////// + /// Ternary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); } + __forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); } + __forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z), nmadd(a.w,b.w,c.w)); } + __forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z), nmsub(a.w,b.w,c.w)); } + + __forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); } + __forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); } + __forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); } + __forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); } + + //////////////////////////////////////////////////////////////////////////////// + /// Assignment Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; } + __forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; } + __forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; } + __forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; } + __forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; } + __forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; } + + //////////////////////////////////////////////////////////////////////////////// + /// Reductions + //////////////////////////////////////////////////////////////////////////////// + + __forceinline float reduce_add(const Vec3fx& v) { return v.x+v.y+v.z; } + __forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; } + __forceinline float reduce_min(const Vec3fx& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); } + __forceinline float reduce_max(const Vec3fx& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); } + + //////////////////////////////////////////////////////////////////////////////// + /// Comparison Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; } + __forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; } + + __forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); } + __forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); } + __forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); } + __forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); } + __forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); } + __forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); } + + __forceinline bool isvalid ( const Vec3fx& v ) { + return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE))); + } + + __forceinline bool is_finite ( const Vec3fx& a ) { + return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX))); + } + + __forceinline bool isvalid4 ( const Vec3fx& v ) { + const bool valid_x = v.x >= -FLT_LARGE & v.x <= +FLT_LARGE; + const bool valid_y = v.y >= -FLT_LARGE & v.y <= +FLT_LARGE; + const bool valid_z = v.z >= -FLT_LARGE & v.z <= +FLT_LARGE; + const bool valid_w = v.w >= -FLT_LARGE & v.w <= +FLT_LARGE; + return valid_x & valid_y & valid_z & valid_w; + } + + __forceinline bool is_finite4 ( const Vec3fx& v ) { + const bool finite_x = v.x >= -FLT_MAX & v.x <= +FLT_MAX; + const bool finite_y = v.y >= -FLT_MAX & v.y <= +FLT_MAX; + const bool finite_z = v.z >= -FLT_MAX & v.z <= +FLT_MAX; + const bool finite_w = v.w >= -FLT_MAX & v.w <= +FLT_MAX; + return finite_x & finite_y & finite_z & finite_w; + } + + //////////////////////////////////////////////////////////////////////////////// + /// Euclidian Space Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) { + return reduce_add(a*b); + } + + __forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) { + return Vec3fx(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x)); + } + + __forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); } + __forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); } + __forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); } + __forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); } + __forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); } + __forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); } + __forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); } + __forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); } + + __forceinline Vec3fx normalize_safe( const Vec3fx& a ) { + const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d); + } + + /*! differentiated normalization */ + __forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp) + { + const float pp = dot(p,p); + const float pdp = dot(p,dp); + return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp); + } + + //////////////////////////////////////////////////////////////////////////////// + /// Select + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) { + return Vec3fx(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z, s ? t.w : f.w); + } + + __forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) { + return Vec3fx(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z); + } + + __forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) { + return madd(1.0f-t,v0,t*v1); + } + + __forceinline int maxDim ( const Vec3fx& a ) + { + const Vec3fx b = abs(a); + if (b.x > b.y) { + if (b.x > b.z) return 0; else return 2; + } else { + if (b.y > b.z) return 1; else return 2; + } + } + + //////////////////////////////////////////////////////////////////////////////// + /// Rounding Functions + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z),sycl::trunc(a.w)); } + __forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z),sycl::floor(a.w)); } + __forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z),sycl::ceil (a.w)); } + + //////////////////////////////////////////////////////////////////////////////// + /// Output Operators + //////////////////////////////////////////////////////////////////////////////// + + inline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) { + return cout << "(" << a.x << ", " << a.y << ", " << a.z << "," << a.w << ")"; + } + + typedef Vec3fx Vec3ff; + + //__forceinline Vec2fa::Vec2fa(const Vec3fx& a) + // : x(a.x), y(a.y) {} + + //__forceinline Vec3ia::Vec3ia( const Vec3fx& a ) + // : x((int)a.x), y((int)a.y), z((int)a.z) {} +} diff --git a/thirdparty/embree/common/math/vec3ia.h b/thirdparty/embree/common/math/vec3ia.h index d4cc3125cd..1472fe9135 100644 --- a/thirdparty/embree/common/math/vec3ia.h +++ b/thirdparty/embree/common/math/vec3ia.h @@ -4,7 +4,12 @@ #pragma once #include "../sys/alloc.h" -#include "math.h" +#include "emath.h" + +#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) +# include "vec3ia_sycl.h" +#else + #include "../simd/sse.h" namespace embree @@ -194,3 +199,5 @@ namespace embree return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")"; } } + +#endif diff --git a/thirdparty/embree/common/math/vec3ia_sycl.h b/thirdparty/embree/common/math/vec3ia_sycl.h new file mode 100644 index 0000000000..5a3d396373 --- /dev/null +++ b/thirdparty/embree/common/math/vec3ia_sycl.h @@ -0,0 +1,178 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../sys/alloc.h" +#include "emath.h" +#include "../simd/sse.h" + +namespace embree +{ + //////////////////////////////////////////////////////////////////////////////// + /// SSE Vec3ia Type + //////////////////////////////////////////////////////////////////////////////// + + struct __aligned(16) Vec3ia + { + ALIGNED_STRUCT_(16); + + struct { int x,y,z; }; + + typedef int Scalar; + enum { N = 3 }; + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ia( ) {} + //__forceinline Vec3ia( const __m128i a ) : m128(a) {} + + __forceinline Vec3ia( const Vec3ia& other ) : x(other.x), y(other.y), z(other.z) {} + __forceinline Vec3ia& operator =(const Vec3ia& other) { x = other.x; y = other.y; z = other.z; return *this; } + + __forceinline explicit Vec3ia( const int a ) : x(a), y(a), z(a) {} + __forceinline Vec3ia( const int x, const int y, const int z) : x(x), y(y), z(z) {} + //__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {} + __forceinline explicit Vec3ia(const vint4& a) : x(a[0]), y(a[1]), z(a[2]) {} + + __forceinline explicit Vec3ia( const Vec3fa& a ); + + //__forceinline operator const __m128i&() const { return m128; } + //__forceinline operator __m128i&() { return m128; } + __forceinline operator vint4() const { return vint4(x,y,z,z); } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ia( ZeroTy ) : x(0), y(0), z(0) {} + __forceinline Vec3ia( OneTy ) : x(1), y(1), z(1) {} + __forceinline Vec3ia( PosInfTy ) : x(0x7FFFFFFF), y(0x7FFFFFFF), z(0x7FFFFFFF) {} + __forceinline Vec3ia( NegInfTy ) : x(0x80000000), y(0x80000000), z(0x80000000) {} + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } + __forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } + }; + + + //////////////////////////////////////////////////////////////////////////////// + /// Unary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ia operator +( const Vec3ia& a ) { return Vec3ia(+a.x,+a.y,+a.z); } + __forceinline Vec3ia operator -( const Vec3ia& a ) { return Vec3ia(-a.x,-a.y,-a.z); } + __forceinline Vec3ia abs ( const Vec3ia& a ) { return Vec3ia(sycl::abs(a.x),sycl::abs(a.y),sycl::abs(a.z)); } + + //////////////////////////////////////////////////////////////////////////////// + /// Binary Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x+b.x, a.y+b.y, a.z+b.z); } + __forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); } + __forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; } + + __forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x-b.x, a.y-b.y, a.z-b.z); } + __forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); } + __forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; } + + __forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x*b.x, a.y*b.y, a.z*b.z); } + __forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); } + __forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; } + + __forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x&b.x, a.y&b.y, a.z&b.z); } + __forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); } + __forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; } + + __forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x|b.x, a.y|b.y, a.z|b.z); } + __forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); } + __forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; } + + __forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x^b.x, a.y^b.y, a.z^b.z); } + __forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); } + __forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; } + + __forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return Vec3ia(a.x<<n, a.y<<n, a.z<<n); } + __forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return Vec3ia(a.x>>n, a.y>>n, a.z>>n); } + + __forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return Vec3ia(a.x<<b, a.y<<b, a.z<<b); } + __forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return Vec3ia(a.x>>b, a.y>>b, a.z>>b); } + __forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return Vec3ia(unsigned(a.x)>>b, unsigned(a.y)>>b, unsigned(a.z)>>b); } + + //////////////////////////////////////////////////////////////////////////////// + /// Assignment Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; } + __forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; } + + __forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; } + __forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; } + + __forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; } + __forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; } + + __forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; } + __forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; } + + __forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; } + __forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; } + + __forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; } + __forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; } + + //////////////////////////////////////////////////////////////////////////////// + /// Reductions + //////////////////////////////////////////////////////////////////////////////// + + __forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; } + __forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; } + __forceinline int reduce_min(const Vec3ia& v) { return sycl::min(sycl::min(v.x,v.y),v.z); } + __forceinline int reduce_max(const Vec3ia& v) { return sycl::max(sycl::max(v.x,v.y),v.z); } + + //////////////////////////////////////////////////////////////////////////////// + /// Comparison Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return a.x == b.x & a.y == b.y & a.z == b.z; } + __forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return a.x != b.x & a.y != b.y & a.z != b.z; } + +/* + __forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) { + if (a.x != b.x) return a.x < b.x; + if (a.y != b.y) return a.y < b.y; + if (a.z != b.z) return a.z < b.z; + return false; + } +*/ + __forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); } + __forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); } + __forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); } + + //////////////////////////////////////////////////////////////////////////////// + /// Select + //////////////////////////////////////////////////////////////////////////////// + + __forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) { + const int x = m.x ? t.x : f.x; + const int y = m.y ? t.y : f.y; + const int z = m.z ? t.z : f.z; + return Vec3ia(x,y,z); + } + + __forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::min(a.x,b.x), sycl::min(a.y,b.y), sycl::min(a.z,b.z)); } + __forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::max(a.x,b.x), sycl::max(a.y,b.y), sycl::max(a.z,b.z)); } + + //////////////////////////////////////////////////////////////////////////////// + /// Output Operators + //////////////////////////////////////////////////////////////////////////////// + + inline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) { + return cout; + } +} diff --git a/thirdparty/embree/common/math/vec4.h b/thirdparty/embree/common/math/vec4.h index 10c53f47b4..5647859257 100644 --- a/thirdparty/embree/common/math/vec4.h +++ b/thirdparty/embree/common/math/vec4.h @@ -3,7 +3,7 @@ #pragma once -#include "math.h" +#include "emath.h" #include "vec3.h" namespace embree @@ -221,6 +221,8 @@ namespace embree { template<> __forceinline Vec4<float>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; } +#if !defined(__SYCL_DEVICE_ONLY__) + #if defined(__AVX__) template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; @@ -240,4 +242,25 @@ namespace embree #if defined(__AVX512F__) template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fx& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {} #endif + +#else + +#if defined(__SSE__) + template<> __forceinline Vec4<vfloat4>::Vec4(const Vec3fx& a) { + x = a.x; y = a.y; z = a.z; w = a.w; + } +#endif +#if defined(__AVX__) + template<> __forceinline Vec4<vfloat8>::Vec4(const Vec3fx& a) { + x = a.x; y = a.y; z = a.z; w = a.w; + } +#endif +#if defined(__AVX512F__) + template<> __forceinline Vec4<vfloat16>::Vec4(const Vec3fx& a) { + x = a.x; y = a.y; z = a.z; w = a.w; + } +#endif + +#endif } + |