summaryrefslogtreecommitdiffstats
path: root/thirdparty/embree/common/sys
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/common/sys')
-rw-r--r--thirdparty/embree/common/sys/alloc.cpp212
-rw-r--r--thirdparty/embree/common/sys/alloc.h93
-rw-r--r--thirdparty/embree/common/sys/atomic.h4
-rw-r--r--thirdparty/embree/common/sys/barrier.h2
-rw-r--r--thirdparty/embree/common/sys/estring.cpp (renamed from thirdparty/embree/common/sys/string.cpp)2
-rw-r--r--thirdparty/embree/common/sys/estring.h (renamed from thirdparty/embree/common/sys/string.h)36
-rw-r--r--thirdparty/embree/common/sys/intrinsics.h113
-rw-r--r--thirdparty/embree/common/sys/mutex.h4
-rw-r--r--thirdparty/embree/common/sys/platform.h245
-rw-r--r--thirdparty/embree/common/sys/sycl.h307
-rw-r--r--thirdparty/embree/common/sys/sysinfo.cpp11
-rw-r--r--thirdparty/embree/common/sys/thread.cpp2
-rw-r--r--thirdparty/embree/common/sys/vector.h22
13 files changed, 956 insertions, 97 deletions
diff --git a/thirdparty/embree/common/sys/alloc.cpp b/thirdparty/embree/common/sys/alloc.cpp
index abdd269069..71616a3982 100644
--- a/thirdparty/embree/common/sys/alloc.cpp
+++ b/thirdparty/embree/common/sys/alloc.cpp
@@ -12,33 +12,177 @@
namespace embree
{
- void* alignedMalloc(size_t size, size_t align)
+ size_t total_allocations = 0;
+
+#if defined(EMBREE_SYCL_SUPPORT)
+
+ __thread sycl::context* tls_context_tutorial = nullptr;
+ __thread sycl::device* tls_device_tutorial = nullptr;
+
+ __thread sycl::context* tls_context_embree = nullptr;
+ __thread sycl::device* tls_device_embree = nullptr;
+
+ void enableUSMAllocEmbree(sycl::context* context, sycl::device* device)
+ {
+ // -- GODOT start --
+ // if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
+ // if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
+ if (tls_context_embree != nullptr) {
+ abort();
+ }
+ if (tls_device_embree != nullptr) {
+ abort();
+ }
+ // -- GODOT end --
+ tls_context_embree = context;
+ tls_device_embree = device;
+ }
+
+ void disableUSMAllocEmbree()
+ {
+ // -- GODOT start --
+ // if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
+ // if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
+ if (tls_context_embree == nullptr) {
+ abort();
+ }
+ if (tls_device_embree == nullptr) {
+ abort();
+ }
+ // -- GODOT end --
+ tls_context_embree = nullptr;
+ tls_device_embree = nullptr;
+ }
+
+ void enableUSMAllocTutorial(sycl::context* context, sycl::device* device)
+ {
+ //if (tls_context_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
+ //if (tls_device_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
+ tls_context_tutorial = context;
+ tls_device_tutorial = device;
+ }
+
+ void disableUSMAllocTutorial()
+ {
+ // -- GODOT start --
+ // if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
+ // if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
+ if (tls_context_tutorial == nullptr) {
+ abort();
+ }
+ if (tls_device_tutorial == nullptr) {
+ abort();
+ }
+ // -- GODOT end --
+
+ tls_context_tutorial = nullptr;
+ tls_device_tutorial = nullptr;
+ }
+
+#endif
+
+ void* alignedMalloc(size_t size, size_t align)
{
if (size == 0)
return nullptr;
-
+
assert((align & (align-1)) == 0);
void* ptr = _mm_malloc(size,align);
-
- if (size != 0 && ptr == nullptr)
- // -- GODOT start --
- // throw std::bad_alloc();
+ // -- GODOT start --
+ // if (size != 0 && ptr == nullptr)
+ // throw std::bad_alloc();
+ if (size != 0 && ptr == nullptr) {
abort();
- // -- GODOT end --
-
+ }
+ // -- GODOT end --
return ptr;
}
-
+
void alignedFree(void* ptr)
{
if (ptr)
_mm_free(ptr);
}
+#if defined(EMBREE_SYCL_SUPPORT)
+
+ void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode)
+ {
+ assert(context);
+ assert(device);
+
+ if (size == 0)
+ return nullptr;
+
+ assert((align & (align-1)) == 0);
+ total_allocations++;
+
+ void* ptr = nullptr;
+ if (mode == EMBREE_USM_SHARED_DEVICE_READ_ONLY)
+ ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
+ else
+ ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
+
+ // -- GODOT start --
+ // if (size != 0 && ptr == nullptr)
+ // throw std::bad_alloc();
+ if (size != 0 && ptr == nullptr) {
+ abort();
+ }
+ // -- GODOT end --
+
+ return ptr;
+ }
+
+ static MutexSys g_alloc_mutex;
+
+ void* alignedSYCLMalloc(size_t size, size_t align, EmbreeUSMMode mode)
+ {
+ if (tls_context_tutorial) return alignedSYCLMalloc(tls_context_tutorial, tls_device_tutorial, size, align, mode);
+ if (tls_context_embree ) return alignedSYCLMalloc(tls_context_embree, tls_device_embree, size, align, mode);
+ return nullptr;
+ }
+
+ void alignedSYCLFree(sycl::context* context, void* ptr)
+ {
+ assert(context);
+ if (ptr) {
+ sycl::free(ptr,*context);
+ }
+ }
+
+ void alignedSYCLFree(void* ptr)
+ {
+ if (tls_context_tutorial) return alignedSYCLFree(tls_context_tutorial, ptr);
+ if (tls_context_embree ) return alignedSYCLFree(tls_context_embree, ptr);
+ }
+
+#endif
+
+ void* alignedUSMMalloc(size_t size, size_t align, EmbreeUSMMode mode)
+ {
+#if defined(EMBREE_SYCL_SUPPORT)
+ if (tls_context_embree || tls_context_tutorial)
+ return alignedSYCLMalloc(size,align,mode);
+ else
+#endif
+ return alignedMalloc(size,align);
+ }
+
+ void alignedUSMFree(void* ptr)
+ {
+#if defined(EMBREE_SYCL_SUPPORT)
+ if (tls_context_embree || tls_context_tutorial)
+ return alignedSYCLFree(ptr);
+ else
+#endif
+ return alignedFree(ptr);
+ }
+
static bool huge_pages_enabled = false;
static MutexSys os_init_mutex;
- __forceinline bool isHugePageCandidate(const size_t bytes)
+ __forceinline bool isHugePageCandidate(const size_t bytes)
{
if (!huge_pages_enabled)
return false;
@@ -133,7 +277,9 @@ namespace embree
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
// -- GODOT start --
// if (ptr == nullptr) throw std::bad_alloc();
- if (ptr == nullptr) abort();
+ if (ptr == nullptr) {
+ abort();
+ }
// -- GODOT end --
hugepages = false;
return ptr;
@@ -150,11 +296,13 @@ namespace embree
if (bytesNew >= bytesOld)
return bytesOld;
- if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
- // -- GODOT start --
- // throw std::bad_alloc();
+ // -- GODOT start --
+ // if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
+ // throw std::bad_alloc();
+ if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) {
abort();
- // -- GODOT end --
+ }
+ // -- GODOT end --
return bytesNew;
}
@@ -164,11 +312,13 @@ namespace embree
if (bytes == 0)
return;
- if (!VirtualFree(ptr,0,MEM_RELEASE))
- // -- GODOT start --
- // throw std::bad_alloc();
+ // -- GODOT start --
+ // if (!VirtualFree(ptr,0,MEM_RELEASE))
+ // throw std::bad_alloc();
+ if (!VirtualFree(ptr,0,MEM_RELEASE)) {
abort();
- // -- GODOT end --
+ }
+ // -- GODOT end --
}
void os_advise(void *ptr, size_t bytes)
@@ -274,7 +424,9 @@ namespace embree
void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
// -- GODOT start --
// if (ptr == MAP_FAILED) throw std::bad_alloc();
- if (ptr == MAP_FAILED) abort();
+ if (ptr == MAP_FAILED) {
+ abort();
+ }
// -- GODOT end --
hugepages = false;
@@ -291,11 +443,13 @@ namespace embree
if (bytesNew >= bytesOld)
return bytesOld;
- if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
- // -- GODOT start --
- // throw std::bad_alloc();
+ // -- GODOT start --
+ // if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
+ // throw std::bad_alloc();
+ if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) {
abort();
- // -- GODOT end --
+ }
+ // -- GODOT end --
return bytesNew;
}
@@ -308,11 +462,13 @@ namespace embree
/* for hugepages we need to also align the size */
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
bytes = (bytes+pageSize-1) & ~(pageSize-1);
- if (munmap(ptr,bytes) == -1)
- // -- GODOT start --
- // throw std::bad_alloc();
+ // -- GODOT start --
+ // if (munmap(ptr,bytes) == -1)
+ // throw std::bad_alloc();
+ if (munmap(ptr,bytes) == -1) {
abort();
- // -- GODOT end --
+ }
+ // -- GODOT end --
}
/* hint for transparent huge pages (THP) */
diff --git a/thirdparty/embree/common/sys/alloc.h b/thirdparty/embree/common/sys/alloc.h
index 4fa474ec1d..28b17f988d 100644
--- a/thirdparty/embree/common/sys/alloc.h
+++ b/thirdparty/embree/common/sys/alloc.h
@@ -9,20 +9,72 @@
namespace embree
{
-#define ALIGNED_STRUCT_(align) \
- void* operator new(size_t size) { return alignedMalloc(size,align); } \
- void operator delete(void* ptr) { alignedFree(ptr); } \
+#if defined(EMBREE_SYCL_SUPPORT)
+
+ /* enables SYCL USM allocation */
+ void enableUSMAllocEmbree(sycl::context* context, sycl::device* device);
+ void enableUSMAllocTutorial(sycl::context* context, sycl::device* device);
+
+ /* disables SYCL USM allocation */
+ void disableUSMAllocEmbree();
+ void disableUSMAllocTutorial();
+
+#endif
+
+#define ALIGNED_STRUCT_(align) \
+ void* operator new(size_t size) { return alignedMalloc(size,align); } \
+ void operator delete(void* ptr) { alignedFree(ptr); } \
void* operator new[](size_t size) { return alignedMalloc(size,align); } \
void operator delete[](void* ptr) { alignedFree(ptr); }
+
+#define ALIGNED_STRUCT_USM_(align) \
+ void* operator new(size_t size) { return alignedUSMMalloc(size,align); } \
+ void operator delete(void* ptr) { alignedUSMFree(ptr); } \
+ void* operator new[](size_t size) { return alignedUSMMalloc(size,align); } \
+ void operator delete[](void* ptr) { alignedUSMFree(ptr); }
+
+#define ALIGNED_CLASS_(align) \
+ public: \
+ ALIGNED_STRUCT_(align) \
+ private:
-#define ALIGNED_CLASS_(align) \
+#define ALIGNED_CLASS_USM_(align) \
public: \
- ALIGNED_STRUCT_(align) \
+ ALIGNED_STRUCT_USM_(align) \
private:
+
+ enum EmbreeUSMMode {
+ EMBREE_USM_SHARED = 0,
+ EMBREE_USM_SHARED_DEVICE_READ_WRITE = 0,
+ EMBREE_USM_SHARED_DEVICE_READ_ONLY = 1
+ };
/*! aligned allocation */
void* alignedMalloc(size_t size, size_t align);
void alignedFree(void* ptr);
+
+ /*! aligned allocation using SYCL USM */
+ void* alignedUSMMalloc(size_t size, size_t align = 16, EmbreeUSMMode mode = EMBREE_USM_SHARED_DEVICE_READ_ONLY);
+ void alignedUSMFree(void* ptr);
+
+#if defined(EMBREE_SYCL_SUPPORT)
+
+ /*! aligned allocation using SYCL USM */
+ void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode);
+ void alignedSYCLFree(sycl::context* context, void* ptr);
+
+ // deleter functor to use as deleter in std unique or shared pointers that
+ // capture raw pointers created by sycl::malloc and it's variants
+ template<typename T>
+ struct sycl_deleter
+ {
+ void operator()(T const* ptr)
+ {
+ alignedUSMFree((void*)ptr);
+ }
+ };
+
+#endif
/*! allocator that performs aligned allocations */
template<typename T, size_t alignment>
@@ -95,6 +147,37 @@ namespace embree
bool hugepages;
};
+ /*! allocator that newer performs allocations */
+ template<typename T>
+ struct no_allocator
+ {
+ typedef T value_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+
+ __forceinline pointer allocate( size_type n ) {
+ // -- GODOT start --
+ // throw std::runtime_error("no allocation supported");
+ abort();
+ // -- GODOT end --
+ }
+
+ __forceinline void deallocate( pointer p, size_type n ) {
+ }
+
+ __forceinline void construct( pointer p, const_reference val ) {
+ new (p) T(val);
+ }
+
+ __forceinline void destroy( pointer p ) {
+ p->~T();
+ }
+ };
+
/*! allocator for IDs */
template<typename T, size_t max_id>
struct IDPool
diff --git a/thirdparty/embree/common/sys/atomic.h b/thirdparty/embree/common/sys/atomic.h
index 67af254f36..cf9909aad9 100644
--- a/thirdparty/embree/common/sys/atomic.h
+++ b/thirdparty/embree/common/sys/atomic.h
@@ -36,7 +36,7 @@ namespace embree
};
template<typename T>
- __forceinline void atomic_min(std::atomic<T>& aref, const T& bref)
+ __forceinline void _atomic_min(std::atomic<T>& aref, const T& bref)
{
const T b = bref.load();
while (true) {
@@ -47,7 +47,7 @@ namespace embree
}
template<typename T>
- __forceinline void atomic_max(std::atomic<T>& aref, const T& bref)
+ __forceinline void _atomic_max(std::atomic<T>& aref, const T& bref)
{
const T b = bref.load();
while (true) {
diff --git a/thirdparty/embree/common/sys/barrier.h b/thirdparty/embree/common/sys/barrier.h
index c56513a2ed..e1580f41a9 100644
--- a/thirdparty/embree/common/sys/barrier.h
+++ b/thirdparty/embree/common/sys/barrier.h
@@ -34,7 +34,7 @@ namespace embree
void* opaque;
};
- /*! fast active barrier using atomitc counter */
+ /*! fast active barrier using atomic counter */
struct BarrierActive
{
public:
diff --git a/thirdparty/embree/common/sys/string.cpp b/thirdparty/embree/common/sys/estring.cpp
index f42fdc8536..c66c5c5b84 100644
--- a/thirdparty/embree/common/sys/string.cpp
+++ b/thirdparty/embree/common/sys/estring.cpp
@@ -1,7 +1,7 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
-#include "string.h"
+#include "estring.h"
#include <algorithm>
#include <ctype.h>
diff --git a/thirdparty/embree/common/sys/string.h b/thirdparty/embree/common/sys/estring.h
index 820076b21c..63051ad3c3 100644
--- a/thirdparty/embree/common/sys/string.h
+++ b/thirdparty/embree/common/sys/estring.h
@@ -28,6 +28,42 @@ namespace embree
std::streamsize precision;
};
+ struct IndentOStream : public std::streambuf
+ {
+ explicit IndentOStream(std::ostream &ostream, int indent = 2)
+ : streambuf(ostream.rdbuf())
+ , start_of_line(true)
+ , ident_str(indent, ' ')
+ , stream(&ostream)
+ {
+ // set streambuf of ostream to this and save original streambuf
+ stream->rdbuf(this);
+ }
+
+ virtual ~IndentOStream()
+ {
+ if (stream != NULL) {
+ // restore old streambuf
+ stream->rdbuf(streambuf);
+ }
+ }
+
+ protected:
+ virtual int overflow(int ch) {
+ if (start_of_line && ch != '\n') {
+ streambuf->sputn(ident_str.data(), ident_str.size());
+ }
+ start_of_line = ch == '\n';
+ return streambuf->sputc(ch);
+ }
+
+ private:
+ std::streambuf *streambuf;
+ bool start_of_line;
+ std::string ident_str;
+ std::ostream *stream;
+ };
+
std::string toLowerCase(const std::string& s);
std::string toUpperCase(const std::string& s);
diff --git a/thirdparty/embree/common/sys/intrinsics.h b/thirdparty/embree/common/sys/intrinsics.h
index 2c2f6eccda..f5074bb29d 100644
--- a/thirdparty/embree/common/sys/intrinsics.h
+++ b/thirdparty/embree/common/sys/intrinsics.h
@@ -64,7 +64,7 @@ namespace embree
/// Windows Platform
////////////////////////////////////////////////////////////////////////////////
-#if defined(__WIN32__)
+#if defined(__WIN32__) && !defined(__INTEL_LLVM_COMPILER)
__forceinline size_t read_tsc()
{
@@ -89,7 +89,7 @@ namespace embree
#endif
}
-#if defined(__X86_64__)
+#if defined(__X86_64__) || defined (__aarch64__)
__forceinline size_t bsf(size_t v) {
#if defined(__AVX2__)
return _tzcnt_u64(v);
@@ -113,7 +113,7 @@ namespace embree
return i;
}
-#if defined(__X86_64__)
+#if defined(__X86_64__) || defined (__aarch64__)
__forceinline size_t bscf(size_t& v)
{
size_t i = bsf(v);
@@ -138,7 +138,7 @@ namespace embree
#endif
}
-#if defined(__X86_64__)
+#if defined(__X86_64__) || defined (__aarch64__)
__forceinline size_t bsr(size_t v) {
#if defined(__AVX2__)
return 63 -_lzcnt_u64(v);
@@ -196,49 +196,6 @@ namespace embree
#else
-#if defined(__i386__) && defined(__PIC__)
-
- __forceinline void __cpuid(int out[4], int op)
- {
- asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
- "cpuid\n\t"
- "xchg{l}\t{%%}ebx, %1\n\t"
- : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
- : "0"(op));
- }
-
- __forceinline void __cpuid_count(int out[4], int op1, int op2)
- {
- asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
- "cpuid\n\t"
- "xchg{l}\t{%%}ebx, %1\n\t"
- : "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3])
- : "0" (op1), "2" (op2));
- }
-
-#elif defined(__X86_ASM__)
-
- __forceinline void __cpuid(int out[4], int op) {
-#if defined(__ARM_NEON)
- if (op == 0) { // Get CPU name
- out[0] = 0x41524d20;
- out[1] = 0x41524d20;
- out[2] = 0x41524d20;
- out[3] = 0x41524d20;
- }
-#else
- asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
-#endif
- }
-
-#if !defined(__ARM_NEON)
- __forceinline void __cpuid_count(int out[4], int op1, int op2) {
- asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2));
- }
-#endif
-
-#endif
-
__forceinline uint64_t read_tsc() {
#if defined(__X86_ASM__)
uint32_t high,low;
@@ -263,6 +220,13 @@ namespace embree
#endif
#endif
}
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+ __forceinline unsigned int bsf(unsigned v) {
+ return sycl::ctz(v);
+ }
+
+#else
#if defined(__64BIT__)
__forceinline unsigned bsf(unsigned v)
@@ -280,6 +244,13 @@ namespace embree
#endif
}
#endif
+#endif
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+ __forceinline size_t bsf(size_t v) {
+ return sycl::ctz(v);
+ }
+#else
__forceinline size_t bsf(size_t v) {
#if defined(__AVX2__) && !defined(__aarch64__)
@@ -294,6 +265,7 @@ namespace embree
return __builtin_ctzl(v);
#endif
}
+#endif
__forceinline int bscf(int& v)
{
@@ -434,6 +406,41 @@ namespace embree
#endif
+#if !defined(__WIN32__)
+
+#if defined(__i386__) && defined(__PIC__)
+
+ __forceinline void __cpuid(int out[4], int op)
+ {
+ asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
+ "cpuid\n\t"
+ "xchg{l}\t{%%}ebx, %1\n\t"
+ : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
+ : "0"(op));
+ }
+
+ __forceinline void __cpuid_count(int out[4], int op1, int op2)
+ {
+ asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
+ "cpuid\n\t"
+ "xchg{l}\t{%%}ebx, %1\n\t"
+ : "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3])
+ : "0" (op1), "2" (op2));
+ }
+
+#elif defined(__X86_ASM__)
+
+ __forceinline void __cpuid(int out[4], int op) {
+ asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
+ }
+
+ __forceinline void __cpuid_count(int out[4], int op1, int op2) {
+ asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2));
+ }
+
+#endif
+#endif
+
////////////////////////////////////////////////////////////////////////////////
/// All Platforms
////////////////////////////////////////////////////////////////////////////////
@@ -459,8 +466,16 @@ namespace embree
#endif
#endif
-#if defined(__SSE4_2__) || defined(__ARM_NEON)
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+ __forceinline unsigned int popcnt(unsigned int in) {
+ return sycl::popcount(in);
+ }
+
+#else
+
+#if defined(__SSE4_2__) || defined(__ARM_NEON)
+
__forceinline int popcnt(int in) {
return _mm_popcnt_u32(in);
}
@@ -476,6 +491,8 @@ namespace embree
#endif
#endif
+
+#endif
#if defined(__X86_ASM__)
__forceinline uint64_t rdtsc()
diff --git a/thirdparty/embree/common/sys/mutex.h b/thirdparty/embree/common/sys/mutex.h
index 26af6c582c..0f7345cf45 100644
--- a/thirdparty/embree/common/sys/mutex.h
+++ b/thirdparty/embree/common/sys/mutex.h
@@ -86,8 +86,8 @@ namespace embree
class PaddedSpinLock : public SpinLock
{
- private:
- char padding[CPU_CACHELINE_SIZE - sizeof(SpinLock)];
+ private:
+ MAYBE_UNUSED char padding[CPU_CACHELINE_SIZE - sizeof(SpinLock)];
};
/*! safe mutex lock and unlock helper */
template<typename Mutex> class Lock {
diff --git a/thirdparty/embree/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h
index 728bf6ed7d..d4a9b9e119 100644
--- a/thirdparty/embree/common/sys/platform.h
+++ b/thirdparty/embree/common/sys/platform.h
@@ -3,7 +3,9 @@
#pragma once
+#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
+#endif
#include <cstddef>
#include <cassert>
@@ -18,6 +20,30 @@
#include <cstring>
#include <stdint.h>
#include <functional>
+#include <mutex>
+
+#if defined(EMBREE_SYCL_SUPPORT)
+
+#define __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#pragma clang diagnostic ignored "-W#pragma-messages"
+
+#include <sycl/sycl.hpp>
+
+#pragma clang diagnostic pop
+
+#include "sycl.h"
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+#define CONSTANT __attribute__((opencl_constant))
+#else
+#define CONSTANT
+#endif
+
+#endif
+
////////////////////////////////////////////////////////////////////////////////
/// detect platform
@@ -115,7 +141,7 @@
#else
#define __restrict__ //__restrict // causes issues with MSVC
#endif
-#if !defined(__thread)
+#if !defined(__thread) && !defined(__INTEL_LLVM_COMPILER)
#define __thread __declspec(thread)
#endif
#if !defined(__aligned)
@@ -148,6 +174,10 @@
#define MAYBE_UNUSED
#endif
+#if !defined(_unused)
+#define _unused(x) ((void)(x))
+#endif
+
#if defined(_MSC_VER) && (_MSC_VER < 1900) // before VS2015 deleted functions are not supported properly
#define DELETED
#else
@@ -155,7 +185,7 @@
#endif
#if !defined(likely)
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) || defined(__SYCL_DEVICE_ONLY__)
#define likely(expr) (expr)
#define unlikely(expr) (expr)
#else
@@ -171,22 +201,27 @@
/* debug printing macros */
#define STRING(x) #x
#define TOSTRING(x) STRING(x)
-#define PING embree_cout << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl
+#define PING embree_cout_uniform << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl
#define PRINT(x) embree_cout << STRING(x) << " = " << (x) << embree_endl
#define PRINT2(x,y) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
#define PRINT3(x,y,z) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
#define PRINT4(x,y,z,w) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
+#define UPRINT(x) embree_cout_uniform << STRING(x) << " = " << (x) << embree_endl
+#define UPRINT2(x,y) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
+#define UPRINT3(x,y,z) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
+#define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
+
#if defined(DEBUG) // only report file and line in debug mode
// -- GODOT start --
- // #define THROW_RUNTIME_ERROR(str)
+ // #define THROW_RUNTIME_ERROR(str) \
// throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#define THROW_RUNTIME_ERROR(str) \
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
// -- GODOT end --
#else
// -- GODOT start --
- // #define THROW_RUNTIME_ERROR(str)
+ // #define THROW_RUNTIME_ERROR(str) \
// throw std::runtime_error(str);
#define THROW_RUNTIME_ERROR(str) \
abort();
@@ -323,13 +358,209 @@ __forceinline std::string toString(long long value) {
#define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 4996)) // warning: function was declared deprecated
#define ENABLE_DEPRECATED_WARNING __pragma(warning (enable : 4996)) // warning: function was declared deprecated
#endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// SYCL specific
+////////////////////////////////////////////////////////////////////////////////
+
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+
+#define sycl_printf0(format, ...) { \
+ static const CONSTANT char fmt[] = format; \
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
+ sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
+ }
+
+#define sycl_printf0_(format) { \
+ static const CONSTANT char fmt[] = format; \
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
+ sycl::ext::oneapi::experimental::printf(fmt); \
+ }
+
+#else
+
+#define sycl_printf0(format, ...) { \
+ static const CONSTANT char fmt[] = format; \
+ sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
+ }
+
+#define sycl_printf0_(format) { \
+ static const CONSTANT char fmt[] = format; \
+ sycl::ext::oneapi::experimental::printf(fmt); \
+ }
+
+#endif
+
+#define sycl_printf(format, ...) { \
+ static const CONSTANT char fmt[] = format; \
+ sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
+ }
+
+#define sycl_printf_(format) { \
+ static const CONSTANT char fmt[] = format; \
+ sycl::ext::oneapi::experimental::printf(fmt); \
+ }
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
+
+namespace embree
+{
+ struct sycl_ostream_ {
+ sycl_ostream_ (bool uniform) : uniform(uniform) {}
+ bool uniform = false;
+ };
+ struct sycl_endl_ {};
+
+#define embree_ostream embree::sycl_ostream_
+#define embree_cout embree::sycl_ostream_(false)
+#define embree_cout_uniform embree::sycl_ostream_(true)
+#define embree_endl embree::sycl_endl_()
+
+ inline sycl_ostream_ operator <<(sycl_ostream_ cout, int i)
+ {
+ if (cout.uniform) {
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
+ sycl_printf("%i",i);
+ }
+ else
+ sycl_printf("%i ",i);
+
+ return cout;
+ }
+
+ inline sycl_ostream_ operator <<(sycl_ostream_ cout, unsigned int i)
+ {
+ if (cout.uniform) {
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
+ sycl_printf("%u",i);
+ } else
+ sycl_printf("%u ",i);
+
+ return cout;
+ }
+
+ inline sycl_ostream_ operator <<(sycl_ostream_ cout, float f)
+ {
+ if (cout.uniform) {
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
+ sycl_printf("%f",f);
+ } else
+ sycl_printf("%f ",f);
+
+ return cout;
+ }
+
+ inline sycl_ostream_ operator <<(sycl_ostream_ cout, double d)
+ {
+ if (cout.uniform) {
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
+ sycl_printf("%f",d);
+ } else
+ sycl_printf("%f ",d);
+
+ return cout;
+ }
+
+ inline sycl_ostream_ operator <<(sycl_ostream_ cout, uint64_t l)
+ {
+ if (cout.uniform) {
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
+ sycl_printf("%lu",l);
+ } else
+ sycl_printf("%lu ",l);
+
+ return cout;
+ }
+
+ inline sycl_ostream_ operator <<(sycl_ostream_ cout, long l)
+ {
+ if (cout.uniform) {
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
+ sycl_printf("%l",l);
+ } else
+ sycl_printf("%l ",l);
+
+ return cout;
+ }
+
+
+ inline sycl_ostream_ operator <<(sycl_ostream_ cout, void* p)
+ {
+ if (cout.uniform) {
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
+ sycl_printf("%p",p);
+ } else
+ sycl_printf("%p ",p);
+
+ return cout;
+ }
+
+ inline sycl_ostream_ operator <<(sycl_ostream_ cout, const char* c)
+ {
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
+ sycl_printf("%s",c);
+ return cout;
+ }
+
+ inline sycl_ostream_ operator <<(sycl_ostream_ cout, sycl_endl_)
+ {
+ if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
+ sycl_printf_("\n");
+ return cout;
+ }
+}
+
+#else
-/* embree output stream */
#define embree_ostream std::ostream&
#define embree_cout std::cout
#define embree_cout_uniform std::cout
#define embree_endl std::endl
-
+
+#endif
+
+#if defined(EMBREE_SYCL_SUPPORT)
+
+ /* printing out sycle vector types */
+ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::float4& v) {
+ return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
+ }
+ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::float3& v) {
+ return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
+ }
+ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::float2& v) {
+ return out << "(" << v.x() << "," << v.y() << ")";
+ }
+ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::int4& v) {
+ return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
+ }
+ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::int3& v) {
+ return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
+ }
+ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::int2& v) {
+ return out << "(" << v.x() << "," << v.y() << ")";
+ }
+ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint4& v) {
+ return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
+ }
+ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint3& v) {
+ return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
+ }
+ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint2& v) {
+ return out << "(" << v.x() << "," << v.y() << ")";
+ }
+
+#endif
+
+inline void tab(std::ostream& cout, int n) {
+ for (int i=0; i<n; i++) cout << " ";
+}
+
+inline std::string tab(int depth) {
+ return std::string(2*depth,' ');
+}
+
////////////////////////////////////////////////////////////////////////////////
/// Some macros for static profiling
////////////////////////////////////////////////////////////////////////////////
diff --git a/thirdparty/embree/common/sys/sycl.h b/thirdparty/embree/common/sys/sycl.h
new file mode 100644
index 0000000000..2558eb052f
--- /dev/null
+++ b/thirdparty/embree/common/sys/sycl.h
@@ -0,0 +1,307 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "platform.h"
+
+using sycl::float16;
+using sycl::float8;
+using sycl::float4;
+using sycl::float3;
+using sycl::float2;
+using sycl::int16;
+using sycl::int8;
+using sycl::int4;
+using sycl::int3;
+using sycl::int2;
+using sycl::uint16;
+using sycl::uint8;
+using sycl::uint4;
+using sycl::uint3;
+using sycl::uint2;
+using sycl::uchar16;
+using sycl::uchar8;
+using sycl::uchar4;
+using sycl::uchar3;
+using sycl::uchar2;
+using sycl::ushort16;
+using sycl::ushort8;
+using sycl::ushort4;
+using sycl::ushort3;
+using sycl::ushort2;
+
+#ifdef __SYCL_DEVICE_ONLY__
+#define GLOBAL __attribute__((opencl_global))
+#define LOCAL __attribute__((opencl_local))
+
+SYCL_EXTERNAL extern int work_group_reduce_add(int x);
+SYCL_EXTERNAL extern float work_group_reduce_min(float x);
+SYCL_EXTERNAL extern float work_group_reduce_max(float x);
+
+SYCL_EXTERNAL extern float atomic_min(volatile GLOBAL float *p, float val);
+SYCL_EXTERNAL extern float atomic_min(volatile LOCAL float *p, float val);
+SYCL_EXTERNAL extern float atomic_max(volatile GLOBAL float *p, float val);
+SYCL_EXTERNAL extern float atomic_max(volatile LOCAL float *p, float val);
+
+SYCL_EXTERNAL extern "C" unsigned int intel_sub_group_ballot(bool valid);
+
+SYCL_EXTERNAL extern "C" void __builtin_IB_assume_uniform(void *p);
+
+// Load message caching control
+
+ enum LSC_LDCC {
+ LSC_LDCC_DEFAULT,
+ LSC_LDCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
+ LSC_LDCC_L1UC_L3C, // Override to L1 uncached and L3 cached
+ LSC_LDCC_L1C_L3UC, // Override to L1 cached and L3 uncached
+ LSC_LDCC_L1C_L3C, // Override to L1 cached and L3 cached
+ LSC_LDCC_L1S_L3UC, // Override to L1 streaming load and L3 uncached
+ LSC_LDCC_L1S_L3C, // Override to L1 streaming load and L3 cached
+ LSC_LDCC_L1IAR_L3C, // Override to L1 invalidate-after-read, and L3 cached
+ };
+
+
+
+// Store message caching control (also used for atomics)
+
+ enum LSC_STCC {
+ LSC_STCC_DEFAULT,
+ LSC_STCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
+ LSC_STCC_L1UC_L3WB, // Override to L1 uncached and L3 written back
+ LSC_STCC_L1WT_L3UC, // Override to L1 written through and L3 uncached
+ LSC_STCC_L1WT_L3WB, // Override to L1 written through and L3 written back
+ LSC_STCC_L1S_L3UC, // Override to L1 streaming and L3 uncached
+ LSC_STCC_L1S_L3WB, // Override to L1 streaming and L3 written back
+ LSC_STCC_L1WB_L3WB, // Override to L1 written through and L3 written back
+ };
+
+
+
+///////////////////////////////////////////////////////////////////////
+
+// LSC Loads
+
+///////////////////////////////////////////////////////////////////////
+
+SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uchar_to_uint (const GLOBAL uint8_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D8U32
+SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_ushort_to_uint(const GLOBAL uint16_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D16U32
+SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uint (const GLOBAL uint32_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V1
+SYCL_EXTERNAL /* extern "C" */ sycl::uint2 __builtin_IB_lsc_load_global_uint2 (const GLOBAL sycl::uint2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V2
+SYCL_EXTERNAL /* extern "C" */ sycl::uint3 __builtin_IB_lsc_load_global_uint3 (const GLOBAL sycl::uint3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V3
+SYCL_EXTERNAL /* extern "C" */ sycl::uint4 __builtin_IB_lsc_load_global_uint4 (const GLOBAL sycl::uint4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V4
+SYCL_EXTERNAL /* extern "C" */ sycl::uint8 __builtin_IB_lsc_load_global_uint8 (const GLOBAL sycl::uint8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V8
+SYCL_EXTERNAL /* extern "C" */ uint64_t __builtin_IB_lsc_load_global_ulong (const GLOBAL uint64_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V1
+SYCL_EXTERNAL /* extern "C" */ sycl::ulong2 __builtin_IB_lsc_load_global_ulong2 (const GLOBAL sycl::ulong2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V2
+SYCL_EXTERNAL /* extern "C" */ sycl::ulong3 __builtin_IB_lsc_load_global_ulong3 (const GLOBAL sycl::ulong3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V3
+SYCL_EXTERNAL /* extern "C" */ sycl::ulong4 __builtin_IB_lsc_load_global_ulong4 (const GLOBAL sycl::ulong4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V4
+SYCL_EXTERNAL /* extern "C" */ sycl::ulong8 __builtin_IB_lsc_load_global_ulong8 (const GLOBAL sycl::ulong8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V8
+
+// global address space
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uchar_from_uint (GLOBAL uint8_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D8U32
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ushort_from_uint(GLOBAL uint16_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D16U32
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint (GLOBAL uint32_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D32V1
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint2 (GLOBAL sycl::uint2 *base, int immElemOff, sycl::uint2 val, enum LSC_STCC cacheOpt); //D32V2
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint3 (GLOBAL sycl::uint3 *base, int immElemOff, sycl::uint3 val, enum LSC_STCC cacheOpt); //D32V3
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint4 (GLOBAL sycl::uint4 *base, int immElemOff, sycl::uint4 val, enum LSC_STCC cacheOpt); //D32V4
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint8 (GLOBAL sycl::uint8 *base, int immElemOff, sycl::uint8 val, enum LSC_STCC cacheOpt); //D32V8
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong (GLOBAL uint64_t *base, int immElemOff, uint64_t val, enum LSC_STCC cacheOpt); //D64V1
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong2 (GLOBAL sycl::ulong2 *base, int immElemOff, sycl::ulong2 val, enum LSC_STCC cacheOpt); //D64V2
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong3 (GLOBAL sycl::ulong3 *base, int immElemOff, sycl::ulong3 val, enum LSC_STCC cacheOpt); //D64V3
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong4 (GLOBAL sycl::ulong4 *base, int immElemOff, sycl::ulong4 val, enum LSC_STCC cacheOpt); //D64V4
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong8 (GLOBAL sycl::ulong8 *base, int immElemOff, sycl::ulong8 val, enum LSC_STCC cacheOpt); //D64V8
+
+///////////////////////////////////////////////////////////////////////
+// prefetching
+///////////////////////////////////////////////////////////////////////
+//
+// LSC Pre-Fetch Load functions with CacheControls
+// global address space
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uchar (const GLOBAL uint8_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D8U32
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ushort(const GLOBAL uint16_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D16U32
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint (const GLOBAL uint32_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V1
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint2 (const GLOBAL sycl::uint2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V2
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint3 (const GLOBAL sycl::uint3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V3
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint4 (const GLOBAL sycl::uint4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V4
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint8 (const GLOBAL sycl::uint8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V8
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong (const GLOBAL uint64_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V1
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong2(const GLOBAL sycl::ulong2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V2
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong3(const GLOBAL sycl::ulong3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V3
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong4(const GLOBAL sycl::ulong4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V4
+SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong8(const GLOBAL sycl::ulong8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V8
+
+#else
+
+#define GLOBAL
+#define LOCAL
+
+/* dummy functions for host */
+inline int work_group_reduce_add(int x) { return x; }
+inline float work_group_reduce_min(float x) { return x; }
+inline float work_group_reduce_max(float x) { return x; }
+
+inline float atomic_min(volatile float *p, float val) { return val; };
+inline float atomic_max(volatile float *p, float val) { return val; };
+
+inline uint32_t intel_sub_group_ballot(bool valid) { return 0; }
+
+#endif
+
+/* creates a temporary that is enforced to be uniform */
+#define SYCL_UNIFORM_VAR(Ty,tmp,k) \
+ Ty tmp##_data; \
+ Ty* p##tmp##_data = (Ty*) sub_group_broadcast((uint64_t)&tmp##_data,k); \
+ Ty& tmp = *p##tmp##_data;
+
+#if !defined(__forceinline)
+#define __forceinline inline __attribute__((always_inline))
+#endif
+
+#if __SYCL_COMPILER_VERSION < 20210801
+#define all_of_group all_of
+#define any_of_group any_of
+#define none_of_group none_of
+#define group_broadcast broadcast
+#define reduce_over_group reduce
+#define exclusive_scan_over_group exclusive_scan
+#define inclusive_scan_over_group inclusive_scan
+#endif
+
+namespace embree
+{
+ template<typename T>
+ __forceinline T cselect(const bool mask, const T &a, const T &b)
+ {
+ return sycl::select(b,a,(int)mask);
+ }
+
+ template<typename T, typename M>
+ __forceinline T cselect(const M &mask, const T &a, const T &b)
+ {
+ return sycl::select(b,a,mask);
+ }
+
+ __forceinline const sycl::sub_group this_sub_group() {
+ return sycl::ext::oneapi::experimental::this_sub_group();
+ }
+
+ __forceinline const uint32_t get_sub_group_local_id() {
+ return this_sub_group().get_local_id()[0];
+ }
+
+ __forceinline const uint32_t get_sub_group_size() {
+ return this_sub_group().get_max_local_range().size();
+ }
+
+ __forceinline const uint32_t get_sub_group_id() {
+ return this_sub_group().get_group_id()[0];
+ }
+
+ __forceinline const uint32_t get_num_sub_groups() {
+ return this_sub_group().get_group_range().size();
+ }
+
+ __forceinline uint32_t sub_group_ballot(bool pred) {
+ return intel_sub_group_ballot(pred);
+ }
+
+ __forceinline bool sub_group_all_of(bool pred) {
+ return sycl::all_of_group(this_sub_group(),pred);
+ }
+
+ __forceinline bool sub_group_any_of(bool pred) {
+ return sycl::any_of_group(this_sub_group(),pred);
+ }
+
+ __forceinline bool sub_group_none_of(bool pred) {
+ return sycl::none_of_group(this_sub_group(),pred);
+ }
+
+ template <typename T> __forceinline T sub_group_broadcast(T x, sycl::id<1> local_id) {
+ return sycl::group_broadcast<sycl::sub_group>(this_sub_group(),x,local_id);
+ }
+
+ template <typename T> __forceinline T sub_group_make_uniform(T x) {
+ return sub_group_broadcast(x,sycl::ctz(intel_sub_group_ballot(true)));
+ }
+
+ __forceinline void assume_uniform_array(void* ptr) {
+#ifdef __SYCL_DEVICE_ONLY__
+ __builtin_IB_assume_uniform(ptr);
+#endif
+ }
+
+ template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, BinaryOperation binary_op) {
+ return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,binary_op);
+ }
+
+ template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, T init, BinaryOperation binary_op) {
+ return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,init,binary_op);
+ }
+
+ template <typename T> __forceinline T sub_group_reduce_min(T x, T init) {
+ return sub_group_reduce(x, init, sycl::ext::oneapi::minimum<T>());
+ }
+
+ template <typename T> __forceinline T sub_group_reduce_min(T x) {
+ return sub_group_reduce(x, sycl::ext::oneapi::minimum<T>());
+ }
+
+ template <typename T> __forceinline T sub_group_reduce_max(T x) {
+ return sub_group_reduce(x, sycl::ext::oneapi::maximum<T>());
+ }
+
+ template <typename T> __forceinline T sub_group_reduce_add(T x) {
+ return sub_group_reduce(x, sycl::ext::oneapi::plus<T>());
+ }
+
+ template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, BinaryOperation binary_op) {
+ return sycl::exclusive_scan_over_group(this_sub_group(),x,binary_op);
+ }
+
+ template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan_min(T x) {
+ return sub_group_exclusive_scan(x,sycl::ext::oneapi::minimum<T>());
+ }
+
+ template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, T init, BinaryOperation binary_op) {
+ return sycl::exclusive_scan_over_group(this_sub_group(),x,init,binary_op);
+ }
+
+ template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op) {
+ return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op);
+ }
+
+ template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op, T init) {
+ return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op,init);
+ }
+
+ template <typename T> __forceinline T sub_group_shuffle(T x, sycl::id<1> local_id) {
+ return this_sub_group().shuffle(x, local_id);
+ }
+
+ template <typename T> __forceinline T sub_group_shuffle_down(T x, uint32_t delta) {
+ return this_sub_group().shuffle_down(x, delta);
+ }
+
+ template <typename T> __forceinline T sub_group_shuffle_up(T x, uint32_t delta) {
+ return this_sub_group().shuffle_up(x, delta);
+ }
+
+ template <typename T> __forceinline T sub_group_load(const void* src) {
+ return this_sub_group().load(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)src));
+ }
+
+ template <typename T> __forceinline void sub_group_store(void* dst, const T& x) {
+ this_sub_group().store(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)dst),x);
+ }
+}
+
+#if __SYCL_COMPILER_VERSION < 20210801
+#undef all_of_group
+#undef any_of_group
+#undef none_of_group
+#undef group_broadcast
+#undef reduce_over_group
+#undef exclusive_scan_over_group
+#undef inclusive_scan_over_group
+#endif
diff --git a/thirdparty/embree/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp
index 7f7a009a1e..d01eab3c9d 100644
--- a/thirdparty/embree/common/sys/sysinfo.cpp
+++ b/thirdparty/embree/common/sys/sysinfo.cpp
@@ -1,9 +1,15 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
+#if defined(__INTEL_LLVM_COMPILER)
+// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wignored-attributes"
+#endif
+
#include "sysinfo.h"
#include "intrinsics.h"
-#include "string.h"
+#include "estring.h"
#include "ref.h"
#if defined(__FREEBSD__)
#include <sys/cpuset.h>
@@ -690,3 +696,6 @@ namespace embree
}
#endif
+#if defined(__INTEL_LLVM_COMPILER)
+#pragma clang diagnostic pop
+#endif
diff --git a/thirdparty/embree/common/sys/thread.cpp b/thirdparty/embree/common/sys/thread.cpp
index 530c3c7810..8b072067e6 100644
--- a/thirdparty/embree/common/sys/thread.cpp
+++ b/thirdparty/embree/common/sys/thread.cpp
@@ -3,7 +3,7 @@
#include "thread.h"
#include "sysinfo.h"
-#include "string.h"
+#include "estring.h"
#include <iostream>
#if defined(__ARM_NEON)
diff --git a/thirdparty/embree/common/sys/vector.h b/thirdparty/embree/common/sys/vector.h
index d05e1deb18..226cd34c21 100644
--- a/thirdparty/embree/common/sys/vector.h
+++ b/thirdparty/embree/common/sys/vector.h
@@ -8,6 +8,8 @@
namespace embree
{
+ class Device;
+
template<typename T, typename allocator>
class vector_t
{
@@ -25,6 +27,12 @@ namespace embree
template<typename M>
__forceinline explicit vector_t (M alloc, size_t sz)
: alloc(alloc), size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); }
+
+ __forceinline vector_t (Device* alloc)
+ : vector_t(alloc,0) {}
+
+ __forceinline vector_t(void* data, size_t bytes)
+ : size_active(0), size_alloced(bytes/sizeof(T)), items((T*)data) {}
__forceinline ~vector_t() {
clear();
@@ -65,6 +73,10 @@ namespace embree
return *this;
}
+ __forceinline allocator& getAlloc() {
+ return alloc;
+ }
+
/********************** Iterators ****************************/
__forceinline iterator begin() { return items; };
@@ -215,6 +227,10 @@ namespace embree
if (new_alloced <= size_alloced)
return size_alloced;
+ /* if current size is 0 allocate exact requested size */
+ if (size_alloced == 0)
+ return new_alloced;
+
/* resize to next power of 2 otherwise */
size_t new_size_alloced = size_alloced;
while (new_size_alloced < new_alloced) {
@@ -237,8 +253,12 @@ namespace embree
/*! vector class that performs aligned allocations */
template<typename T>
using avector = vector_t<T,aligned_allocator<T,std::alignment_of<T>::value> >;
-
+
/*! vector class that performs OS allocations */
template<typename T>
using ovector = vector_t<T,os_allocator<T> >;
+
+ /*! vector class with externally managed data buffer */
+ template<typename T>
+ using evector = vector_t<T,no_allocator<T>>;
}