summaryrefslogtreecommitdiffstats
path: root/thirdparty/embree/kernels/common/device.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/kernels/common/device.cpp')
-rw-r--r--thirdparty/embree/kernels/common/device.cpp228
1 files changed, 199 insertions, 29 deletions
diff --git a/thirdparty/embree/kernels/common/device.cpp b/thirdparty/embree/kernels/common/device.cpp
index 833ec65139..07214532a1 100644
--- a/thirdparty/embree/kernels/common/device.cpp
+++ b/thirdparty/embree/kernels/common/device.cpp
@@ -2,6 +2,9 @@
// SPDX-License-Identifier: Apache-2.0
#include "device.h"
+
+#include "../../common/tasking/taskscheduler.h"
+
#include "../hash.h"
#include "scene_triangle_mesh.h"
#include "scene_user_geometry.h"
@@ -19,9 +22,12 @@
#include "../bvh/bvh4_factory.h"
#include "../bvh/bvh8_factory.h"
-#include "../../common/tasking/taskscheduler.h"
#include "../../common/sys/alloc.h"
+#if defined(EMBREE_SYCL_SUPPORT)
+# include "../level_zero/ze_wrapper.h"
+#endif
+
namespace embree
{
/*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
@@ -30,13 +36,18 @@ namespace embree
ssize_t Device::debug_int2 = 0;
ssize_t Device::debug_int3 = 0;
- DECLARE_SYMBOL2(RayStreamFilterFuncs,rayStreamFilterFuncs);
-
static MutexSys g_mutex;
static std::map<Device*,size_t> g_cache_size_map;
static std::map<Device*,size_t> g_num_threads_map;
+
+ struct TaskArena
+ {
+#if USE_TASK_ARENA
+ std::unique_ptr<tbb::task_arena> arena;
+#endif
+ };
- Device::Device (const char* cfg)
+ Device::Device (const char* cfg) : arena(new TaskArena())
{
/* check that CPU supports lowest ISA */
if (!hasISA(ISA)) {
@@ -48,12 +59,12 @@ namespace embree
case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
+ case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;
+ case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;
+ case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;
+ case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
+ case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
@@ -66,11 +77,7 @@ namespace embree
case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
-#if defined(__APPLE__)
- case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break; // Apple M1 supports high throughput for SIMD4
-#else
- case CPU::ARM: frequency_level = FREQUENCY_SIMD128; break;
-#endif
+ case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;
}
/* initialize global state */
@@ -126,13 +133,6 @@ namespace embree
/* setup tasking system */
initTaskingSystem(numThreads);
-
- /* ray stream SOA to AOS conversion */
-#if defined(EMBREE_RAY_PACKETS)
- RayStreamFilterFuncsType rayStreamFilterFuncs;
- SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(enabled_cpu_features,rayStreamFilterFuncs);
- rayStreamFilters = rayStreamFilterFuncs();
-#endif
}
Device::~Device ()
@@ -174,6 +174,9 @@ namespace embree
#if defined (EMBREE_BACKFACE_CULLING_CURVES)
v += "backfacecullingcurves ";
#endif
+#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
+ v += "backfacecullingspheres ";
+#endif
#if defined(EMBREE_FILTER_FUNCTION)
v += "intersection_filter ";
#endif
@@ -367,7 +370,7 @@ namespace embree
#if USE_TASK_ARENA
const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
- arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
+ arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
#endif
}
@@ -386,8 +389,21 @@ namespace embree
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
}
#if USE_TASK_ARENA
- arena.reset();
+ arena->arena.reset();
+#endif
+ }
+
+ void Device::execute(bool join, const std::function<void()>& func)
+ {
+#if USE_TASK_ARENA
+ if (join) {
+ arena->arena->execute(func);
+ }
+ else
#endif
+ {
+ func();
+ }
}
void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
@@ -450,12 +466,6 @@ namespace embree
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
#endif
-#if defined(EMBREE_RAY_PACKETS)
- case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 0;
-#endif
-
#if defined(EMBREE_RAY_MASK)
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
#else
@@ -474,6 +484,12 @@ namespace embree
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
#endif
+#if defined(EMBREE_BACKFACE_CULLING_SPHERES)
+ case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
+#else
+ case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
+#endif
+
#if defined(EMBREE_COMPACT_POLYS)
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
#else
@@ -557,4 +573,158 @@ namespace embree
default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
};
}
+
+ void* Device::malloc(size_t size, size_t align) {
+ return alignedMalloc(size,align);
+ }
+
+ void Device::free(void* ptr) {
+ alignedFree(ptr);
+ }
+
+
+#if defined(EMBREE_SYCL_SUPPORT)
+
+ DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)
+ : Device(cfg), gpu_context(sycl_context)
+ {
+ /* initialize ZeWrapper */
+ if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
+ throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");
+
+ /* take first device as default device */
+ auto devices = gpu_context.get_devices();
+ if (devices.size() == 0)
+ throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");
+ gpu_device = devices[0];
+
+ /* check if RTAS build extension is available */
+ sycl::platform platform = gpu_device.get_platform();
+ ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
+
+ uint32_t count = 0;
+ std::vector<ze_driver_extension_properties_t> extensions;
+ ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
+ if (result != ZE_RESULT_SUCCESS)
+ throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
+
+ extensions.resize(count);
+ result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
+ if (result != ZE_RESULT_SUCCESS)
+ throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
+
+#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
+ bool ze_rtas_builder = false;
+ for (uint32_t i=0; i<extensions.size(); i++)
+ {
+ if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
+ ze_rtas_builder = true;
+ }
+ if (!ze_rtas_builder)
+ throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found");
+
+ result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO);
+ if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
+ throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension");
+ if (result != ZE_RESULT_SUCCESS)
+ throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
+#else
+ ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL);
+#endif
+
+ if (State::verbosity(1))
+ {
+ if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
+ std::cout << " Internal RTAS Builder" << std::endl;
+ else
+ std::cout << " Level Zero RTAS Builder" << std::endl;
+ }
+
+ /* check if extension library can get loaded */
+ ze_rtas_parallel_operation_exp_handle_t hParallelOperation;
+ result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
+ if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
+ throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");
+ if (result == ZE_RESULT_SUCCESS)
+ ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
+
+ gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();
+ gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();
+
+ if (State::verbosity(1))
+ {
+ sycl::platform platform = gpu_context.get_platform();
+ std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;
+ std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;
+ std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;
+ std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;
+ std::cout << std::endl;
+ }
+
+ dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);
+ }
+
+ DeviceGPU::~DeviceGPU()
+ {
+ rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);
+ }
+
+ void DeviceGPU::enter() {
+ enableUSMAllocEmbree(&gpu_context,&gpu_device);
+ }
+
+ void DeviceGPU::leave() {
+ disableUSMAllocEmbree();
+ }
+
+ void* DeviceGPU::malloc(size_t size, size_t align) {
+ return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
+ }
+
+ void DeviceGPU::free(void* ptr) {
+ alignedSYCLFree(&gpu_context,ptr);
+ }
+
+ void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
+ gpu_device = sycl_device_in;
+ }
+
+#endif
+
+ DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)
+ : device((Device*)hdevice)
+ {
+ assert(device);
+ device->refInc();
+ device->enter();
+ }
+
+ DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)
+ : device(((Scene*)hscene)->device)
+ {
+ assert(device);
+ device->refInc();
+ device->enter();
+ }
+
+ DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)
+ : device(((Geometry*)hgeometry)->device)
+ {
+ assert(device);
+ device->refInc();
+ device->enter();
+ }
+
+ DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)
+ : device(((Buffer*)hbuffer)->device)
+ {
+ assert(device);
+ device->refInc();
+ device->enter();
+ }
+
+ DeviceEnterLeave::~DeviceEnterLeave() {
+ device->leave();
+ device->refDec();
+ }
}