diff options
Diffstat (limited to 'thirdparty/embree/kernels/common/device.cpp')
-rw-r--r-- | thirdparty/embree/kernels/common/device.cpp | 228 |
1 files changed, 199 insertions, 29 deletions
diff --git a/thirdparty/embree/kernels/common/device.cpp b/thirdparty/embree/kernels/common/device.cpp index 833ec65139..07214532a1 100644 --- a/thirdparty/embree/kernels/common/device.cpp +++ b/thirdparty/embree/kernels/common/device.cpp @@ -2,6 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 #include "device.h" + +#include "../../common/tasking/taskscheduler.h" + #include "../hash.h" #include "scene_triangle_mesh.h" #include "scene_user_geometry.h" @@ -19,9 +22,12 @@ #include "../bvh/bvh4_factory.h" #include "../bvh/bvh8_factory.h" -#include "../../common/tasking/taskscheduler.h" #include "../../common/sys/alloc.h" +#if defined(EMBREE_SYCL_SUPPORT) +# include "../level_zero/ze_wrapper.h" +#endif + namespace embree { /*! some global variables that can be set via rtcSetParameter1i for debugging purposes */ @@ -30,13 +36,18 @@ namespace embree ssize_t Device::debug_int2 = 0; ssize_t Device::debug_int3 = 0; - DECLARE_SYMBOL2(RayStreamFilterFuncs,rayStreamFilterFuncs); - static MutexSys g_mutex; static std::map<Device*,size_t> g_cache_size_map; static std::map<Device*,size_t> g_num_threads_map; + + struct TaskArena + { +#if USE_TASK_ARENA + std::unique_ptr<tbb::task_arena> arena; +#endif + }; - Device::Device (const char* cfg) + Device::Device (const char* cfg) : arena(new TaskArena()) { /* check that CPU supports lowest ISA */ if (!hasISA(ISA)) { @@ -48,12 +59,12 @@ namespace embree case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break; case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break; case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break; - case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD128; break; + case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break; + case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break; + case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break; + case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break; case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break; + case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break; case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break; case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break; case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break; @@ -66,11 +77,7 @@ namespace embree case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break; case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break; case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break; -#if defined(__APPLE__) - case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break; // Apple M1 supports high throughput for SIMD4 -#else - case CPU::ARM: frequency_level = FREQUENCY_SIMD128; break; -#endif + case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break; } /* initialize global state */ @@ -126,13 +133,6 @@ namespace embree /* setup tasking system */ initTaskingSystem(numThreads); - - /* ray stream SOA to AOS conversion */ -#if defined(EMBREE_RAY_PACKETS) - RayStreamFilterFuncsType rayStreamFilterFuncs; - SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(enabled_cpu_features,rayStreamFilterFuncs); - rayStreamFilters = rayStreamFilterFuncs(); -#endif } Device::~Device () @@ -174,6 +174,9 @@ namespace embree #if defined (EMBREE_BACKFACE_CULLING_CURVES) v += "backfacecullingcurves "; #endif +#if defined (EMBREE_BACKFACE_CULLING_SPHERES) + v += "backfacecullingspheres "; +#endif #if defined(EMBREE_FILTER_FUNCTION) v += "intersection_filter "; #endif @@ -367,7 +370,7 @@ namespace embree #if USE_TASK_ARENA const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount()); const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads); - arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads)); + arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads)); #endif } @@ -386,8 +389,21 @@ namespace embree TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads); } #if USE_TASK_ARENA - arena.reset(); + arena->arena.reset(); +#endif + } + + void Device::execute(bool join, const std::function<void()>& func) + { +#if USE_TASK_ARENA + if (join) { + arena->arena->execute(func); + } + else #endif + { + func(); + } } void Device::setProperty(const RTCDeviceProperty prop, ssize_t val) @@ -450,12 +466,6 @@ namespace embree case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0; #endif -#if defined(EMBREE_RAY_PACKETS) - case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 0; -#endif - #if defined(EMBREE_RAY_MASK) case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1; #else @@ -474,6 +484,12 @@ namespace embree case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0; #endif +#if defined(EMBREE_BACKFACE_CULLING_SPHERES) + case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1; +#else + case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0; +#endif + #if defined(EMBREE_COMPACT_POLYS) case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1; #else @@ -557,4 +573,158 @@ namespace embree default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break; }; } + + void* Device::malloc(size_t size, size_t align) { + return alignedMalloc(size,align); + } + + void Device::free(void* ptr) { + alignedFree(ptr); + } + + +#if defined(EMBREE_SYCL_SUPPORT) + + DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg) + : Device(cfg), gpu_context(sycl_context) + { + /* initialize ZeWrapper */ + if (ZeWrapper::init() != ZE_RESULT_SUCCESS) + throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper"); + + /* take first device as default device */ + auto devices = gpu_context.get_devices(); + if (devices.size() == 0) + throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device"); + gpu_device = devices[0]; + + /* check if RTAS build extension is available */ + sycl::platform platform = gpu_device.get_platform(); + ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform); + + uint32_t count = 0; + std::vector<ze_driver_extension_properties_t> extensions; + ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data()); + if (result != ZE_RESULT_SUCCESS) + throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed"); + + extensions.resize(count); + result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data()); + if (result != ZE_RESULT_SUCCESS) + throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed"); + +#if defined(EMBREE_SYCL_L0_RTAS_BUILDER) + bool ze_rtas_builder = false; + for (uint32_t i=0; i<extensions.size(); i++) + { + if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0) + ze_rtas_builder = true; + } + if (!ze_rtas_builder) + throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found"); + + result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO); + if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) + throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension"); + if (result != ZE_RESULT_SUCCESS) + throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension"); +#else + ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL); +#endif + + if (State::verbosity(1)) + { + if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL) + std::cout << " Internal RTAS Builder" << std::endl; + else + std::cout << " Level Zero RTAS Builder" << std::endl; + } + + /* check if extension library can get loaded */ + ze_rtas_parallel_operation_exp_handle_t hParallelOperation; + result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation); + if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) + throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded"); + if (result == ZE_RESULT_SUCCESS) + ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation); + + gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>(); + gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>(); + + if (State::verbosity(1)) + { + sycl::platform platform = gpu_context.get_platform(); + std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl; + std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl; + std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl; + std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl; + std::cout << std::endl; + } + + dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context); + } + + DeviceGPU::~DeviceGPU() + { + rthwifCleanup(this,dispatchGlobalsPtr,gpu_context); + } + + void DeviceGPU::enter() { + enableUSMAllocEmbree(&gpu_context,&gpu_device); + } + + void DeviceGPU::leave() { + disableUSMAllocEmbree(); + } + + void* DeviceGPU::malloc(size_t size, size_t align) { + return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY); + } + + void DeviceGPU::free(void* ptr) { + alignedSYCLFree(&gpu_context,ptr); + } + + void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) { + gpu_device = sycl_device_in; + } + +#endif + + DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice) + : device((Device*)hdevice) + { + assert(device); + device->refInc(); + device->enter(); + } + + DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene) + : device(((Scene*)hscene)->device) + { + assert(device); + device->refInc(); + device->enter(); + } + + DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry) + : device(((Geometry*)hgeometry)->device) + { + assert(device); + device->refInc(); + device->enter(); + } + + DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer) + : device(((Buffer*)hbuffer)->device) + { + assert(device); + device->refInc(); + device->enter(); + } + + DeviceEnterLeave::~DeviceEnterLeave() { + device->leave(); + device->refDec(); + } } |