diff options
Diffstat (limited to 'thirdparty/basis_universal/encoder/basisu_enc.h')
-rw-r--r-- | thirdparty/basis_universal/encoder/basisu_enc.h | 653 |
1 files changed, 613 insertions, 40 deletions
diff --git a/thirdparty/basis_universal/encoder/basisu_enc.h b/thirdparty/basis_universal/encoder/basisu_enc.h index 0efeaa461f..780605e7b8 100644 --- a/thirdparty/basis_universal/encoder/basisu_enc.h +++ b/thirdparty/basis_universal/encoder/basisu_enc.h @@ -1,5 +1,5 @@ // basisu_enc.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -48,7 +48,8 @@ namespace basisu // Encoder library initialization. // This function MUST be called before encoding anything! - void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false); + // Returns false if library initialization fails. + bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false); void basisu_encoder_deinit(); // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1 @@ -70,6 +71,18 @@ namespace basisu return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } + inline int left_shift32(int val, int shift) + { + assert((shift >= 0) && (shift < 32)); + return static_cast<int>(static_cast<uint32_t>(val) << shift); + } + + inline uint32_t left_shift32(uint32_t val, int shift) + { + assert((shift >= 0) && (shift < 32)); + return val << shift; + } + inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) @@ -130,6 +143,31 @@ namespace basisu return bits; } + + // Open interval + inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + + // Closed interval + inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; } + inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; } + + inline uint32_t clz(uint32_t x) + { + if (!x) + return 32; + + uint32_t n = 0; + while ((x & 0x80000000) == 0) + { + x <<= 1u; + n++; + } + + return n; + } + + bool string_begins_with(const std::string& str, const char* pPhrase); // Hashing @@ -268,6 +306,7 @@ namespace basisu public: enum { num_elements = N }; + typedef T scalar_type; inline vec() { } inline vec(eZero) { set_zero(); } @@ -291,6 +330,7 @@ namespace basisu inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; } inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; } + inline void clear() { set_zero(); } template <uint32_t OtherN, typename OtherT> inline vec &set(const vec<OtherN, OtherT> &other) @@ -391,7 +431,7 @@ namespace basisu inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); } inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); } - inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } + inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } inline vec &clamp(T l, T h) { @@ -722,7 +762,7 @@ namespace basisu void job_thread(uint32_t index); }; - // Simple 32-bit color class + // Simple 64-bit color class class color_rgba_i16 { @@ -1116,7 +1156,9 @@ namespace basisu { std::string result(s); for (size_t i = 0; i < result.size(); i++) - result[i] = (char)tolower((int)result[i]); + { + result[i] = (char)tolower((uint8_t)(result[i])); + } return result; } @@ -1408,7 +1450,7 @@ namespace basisu size_t get_total_training_vecs() const { return m_training_vecs.size(); } const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; } - array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } + array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const { @@ -1437,36 +1479,36 @@ namespace basisu } void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const - { + { uint_vec node_stack; - node_stack.reserve(512); + node_stack.reserve(512); - codebook.resize(0); - codebook.reserve(max_clusters); + codebook.resize(0); + codebook.reserve(max_clusters); - uint32_t node_index = 0; + uint32_t node_index = 0; - while (true) - { - const tsvq_node& cur = m_nodes[node_index]; + while (true) + { + const tsvq_node& cur = m_nodes[node_index]; - if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters)) - { - codebook.resize(codebook.size() + 1); - codebook.back() = cur.m_training_vecs; + if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters)) + { + codebook.resize(codebook.size() + 1); + codebook.back() = cur.m_training_vecs; - if (node_stack.empty()) - break; + if (node_stack.empty()) + break; - node_index = node_stack.back(); - node_stack.pop_back(); - continue; - } + node_index = node_stack.back(); + node_stack.pop_back(); + continue; + } - node_stack.push_back(cur.m_right_index); - node_index = cur.m_left_index; - } - } + node_stack.push_back(cur.m_right_index); + node_index = cur.m_left_index; + } + } bool generate(uint32_t max_size) { @@ -2319,6 +2361,14 @@ namespace basisu m_total_bits = 0; } + inline void restart() + { + m_bytes.resize(0); + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + inline const uint8_vec &get_bytes() const { return m_bytes; } inline uint64_t get_total_bits() const { return m_total_bits; } @@ -2920,11 +2970,11 @@ namespace basisu inline const color_rgba *get_ptr() const { return &m_pixels[0]; } inline color_rgba *get_ptr() { return &m_pixels[0]; } - bool has_alpha() const + bool has_alpha(uint32_t channel = 3) const { for (uint32_t y = 0; y < m_height; ++y) for (uint32_t x = 0; x < m_width; ++x) - if ((*this)(x, y).a < 255) + if ((*this)(x, y)[channel] < 255) return true; return false; @@ -3130,6 +3180,31 @@ namespace basisu return *this; } + imagef& crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + } + return *this; + } + inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } @@ -3213,19 +3288,128 @@ namespace basisu inline const vec4F *get_ptr() const { return &m_pixels[0]; } inline vec4F *get_ptr() { return &m_pixels[0]; } + + bool clean_astc_hdr_pixels(float highest_mag) + { + bool status = true; + bool nan_msg = false; + bool inf_msg = false; + bool neg_zero_msg = false; + bool neg_msg = false; + bool clamp_msg = false; + + for (uint32_t iy = 0; iy < m_height; iy++) + { + for (uint32_t ix = 0; ix < m_width; ix++) + { + vec4F& c = (*this)(ix, iy); + + for (uint32_t s = 0; s < 4; s++) + { + float &p = c[s]; + union { float f; uint32_t u; } x; x.f = p; + + if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000)) + { + if (std::isnan(p)) + { + if (!nan_msg) + { + fprintf(stderr, "One or more pixels was NaN, setting to 0.\n"); + nan_msg = true; + } + } + + if (std::isinf(p)) + { + if (!inf_msg) + { + fprintf(stderr, "One or more pixels was INF, setting to 0.\n"); + inf_msg = true; + } + } + + if (x.u == 0x80000000) + { + if (!neg_zero_msg) + { + fprintf(stderr, "One or more pixels was -0, setting them to 0.\n"); + neg_zero_msg = true; + } + } + + p = 0.0f; + status = false; + } + else + { + //const float o = p; + if (p < 0.0f) + { + p = 0.0f; + + if (!neg_msg) + { + fprintf(stderr, "One or more pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n"); + neg_msg = true; + } + + status = false; + } + + if (p > highest_mag) + { + p = highest_mag; + + if (!clamp_msg) + { + fprintf(stderr, "One or more pixels had to be clamped to %f.\n", highest_mag); + clamp_msg = true; + } + + status = false; + } + } + } + } + } + + return status; + } + + imagef& flip_y() + { + for (uint32_t y = 0; y < m_height / 2; ++y) + for (uint32_t x = 0; x < m_width; ++x) + std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); + + return *this; + } private: uint32_t m_width, m_height, m_pitch; // all in pixels vec4F_vec m_pixels; }; + // REC 709 coefficients + const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f; + + inline float get_luminance(const vec4F &c) + { + return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B; + } + + float linear_to_srgb(float l); + float srgb_to_linear(float s); + // Image metrics class image_metrics { public: // TODO: Add ssim - float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; + double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; + bool m_has_neg, m_hf_mag_overflow, m_any_abnormal; image_metrics() { @@ -3240,10 +3424,17 @@ namespace basisu m_rms = 0; m_psnr = 0; m_ssim = 0; + m_has_neg = false; + m_hf_mag_overflow = false; + m_any_abnormal = false; } - void print(const char *pPrefix = nullptr) { printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); } + void print(const char *pPrefix = nullptr) { printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); } + void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); } + void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false); + void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error); + void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error); void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false); }; @@ -3256,6 +3447,8 @@ namespace basisu bool load_tga(const char* pFilename, image& img); inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } + bool load_qoi(const char* pFilename, image& img); + bool load_jpg(const char *pFilename, image& img); inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } @@ -3263,9 +3456,64 @@ namespace basisu bool load_image(const char* pFilename, image& img); inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } + // Supports .HDR and most (but not all) .EXR's (see TinyEXR). + bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true); + inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true) { return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear); } + + enum class hdr_image_type + { + cHITRGBAHalfFloat = 0, + cHITRGBAFloat = 1, + cHITPNGImage = 2, + cHITEXRImage = 3, + cHITHDRImage = 4 + }; + + bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear); + uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); + struct rgbe_header_info + { + std::string m_program; + + // Note no validation is done, either gamma or exposure may be 0. + double m_gamma; + bool m_has_gamma; + + double m_exposure; // watts/steradian/m^2. + bool m_has_exposure; + + void clear() + { + m_program.clear(); + m_gamma = 1.0f; + m_has_gamma = false; + m_exposure = 1.0f; + m_has_exposure = false; + } + }; + + bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info); + bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info); + + bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info); + bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info); + + bool read_exr(const char* pFilename, imagef& img, int& n_chans); + bool read_exr(const void* pMem, size_t mem_size, imagef& img); + + enum + { + WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic + WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats + WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block) + }; + + // Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images. + bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags); + enum { cImageSaveGrayscale = 1, @@ -3276,19 +3524,22 @@ namespace basisu inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } bool read_file_to_vec(const char* pFilename, uint8_vec& data); - + bool read_file_to_data(const char* pFilename, void *pData, size_t len); + bool write_data_to_file(const char* pFilename, const void* pData, size_t len); inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); } - - float linear_to_srgb(float l); - float srgb_to_linear(float s); - + bool image_resample(const image &src, image &dst, bool srgb = false, const char *pFilter = "lanczos4", float filter_scale = 1.0f, bool wrapping = false, uint32_t first_comp = 0, uint32_t num_comps = 4); + bool image_resample(const imagef& src, imagef& dst, + const char* pFilter = "lanczos4", float filter_scale = 1.0f, + bool wrapping = false, + uint32_t first_comp = 0, uint32_t num_comps = 4); + // Timing typedef uint64_t timer_ticks; @@ -3319,6 +3570,8 @@ namespace basisu bool m_started, m_stopped; }; + inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); } + // 2D array template<typename T> @@ -3372,8 +3625,8 @@ namespace basisu inline const T &operator[] (uint32_t i) const { return m_values[i]; } inline T &operator[] (uint32_t i) { return m_values[i]; } - inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } - inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } + inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } + inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } void clear() { @@ -3450,7 +3703,327 @@ namespace basisu } }; typedef basisu::vector<pixel_block> pixel_block_vec; - + + struct pixel_block_hdr + { + vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] + + inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + + inline const vec4F* get_ptr() const { return &m_pixels[0][0]; } + inline vec4F* get_ptr() { return &m_pixels[0][0]; } + + inline void clear() { clear_obj(*this); } + + inline bool operator== (const pixel_block& rhs) const + { + return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0; + } + }; + typedef basisu::vector<pixel_block_hdr> pixel_block_hdr_vec; + + void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure); + bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img); + + // Intersection + enum eClear { cClear = 0 }; + enum eInitExpand { cInitExpand = 0 }; + + template<typename vector_type> + class ray + { + public: + typedef vector_type vector_t; + typedef typename vector_type::scalar_type scalar_type; + + inline ray() { } + inline ray(eClear) { clear(); } + inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { } + + inline void clear() + { + m_origin.clear(); + m_direction.clear(); + } + + inline const vector_type& get_origin(void) const { return m_origin; } + inline void set_origin(const vector_type& origin) { m_origin = origin; } + + inline const vector_type& get_direction(void) const { return m_direction; } + inline void set_direction(const vector_type& direction) { m_direction = direction; } + + inline void set_endpoints(const vector_type& start, const vector_type& end) + { + m_origin = start; + + m_direction = end - start; + m_direction.normalize_in_place(); + } + + inline vector_type eval(scalar_type t) const + { + return m_origin + m_direction * t; + } + + private: + vector_type m_origin; + vector_type m_direction; + }; + + typedef ray<vec2F> ray2F; + typedef ray<vec3F> ray3F; + + template<typename T> + class vec_interval + { + public: + enum { N = T::num_elements }; + typedef typename T::scalar_type scalar_type; + + inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; } + inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; } + + inline vec_interval() { } + inline vec_interval(eClear) { clear(); } + inline vec_interval(eInitExpand) { init_expand(); } + + inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); } + + inline void init_expand() + { + m_bounds[0].set(1e+30f, 1e+30f, 1e+30f); + m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f); + } + + inline vec_interval expand(const T& p) + { + for (uint32_t c = 0; c < N; c++) + { + if (p[c] < m_bounds[0][c]) + m_bounds[0][c] = p[c]; + + if (p[c] > m_bounds[1][c]) + m_bounds[1][c] = p[c]; + } + + return *this; + } + + inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; } + inline T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; } + + const T& get_low() const { return m_bounds[0]; } + T& get_low() { return m_bounds[0]; } + + const T& get_high() const { return m_bounds[1]; } + T& get_high() { return m_bounds[1]; } + + scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; } + + bool contains(const T& p) const + { + const T& low = get_low(), high = get_high(); + + for (uint32_t i = 0; i < N; i++) + { + if (p[i] < low[i]) + return false; + + if (p[i] > high[i]) + return false; + } + return true; + } + + private: + T m_bounds[2]; + }; + + typedef vec_interval<vec1F> vec_interval1F; + typedef vec_interval<vec2F> vec_interval2F; + typedef vec_interval<vec3F> vec_interval3F; + typedef vec_interval<vec4F> vec_interval4F; + + typedef vec_interval2F aabb2F; + typedef vec_interval3F aabb3F; + + namespace intersection + { + enum result + { + cBackfacing = -1, + cFailure = 0, + cSuccess, + cParallel, + cInside, + }; + + // Returns cInside, cSuccess, or cFailure. + // Algorithm: Graphics Gems 1 + template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type> + result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + enum + { + cNumDim = vector_type::num_elements, + cRight = 0, + cLeft = 1, + cMiddle = 2 + }; + + bool inside = true; + int quadrant[cNumDim]; + scalar_type candidate_plane[cNumDim]; + + for (int i = 0; i < cNumDim; i++) + { + if (ray.get_origin()[i] < box[0][i]) + { + quadrant[i] = cLeft; + candidate_plane[i] = box[0][i]; + inside = false; + } + else if (ray.get_origin()[i] > box[1][i]) + { + quadrant[i] = cRight; + candidate_plane[i] = box[1][i]; + inside = false; + } + else + { + quadrant[i] = cMiddle; + } + } + + if (inside) + { + coord = ray.get_origin(); + t = 0.0f; + return cInside; + } + + scalar_type max_t[cNumDim]; + for (int i = 0; i < cNumDim; i++) + { + if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f)) + max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i]; + else + max_t[i] = -1.0f; + } + + int which_plane = 0; + for (int i = 1; i < cNumDim; i++) + if (max_t[which_plane] < max_t[i]) + which_plane = i; + + if (max_t[which_plane] < 0.0f) + return cFailure; + + for (int i = 0; i < cNumDim; i++) + { + if (i != which_plane) + { + coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i]; + + if ((coord[i] < box[0][i]) || (coord[i] > box[1][i])) + return cFailure; + } + else + { + coord[i] = candidate_plane[i]; + } + + assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]); + } + + t = max_t[which_plane]; + return cSuccess; + } + + template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type> + result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + if (!box.contains(ray.get_origin())) + { + started_within = false; + return ray_aabb(coord, t, ray, box); + } + + started_within = true; + + typename vector_type::T diag_dist = box.diagonal_length() * 1.5f; + ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction()); + + result res(ray_aabb(coord, t, outside_ray, box)); + if (res != cSuccess) + return res; + + t = basisu::maximum(0.0f, diag_dist - t); + return cSuccess; + } + + } // intersect + + // This float->half conversion matches how "F32TO16" works on Intel GPU's. + // Input cannot be negative, Inf or Nan. + inline basist::half_float float_to_half_non_neg_no_nan_inf(float val) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF; + int e = 0, m = 0; + + assert(((fi.i >> 31) == 0) && (flt_e != 0xFF)); + + // not zero or denormal + if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + m = lrintf((1 << 24) * fabsf(fi.f)); + else + { + e = new_exp + 15; + m = lrintf(flt_m * (1.0f / ((float)(1 << 13)))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + basist::half_float result = (basist::half_float)((e << 10) | m); + return result; + } + + // Supports positive and denormals only. No NaN or Inf. + inline float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h) + { + assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h)); + + union fu32 + { + uint32_t u; + float f; + }; + + static const fu32 K = { 0x77800000 }; + + fu32 o; + o.u = h << 13; + o.f *= K.f; + + return o.f; + } + } // namespace basisu |