1 files changed, 613 insertions, 40 deletions
diff --git a/thirdparty/basis_universal/encoder/basisu_enc.h b/thirdparty/basis_universal/encoder/basisu_enc.h
index 0efeaa461f..780605e7b8 100644
--- a/thirdparty/basis_universal/encoder/basisu_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_enc.h
@@ -1,5 +1,5 @@
 // basisu_enc.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -48,7 +48,8 @@ namespace basisu
 
 	// Encoder library initialization.
 	// This function MUST be called before encoding anything!
-	void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
+	// Returns false if library initialization fails.
+	bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
 	void basisu_encoder_deinit();
 
 	// basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1
@@ -70,6 +71,18 @@ namespace basisu
 		return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
 	}
 
+	inline int left_shift32(int val, int shift)
+	{
+		assert((shift >= 0) && (shift < 32));
+		return static_cast<int>(static_cast<uint32_t>(val) << shift);
+	}
+
+	inline uint32_t left_shift32(uint32_t val, int shift)
+	{
+		assert((shift >= 0) && (shift < 32));
+		return val << shift;
+	}
+
 	inline int32_t clampi(int32_t value, int32_t low, int32_t high) 
 	{ 
 		if (value < low) 
@@ -130,6 +143,31 @@ namespace basisu
 
 		return bits;
 	}
+		
+	// Open interval
+	inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
+	inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
+
+	// Closed interval
+	inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
+	inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
+
+	inline uint32_t clz(uint32_t x)
+	{
+		if (!x)
+			return 32;
+
+		uint32_t n = 0;
+		while ((x & 0x80000000) == 0)
+		{
+			x <<= 1u;
+			n++;
+		}
+
+		return n;
+	}
+
+	bool string_begins_with(const std::string& str, const char* pPhrase);
 				
 	// Hashing
 	
@@ -268,6 +306,7 @@ namespace basisu
 
 	public:
 		enum { num_elements = N };
+		typedef T scalar_type;
 
 		inline vec() { }
 		inline vec(eZero) { set_zero();  }
@@ -291,6 +330,7 @@ namespace basisu
 		inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }
 
 		inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
+		inline void clear() { set_zero(); }
 
 		template <uint32_t OtherN, typename OtherT>
 		inline vec &set(const vec<OtherN, OtherT> &other)
@@ -391,7 +431,7 @@ namespace basisu
 		inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); }
 		inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); }
 
-		inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len);	return *this; }
+		inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
 
 		inline vec &clamp(T l, T h)
 		{
@@ -722,7 +762,7 @@ namespace basisu
 		void job_thread(uint32_t index);
 	};
 
-	// Simple 32-bit color class
+	// Simple 64-bit color class
 
 	class color_rgba_i16
 	{
@@ -1116,7 +1156,9 @@ namespace basisu
 	{
 		std::string result(s);
 		for (size_t i = 0; i < result.size(); i++)
-			result[i] = (char)tolower((int)result[i]);
+		{
+			result[i] = (char)tolower((uint8_t)(result[i]));
+		}
 		return result;
 	}
 
@@ -1408,7 +1450,7 @@ namespace basisu
 
 		size_t get_total_training_vecs() const { return m_training_vecs.size(); }
 		const array_of_weighted_training_vecs &get_training_vecs() const	{ return m_training_vecs; }
-				array_of_weighted_training_vecs &get_training_vecs()			{ return m_training_vecs; }
+			  array_of_weighted_training_vecs &get_training_vecs()			{ return m_training_vecs; }
 
 		void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const
 		{
@@ -1437,36 +1479,36 @@ namespace basisu
 		}
 
 		void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const
-      {
+		{
 			uint_vec node_stack;
-         node_stack.reserve(512);
+			node_stack.reserve(512);
 
-         codebook.resize(0);
-         codebook.reserve(max_clusters);
+			codebook.resize(0);
+			codebook.reserve(max_clusters);
 			         
-         uint32_t node_index = 0;
+			uint32_t node_index = 0;
 
-         while (true)
-         {
-            const tsvq_node& cur = m_nodes[node_index];
+			while (true)
+			{
+				const tsvq_node& cur = m_nodes[node_index];
 
-            if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
-            {
-               codebook.resize(codebook.size() + 1);
-               codebook.back() = cur.m_training_vecs;
+				if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
+				{
+					codebook.resize(codebook.size() + 1);
+					codebook.back() = cur.m_training_vecs;
 										
-               if (node_stack.empty())
-                  break;
+					if (node_stack.empty())
+						break;
 
-               node_index = node_stack.back();
-               node_stack.pop_back();
-               continue;
-            }
+					node_index = node_stack.back();
+					node_stack.pop_back();
+					continue;
+				}
 				            
-            node_stack.push_back(cur.m_right_index);
-				node_index = cur.m_left_index;
-         }
-      }
+				node_stack.push_back(cur.m_right_index);
+					node_index = cur.m_left_index;
+			}
+		}
 
 		bool generate(uint32_t max_size)
 		{
@@ -2319,6 +2361,14 @@ namespace basisu
 			m_total_bits = 0;
 		}
 
+		inline void restart()
+		{
+			m_bytes.resize(0);
+			m_bit_buffer = 0;
+			m_bit_buffer_size = 0;
+			m_total_bits = 0;
+		}
+
 		inline const uint8_vec &get_bytes() const { return m_bytes; }
 
 		inline uint64_t get_total_bits() const { return m_total_bits; }
@@ -2920,11 +2970,11 @@ namespace basisu
 		inline const color_rgba *get_ptr() const { return &m_pixels[0]; }
 		inline color_rgba *get_ptr() { return &m_pixels[0]; }
 
-		bool has_alpha() const
+		bool has_alpha(uint32_t channel = 3) const
 		{
 			for (uint32_t y = 0; y < m_height; ++y)
 				for (uint32_t x = 0; x < m_width; ++x)
-					if ((*this)(x, y).a < 255)
+					if ((*this)(x, y)[channel] < 255)
 						return true;
 
 			return false;
@@ -3130,6 +3180,31 @@ namespace basisu
 			return *this;
 		}
 
+		imagef& crop_dup_borders(uint32_t w, uint32_t h)
+		{
+			const uint32_t orig_w = m_width, orig_h = m_height;
+
+			crop(w, h);
+
+			if (orig_w && orig_h)
+			{
+				if (m_width > orig_w)
+				{
+					for (uint32_t x = orig_w; x < m_width; x++)
+						for (uint32_t y = 0; y < m_height; y++)
+							set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
+				}
+
+				if (m_height > orig_h)
+				{
+					for (uint32_t y = orig_h; y < m_height; y++)
+						for (uint32_t x = 0; x < m_width; x++)
+							set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
+				}
+			}
+			return *this;
+		}
+
 		inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
 		inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
 
@@ -3213,19 +3288,128 @@ namespace basisu
 
 		inline const vec4F *get_ptr() const { return &m_pixels[0]; }
 		inline vec4F *get_ptr() { return &m_pixels[0]; }
+
+		bool clean_astc_hdr_pixels(float highest_mag)
+		{
+			bool status = true;
+			bool nan_msg = false;
+			bool inf_msg = false;
+			bool neg_zero_msg = false;
+			bool neg_msg = false;
+			bool clamp_msg = false;
+
+			for (uint32_t iy = 0; iy < m_height; iy++)
+			{
+				for (uint32_t ix = 0; ix < m_width; ix++)
+				{
+					vec4F& c = (*this)(ix, iy);
+
+					for (uint32_t s = 0; s < 4; s++)
+					{
+						float &p = c[s];
+						union { float f; uint32_t u; } x; x.f = p;
+						
+						if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000))
+						{
+							if (std::isnan(p))
+							{
+								if (!nan_msg)
+								{
+									fprintf(stderr, "One or more pixels was NaN, setting to 0.\n");
+									nan_msg = true;
+								}
+							}
+
+							if (std::isinf(p))
+							{
+								if (!inf_msg)
+								{
+									fprintf(stderr, "One or more pixels was INF, setting to 0.\n");
+									inf_msg = true;
+								}
+							}
+
+							if (x.u == 0x80000000)
+							{
+								if (!neg_zero_msg)
+								{
+									fprintf(stderr, "One or more pixels was -0, setting them to 0.\n");
+									neg_zero_msg = true;
+								}
+							}
+
+							p = 0.0f;
+							status = false;
+						}
+						else
+						{
+							//const float o = p;
+							if (p < 0.0f)
+							{
+								p = 0.0f;
+
+								if (!neg_msg)
+								{
+									fprintf(stderr, "One or more pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n");
+									neg_msg = true;
+								}
+								
+								status = false;
+							}
+
+							if (p > highest_mag)
+							{
+								p = highest_mag;
+								
+								if (!clamp_msg)
+								{
+									fprintf(stderr, "One or more pixels had to be clamped to %f.\n", highest_mag);
+									clamp_msg = true;
+								}
+
+								status = false;
+							}
+						}
+					}
+				}
+			}
+
+			return status;
+		}
+
+		imagef& flip_y()
+		{
+			for (uint32_t y = 0; y < m_height / 2; ++y)
+				for (uint32_t x = 0; x < m_width; ++x)
+					std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
+
+			return *this;
+		}
 						
 	private:
 		uint32_t m_width, m_height, m_pitch;  // all in pixels
 		vec4F_vec m_pixels;
 	};
 
+	// REC 709 coefficients
+	const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f;
+
+	inline float get_luminance(const vec4F &c)
+	{
+		return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B;
+	}
+
+	float linear_to_srgb(float l);
+	float srgb_to_linear(float s);
+
 	// Image metrics
 		
 	class image_metrics
 	{
 	public:
 		// TODO: Add ssim
-		float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
+		double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
+		bool m_has_neg, m_hf_mag_overflow, m_any_abnormal;
 
 		image_metrics()
 		{
@@ -3240,10 +3424,17 @@ namespace basisu
 			m_rms = 0;
 			m_psnr = 0;
 			m_ssim = 0;
+			m_has_neg = false;
+			m_hf_mag_overflow = false;
+			m_any_abnormal = false;
 		}
 
-		void print(const char *pPrefix = nullptr)	{ printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr);	}
+		void print(const char *pPrefix = nullptr)	{ printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr);	}
+		void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); }
 
+		void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false);
+		void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
+		void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
 		void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);
 	};
 
@@ -3256,6 +3447,8 @@ namespace basisu
 	bool load_tga(const char* pFilename, image& img);
 	inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); }
 
+	bool load_qoi(const char* pFilename, image& img);
+
 	bool load_jpg(const char *pFilename, image& img);
 	inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); }
 	
@@ -3263,9 +3456,64 @@ namespace basisu
 	bool load_image(const char* pFilename, image& img);
 	inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); }
 
+	// Supports .HDR and most (but not all) .EXR's (see TinyEXR).
+	bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true);
+	inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true) { return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear); }
+
+	enum class hdr_image_type
+	{
+		cHITRGBAHalfFloat = 0,
+		cHITRGBAFloat = 1,
+		cHITPNGImage = 2,
+		cHITEXRImage = 3,
+		cHITHDRImage = 4
+	};
+
+	bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear);
+
 	uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans);
 	uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans);
 		
+	struct rgbe_header_info
+	{
+		std::string m_program;
+
+		// Note no validation is done, either gamma or exposure may be 0.
+		double m_gamma;
+		bool m_has_gamma;
+
+		double m_exposure; // watts/steradian/m^2.
+		bool m_has_exposure;
+
+		void clear() 
+		{ 
+			m_program.clear(); 
+			m_gamma = 1.0f; 
+			m_has_gamma = false; 
+			m_exposure = 1.0f; 
+			m_has_exposure = false; 
+		}
+	};
+
+	bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info);
+	bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info);
+
+	bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info);
+	bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info);
+
+	bool read_exr(const char* pFilename, imagef& img, int& n_chans);
+	bool read_exr(const void* pMem, size_t mem_size, imagef& img);
+	
+	enum
+	{
+		WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic
+		WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats
+		WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block)
+	};
+
+	// Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images.
+	bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags);
+			
 	enum
 	{
 		cImageSaveGrayscale = 1,
@@ -3276,19 +3524,22 @@ namespace basisu
 	inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); }
 	
 	bool read_file_to_vec(const char* pFilename, uint8_vec& data);
-	
+	bool read_file_to_data(const char* pFilename, void *pData, size_t len);	
+
 	bool write_data_to_file(const char* pFilename, const void* pData, size_t len);
 	
 	inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) {	return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); }
-
-	float linear_to_srgb(float l);
-	float srgb_to_linear(float s);
-
+		
 	bool image_resample(const image &src, image &dst, bool srgb = false,
 		const char *pFilter = "lanczos4", float filter_scale = 1.0f, 
 		bool wrapping = false,
 		uint32_t first_comp = 0, uint32_t num_comps = 4);
 
+	bool image_resample(const imagef& src, imagef& dst, 
+		const char* pFilter = "lanczos4", float filter_scale = 1.0f,
+		bool wrapping = false,
+		uint32_t first_comp = 0, uint32_t num_comps = 4);
+		
 	// Timing
 			
 	typedef uint64_t timer_ticks;
@@ -3319,6 +3570,8 @@ namespace basisu
 		bool m_started, m_stopped;
 	};
 
+	inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); }
+
 	// 2D array
 
 	template<typename T>
@@ -3372,8 +3625,8 @@ namespace basisu
 		inline const T &operator[] (uint32_t i) const { return m_values[i]; }
 		inline T &operator[] (uint32_t i) { return m_values[i]; }
 				
-		inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }		
-		inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
+		inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }		
+		inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
 
 		void clear()
 		{
@@ -3450,7 +3703,327 @@ namespace basisu
 		}
 	};
 	typedef basisu::vector<pixel_block> pixel_block_vec;
-		
+
+	struct pixel_block_hdr
+	{
+		vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]
+
+		inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
+		inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
+
+		inline const vec4F* get_ptr() const { return &m_pixels[0][0]; }
+		inline vec4F* get_ptr() { return &m_pixels[0][0]; }
+
+		inline void clear() { clear_obj(*this); }
+
+		inline bool operator== (const pixel_block& rhs) const
+		{
+			return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;
+		}
+	};
+	typedef basisu::vector<pixel_block_hdr> pixel_block_hdr_vec;
+
+	void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure);
+	bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img);
+	
+	// Intersection
+	enum eClear { cClear = 0 };
+	enum eInitExpand { cInitExpand = 0 };
+
+	template<typename vector_type>
+	class ray
+	{
+	public:
+		typedef vector_type vector_t;
+		typedef typename vector_type::scalar_type scalar_type;
+
+		inline ray() { }
+		inline ray(eClear) { clear(); }
+		inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { }
+
+		inline void clear()
+		{
+			m_origin.clear();
+			m_direction.clear();
+		}
+
+		inline const vector_type& get_origin(void) const { return m_origin; }
+		inline void set_origin(const vector_type& origin) { m_origin = origin; }
+
+		inline const vector_type& get_direction(void) const { return m_direction; }
+		inline void set_direction(const vector_type& direction) { m_direction = direction; }
+
+		inline void set_endpoints(const vector_type& start, const vector_type& end)
+		{
+			m_origin = start;
+
+			m_direction = end - start;
+			m_direction.normalize_in_place();
+		}
+
+		inline vector_type eval(scalar_type t) const
+		{
+			return m_origin + m_direction * t;
+		}
+
+	private:
+		vector_type m_origin;
+		vector_type m_direction;
+	};
+
+	typedef ray<vec2F> ray2F;
+	typedef ray<vec3F> ray3F;
+
+	template<typename T>
+	class vec_interval
+	{
+	public:
+		enum { N = T::num_elements };
+		typedef typename T::scalar_type scalar_type;
+
+		inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; }
+		inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; }
+
+		inline vec_interval() { }
+		inline vec_interval(eClear) { clear(); }
+		inline vec_interval(eInitExpand) { init_expand(); }
+
+		inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); }
+
+		inline void init_expand()
+		{
+			m_bounds[0].set(1e+30f, 1e+30f, 1e+30f);
+			m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f);
+		}
+
+		inline vec_interval expand(const T& p)
+		{
+			for (uint32_t c = 0; c < N; c++)
+			{
+				if (p[c] < m_bounds[0][c])
+					m_bounds[0][c] = p[c];
+
+				if (p[c] > m_bounds[1][c])
+					m_bounds[1][c] = p[c];
+			}
+
+			return *this;
+		}
+
+		inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; }
+		inline       T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; }
+
+		const T& get_low() const { return m_bounds[0]; }
+		T& get_low() { return m_bounds[0]; }
+
+		const T& get_high() const { return m_bounds[1]; }
+		T& get_high() { return m_bounds[1]; }
+
+		scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; }
+
+		bool contains(const T& p) const
+		{
+			const T& low = get_low(), high = get_high();
+
+			for (uint32_t i = 0; i < N; i++)
+			{
+				if (p[i] < low[i])
+					return false;
+
+				if (p[i] > high[i])
+					return false;
+			}
+			return true;
+		}
+
+	private:
+		T m_bounds[2];
+	};
+
+	typedef vec_interval<vec1F> vec_interval1F;
+	typedef vec_interval<vec2F> vec_interval2F;
+	typedef vec_interval<vec3F> vec_interval3F;
+	typedef vec_interval<vec4F> vec_interval4F;
+
+	typedef vec_interval2F aabb2F;
+	typedef vec_interval3F aabb3F;
+
+	namespace intersection
+	{
+		enum result
+		{
+			cBackfacing = -1,
+			cFailure = 0,
+			cSuccess,
+			cParallel,
+			cInside,
+		};
+
+		// Returns cInside, cSuccess, or cFailure.
+		// Algorithm: Graphics Gems 1
+		template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
+		result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
+		{
+			enum
+			{
+				cNumDim = vector_type::num_elements,
+				cRight = 0,
+				cLeft = 1,
+				cMiddle = 2
+			};
+
+			bool inside = true;
+			int quadrant[cNumDim];
+			scalar_type candidate_plane[cNumDim];
+
+			for (int i = 0; i < cNumDim; i++)
+			{
+				if (ray.get_origin()[i] < box[0][i])
+				{
+					quadrant[i] = cLeft;
+					candidate_plane[i] = box[0][i];
+					inside = false;
+				}
+				else if (ray.get_origin()[i] > box[1][i])
+				{
+					quadrant[i] = cRight;
+					candidate_plane[i] = box[1][i];
+					inside = false;
+				}
+				else
+				{
+					quadrant[i] = cMiddle;
+				}
+			}
+
+			if (inside)
+			{
+				coord = ray.get_origin();
+				t = 0.0f;
+				return cInside;
+			}
+
+			scalar_type max_t[cNumDim];
+			for (int i = 0; i < cNumDim; i++)
+			{
+				if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f))
+					max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i];
+				else
+					max_t[i] = -1.0f;
+			}
+
+			int which_plane = 0;
+			for (int i = 1; i < cNumDim; i++)
+				if (max_t[which_plane] < max_t[i])
+					which_plane = i;
+
+			if (max_t[which_plane] < 0.0f)
+				return cFailure;
+
+			for (int i = 0; i < cNumDim; i++)
+			{
+				if (i != which_plane)
+				{
+					coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i];
+
+					if ((coord[i] < box[0][i]) || (coord[i] > box[1][i]))
+						return cFailure;
+				}
+				else
+				{
+					coord[i] = candidate_plane[i];
+				}
+
+				assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]);
+			}
+
+			t = max_t[which_plane];
+			return cSuccess;
+		}
+
+		template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
+		result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
+		{
+			if (!box.contains(ray.get_origin()))
+			{
+				started_within = false;
+				return ray_aabb(coord, t, ray, box);
+			}
+
+			started_within = true;
+
+			typename vector_type::T diag_dist = box.diagonal_length() * 1.5f;
+			ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction());
+
+			result res(ray_aabb(coord, t, outside_ray, box));
+			if (res != cSuccess)
+				return res;
+
+			t = basisu::maximum(0.0f, diag_dist - t);
+			return cSuccess;
+		}
+
+	} // intersect
+
+	// This float->half conversion matches how "F32TO16" works on Intel GPU's.
+	// Input cannot be negative, Inf or Nan.
+	inline basist::half_float float_to_half_non_neg_no_nan_inf(float val)
+	{
+		union { float f; int32_t i; uint32_t u; } fi = { val };
+		const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF;
+		int e = 0, m = 0;
+
+		assert(((fi.i >> 31) == 0) && (flt_e != 0xFF));
+
+		// not zero or denormal
+		if (flt_e != 0)
+		{
+			int new_exp = flt_e - 127;
+			if (new_exp > 15)
+				e = 31;
+			else if (new_exp < -14)
+				m = lrintf((1 << 24) * fabsf(fi.f));
+			else
+			{
+				e = new_exp + 15;
+				m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
+			}
+		}
+
+		assert((0 <= m) && (m <= 1024));
+		if (m == 1024)
+		{
+			e++;
+			m = 0;
+		}
+
+		assert((e >= 0) && (e <= 31));
+		assert((m >= 0) && (m <= 1023));
+
+		basist::half_float result = (basist::half_float)((e << 10) | m);
+		return result;
+	}
+
+	// Supports positive and denormals only. No NaN or Inf.
+	inline float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)
+	{
+		assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));
+
+		union fu32
+		{
+			uint32_t u;
+			float f;
+		};
+
+		static const fu32 K = { 0x77800000 };
+
+		fu32 o;
+		o.u = h << 13;
+		o.f *= K.f;
+
+		return o.f;
+	}
+				
 } // namespace basisu