From fcfc56d5e3cbc7c7b1e904802e853d2462dba18d Mon Sep 17 00:00:00 2001
From: Christophe Riccio <mail@g-truc.net>
Date: Tue, 21 Oct 2014 01:32:45 +0200
Subject: [PATCH] Vectorization of usubBorrow and umulExtended, simplified
 vector relational code

---
 glm/detail/func_integer.hpp           |  22 +++---
 glm/detail/func_integer.inl           | 106 ++++----------------------
 glm/detail/func_vector_relational.hpp |  18 ++---
 glm/detail/func_vector_relational.inl |  50 ++++--------
 4 files changed, 46 insertions(+), 150 deletions(-)
diff --git a/glm/detail/func_integer.hpp b/glm/detail/func_integer.hpp
index 4dfcd0bd..00f4e66f 100644
--- a/glm/detail/func_integer.hpp
+++ b/glm/detail/func_integer.hpp
@@ -69,12 +69,12 @@ namespace glm
 	/// 
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/usubBorrow.xml">GLSL usubBorrow man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.8 Integer Functions</a>
-	template <typename genUType>
-	GLM_FUNC_DECL genUType usubBorrow(
-		genUType const & x,
-		genUType const & y,
-		genUType & borrow);
-		
+	template <precision P, template <typename, precision> class vecType>
+	GLM_FUNC_DECL vecType<uint, P> usubBorrow(
+		vecType<uint, P> const & x,
+		vecType<uint, P> const & y,
+		vecType<uint, P> & borrow);
+
 	/// Multiplies 32-bit integers x and y, producing a 64-bit
 	/// result. The 32 least-significant bits are returned in lsb.
 	/// The 32 most-significant bits are returned in msb.
@@ -83,12 +83,12 @@ namespace glm
 	/// 
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/umulExtended.xml">GLSL umulExtended man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.8 Integer Functions</a>
-	template <typename genUType>
+	template <precision P, template <typename, precision> class vecType>
 	GLM_FUNC_DECL void umulExtended(
-		genUType const & x,
-		genUType const & y,
-		genUType & msb,
-		genUType & lsb);
+		vecType<uint, P> const & x,
+		vecType<uint, P> const & y,
+		vecType<uint, P> & msb,
+		vecType<uint, P> & lsb);
 		
 	/// Multiplies 32-bit integers x and y, producing a 64-bit
 	/// result. The 32 least-significant bits are returned in lsb.
diff --git a/glm/detail/func_integer.inl b/glm/detail/func_integer.inl
index 4b75bbb5..1e8e309d 100644
--- a/glm/detail/func_integer.inl
+++ b/glm/detail/func_integer.inl
@@ -99,13 +99,7 @@ namespace glm
 	}
 
 	// usubBorrow
-	template <>
-	GLM_FUNC_QUALIFIER uint usubBorrow
-	(
-		uint const & x,
-		uint const & y,
-		uint & Borrow
-	)
+	GLM_FUNC_QUALIFIER uint usubBorrow(uint const & x, uint const & y, uint & Borrow)
 	{
 		GLM_STATIC_ASSERT(sizeof(uint) == sizeof(uint32), "uint and uint32 size mismatch");
 
@@ -116,57 +110,17 @@ namespace glm
 			return static_cast<uint32>((static_cast<int64>(1) << static_cast<int64>(32)) + (static_cast<int64>(y) - static_cast<int64>(x)));
 	}
 
-	template <>
-	GLM_FUNC_QUALIFIER uvec2 usubBorrow
-	(
-		uvec2 const & x,
-		uvec2 const & y,
-		uvec2 & Borrow
-	)
-	{
-		return uvec2(
-			usubBorrow(x[0], y[0], Borrow[0]),
-			usubBorrow(x[1], y[1], Borrow[1]));
-	}
-
-	template <>
-	GLM_FUNC_QUALIFIER uvec3 usubBorrow
-	(
-		uvec3 const & x,
-		uvec3 const & y,
-		uvec3 & Borrow
-	)
+	template <precision P, template <typename, precision> class vecType>
+	GLM_FUNC_QUALIFIER vecType<uint, P> usubBorrow(vecType<uint, P> const & x, vecType<uint, P> const & y, vecType<uint, P> & Borrow)
 	{
-		return uvec3(
-			usubBorrow(x[0], y[0], Borrow[0]),
-			usubBorrow(x[1], y[1], Borrow[1]),
-			usubBorrow(x[2], y[2], Borrow[2]));
-	}
-
-	template <>
-	GLM_FUNC_QUALIFIER uvec4 usubBorrow
-	(
-		uvec4 const & x,
-		uvec4 const & y,
-		uvec4 & Borrow
-	)
-	{
-		return uvec4(
-			usubBorrow(x[0], y[0], Borrow[0]),
-			usubBorrow(x[1], y[1], Borrow[1]),
-			usubBorrow(x[2], y[2], Borrow[2]),
-			usubBorrow(x[3], y[3], Borrow[3]));
+		Borrow = mix(vecType<uint, P>(1), vecType<uint, P>(0), greaterThanEqual(x, y));
+		vecType<uint, P> const YgeX(y - x);
+		vecType<uint, P> const XgeY(vecType<uint32, P>((static_cast<int64>(1) << static_cast<int64>(32)) + (vecType<int64, P>(y) - vecType<int64, P>(x))));
+		return mix(XgeY, YgeX, y >= x);
 	}
 
 	// umulExtended
-	template <>
-	GLM_FUNC_QUALIFIER void umulExtended
-	(
-		uint const & x,
-		uint const & y,
-		uint & msb,
-		uint & lsb
-	)
+	GLM_FUNC_QUALIFIER void umulExtended(uint const & x, uint const & y, uint & msb, uint & lsb)
 	{
 		GLM_STATIC_ASSERT(sizeof(uint) == sizeof(uint32), "uint and uint32 size mismatch");
 
@@ -177,46 +131,14 @@ namespace glm
 		lsb = *PointerLSB;
 	}
 
-	template <>
-	GLM_FUNC_QUALIFIER void umulExtended
-	(
-		uvec2 const & x,
-		uvec2 const & y,
-		uvec2 & msb,
-		uvec2 & lsb
-	)
+	template <precision P, template <typename, precision> class vecType>
+	GLM_FUNC_QUALIFIER void umulExtended(vecType<uint, P> const & x, vecType<uint, P> const & y, vecType<uint, P> & msb, vecType<uint, P> & lsb)
 	{
-		umulExtended(x[0], y[0], msb[0], lsb[0]);
-		umulExtended(x[1], y[1], msb[1], lsb[1]);
-	}
-
-	template <>
-	GLM_FUNC_QUALIFIER void umulExtended
-	(
-		uvec3 const & x,
-		uvec3 const & y,
-		uvec3 & msb,
-		uvec3 & lsb
-	)
-	{
-		umulExtended(x[0], y[0], msb[0], lsb[0]);
-		umulExtended(x[1], y[1], msb[1], lsb[1]);
-		umulExtended(x[2], y[2], msb[2], lsb[2]);
-	}
+		GLM_STATIC_ASSERT(sizeof(uint) == sizeof(uint32), "uint and uint32 size mismatch");
 
-	template <>
-	GLM_FUNC_QUALIFIER void umulExtended
-	(
-		uvec4 const & x,
-		uvec4 const & y,
-		uvec4 & msb,
-		uvec4 & lsb
-	)
-	{
-		umulExtended(x[0], y[0], msb[0], lsb[0]);
-		umulExtended(x[1], y[1], msb[1], lsb[1]);
-		umulExtended(x[2], y[2], msb[2], lsb[2]);
-		umulExtended(x[3], y[3], msb[3], lsb[3]);
+		vecType<uint64, P> Value64(vecType<uint64, P>(x) * vecType<uint64, P>(y));
+		msb = vecType<uint32, P>(Value64 >> static_cast<uint64>(32));
+		lsb = vecType<uint32, P>(Value64);
 	}
 
 	// imulExtended
diff --git a/glm/detail/func_vector_relational.hpp b/glm/detail/func_vector_relational.hpp
index 94714ab4..075de262 100644
--- a/glm/detail/func_vector_relational.hpp
+++ b/glm/detail/func_vector_relational.hpp
@@ -56,9 +56,8 @@ namespace glm
 	///
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/lessThan.xml">GLSL lessThan man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.7 Vector Relational Functions</a>
-	// TODO: Mismatched 
-	//template <typename T, precision P, template <typename, precision> class vecType>
-	//GLM_FUNC_DECL typename vecType<T, P>::bool_type lessThan(vecType<T, P> const & x, vecType<T, P> const & y);
+	template <typename T, precision P, template <typename, precision> class vecType>
+	GLM_FUNC_DECL vecType<bool, P> lessThan(vecType<T, P> const & x, vecType<T, P> const & y);
 
 	/// Returns the component-wise comparison of result x <= y.
 	///
@@ -67,7 +66,7 @@ namespace glm
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/lessThanEqual.xml">GLSL lessThanEqual man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.7 Vector Relational Functions</a>
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_DECL typename vecType<T, P>::bool_type lessThanEqual(vecType<T, P> const & x, vecType<T, P> const & y);
+	GLM_FUNC_DECL vecType<bool, P> lessThanEqual(vecType<T, P> const & x, vecType<T, P> const & y);
 
 	/// Returns the component-wise comparison of result x > y.
 	///
@@ -76,7 +75,7 @@ namespace glm
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/greaterThan.xml">GLSL greaterThan man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.7 Vector Relational Functions</a>
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_DECL typename vecType<T, P>::bool_type greaterThan(vecType<T, P> const & x, vecType<T, P> const & y);
+	GLM_FUNC_DECL vecType<bool, P> greaterThan(vecType<T, P> const & x, vecType<T, P> const & y);
 
 	/// Returns the component-wise comparison of result x >= y.
 	///
@@ -85,7 +84,7 @@ namespace glm
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/greaterThanEqual.xml">GLSL greaterThanEqual man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.7 Vector Relational Functions</a>
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_DECL typename vecType<T, P>::bool_type greaterThanEqual(vecType<T, P> const & x, vecType<T, P> const & y);
+	GLM_FUNC_DECL vecType<bool, P> greaterThanEqual(vecType<T, P> const & x, vecType<T, P> const & y);
 
 	/// Returns the component-wise comparison of result x == y.
 	///
@@ -93,9 +92,8 @@ namespace glm
 	/// 
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/equal.xml">GLSL equal man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.7 Vector Relational Functions</a>
-	//TODO: conflicts with definision
-	//template <typename T, precision P, template <typename, precision> class vecType>
-	//GLM_FUNC_DECL typename vecType<T, P>::bool_type equal(vecType<T, P> const & x, vecType<T, P> const & y);
+	template <typename T, precision P, template <typename, precision> class vecType>
+	GLM_FUNC_DECL vecType<bool, P> equal(vecType<T, P> const & x, vecType<T, P> const & y);
 
 	/// Returns the component-wise comparison of result x != y.
 	/// 
@@ -104,7 +102,7 @@ namespace glm
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/notEqual.xml">GLSL notEqual man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.7 Vector Relational Functions</a>
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_DECL typename vecType<T, P>::bool_type notEqual(vecType<T, P> const & x, vecType<T, P> const & y);
+	GLM_FUNC_DECL vecType<bool, P> notEqual(vecType<T, P> const & x, vecType<T, P> const & y);
 
 	/// Returns true if any component of x is true.
 	///
diff --git a/glm/detail/func_vector_relational.inl b/glm/detail/func_vector_relational.inl
index 4e32a8a9..0c136928 100644
--- a/glm/detail/func_vector_relational.inl
+++ b/glm/detail/func_vector_relational.inl
@@ -31,17 +31,13 @@
 namespace glm
 {
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_QUALIFIER typename vecType<T, P>::bool_type lessThan
-	(
-		vecType<T, P> const & x,
-		vecType<T, P> const & y
-	)
+	GLM_FUNC_QUALIFIER vecType<bool, P> lessThan(vecType<T, P> const & x, vecType<T, P> const & y)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559 || std::numeric_limits<T>::is_integer,
 			"Invalid template instantiation of 'lessThan', GLM vector types required floating-point or integer value types vectors");
 		assert(detail::component_count(x) == detail::component_count(y));
 
-		typename vecType<bool, P>::bool_type Result(uninitialize);
+		vecType<bool, P> Result(uninitialize);
 		for(detail::component_count_t i = 0; i < detail::component_count(x); ++i)
 			Result[i] = x[i] < y[i];
 
@@ -49,81 +45,61 @@ namespace glm
 	}
 
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_QUALIFIER typename vecType<T, P>::bool_type lessThanEqual
-	(
-		vecType<T, P> const & x,
-		vecType<T, P> const & y
-	)
+	GLM_FUNC_QUALIFIER vecType<bool, P> lessThanEqual(vecType<T, P> const & x, vecType<T, P> const & y)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559 || std::numeric_limits<T>::is_integer,
 			"Invalid template instantiation of 'lessThanEqual', GLM vector types required floating-point or integer value types vectors");
 		assert(detail::component_count(x) == detail::component_count(y));
 
-		typename vecType<bool, P>::bool_type Result(uninitialize);
+		vecType<bool, P> Result(uninitialize);
 		for(detail::component_count_t i = 0; i < detail::component_count(x); ++i)
 			Result[i] = x[i] <= y[i];
 		return Result;
 	}
 
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_QUALIFIER typename vecType<T, P>::bool_type greaterThan
-	(
-		vecType<T, P> const & x,
-		vecType<T, P> const & y
-	)
+	GLM_FUNC_QUALIFIER vecType<bool, P> greaterThan(vecType<T, P> const & x, vecType<T, P> const & y)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559 || std::numeric_limits<T>::is_integer,
 			"Invalid template instantiation of 'greaterThan', GLM vector types required floating-point or integer value types vectors");
 		assert(detail::component_count(x) == detail::component_count(y));
 
-		typename vecType<bool, P>::bool_type Result(uninitialize);
+		vecType<bool, P> Result(uninitialize);
 		for(detail::component_count_t i = 0; i < detail::component_count(x); ++i)
 			Result[i] = x[i] > y[i];
 		return Result;
 	}
 
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_QUALIFIER typename vecType<T, P>::bool_type greaterThanEqual
-	(
-		vecType<T, P> const & x,
-		vecType<T, P> const & y
-	)
+	GLM_FUNC_QUALIFIER vecType<bool, P> greaterThanEqual(vecType<T, P> const & x, vecType<T, P> const & y)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559 || std::numeric_limits<T>::is_integer,
 			"Invalid template instantiation of 'greaterThanEqual', GLM vector types required floating-point or integer value types vectors");
 		assert(detail::component_count(x) == detail::component_count(y));
 
-		typename vecType<bool, P>::bool_type Result(uninitialize);
+		vecType<bool, P> Result(uninitialize);
 		for(detail::component_count_t i = 0; i < detail::component_count(x); ++i)
 			Result[i] = x[i] >= y[i];
 		return Result;
 	}
 
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_QUALIFIER typename vecType<T, P>::bool_type equal
-	(
-		vecType<T, P> const & x,
-		vecType<T, P> const & y
-	)
+	GLM_FUNC_QUALIFIER vecType<bool, P> equal(vecType<T, P> const & x, vecType<T, P> const & y)
 	{
 		assert(detail::component_count(x) == detail::component_count(y));
 
-		typename vecType<bool, P>::bool_type Result(uninitialize);
+		vecType<bool, P> Result(uninitialize);
 		for(detail::component_count_t i = 0; i < detail::component_count(x); ++i)
 			Result[i] = x[i] == y[i];
 		return Result;
 	}
 
 	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_QUALIFIER typename vecType<T, P>::bool_type notEqual
-	(
-		vecType<T, P> const & x,
-		vecType<T, P> const & y
-	)
+	GLM_FUNC_QUALIFIER vecType<bool, P> notEqual(vecType<T, P> const & x, vecType<T, P> const & y)
 	{
 		assert(detail::component_count(x) == detail::component_count(y));
 
-		typename vecType<bool, P>::bool_type Result(uninitialize);
+		vecType<bool, P> Result(uninitialize);
 		for(detail::component_count_t i = 0; i < detail::component_count(x); ++i)
 			Result[i] = x[i] != y[i];
 		return Result;
@@ -150,7 +126,7 @@ namespace glm
 	template <precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<bool, P> not_(vecType<bool, P> const & v)
 	{
-		typename vecType<bool, P>::bool_type Result(uninitialize);
+		vecType<bool, P> Result(uninitialize);
 		for(detail::component_count_t i = 0; i < detail::component_count(v); ++i)
 			Result[i] = !v[i];
 		return Result;