Added AVX optimizations and equal tests

ago%!(EXTRA string=10 years) · 52f8ecd973
parent d69616bbc8
commit 52f8ecd973
5 changed files with 102 additions and 4 deletions
--- a/glm/detail/func_trigonometric.inl
+++ b/glm/detail/func_trigonometric.inl
@ -193,3 +193,8 @@ namespace glm
 		return detail::functor1<T, T, P, vecType>::call(atanh, v);
 	}
 }//namespace glm
+
+#if GLM_ARCH != GLM_ARCH_PURE && GLM_HAS_UNRESTRICTED_UNIONS
+#	include "func_trigonometric_simd.inl"
+#endif
+
--- a/glm/detail/func_trigonometric_simd.inl
+++ b/glm/detail/func_trigonometric_simd.inl
--- a/glm/detail/type_vec4_simd.inl
+++ b/glm/detail/type_vec4_simd.inl
@ -62,6 +62,19 @@ namespace detail
 		}
 	};

+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
+	template <precision P>
+	struct compute_vec4_add<double, P>
+	{
+		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
+		{
+			tvec4<double, P> Result(uninitialize);
+			Result.data = _mm256_add_pd(a.data, b.data);
+			return Result;
+		}
+	};
+#	endif
+
 	template <precision P>
 	struct compute_vec4_sub<float, P>
 	{
@ -73,6 +86,19 @@ namespace detail
 		}
 	};

+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
+	template <precision P>
+	struct compute_vec4_sub<double, P>
+	{
+		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
+		{
+			tvec4<double, P> Result(uninitialize);
+			Result.data = _mm256_sub_pd(a.data, b.data);
+			return Result;
+		}
+	};
+#	endif
+
 	template <precision P>
 	struct compute_vec4_mul<float, P>
 	{
@ -84,6 +110,19 @@ namespace detail
 		}
 	};

+#	if GLM_ARCH & GLM_ARCH_AVX_BIT
+	template <precision P>
+	struct compute_vec4_mul<double, P>
+	{
+		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
+		{
+			tvec4<double, P> Result(uninitialize);
+			Result.data = _mm256_mul_pd(a.data, b.data);
+			return Result;
+		}
+	};
+#	endif
+
 	template <precision P>
 	struct compute_vec4_div<float, P>
 	{
@ -95,6 +134,19 @@ namespace detail
 		}
 	};

+	#	if GLM_ARCH & GLM_ARCH_AVX_BIT
+	template <precision P>
+	struct compute_vec4_div<double, P>
+	{
+		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
+		{
+			tvec4<double, P> Result(uninitialize);
+			Result.data = _mm256_div_pd(a.data, b.data);
+			return Result;
+		}
+	};
+#	endif
+
 	template <>
 	struct compute_vec4_div<float, lowp>
 	{
@ -124,7 +176,7 @@ namespace detail
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
 			tvec4<T, P> Result(uninitialize);
-			Result.data = _mm_and_si256(a.data, b.data);
+			Result.data = _mm256_and_si256(a.data, b.data);
 			return Result;
 		}
 	};
@ -148,7 +200,7 @@ namespace detail
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
 			tvec4<T, P> Result(uninitialize);
-			Result.data = _mm_or_si256(a.data, b.data);
+			Result.data = _mm256_or_si256(a.data, b.data);
 			return Result;
 		}
 	};
@ -259,6 +311,15 @@ namespace detail
 		}
 	};

+	template <precision P>
+	struct compute_vec4_equal<int32, P, true, 32>
+	{
+		static bool call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
+		{
+			return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
+		}
+	};
+
 	template <precision P>
 	struct compute_vec4_nequal<float, P, false, 32>
 	{
@ -267,6 +328,15 @@ namespace detail
 			return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0;
 		}
 	};
+
+	template <precision P>
+	struct compute_vec4_nequal<int32, P, true, 32>
+	{
+		static bool call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
+		{
+			return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
+		}
+	};
 }//namespace detail

 #	if !GLM_HAS_DEFAULTED_FUNCTIONS
--- a/test/core/core_func_swizzle.cpp
+++ b/test/core/core_func_swizzle.cpp
@ -83,11 +83,12 @@ int test_vec4_swizzle()
 	glm::vec4 B = A.wzyx();
 	glm::vec4 C = B.wzyx();

-	float f = glm::dot(C.wzyx(), C.xyzw());
-
 	Error += A != B ? 0 : 1;
 	Error += A == C ? 0 : 1;

+	float f = glm::dot(C.wzyx(), C.xyzw());
+	Error += glm::abs(f - 20.f) < 0.01f ? 0 : 1;
+
 	return Error;
 }

--- a/test/core/core_type_vec4.cpp
+++ b/test/core/core_type_vec4.cpp
@ -310,6 +310,27 @@ int test_vec4_operators()
 	return Error;
 }

+int test_vec4_equal()
+{
+	int Error = 0;
+
+	{
+		glm::vec4 const A(1, 2, 3, 4);
+		glm::vec4 const B(1, 2, 3, 4);
+		Error += A == B ? 0 : 1;
+		Error += A != B ? 1 : 0;
+	}
+
+	{
+		glm::ivec4 const A(1, 2, 3, 4);
+		glm::ivec4 const B(1, 2, 3, 4);
+		Error += A == B ? 0 : 1;
+		Error += A != B ? 1 : 0;
+	}
+
+	return Error;
+}
+
 int test_vec4_size()
 {
 	int Error = 0;
@ -557,6 +578,7 @@ int main()
 	Error += test_bvec4_ctor();
 	Error += test_vec4_size();
 	Error += test_vec4_operators();
+	Error += test_vec4_equal();
 	Error += test_vec4_swizzle_partial();
 	Error += test_vec4_simd();
 	Error += test_operator_increment();