From 4797ea95409aed7d4ffe0317721a51bda68e682e Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Thu, 26 May 2016 02:47:43 +0200 Subject: [PATCH] Added specialized SSE2 and AVX bool mix --- glm/detail/func_common_simd.inl | 29 +++++++++++++++++++++++++++++ glm/detail/type_vec4.hpp | 6 ++++++ glm/detail/type_vec4_simd.inl | 2 +- glm/gtx/simd_vec4.inl | 6 ------ glm/simd/common.h | 2 ++ test/core/core_func_common.cpp | 4 ++-- 6 files changed, 40 insertions(+), 9 deletions(-) diff --git a/glm/detail/func_common_simd.inl b/glm/detail/func_common_simd.inl index c7abee25..9d1106ff 100644 --- a/glm/detail/func_common_simd.inl +++ b/glm/detail/func_common_simd.inl @@ -1,6 +1,33 @@ +/// @ref core +/// @file glm/detail/func_common_simd.inl + +#if GLM_ARCH & GLM_ARCH_SSE2 + +#include "../simd/common.h" + +#include + namespace glm{ namespace detail { + template + struct compute_mix_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & x, tvec4 const & y, tvec4 const & a) + { + __m128i const Load = _mm_set_epi32(-(int)a.w, -(int)a.z, -(int)a.y, -(int)a.x); + __m128 const Mask = _mm_castsi128_ps(Load); + + tvec4 Result(uninitialize); +# if 0 && GLM_ARCH & GLM_ARCH_AVX + Result.data = _mm_blendv_ps(x.data, y.data, Mask); +# else + Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data)); +# endif + return Result; + } + }; + /* static const __m128 GLM_VAR_USED zero = _mm_setzero_ps(); static const __m128 GLM_VAR_USED one = _mm_set_ps1(1.0f); @@ -107,3 +134,5 @@ namespace detail }//namespace detail }//namespace glm + +#endif//GLM_ARCH & GLM_ARCH_SSE2 diff --git a/glm/detail/type_vec4.hpp b/glm/detail/type_vec4.hpp index 533dc82c..80cb4049 100644 --- a/glm/detail/type_vec4.hpp +++ b/glm/detail/type_vec4.hpp @@ -17,6 +17,12 @@ namespace glm{ namespace detail { + template + struct shuffle_mask + { + enum{value = Value}; + }; + template struct simd_data { diff --git a/glm/detail/type_vec4_simd.inl b/glm/detail/type_vec4_simd.inl index 9cc85be5..9c46ac5c 100644 --- a/glm/detail/type_vec4_simd.inl +++ b/glm/detail/type_vec4_simd.inl @@ -1,5 +1,5 @@ /// @ref core -/// @file glm/detail/type_tvec4_sse2.inl +/// @file glm/detail/type_tvec4_simd.inl #if GLM_ARCH & GLM_ARCH_SSE2 diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index 3810ed49..efc87c61 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -4,12 +4,6 @@ namespace glm{ namespace detail{ -template -struct shuffle_mask -{ - enum{value = Value}; -}; - ////////////////////////////////////// // Implicit basic constructors diff --git a/glm/simd/common.h b/glm/simd/common.h index dd188a1a..33959b64 100644 --- a/glm/simd/common.h +++ b/glm/simd/common.h @@ -1,6 +1,8 @@ /// @ref simd /// @file glm/simd/common.h +#pragma once + #if(GLM_COMPILER & GLM_COMPILER_VC) #pragma warning(push) #pragma warning(disable : 4510 4512 4610) diff --git a/test/core/core_func_common.cpp b/test/core/core_func_common.cpp index cf47c197..edd5c72a 100644 --- a/test/core/core_func_common.cpp +++ b/test/core/core_func_common.cpp @@ -444,7 +444,7 @@ namespace mix_ entry TestBVec4[] = { - {glm::vec4(0.0f), glm::vec4(1.0f), glm::bvec4(false), glm::vec4(0.0f)}, + {glm::vec4(0.0f, 0.0f, 1.0f, 1.0f), glm::vec4(2.0f, 2.0f, 3.0f, 3.0f), glm::bvec4(false, true, false, true), glm::vec4(0.0f, 2.0f, 1.0f, 3.0f)}, {glm::vec4(0.0f), glm::vec4(1.0f), glm::bvec4(true), glm::vec4(1.0f)}, {glm::vec4(-1.0f), glm::vec4(1.0f), glm::bvec4(false), glm::vec4(-1.0f)}, {glm::vec4(-1.0f), glm::vec4(1.0f), glm::bvec4(true), glm::vec4(1.0f)}, @@ -1243,10 +1243,10 @@ int main() Error += modf_::test(); Error += floatBitsToInt::test(); Error += floatBitsToUint::test(); + Error += mix_::test(); Error += step_::test(); Error += max_::test(); Error += min_::test(); - Error += mix_::test(); Error += round_::test(); Error += roundEven::test(); Error += isnan_::test();