From 2386237528a8aecde635868804ac410acf22a892 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Sun, 29 May 2016 17:58:53 +0200 Subject: [PATCH] common function SIMD optimization --- glm/detail/func_common.inl | 76 ++++++++++++------ glm/detail/func_common_simd.inl | 136 +++++++++++++++++++++++++++----- 2 files changed, 169 insertions(+), 43 deletions(-) diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl index 283949ec..02468cc1 100644 --- a/glm/detail/func_common.inl +++ b/glm/detail/func_common.inl @@ -10,6 +10,23 @@ namespace glm { + // min + template + GLM_FUNC_QUALIFIER genType min(genType x, genType y) + { + GLM_STATIC_ASSERT(std::numeric_limits::is_iec559 || std::numeric_limits::is_integer, "'min' only accept floating-point or integer inputs"); + return x < y ? x : y; + } + + // max + template + GLM_FUNC_QUALIFIER genType max(genType x, genType y) + { + GLM_STATIC_ASSERT(std::numeric_limits::is_iec559 || std::numeric_limits::is_integer, "'max' only accept floating-point or integer inputs"); + + return x > y ? x : y; + } + // abs template <> GLM_FUNC_QUALIFIER int32 abs(int32 x) @@ -239,6 +256,33 @@ namespace detail return a - b * floor(a / b); } }; + + template class vecType> + struct compute_min_vector + { + GLM_FUNC_QUALIFIER static vecType call(vecType const & x, vecType const & y) + { + return detail::functor2::call(min, x, y); + } + }; + + template class vecType> + struct compute_max_vector + { + GLM_FUNC_QUALIFIER static vecType call(vecType const & x, vecType const & y) + { + return detail::functor2::call(max, x, y); + } + }; + + template class vecType> + struct compute_clamp_vector + { + GLM_FUNC_QUALIFIER static vecType call(vecType const & x, vecType const & minVal, vecType const & maxVal) + { + return min(max(x, minVal), maxVal); + } + }; }//namespace detail template @@ -441,45 +485,30 @@ namespace detail //CHAR_BIT - 1))); // min - template - GLM_FUNC_QUALIFIER genType min(genType x, genType y) - { - GLM_STATIC_ASSERT(std::numeric_limits::is_iec559 || std::numeric_limits::is_integer, "'min' only accept floating-point or integer inputs"); - - return x < y ? x : y; - } - template class vecType> GLM_FUNC_QUALIFIER vecType min(vecType const & a, T b) { - return detail::functor2_vec_sca::call(min, a, b); + GLM_STATIC_ASSERT(std::numeric_limits::is_iec559, "'min' only accept floating-point inputs for the interpolator a"); + return detail::compute_min_vector::call(a, vecType(b)); } template class vecType> GLM_FUNC_QUALIFIER vecType min(vecType const & a, vecType const & b) { - return detail::functor2::call(min, a, b); + return detail::compute_min_vector::call(a, b); } // max - template - GLM_FUNC_QUALIFIER genType max(genType x, genType y) - { - GLM_STATIC_ASSERT(std::numeric_limits::is_iec559 || std::numeric_limits::is_integer, "'max' only accept floating-point or integer inputs"); - - return x > y ? x : y; - } - template class vecType> GLM_FUNC_QUALIFIER vecType max(vecType const & a, T b) { - return detail::functor2_vec_sca::call(max, a, b); + return detail::compute_max_vector::call(a, vecType(b)); } template class vecType> GLM_FUNC_QUALIFIER vecType max(vecType const & a, vecType const & b) { - return detail::functor2::call(max, a, b); + return detail::compute_max_vector::call(a, b); } // clamp @@ -487,7 +516,6 @@ namespace detail GLM_FUNC_QUALIFIER genType clamp(genType x, genType minVal, genType maxVal) { GLM_STATIC_ASSERT(std::numeric_limits::is_iec559 || std::numeric_limits::is_integer, "'clamp' only accept floating-point or integer inputs"); - return min(max(x, minVal), maxVal); } @@ -495,16 +523,14 @@ namespace detail GLM_FUNC_QUALIFIER vecType clamp(vecType const & x, T minVal, T maxVal) { GLM_STATIC_ASSERT(std::numeric_limits::is_iec559 || std::numeric_limits::is_integer, "'clamp' only accept floating-point or integer inputs"); - - return min(max(x, minVal), maxVal); + return detail::compute_clamp_vector::call(x, vecType(minVal), vecType(maxVal)); } template class vecType> GLM_FUNC_QUALIFIER vecType clamp(vecType const & x, vecType const & minVal, vecType const & maxVal) { GLM_STATIC_ASSERT(std::numeric_limits::is_iec559 || std::numeric_limits::is_integer, "'clamp' only accept floating-point or integer inputs"); - - return min(max(x, minVal), maxVal); + return detail::compute_clamp_vector::call(x, minVal, maxVal); } template diff --git a/glm/detail/func_common_simd.inl b/glm/detail/func_common_simd.inl index 0bcd6c4a..b2cb7dfc 100644 --- a/glm/detail/func_common_simd.inl +++ b/glm/detail/func_common_simd.inl @@ -32,24 +32,6 @@ namespace detail } }; - template - struct compute_mix_vector - { - GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & x, tvec4 const & y, tvec4 const & a) - { - __m128i const Load = _mm_set_epi32(-(int)a.w, -(int)a.z, -(int)a.y, -(int)a.x); - __m128 const Mask = _mm_castsi128_ps(Load); - - tvec4 Result(uninitialize); -# if 0 && GLM_ARCH & GLM_ARCH_AVX - Result.data = _mm_blendv_ps(x.data, y.data, Mask); -# else - Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data)); -# endif - return Result; - } - }; - template struct compute_floor { @@ -105,6 +87,124 @@ namespace detail } }; + template + struct compute_min_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v1, tvec4 const & v2) + { + tvec4 result(uninitialize); + result.data = _mm_min_ps(v1.data, v2.data); + return result; + } + }; + + template + struct compute_min_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v1, tvec4 const & v2) + { + tvec4 result(uninitialize); + result.data = _mm_min_epi32(v1.data, v2.data); + return result; + } + }; + + template + struct compute_min_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v1, tvec4 const & v2) + { + tvec4 result(uninitialize); + result.data = _mm_min_epu32(v1.data, v2.data); + return result; + } + }; + + template + struct compute_max_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v1, tvec4 const & v2) + { + tvec4 result(uninitialize); + result.data = _mm_max_ps(v1.data, v2.data); + return result; + } + }; + + template + struct compute_max_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v1, tvec4 const & v2) + { + tvec4 result(uninitialize); + result.data = _mm_max_epi32(v1.data, v2.data); + return result; + } + }; + + template + struct compute_max_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v1, tvec4 const & v2) + { + tvec4 result(uninitialize); + result.data = _mm_max_epu32(v1.data, v2.data); + return result; + } + }; + + template + struct compute_clamp_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & x, tvec4 const & minVal, tvec4 const & maxVal) + { + tvec4 result(uninitialize); + result.data = _mm_min_ps(_mm_max_ps(x.data, minVal.data), maxVal.data); + return result; + } + }; + + template + struct compute_clamp_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & x, tvec4 const & minVal, tvec4 const & maxVal) + { + tvec4 result(uninitialize); + result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data); + return result; + } + }; + + template + struct compute_clamp_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & x, tvec4 const & minVal, tvec4 const & maxVal) + { + tvec4 result(uninitialize); + result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data); + return result; + } + }; + + template + struct compute_mix_vector + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & x, tvec4 const & y, tvec4 const & a) + { + __m128i const Load = _mm_set_epi32(-(int)a.w, -(int)a.z, -(int)a.y, -(int)a.x); + __m128 const Mask = _mm_castsi128_ps(Load); + + tvec4 Result(uninitialize); +# if 0 && GLM_ARCH & GLM_ARCH_AVX + Result.data = _mm_blendv_ps(x.data, y.data, Mask); +# else + Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data)); +# endif + return Result; + } + }; + + }//namespace detail }//namespace glm