From d77694f8ac08c132afc24c9225af920dc5389c94 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Tue, 8 Feb 2011 10:37:47 +0000 Subject: [PATCH] Added SIMD sqrt functions support, ticket #2 --- glm/core/intrinsic_common.inl | 2 +- glm/gtx/simd_vec4.hpp | 22 ++++++++++++++++++++++ glm/gtx/simd_vec4.inl | 28 ++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/glm/core/intrinsic_common.inl b/glm/core/intrinsic_common.inl index b11f000d..fd031ef8 100644 --- a/glm/core/intrinsic_common.inl +++ b/glm/core/intrinsic_common.inl @@ -258,7 +258,7 @@ inline __m128 sse_inf_ps(__m128 x) } // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration -// By Elan Ruskin, +// By Elan Ruskin, http://assemblyrequired.crashworks.org/ inline __m128 sse_sqrt_wip_ss(__m128 const & x) { __m128 recip = _mm_rsqrt_ss(x); // "estimate" opcode diff --git a/glm/gtx/simd_vec4.hpp b/glm/gtx/simd_vec4.hpp index 186fd045..028db5e0 100644 --- a/glm/gtx/simd_vec4.hpp +++ b/glm/gtx/simd_vec4.hpp @@ -401,6 +401,28 @@ namespace glm detail::fvec4SIMD const & I, detail::fvec4SIMD const & N, float const & eta); + + //! Returns the positive square root of x. + //! (From GLM_GTX_simd_vec4 extension, exponential function) + detail::fvec4SIMD simdSqrt( + detail::fvec4SIMD const & x); + + //! Returns the positive square root of x with an accuracy slight lower or equal than simdSqrt but much faster. + //! (From GLM_GTX_simd_vec4 extension, exponential function) + detail::fvec4SIMD simdFastSqrt( + detail::fvec4SIMD const & x); + + //! Returns the reciprocal of the positive square root of x. + //! (From GLM_GTX_simd_vec4 extension, exponential function) + detail::fvec4SIMD simdInversesqrt( + detail::fvec4SIMD const & x); + + //! Returns the reciprocal of the positive square root of x, + //! faster than simdInversesqrt but less accurate. + //! (From GLM_GTX_simd_vec4 extension, exponential function) + detail::fvec4SIMD simdFastInversesqrt( + detail::fvec4SIMD const & x); + }//namespace simd_vec4 }//namespace gtx }//namespace glm diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index 510a57e6..f74144b8 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -634,6 +634,34 @@ namespace glm return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta)); } + inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x) + { + return _mm_sqrt_ps(x.Data); + } + + inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x) + { + + } + + // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration + // By Elan Ruskin, http://assemblyrequired.crashworks.org/ + inline detail::fvec4SIMD simdInversesqrt(detail::fvec4SIMD const & x) + { + GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load + GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5}; + + __m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode + __m128 halfrecip = _mm_mul_ps(half, recip); + __m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip))); + return _mm_mul_ps(halfrecip, threeminus_xrr); + } + + inline detail::fvec4SIMD simdFastInversesqrt(detail::fvec4SIMD const & x) + { + return _mm_rsqrt_ps(x.Data); + } + }//namespace simd_vec4 }//namespace gtx }//namespace glm