From 84caa1092f09b964c92ceb7a4c0765197fc3b5f1 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Mon, 30 May 2016 15:38:47 +0200 Subject: [PATCH] Added SIMD integer operations optimizations --- glm/detail/func_integer.inl | 40 ++++++++++++++------------------ glm/detail/func_integer_simd.inl | 37 +++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 22 deletions(-) diff --git a/glm/detail/func_integer.inl b/glm/detail/func_integer.inl index dfdae80d..8b62896f 100644 --- a/glm/detail/func_integer.inl +++ b/glm/detail/func_integer.inl @@ -21,40 +21,36 @@ namespace detail return Bits >= sizeof(T) * 8 ? ~static_cast(0) : (static_cast(1) << Bits) - static_cast(1); } - template + template class vecType, bool EXEC = false> struct compute_bitfieldReverseStep { - template class vecType> GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T, T) { return v; } }; - template <> - struct compute_bitfieldReverseStep + template class vecType> + struct compute_bitfieldReverseStep { - template class vecType> GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T Mask, T Shift) { return (v & Mask) << Shift | (v & (~Mask)) >> Shift; } }; - template + template class vecType, bool EXEC = false> struct compute_bitfieldBitCountStep { - template class vecType> GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T, T) { return v; } }; - template <> - struct compute_bitfieldBitCountStep + template class vecType> + struct compute_bitfieldBitCountStep { - template class vecType> GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T Mask, T Shift) { return (v & Mask) + ((v >> Shift) & Mask); @@ -293,12 +289,12 @@ namespace detail GLM_FUNC_QUALIFIER vecType bitfieldReverse(vecType const & v) { vecType x(v); - x = detail::compute_bitfieldReverseStep= 2>::call(x, T(0x5555555555555555ull), static_cast( 1)); - x = detail::compute_bitfieldReverseStep= 4>::call(x, T(0x3333333333333333ull), static_cast( 2)); - x = detail::compute_bitfieldReverseStep= 8>::call(x, T(0x0F0F0F0F0F0F0F0Full), static_cast( 4)); - x = detail::compute_bitfieldReverseStep= 16>::call(x, T(0x00FF00FF00FF00FFull), static_cast( 8)); - x = detail::compute_bitfieldReverseStep= 32>::call(x, T(0x0000FFFF0000FFFFull), static_cast(16)); - x = detail::compute_bitfieldReverseStep= 64>::call(x, T(0x00000000FFFFFFFFull), static_cast(32)); + x = detail::compute_bitfieldReverseStep= 2>::call(x, T(0x5555555555555555ull), static_cast( 1)); + x = detail::compute_bitfieldReverseStep= 4>::call(x, T(0x3333333333333333ull), static_cast( 2)); + x = detail::compute_bitfieldReverseStep= 8>::call(x, T(0x0F0F0F0F0F0F0F0Full), static_cast( 4)); + x = detail::compute_bitfieldReverseStep= 16>::call(x, T(0x00FF00FF00FF00FFull), static_cast( 8)); + x = detail::compute_bitfieldReverseStep= 32>::call(x, T(0x0000FFFF0000FFFFull), static_cast(16)); + x = detail::compute_bitfieldReverseStep= 64>::call(x, T(0x00000000FFFFFFFFull), static_cast(32)); return x; } @@ -313,12 +309,12 @@ namespace detail GLM_FUNC_QUALIFIER vecType bitCount(vecType const & v) { vecType::type, P> x(*reinterpret_cast::type, P> const *>(&v)); - x = detail::compute_bitfieldBitCountStep= 2>::call(x, typename detail::make_unsigned::type(0x5555555555555555ull), typename detail::make_unsigned::type( 1)); - x = detail::compute_bitfieldBitCountStep= 4>::call(x, typename detail::make_unsigned::type(0x3333333333333333ull), typename detail::make_unsigned::type( 2)); - x = detail::compute_bitfieldBitCountStep= 8>::call(x, typename detail::make_unsigned::type(0x0F0F0F0F0F0F0F0Full), typename detail::make_unsigned::type( 4)); - x = detail::compute_bitfieldBitCountStep= 16>::call(x, typename detail::make_unsigned::type(0x00FF00FF00FF00FFull), typename detail::make_unsigned::type( 8)); - x = detail::compute_bitfieldBitCountStep= 32>::call(x, typename detail::make_unsigned::type(0x0000FFFF0000FFFFull), typename detail::make_unsigned::type(16)); - x = detail::compute_bitfieldBitCountStep= 64>::call(x, typename detail::make_unsigned::type(0x00000000FFFFFFFFull), typename detail::make_unsigned::type(32)); + x = detail::compute_bitfieldBitCountStep= 2>::call(x, typename detail::make_unsigned::type(0x5555555555555555ull), typename detail::make_unsigned::type( 1)); + x = detail::compute_bitfieldBitCountStep= 4>::call(x, typename detail::make_unsigned::type(0x3333333333333333ull), typename detail::make_unsigned::type( 2)); + x = detail::compute_bitfieldBitCountStep= 8>::call(x, typename detail::make_unsigned::type(0x0F0F0F0F0F0F0F0Full), typename detail::make_unsigned::type( 4)); + x = detail::compute_bitfieldBitCountStep= 16>::call(x, typename detail::make_unsigned::type(0x00FF00FF00FF00FFull), typename detail::make_unsigned::type( 8)); + x = detail::compute_bitfieldBitCountStep= 32>::call(x, typename detail::make_unsigned::type(0x0000FFFF0000FFFFull), typename detail::make_unsigned::type(16)); + x = detail::compute_bitfieldBitCountStep= 64>::call(x, typename detail::make_unsigned::type(0x00000000FFFFFFFFull), typename detail::make_unsigned::type(32)); return vecType(x); } diff --git a/glm/detail/func_integer_simd.inl b/glm/detail/func_integer_simd.inl index fe3a37df..4e5820b4 100644 --- a/glm/detail/func_integer_simd.inl +++ b/glm/detail/func_integer_simd.inl @@ -8,7 +8,44 @@ namespace glm{ namespace detail { + template + struct compute_bitfieldReverseStep + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v, uint32 Mask, uint32 Shift) + { + __m128i const set0 = v.data; + + __m128i const set1 = _mm_set1_epi32(Mask); + __m128i const and1 = _mm_and_si128(set0, set1); + __m128i const sft1 = _mm_slli_epi32(and1, Shift); + + __m128i const set2 = _mm_andnot_si128(set0, _mm_set1_epi32(-1)); + __m128i const and2 = _mm_and_si128(set0, set2); + __m128i const sft2 = _mm_srai_epi32(and2, Shift); + + __m128i const or0 = _mm_or_si128(sft1, sft2); + + return or0; + } + }; + + template + struct compute_bitfieldBitCountStep + { + template + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v, uint32 Mask, uint32 Shift) + { + __m128i const set0 = v.data; + __m128i const set1 = _mm_set1_epi32(Mask); + __m128i const and0 = _mm_and_si128(set0, set1); + __m128i const sft0 = _mm_slli_epi32(set0, Shift); + __m128i const and1 = _mm_and_si128(sft0, set1); + __m128i const add0 = _mm_add_epi32(and0, and1); + + return add0; + } + }; }//namespace detail # if GLM_ARCH & GLM_ARCH_AVX_BIT