From ae6082db5e8907e6b75c2429758330f4254e4079 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Sat, 28 May 2016 00:00:33 +0200 Subject: [PATCH] Added AVX2 bitwise optimization --- glm/detail/type_vec4.hpp | 1 - glm/detail/type_vec4.inl | 62 ++++++++++++--------- glm/detail/type_vec4_simd.inl | 100 ++++++++++++++++++++++++++-------- 3 files changed, 115 insertions(+), 48 deletions(-) diff --git a/glm/detail/type_vec4.hpp b/glm/detail/type_vec4.hpp index 80cb4049..3af57d5f 100644 --- a/glm/detail/type_vec4.hpp +++ b/glm/detail/type_vec4.hpp @@ -70,7 +70,6 @@ namespace detail typedef __m256i type; }; # endif - }//namespace detail template diff --git a/glm/detail/type_vec4.inl b/glm/detail/type_vec4.inl index 9232cdc6..a6857be7 100644 --- a/glm/detail/type_vec4.inl +++ b/glm/detail/type_vec4.inl @@ -5,19 +5,31 @@ namespace glm{ namespace detail { template - struct is_int32 + struct is_int { enum test {value = 0}; }; template <> - struct is_int32 + struct is_int { enum test {value = ~0}; }; template <> - struct is_int32 + struct is_int + { + enum test {value = ~0}; + }; + + template <> + struct is_int + { + enum test {value = ~0}; + }; + + template <> + struct is_int { enum test {value = ~0}; }; @@ -67,7 +79,7 @@ namespace detail } }; - template + template struct compute_vec4_and { static tvec4 call(tvec4 const & a, tvec4 const & b) @@ -76,7 +88,7 @@ namespace detail } }; - template + template struct compute_vec4_or { static tvec4 call(tvec4 const & a, tvec4 const & b) @@ -85,7 +97,7 @@ namespace detail } }; - template + template struct compute_vec4_xor { static tvec4 call(tvec4 const & a, tvec4 const & b) @@ -94,7 +106,7 @@ namespace detail } }; - template + template struct compute_vec4_shift_left { static tvec4 call(tvec4 const & a, tvec4 const & b) @@ -103,7 +115,7 @@ namespace detail } }; - template + template struct compute_vec4_shift_right { static tvec4 call(tvec4 const & a, tvec4 const & b) @@ -112,7 +124,7 @@ namespace detail } }; - template + template struct compute_vec4_logical_not { static tvec4 call(tvec4 const & v) @@ -488,105 +500,105 @@ namespace detail template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator&=(U scalar) { - return (*this = detail::compute_vec4_and::call(*this, tvec4(scalar))); + return (*this = detail::compute_vec4_and::value, sizeof(T) * 8>::call(*this, tvec4(scalar))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator&=(tvec1 const & v) { - return (*this = detail::compute_vec4_and::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_and::value, sizeof(T) * 8>::call(*this, tvec4(v))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator&=(tvec4 const & v) { - return (*this = detail::compute_vec4_and::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_and::value, sizeof(T) * 8>::call(*this, tvec4(v))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator|=(U scalar) { - return (*this = detail::compute_vec4_or::call(*this, tvec4(scalar))); + return (*this = detail::compute_vec4_or::value, sizeof(T) * 8>::call(*this, tvec4(scalar))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator|=(tvec1 const & v) { - return (*this = detail::compute_vec4_or::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_or::value, sizeof(T) * 8>::call(*this, tvec4(v))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator|=(tvec4 const & v) { - return (*this = detail::compute_vec4_or::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_or::value, sizeof(T) * 8>::call(*this, tvec4(v))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator^=(U scalar) { - return (*this = detail::compute_vec4_xor::call(*this, tvec4(scalar))); + return (*this = detail::compute_vec4_xor::value, sizeof(T) * 8>::call(*this, tvec4(scalar))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator^=(tvec1 const & v) { - return (*this = detail::compute_vec4_xor::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_xor::value, sizeof(T) * 8>::call(*this, tvec4(v))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator^=(tvec4 const & v) { - return (*this = detail::compute_vec4_xor::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_xor::value, sizeof(T) * 8>::call(*this, tvec4(v))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator<<=(U scalar) { - return (*this = detail::compute_vec4_shift_left::call(*this, tvec4(scalar))); + return (*this = detail::compute_vec4_shift_left::value, sizeof(T) * 8>::call(*this, tvec4(scalar))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator<<=(tvec1 const & v) { - return (*this = detail::compute_vec4_shift_left::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_shift_left::value, sizeof(T) * 8>::call(*this, tvec4(v))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator<<=(tvec4 const & v) { - return (*this = detail::compute_vec4_shift_left::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_shift_left::value, sizeof(T) * 8>::call(*this, tvec4(v))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator>>=(U scalar) { - return (*this = detail::compute_vec4_shift_right::call(*this, tvec4(scalar))); + return (*this = detail::compute_vec4_shift_right::value, sizeof(T) * 8>::call(*this, tvec4(scalar))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator>>=(tvec1 const & v) { - return (*this = detail::compute_vec4_shift_right::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_shift_right::value, sizeof(T) * 8>::call(*this, tvec4(v))); } template template GLM_FUNC_QUALIFIER tvec4 & tvec4::operator>>=(tvec4 const & v) { - return (*this = detail::compute_vec4_shift_right::call(*this, tvec4(v))); + return (*this = detail::compute_vec4_shift_right::value, sizeof(T) * 8>::call(*this, tvec4(v))); } // -- Unary constant operators -- @@ -910,7 +922,7 @@ namespace detail template GLM_FUNC_QUALIFIER tvec4 operator~(tvec4 const & v) { - return detail::compute_vec4_logical_not::value>::call(v); + return detail::compute_vec4_logical_not::value, sizeof(T) * 8>::call(v); } // -- Boolean operators -- diff --git a/glm/detail/type_vec4_simd.inl b/glm/detail/type_vec4_simd.inl index c82062a6..3096913c 100644 --- a/glm/detail/type_vec4_simd.inl +++ b/glm/detail/type_vec4_simd.inl @@ -61,52 +61,56 @@ namespace detail } }; - template - struct compute_vec4_and + template + struct compute_vec4_and { - static tvec4 call(tvec4 const& a, tvec4 const& b) + static tvec4 call(tvec4 const& a, tvec4 const& b) { - tvec4 Result(uninitialize); + tvec4 Result(uninitialize); Result.data = _mm_and_si128(a.data, b.data); return Result; } }; - template - struct compute_vec4_and +# if GLM_ARCH & GLM_ARCH_AVX2 + template + struct compute_vec4_and { - static tvec4 call(tvec4 const& a, tvec4 const& b) + static tvec4 call(tvec4 const& a, tvec4 const& b) { - tvec4 Result(uninitialize); - Result.data = _mm_and_si128(a.data, b.data); + tvec4 Result(uninitialize); + Result.data = _mm_and_si256(a.data, b.data); return Result; } }; +# endif - template - struct compute_vec4_or + template + struct compute_vec4_or { - static tvec4 call(tvec4 const& a, tvec4 const& b) + static tvec4 call(tvec4 const& a, tvec4 const& b) { - tvec4 Result(uninitialize); + tvec4 Result(uninitialize); Result.data = _mm_or_si128(a.data, b.data); return Result; } }; - template - struct compute_vec4_or +# if GLM_ARCH & GLM_ARCH_AVX2 + template + struct compute_vec4_or { - static tvec4 call(tvec4 const& a, tvec4 const& b) + static tvec4 call(tvec4 const& a, tvec4 const& b) { - tvec4 Result(uninitialize); - Result.data = _mm_or_si128(a.data, b.data); + tvec4 Result(uninitialize); + Result.data = _mm_or_si256(a.data, b.data); return Result; } }; +# endif template - struct compute_vec4_xor + struct compute_vec4_xor { static tvec4 call(tvec4 const& a, tvec4 const& b) { @@ -116,8 +120,21 @@ namespace detail } }; +# if GLM_ARCH & GLM_ARCH_AVX2 + template + struct compute_vec4_xor + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_xor_si256(a.data, b.data); + return Result; + } + }; +# endif + template - struct compute_vec4_shift_left + struct compute_vec4_shift_left { static tvec4 call(tvec4 const& a, tvec4 const& b) { @@ -127,8 +144,21 @@ namespace detail } }; +# if GLM_ARCH & GLM_ARCH_AVX2 template - struct compute_vec4_shift_right + struct compute_vec4_shift_left + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_sll_epi64(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_shift_right { static tvec4 call(tvec4 const& a, tvec4 const& b) { @@ -138,8 +168,21 @@ namespace detail } }; +# if GLM_ARCH & GLM_ARCH_AVX2 template - struct compute_vec4_logical_not + struct compute_vec4_shift_right + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_srl_epi64(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_logical_not { static tvec4 call(tvec4 const & v) { @@ -148,6 +191,19 @@ namespace detail return Result; } }; + +# if GLM_ARCH & GLM_ARCH_AVX2 + template + struct compute_vec4_logical_not + { + static tvec4 call(tvec4 const & v) + { + tvec4 Result(uninitialize); + Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1)); + return Result; + } + }; +# endif }//namespace detail # if !GLM_HAS_DEFAULTED_FUNCTIONS