|
|
|
@ -122,7 +122,7 @@ GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & |
|
|
|
|
|
|
|
|
|
// SSE4 STATS: |
|
|
|
|
// 3 shuffle |
|
|
|
|
// 8 mul |
|
|
|
|
// 4 mul |
|
|
|
|
// 4 dpps |
|
|
|
|
|
|
|
|
|
__m128 mul0 = _mm_mul_ps(q1.Data, q2.Data); |
|
|
|
@ -130,18 +130,18 @@ GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & |
|
|
|
|
__m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2))); |
|
|
|
|
__m128 mul3 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1))); |
|
|
|
|
|
|
|
|
|
# if((GLM_ARCH & GLM_ARCH_SSE4)) |
|
|
|
|
__m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff); |
|
|
|
|
__m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff); |
|
|
|
|
__m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff); |
|
|
|
|
__m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff); |
|
|
|
|
# else |
|
|
|
|
mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)); |
|
|
|
|
mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f)); |
|
|
|
|
mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f)); |
|
|
|
|
mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# if((GLM_ARCH & GLM_ARCH_SSE4)) |
|
|
|
|
__m128 add0 = _mm_dp_ps(mul0, _mm_set1_ps(1.0f), 0xff); |
|
|
|
|
__m128 add1 = _mm_dp_ps(mul1, _mm_set1_ps(1.0f), 0xff); |
|
|
|
|
__m128 add2 = _mm_dp_ps(mul2, _mm_set1_ps(1.0f), 0xff); |
|
|
|
|
__m128 add3 = _mm_dp_ps(mul3, _mm_set1_ps(1.0f), 0xff); |
|
|
|
|
# elif((GLM_ARCH & GLM_ARCH_SSE3)) |
|
|
|
|
# if((GLM_ARCH & GLM_ARCH_SSE3)) |
|
|
|
|
__m128 add0 = _mm_hadd_ps(mul0, mul0); |
|
|
|
|
add0 = _mm_hadd_ps(add0, add0); |
|
|
|
|
__m128 add1 = _mm_hadd_ps(mul1, mul1); |
|
|
|
@ -160,6 +160,7 @@ GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & |
|
|
|
|
__m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3)); |
|
|
|
|
add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1)); |
|
|
|
|
# endif |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|