diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl index 3575f603..82d70b5a 100644 --- a/glm/detail/func_common.inl +++ b/glm/detail/func_common.inl @@ -762,3 +762,7 @@ namespace detail ldexp(x.w, exp.w)); } }//namespace glm + +#if GLM_ARCH != GLM_FORCE_PURE && GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS +# include "func_common_simd.inl" +#endif diff --git a/glm/detail/func_common_simd.inl b/glm/detail/func_common_simd.inl new file mode 100644 index 00000000..308673d4 --- /dev/null +++ b/glm/detail/func_common_simd.inl @@ -0,0 +1,183 @@ +namespace glm{ +namespace detail +{ +#if(GLM_COMPILER & GLM_COMPILER_VC) +#pragma warning(push) +#pragma warning(disable : 4510 4512 4610) +#endif + + union ieee754_QNAN + { + const float f; + struct i + { + const unsigned int mantissa:23, exp:8, sign:1; + }; + + ieee754_QNAN() : f(0.0)/*, mantissa(0x7FFFFF), exp(0xFF), sign(0x0)*/ {} + }; + +#if(GLM_COMPILER & GLM_COMPILER_VC) +#pragma warning(pop) +#endif + + static const __m128 GLM_VAR_USED zero = _mm_setzero_ps(); + static const __m128 GLM_VAR_USED one = _mm_set_ps1(1.0f); + static const __m128 GLM_VAR_USED minus_one = _mm_set_ps1(-1.0f); + static const __m128 GLM_VAR_USED two = _mm_set_ps1(2.0f); + static const __m128 GLM_VAR_USED three = _mm_set_ps1(3.0f); + + static const ieee754_QNAN absMask; + static const __m128 GLM_VAR_USED abs4Mask = _mm_set_ps1(absMask.f); + static const __m128 GLM_VAR_USED _epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(static_cast(0x80000000))); + static const __m128 GLM_VAR_USED _ps_2pow23 = _mm_set_ps1(8388608.0f); + static const __m128 GLM_VAR_USED _ps_1 = _mm_set_ps1(1.0f); + + GLM_FUNC_QUALIFIER __m128 abs_ps(__m128 x) + { + return _mm_and_ps(abs4Mask, x); + } + + //sign + GLM_FUNC_QUALIFIER __m128 sgn_ps(__m128 x) + { + __m128 Neg = _mm_set1_ps(-1.0f); + __m128 Pos = _mm_set1_ps(1.0f); + + __m128 Cmp0 = _mm_cmplt_ps(x, zero); + __m128 Cmp1 = _mm_cmpgt_ps(x, zero); + + __m128 And0 = _mm_and_ps(Cmp0, Neg); + __m128 And1 = _mm_and_ps(Cmp1, Pos); + + return _mm_or_ps(And0, And1); + } + + //round + GLM_FUNC_QUALIFIER __m128 rnd_ps(__m128 x) + { + __m128 and0 = _mm_and_ps(_epi32_sign_mask, x); + __m128 or0 = _mm_or_ps(and0, _ps_2pow23); + __m128 add0 = _mm_add_ps(x, or0); + __m128 sub0 = _mm_sub_ps(add0, or0); + return sub0; + } + + //floor + GLM_FUNC_QUALIFIER __m128 flr_ps(__m128 x) + { + __m128 rnd0 = rnd_ps(x); + __m128 cmp0 = _mm_cmplt_ps(x, rnd0); + __m128 and0 = _mm_and_ps(cmp0, glm::detail::_ps_1); + __m128 sub0 = _mm_sub_ps(rnd0, and0); + return sub0; + } + + //trunc + //GLM_FUNC_QUALIFIER __m128 _mm_trc_ps(__m128 v) + //{ + // return __m128(); + //} + + //roundEven + GLM_FUNC_QUALIFIER __m128 rde_ps(__m128 x) + { + __m128 and0 = _mm_and_ps(_epi32_sign_mask, x); + __m128 or0 = _mm_or_ps(and0, _ps_2pow23); + __m128 add0 = _mm_add_ps(x, or0); + __m128 sub0 = _mm_sub_ps(add0, or0); + return sub0; + } + + GLM_FUNC_QUALIFIER __m128 ceil_ps(__m128 x) + { + __m128 rnd0 = rnd_ps(x); + __m128 cmp0 = _mm_cmpgt_ps(x, rnd0); + __m128 and0 = _mm_and_ps(cmp0, _ps_1); + __m128 add0 = _mm_add_ps(rnd0, and0); + return add0; + } + + GLM_FUNC_QUALIFIER __m128 frc_ps(__m128 x) + { + __m128 flr0 = flr_ps(x); + __m128 sub0 = _mm_sub_ps(x, flr0); + return sub0; + } + + GLM_FUNC_QUALIFIER __m128 mod_ps(__m128 x, __m128 y) + { + __m128 div0 = _mm_div_ps(x, y); + __m128 flr0 = flr_ps(div0); + __m128 mul0 = _mm_mul_ps(y, flr0); + __m128 sub0 = _mm_sub_ps(x, mul0); + return sub0; + } + + GLM_FUNC_QUALIFIER __m128 clp_ps(__m128 v, __m128 minVal, __m128 maxVal) + { + __m128 min0 = _mm_min_ps(v, maxVal); + __m128 max0 = _mm_max_ps(min0, minVal); + return max0; + } + + GLM_FUNC_QUALIFIER __m128 mix_ps(__m128 v1, __m128 v2, __m128 a) + { + __m128 sub0 = _mm_sub_ps(one, a); + __m128 mul0 = _mm_mul_ps(v1, sub0); + __m128 mul1 = _mm_mul_ps(v2, a); + __m128 add0 = _mm_add_ps(mul0, mul1); + return add0; + } + + //step + GLM_FUNC_QUALIFIER __m128 stp_ps(__m128 edge, __m128 x) + { + __m128 cmp = _mm_cmple_ps(x, edge); + if(_mm_movemask_ps(cmp) == 0) + return one; + else + return zero; + } + + // smoothstep + GLM_FUNC_QUALIFIER __m128 ssp_ps(__m128 edge0, __m128 edge1, __m128 x) + { + __m128 sub0 = _mm_sub_ps(x, edge0); + __m128 sub1 = _mm_sub_ps(edge1, edge0); + __m128 div0 = _mm_sub_ps(sub0, sub1); + __m128 clp0 = clp_ps(div0, zero, one); + __m128 mul0 = _mm_mul_ps(two, clp0); + __m128 sub2 = _mm_sub_ps(three, mul0); + __m128 mul1 = _mm_mul_ps(clp0, clp0); + __m128 mul2 = _mm_mul_ps(mul1, sub2); + return mul2; + } + + /// \todo + //GLM_FUNC_QUALIFIER __m128 sse_nan_ps(__m128 x) + //{ + // __m128 empty; + // return empty; + //} + + /// \todo + //GLM_FUNC_QUALIFIER __m128 sse_inf_ps(__m128 x) + //{ + // __m128 empty; + // return empty; + //} + + // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration + // By Elan Ruskin, http://assemblyrequired.crashworks.org/ + GLM_FUNC_QUALIFIER __m128 sqrt_wip_ss(__m128 x) + { + __m128 const recip = _mm_rsqrt_ss(x); // "estimate" opcode + __m128 const half = _mm_set_ps1(0.5f); + __m128 const halfrecip = _mm_mul_ss(half, recip); + __m128 const threeminus_xrr = _mm_sub_ss(three, _mm_mul_ss(x, _mm_mul_ss (recip, recip))); + return _mm_mul_ss(halfrecip, threeminus_xrr); + } + +}//namespace detail +}//namespace glm diff --git a/glm/detail/func_geometric.inl b/glm/detail/func_geometric.inl index 5bc8e1c9..7d168ed0 100644 --- a/glm/detail/func_geometric.inl +++ b/glm/detail/func_geometric.inl @@ -200,10 +200,6 @@ namespace detail } }//namespace glm -#if GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS -# if GLM_ARCH & GLM_ARCH_AVX -# include "func_geometric_avx.inl" -# elif GLM_ARCH & GLM_ARCH_SSE2 -# include "func_geometric_sse2.inl" -# endif -#endif// +#if GLM_ARCH != GLM_FORCE_PURE && GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS +# include "func_geometric_simd.inl" +#endif diff --git a/glm/detail/func_geometric_avx.inl b/glm/detail/func_geometric_avx.inl deleted file mode 100644 index e69de29b..00000000 diff --git a/glm/detail/func_geometric_simd.inl b/glm/detail/func_geometric_simd.inl new file mode 100644 index 00000000..84fbf06f --- /dev/null +++ b/glm/detail/func_geometric_simd.inl @@ -0,0 +1,119 @@ +namespace glm{ +namespace detail +{ +#if GLM_ARCH & GLM_ARCH_AVX + GLM_FUNC_QUALIFIER __m128 dot_ps(__m128 v1, __m128 v2) + { + return _mm_dp_ps(v1, v2, 0xff); + } +#else + GLM_FUNC_QUALIFIER __m128 dot_ps(__m128 v1, __m128 v2) + { + __m128 mul0 = _mm_mul_ps(v1, v2); + __m128 swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 add0 = _mm_add_ps(mul0, swp0); + __m128 swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3)); + __m128 add1 = _mm_add_ps(add0, swp1); + return add1; + } +#endif + + GLM_FUNC_QUALIFIER __m128 dot_ss(__m128 v1, __m128 v2) + { + __m128 mul0 = _mm_mul_ps(v1, v2); + __m128 mov0 = _mm_movehl_ps(mul0, mul0); + __m128 add0 = _mm_add_ps(mov0, mul0); + __m128 swp1 = _mm_shuffle_ps(add0, add0, 1); + __m128 add1 = _mm_add_ss(add0, swp1); + return add1; + } + + GLM_FUNC_QUALIFIER __m128 len_ps(__m128 x) + { + __m128 dot0 = dot_ps(x, x); + __m128 sqt0 = _mm_sqrt_ps(dot0); + return sqt0; + } + + GLM_FUNC_QUALIFIER __m128 dst_ps(__m128 p0, __m128 p1) + { + __m128 sub0 = _mm_sub_ps(p0, p1); + __m128 len0 = len_ps(sub0); + return len0; + } + + GLM_FUNC_QUALIFIER __m128 xpd_ps(__m128 v1, __m128 v2) + { + __m128 swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2)); + __m128 swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2)); + __m128 mul0 = _mm_mul_ps(swp0, swp3); + __m128 mul1 = _mm_mul_ps(swp1, swp2); + __m128 sub0 = _mm_sub_ps(mul0, mul1); + return sub0; + } + + GLM_FUNC_QUALIFIER __m128 nrm_ps(__m128 v) + { + __m128 dot0 = dot_ps(v, v); + __m128 isr0 = _mm_rsqrt_ps(dot0); + __m128 mul0 = _mm_mul_ps(v, isr0); + return mul0; + } + + GLM_FUNC_QUALIFIER __m128 ffd_ps(__m128 N, __m128 I, __m128 Nref) + { + __m128 dot0 = dot_ps(Nref, I); + __m128 sgn0 = sgn_ps(dot0); + __m128 mul0 = _mm_mul_ps(sgn0, glm::detail::minus_one); + __m128 mul1 = _mm_mul_ps(N, mul0); + return mul1; + } + + GLM_FUNC_QUALIFIER __m128 rfe_ps(__m128 I, __m128 N) + { + __m128 dot0 = dot_ps(N, I); + __m128 mul0 = _mm_mul_ps(N, dot0); + __m128 mul1 = _mm_mul_ps(mul0, glm::detail::two); + __m128 sub0 = _mm_sub_ps(I, mul1); + return sub0; + } + + GLM_FUNC_QUALIFIER __m128 rfa_ps(__m128 I, __m128 N, __m128 eta) + { + __m128 dot0 = dot_ps(N, I); + __m128 mul0 = _mm_mul_ps(eta, eta); + __m128 mul1 = _mm_mul_ps(dot0, dot0); + __m128 sub0 = _mm_sub_ps(glm::detail::one, mul0); + __m128 sub1 = _mm_sub_ps(glm::detail::one, mul1); + __m128 mul2 = _mm_mul_ps(sub0, sub1); + + if(_mm_movemask_ps(_mm_cmplt_ss(mul2, glm::detail::zero)) == 0) + return glm::detail::zero; + + __m128 sqt0 = _mm_sqrt_ps(mul2); + __m128 mul3 = _mm_mul_ps(eta, dot0); + __m128 add0 = _mm_add_ps(mul3, sqt0); + __m128 mul4 = _mm_mul_ps(add0, N); + __m128 mul5 = _mm_mul_ps(eta, I); + __m128 sub2 = _mm_sub_ps(mul5, mul4); + + return sub2; + } + + template <> + struct compute_dot + { + GLM_FUNC_QUALIFIER static float call(tvec4 const& x, tvec4 const& y) + { + __m128 const dot0 = dot_ss(x.data, y.data); + + float Result = 0; + _mm_store_ss(&Result, dot0); + return Result; + } + }; +}//namespace detail +}//namespace glm + diff --git a/glm/detail/func_geometric_sse2.inl b/glm/detail/func_geometric_sse2.inl deleted file mode 100644 index e0862f3f..00000000 --- a/glm/detail/func_geometric_sse2.inl +++ /dev/null @@ -1,38 +0,0 @@ -namespace glm{ -namespace detail -{ - GLM_FUNC_QUALIFIER __m128 dot_ps(__m128 v1, __m128 v2) - { - __m128 mul0 = _mm_mul_ps(v1, v2); - __m128 swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1)); - __m128 add0 = _mm_add_ps(mul0, swp0); - __m128 swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3)); - __m128 add1 = _mm_add_ps(add0, swp1); - return add1; - } - - GLM_FUNC_QUALIFIER __m128 dot_ss(__m128 v1, __m128 v2) - { - __m128 mul0 = _mm_mul_ps(v1, v2); - __m128 mov0 = _mm_movehl_ps(mul0, mul0); - __m128 add0 = _mm_add_ps(mov0, mul0); - __m128 swp1 = _mm_shuffle_ps(add0, add0, 1); - __m128 add1 = _mm_add_ss(add0, swp1); - return add1; - } - - template <> - struct compute_dot - { - GLM_FUNC_QUALIFIER static float call(tvec4 const& x, tvec4 const& y) - { - __m128 const dot0 = dot_ss(x.data, y.data); - - float Result = 0; - _mm_store_ss(&Result, dot0); - return Result; - } - }; -}//namespace detail -}//namespace glm - diff --git a/glm/detail/func_matrix.inl b/glm/detail/func_matrix.inl index b0bfc6b6..42b09c83 100644 --- a/glm/detail/func_matrix.inl +++ b/glm/detail/func_matrix.inl @@ -308,9 +308,7 @@ namespace detail } }//namespace glm -#if GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS -# if GLM_ARCH & GLM_ARCH_SSE2 -# include "func_matrix_sse2.inl" -# endif -#endif// +#if GLM_ARCH != GLM_FORCE_PURE && GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS +# include "func_matrix_simd.inl" +#endif diff --git a/glm/detail/func_matrix_sse2.inl b/glm/detail/func_matrix_simd.inl similarity index 100% rename from glm/detail/func_matrix_sse2.inl rename to glm/detail/func_matrix_simd.inl diff --git a/glm/detail/intrinsic_common.inl b/glm/detail/intrinsic_common.inl index bd9774bb..f34bf939 100644 --- a/glm/detail/intrinsic_common.inl +++ b/glm/detail/intrinsic_common.inl @@ -27,28 +27,9 @@ /////////////////////////////////////////////////////////////////////////////////// namespace glm{ -namespace detail{ - -#if(GLM_COMPILER & GLM_COMPILER_VC) -#pragma warning(push) -#pragma warning(disable : 4510 4512 4610) -#endif - - union ieee754_QNAN - { - const float f; - struct i - { - const unsigned int mantissa:23, exp:8, sign:1; - }; - - ieee754_QNAN() : f(0.0)/*, mantissa(0x7FFFFF), exp(0xFF), sign(0x0)*/ {} - }; - -#if(GLM_COMPILER & GLM_COMPILER_VC) -#pragma warning(pop) -#endif - +namespace detail +{ +/* static const __m128 GLM_VAR_USED zero = _mm_setzero_ps(); static const __m128 GLM_VAR_USED one = _mm_set_ps1(1.0f); static const __m128 GLM_VAR_USED minus_one = _mm_set_ps1(-1.0f); @@ -150,164 +131,7 @@ namespace detail{ static const __m128 GLM_VAR_USED _ps_log_q2 = _mm_set_ps1(-7.69691943550460008604e2f); static const __m128 GLM_VAR_USED _ps_log_c0 = _mm_set_ps1(0.693147180559945f); static const __m128 GLM_VAR_USED _ps_log2_c0 = _mm_set_ps1(1.44269504088896340735992f); - -GLM_FUNC_QUALIFIER __m128 sse_abs_ps(__m128 x) -{ - return _mm_and_ps(glm::detail::abs4Mask, x); -} - -GLM_FUNC_QUALIFIER __m128 sse_sgn_ps(__m128 x) -{ - __m128 Neg = _mm_set1_ps(-1.0f); - __m128 Pos = _mm_set1_ps(1.0f); - - __m128 Cmp0 = _mm_cmplt_ps(x, zero); - __m128 Cmp1 = _mm_cmpgt_ps(x, zero); - - __m128 And0 = _mm_and_ps(Cmp0, Neg); - __m128 And1 = _mm_and_ps(Cmp1, Pos); - - return _mm_or_ps(And0, And1); -} - -//floor -GLM_FUNC_QUALIFIER __m128 sse_flr_ps(__m128 x) -{ - __m128 rnd0 = sse_rnd_ps(x); - __m128 cmp0 = _mm_cmplt_ps(x, rnd0); - __m128 and0 = _mm_and_ps(cmp0, glm::detail::_ps_1); - __m128 sub0 = _mm_sub_ps(rnd0, and0); - return sub0; -} - -//trunc -/* -GLM_FUNC_QUALIFIER __m128 _mm_trc_ps(__m128 v) -{ - return __m128(); -} */ -//round -GLM_FUNC_QUALIFIER __m128 sse_rnd_ps(__m128 x) -{ - __m128 and0 = _mm_and_ps(glm::detail::_epi32_sign_mask, x); - __m128 or0 = _mm_or_ps(and0, glm::detail::_ps_2pow23); - __m128 add0 = _mm_add_ps(x, or0); - __m128 sub0 = _mm_sub_ps(add0, or0); - return sub0; -} - -//roundEven -GLM_FUNC_QUALIFIER __m128 sse_rde_ps(__m128 x) -{ - __m128 and0 = _mm_and_ps(glm::detail::_epi32_sign_mask, x); - __m128 or0 = _mm_or_ps(and0, glm::detail::_ps_2pow23); - __m128 add0 = _mm_add_ps(x, or0); - __m128 sub0 = _mm_sub_ps(add0, or0); - return sub0; -} - -GLM_FUNC_QUALIFIER __m128 sse_ceil_ps(__m128 x) -{ - __m128 rnd0 = sse_rnd_ps(x); - __m128 cmp0 = _mm_cmpgt_ps(x, rnd0); - __m128 and0 = _mm_and_ps(cmp0, glm::detail::_ps_1); - __m128 add0 = _mm_add_ps(rnd0, and0); - return add0; -} - -GLM_FUNC_QUALIFIER __m128 sse_frc_ps(__m128 x) -{ - __m128 flr0 = sse_flr_ps(x); - __m128 sub0 = _mm_sub_ps(x, flr0); - return sub0; -} - -GLM_FUNC_QUALIFIER __m128 sse_mod_ps(__m128 x, __m128 y) -{ - __m128 div0 = _mm_div_ps(x, y); - __m128 flr0 = sse_flr_ps(div0); - __m128 mul0 = _mm_mul_ps(y, flr0); - __m128 sub0 = _mm_sub_ps(x, mul0); - return sub0; -} - -/// TODO -/* -GLM_FUNC_QUALIFIER __m128 sse_modf_ps(__m128 x, __m128i & i) -{ - __m128 empty; - return empty; -} -*/ - -//GLM_FUNC_QUALIFIER __m128 _mm_min_ps(__m128 x, __m128 y) - -//GLM_FUNC_QUALIFIER __m128 _mm_max_ps(__m128 x, __m128 y) - -GLM_FUNC_QUALIFIER __m128 sse_clp_ps(__m128 v, __m128 minVal, __m128 maxVal) -{ - __m128 min0 = _mm_min_ps(v, maxVal); - __m128 max0 = _mm_max_ps(min0, minVal); - return max0; -} - -GLM_FUNC_QUALIFIER __m128 sse_mix_ps(__m128 v1, __m128 v2, __m128 a) -{ - __m128 sub0 = _mm_sub_ps(glm::detail::one, a); - __m128 mul0 = _mm_mul_ps(v1, sub0); - __m128 mul1 = _mm_mul_ps(v2, a); - __m128 add0 = _mm_add_ps(mul0, mul1); - return add0; -} - -GLM_FUNC_QUALIFIER __m128 sse_stp_ps(__m128 edge, __m128 x) -{ - __m128 cmp = _mm_cmple_ps(x, edge); - if(_mm_movemask_ps(cmp) == 0) - return glm::detail::one; - else - return glm::detail::zero; -} - -GLM_FUNC_QUALIFIER __m128 sse_ssp_ps(__m128 edge0, __m128 edge1, __m128 x) -{ - __m128 sub0 = _mm_sub_ps(x, edge0); - __m128 sub1 = _mm_sub_ps(edge1, edge0); - __m128 div0 = _mm_sub_ps(sub0, sub1); - __m128 clp0 = sse_clp_ps(div0, glm::detail::zero, glm::detail::one); - __m128 mul0 = _mm_mul_ps(glm::detail::two, clp0); - __m128 sub2 = _mm_sub_ps(glm::detail::three, mul0); - __m128 mul1 = _mm_mul_ps(clp0, clp0); - __m128 mul2 = _mm_mul_ps(mul1, sub2); - return mul2; -} - -/// \todo -//GLM_FUNC_QUALIFIER __m128 sse_nan_ps(__m128 x) -//{ -// __m128 empty; -// return empty; -//} - -/// \todo -//GLM_FUNC_QUALIFIER __m128 sse_inf_ps(__m128 x) -//{ -// __m128 empty; -// return empty; -//} - -// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration -// By Elan Ruskin, http://assemblyrequired.crashworks.org/ -GLM_FUNC_QUALIFIER __m128 sse_sqrt_wip_ss(__m128 const & x) -{ - __m128 const recip = _mm_rsqrt_ss(x); // "estimate" opcode - __m128 const half = _mm_set_ps1(0.5f); - __m128 const halfrecip = _mm_mul_ss(half, recip); - __m128 const threeminus_xrr = _mm_sub_ss(three, _mm_mul_ss(x, _mm_mul_ss (recip, recip))); - return _mm_mul_ss(halfrecip, threeminus_xrr); -} - }//namespace detail }//namespace glms diff --git a/glm/detail/intrinsic_geometric.inl b/glm/detail/intrinsic_geometric.inl index 3a3e4b08..c6183455 100644 --- a/glm/detail/intrinsic_geometric.inl +++ b/glm/detail/intrinsic_geometric.inl @@ -26,6 +26,7 @@ /// @author Christophe Riccio /////////////////////////////////////////////////////////////////////////////////// +/* namespace glm{ namespace detail{ @@ -48,7 +49,6 @@ GLM_FUNC_QUALIFIER __m128 sse_dst_ps(__m128 p0, __m128 p1) //dot GLM_FUNC_QUALIFIER __m128 sse_dot_ps(__m128 v1, __m128 v2) { - # if(GLM_ARCH & GLM_ARCH_AVX) return _mm_dp_ps(v1, v2, 0xff); # else @@ -145,3 +145,5 @@ GLM_FUNC_QUALIFIER __m128 sse_rfa_ps(__m128 I, __m128 N, __m128 eta) }//namespace detail }//namespace glm +*/ + diff --git a/glm/ext.hpp b/glm/ext.hpp index 5f662761..dc8f9d79 100644 --- a/glm/ext.hpp +++ b/glm/ext.hpp @@ -136,8 +136,3 @@ #if GLM_HAS_RANGE_FOR # include "./gtx/range.hpp" #endif - -#if GLM_ARCH & GLM_ARCH_SSE2 -# include "./gtx/simd_vec4.hpp" -# include "./gtx/simd_mat4.hpp" -#endif diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index 6271d0cf..188fd41f 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -1,12 +1,3 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////// -// OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net) -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Created : 2009-05-07 -// Updated : 2009-05-07 -// Licence : This source is under MIT License -// File : glm/gtx/simd_vec4.inl -/////////////////////////////////////////////////////////////////////////////////////////////////// - namespace glm{ namespace detail{ diff --git a/test/gtx/CMakeLists.txt b/test/gtx/CMakeLists.txt index af369c6e..7b40dce2 100644 --- a/test/gtx/CMakeLists.txt +++ b/test/gtx/CMakeLists.txt @@ -40,8 +40,8 @@ glmCreateTestGTC(gtx_rotate_normalized_axis) glmCreateTestGTC(gtx_rotate_vector) glmCreateTestGTC(gtx_scalar_multiplication) glmCreateTestGTC(gtx_scalar_relational) -glmCreateTestGTC(gtx_simd_vec4) -glmCreateTestGTC(gtx_simd_mat4) +#glmCreateTestGTC(gtx_simd_vec4) +#glmCreateTestGTC(gtx_simd_mat4) glmCreateTestGTC(gtx_spline) glmCreateTestGTC(gtx_string_cast) glmCreateTestGTC(gtx_type_aligned)