diff --git a/glm/core/intrinsic_common.hpp b/glm/core/intrinsic_common.hpp index 8e7c9562..90dcfb56 100644 --- a/glm/core/intrinsic_common.hpp +++ b/glm/core/intrinsic_common.hpp @@ -32,6 +32,8 @@ namespace detail{ //roundEven __m128 sse_rde_ps(__m128 v); + __m128 sse_rnd_ps(__m128 x); + __m128 sse_ceil_ps(__m128 v); __m128 sse_frc_ps(__m128 x); diff --git a/glm/core/intrinsic_common.inl b/glm/core/intrinsic_common.inl index 05f71ed4..d7c50bf3 100644 --- a/glm/core/intrinsic_common.inl +++ b/glm/core/intrinsic_common.inl @@ -153,7 +153,7 @@ inline __m128 sse_sgn_ps(__m128 x) //floor inline __m128 sse_flr_ps(__m128 x) { - __m128 rnd0 = _mm_rnd_ps(x); + __m128 rnd0 = sse_rnd_ps(x); __m128 cmp0 = _mm_cmplt_ps(x, rnd0); __m128 and0 = _mm_and_ps(cmp0, glm::detail::_ps_1); __m128 sub0 = _mm_sub_ps(rnd0, and0); @@ -185,7 +185,7 @@ inline __m128 sse_rde_ps(__m128 v) inline __m128 sse_ceil_ps(__m128 x) { - __m128 rnd0 = _mm_rnd_ps(x); + __m128 rnd0 = sse_rnd_ps(x); __m128 cmp0 = _mm_cmpgt_ps(x, rnd0); __m128 and0 = _mm_and_ps(cmp0, glm::detail::_ps_1); __m128 add0 = _mm_add_ps(rnd0, and0); @@ -194,7 +194,7 @@ inline __m128 sse_ceil_ps(__m128 x) inline __m128 sse_frc_ps(__m128 x) { - __m128 flr0 = _mm_flr_ps(x); + __m128 flr0 = sse_flr_ps(x); __m128 sub0 = _mm_sub_ps(x, flr0); return sub0; } @@ -202,7 +202,7 @@ inline __m128 sse_frc_ps(__m128 x) inline __m128 sse_mod_ps(__m128 x, __m128 y) { __m128 div0 = _mm_div_ps(x, y); - __m128 flr0 = _mm_flr_ps(div0); + __m128 flr0 = sse_flr_ps(div0); __m128 mul0 = _mm_mul_ps(y, flr0); __m128 sub0 = _mm_sub_ps(x, mul0); return sub0; @@ -247,7 +247,7 @@ inline __m128 sse_ssp_ps(__m128 edge0, __m128 edge1, __m128 x) __m128 sub0 = _mm_sub_ps(x, edge0); __m128 sub1 = _mm_sub_ps(edge1, edge0); __m128 div0 = _mm_sub_ps(sub0, sub1); - __m128 clp0 = _mm_clp_ps(div0, glm::detail::zero, glm::detail::one); + __m128 clp0 = sse_clp_ps(div0, glm::detail::zero, glm::detail::one); __m128 mul0 = _mm_mul_ps(glm::detail::two, clp0); __m128 sub2 = _mm_sub_ps(glm::detail::three, mul0); __m128 mul1 = _mm_mul_ps(clp0, clp0); diff --git a/glm/core/intrinsic_geometric.inl b/glm/core/intrinsic_geometric.inl index a1d80d5f..39f60644 100644 --- a/glm/core/intrinsic_geometric.inl +++ b/glm/core/intrinsic_geometric.inl @@ -11,23 +11,23 @@ namespace glm{ namespace detail{ //length -inline __m128 _mm_len_ps(__m128 x) +inline __m128 sse_len_ps(__m128 x) { - __m128 dot0 = _mm_dot_ps(x, x); + __m128 dot0 = sse_dot_ps(x, x); __m128 sqt0 = _mm_sqrt_ps(dot0); return sqt0; } //distance -inline __m128 _mm_dst_ps(__m128 p0, __m128 p1) +inline __m128 sse_dst_ps(__m128 p0, __m128 p1) { __m128 sub0 = _mm_sub_ps(p0, p1); - __m128 len0 = _mm_len_ps(sub0); + __m128 len0 = sse_len_ps(sub0); return len0; } //dot -inline __m128 _mm_dot_ps(__m128 v1, __m128 v2) +inline __m128 sse_dot_ps(__m128 v1, __m128 v2) { __m128 mul0 = _mm_mul_ps(v1, v2); __m128 swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1)); @@ -38,7 +38,7 @@ inline __m128 _mm_dot_ps(__m128 v1, __m128 v2) } // SSE1 -inline __m128 _mm_dot_ss(__m128 v1, __m128 v2) +inline __m128 sse_dot_ss(__m128 v1, __m128 v2) { __m128 mul0 = _mm_mul_ps(v1, v2); __m128 mov0 = _mm_movehl_ps(mul0, mul0); @@ -49,7 +49,7 @@ inline __m128 _mm_dot_ss(__m128 v1, __m128 v2) } //cross -inline __m128 _mm_xpd_ps(__m128 v1, __m128 v2) +inline __m128 sse_xpd_ps(__m128 v1, __m128 v2) { __m128 swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1)); __m128 swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2)); @@ -64,7 +64,7 @@ inline __m128 _mm_xpd_ps(__m128 v1, __m128 v2) //normalize inline __m128 _mm_nrm_ps(__m128 v) { - __m128 dot0 = _mm_dot_ps(v, v); + __m128 dot0 = sse_dot_ps(v, v); __m128 isr0 = _mm_rsqrt_ps(dot0); __m128 mul0 = _mm_mul_ps(v, isr0); return mul0; @@ -79,8 +79,8 @@ inline __m128 _mm_ffd_ps(__m128 N, __m128 I, __m128 Nref) //__m128 mix0 = _mm_mix_ps(N, neg0, sgn0); //return mix0; - __m128 dot0 = _mm_dot_ps(Nref, I); - __m128 sgn0 = _mm_sgn_ps(dot0); + __m128 dot0 = sse_dot_ps(Nref, I); + __m128 sgn0 = sse_sgn_ps(dot0); __m128 mul0 = _mm_mul_ps(sgn0, glm::detail::minus_one); __m128 mul1 = _mm_mul_ps(N, mul0); return mul1; @@ -89,7 +89,7 @@ inline __m128 _mm_ffd_ps(__m128 N, __m128 I, __m128 Nref) //reflect inline __m128 _mm_rfe_ps(__m128 I, __m128 N) { - __m128 dot0 = _mm_dot_ps(N, I); + __m128 dot0 = sse_dot_ps(N, I); __m128 mul0 = _mm_mul_ps(N, dot0); __m128 mul1 = _mm_mul_ps(mul0, glm::detail::two); __m128 sub0 = _mm_sub_ps(I, mul1); @@ -99,7 +99,7 @@ inline __m128 _mm_rfe_ps(__m128 I, __m128 N) //refract inline __m128 _mm_rfa_ps(__m128 I, __m128 N, __m128 eta) { - __m128 dot0 = _mm_dot_ps(N, I); + __m128 dot0 = sse_dot_ps(N, I); __m128 mul0 = _mm_mul_ps(eta, eta); __m128 mul1 = _mm_mul_ps(dot0, dot0); __m128 sub0 = _mm_sub_ps(glm::detail::one, mul0); diff --git a/glm/core/intrinsic_matrix.hpp b/glm/core/intrinsic_matrix.hpp index 6ac6b743..9a639149 100644 --- a/glm/core/intrinsic_matrix.hpp +++ b/glm/core/intrinsic_matrix.hpp @@ -31,6 +31,10 @@ namespace detail void sse_rotate_ps(__m128 const in[4], float Angle, float const v[3], __m128 out[4]); + __m128 sse_det_ps(__m128 const m[4]); + + __m128 sse_slow_det_ps(__m128 const m[4]); + }//namespace detail }//namespace glm diff --git a/glm/core/intrinsic_matrix.inl b/glm/core/intrinsic_matrix.inl index d92f6173..d35bb10b 100644 --- a/glm/core/intrinsic_matrix.inl +++ b/glm/core/intrinsic_matrix.inl @@ -10,13 +10,11 @@ namespace glm{ namespace detail{ -static const __m128 one = _mm_set_ps1(1.0f); -static const __m128 pi = _mm_set_ps1(3.141592653589793238462643383279f); static const __m128 _m128_rad_ps = _mm_set_ps1(3.141592653589793238462643383279f / 180.f); static const __m128 _m128_deg_ps = _mm_set_ps1(180.f / 3.141592653589793238462643383279f); template -inline matType _mm_comp_mul_ps +inline matType sse_comp_mul_ps ( __m128 const in1[4], __m128 const in2[4], @@ -29,7 +27,7 @@ inline matType _mm_comp_mul_ps out[3] = _mm_mul_ps(in1[3], in2[3]); } -inline void _mm_add_ps(__m128 in1[4], __m128 in2[4], __m128 out[4]) +inline void sse_add_ps(__m128 in1[4], __m128 in2[4], __m128 out[4]) { { out[0] = _mm_add_ps(in1[0], in2[0]); @@ -39,7 +37,7 @@ inline void _mm_add_ps(__m128 in1[4], __m128 in2[4], __m128 out[4]) } } -inline void _mm_sub_ps(__m128 in1[4], __m128 in2[4], __m128 out[4]) +inline void sse_sub_ps(__m128 in1[4], __m128 in2[4], __m128 out[4]) { { out[0] = _mm_sub_ps(in1[0], in2[0]); @@ -49,7 +47,7 @@ inline void _mm_sub_ps(__m128 in1[4], __m128 in2[4], __m128 out[4]) } } -inline __m128 _mm_mul_ps(__m128 m[4], __m128 v) +inline __m128 sse_mul_ps(__m128 m[4], __m128 v) { __m128 v0 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); __m128 v1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); @@ -68,7 +66,7 @@ inline __m128 _mm_mul_ps(__m128 m[4], __m128 v) return a2; } -inline __m128 _mm_mul_ps(__m128 v, __m128 m[4]) +inline __m128 sse_mul_ps(__m128 v, __m128 m[4]) { __m128 i0 = m[0]; __m128 i1 = m[1]; @@ -95,7 +93,7 @@ inline __m128 _mm_mul_ps(__m128 v, __m128 m[4]) return f2; } -inline void _mm_mul_ps(__m128 const in1[4], __m128 const in2[4], __m128 out[4]) +inline void sse_mul_ps(__m128 const in1[4], __m128 const in2[4], __m128 out[4]) { { __m128 e0 = _mm_shuffle_ps(in2[0], in2[0], _MM_SHUFFLE(0, 0, 0, 0)); @@ -171,7 +169,7 @@ inline void _mm_mul_ps(__m128 const in1[4], __m128 const in2[4], __m128 out[4]) } } -inline void _mm_transpose_ps(__m128 const in[4], __m128 out[4]) +inline void sse_transpose_ps(__m128 const in[4], __m128 out[4]) { __m128 tmp0 = _mm_shuffle_ps(in[0], in[1], 0x44); __m128 tmp2 = _mm_shuffle_ps(in[0], in[1], 0xEE); @@ -184,7 +182,7 @@ inline void _mm_transpose_ps(__m128 const in[4], __m128 out[4]) out[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD); } -inline __m128 _mm_det_ps(__m128 const in[4]) +inline __m128 sse_slow_det_ps(__m128 const in[4]) { __m128 Fac0; { @@ -406,18 +404,15 @@ inline __m128 _mm_det_ps(__m128 const in[4]) // + m[0][1] * Inverse[1][0] // + m[0][2] * Inverse[2][0] // + m[0][3] * Inverse[3][0]; - __m128 Det0 = _mm_dot_ps(in[0], Row2); + __m128 Det0 = sse_dot_ps(in[0], Row2); return Det0; } -template -inline typename detail::tmat4x4::value_type _mm_det2_ps +inline __m128 sse_det_ps ( - __m128 const & m[4] + __m128 const m[4] ) { - GLM_STATIC_ASSERT(detail::type::is_float, "'determinant' only accept floating-point inputs"); - //T SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3]; //T SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3]; //T SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2]; @@ -428,20 +423,20 @@ inline typename detail::tmat4x4::value_type _mm_det2_ps // First 2 columns __m128 Swp2A = _mm_shuffle_ps(m[2], m[2], _MM_SHUFFLE(0, 1, 1, 2)); __m128 Swp3A = _mm_shuffle_ps(m[3], m[3], _MM_SHUFFLE(3, 2, 3, 3)); - __m128 MulA = __mm_mul_ps(Swp2A, Swp3A); + __m128 MulA = _mm_mul_ps(Swp2A, Swp3A); // Second 2 columns __m128 Swp2B = _mm_shuffle_ps(m[2], m[2], _MM_SHUFFLE(0, 1, 1, 2)); __m128 Swp3B = _mm_shuffle_ps(m[3], m[3], _MM_SHUFFLE(3, 2, 3, 3)); - __m128 MulB = __mm_mul_ps(Swp2A, Swp3A); + __m128 MulB = _mm_mul_ps(Swp2A, Swp3A); // Columns subtraction - __m128 SubAB = __mm_sub_ps(MulA, MulB); + __m128 SubAB = _mm_sub_ps(MulA, MulB); // Last 2 rows __m128 Swp2C = _mm_shuffle_ps(m[2], m[2], _MM_SHUFFLE(1, 2, 0, 0)); __m128 Swp3C = _mm_shuffle_ps(m[3], m[3], _MM_SHUFFLE(0, 0, 1, 2)); - __m128 MulC = __mm_mul_ps(Swp2C, Swp3C); + __m128 MulC = _mm_mul_ps(Swp2C, Swp3C); __m128 SwpD = __mm_hl_ps(MulC); //detail::tvec4 DetCof( @@ -472,10 +467,10 @@ inline typename detail::tmat4x4::value_type _mm_det2_ps // + m[0][2] * DetCof[2] // + m[0][3] * DetCof[3]; - return _mm_dot_ps(m[0], Signed); + return sse_dot_ps(m[0], Signed); } -inline void _mm_inverse_ps(__m128 const in[4], __m128 out[4]) +inline void sse_inverse_ps(__m128 const in[4], __m128 out[4]) { __m128 Fac0; { @@ -708,7 +703,7 @@ inline void _mm_inverse_ps(__m128 const in[4], __m128 out[4]) out[3] = _mm_mul_ps(Inv3, Rcp0); } -inline void _mm_inverse_fast_ps(__m128 const in[4], __m128 out[4]) +inline void sse_inverse_fast_ps(__m128 const in[4], __m128 out[4]) { __m128 Fac0; { @@ -930,7 +925,7 @@ inline void _mm_inverse_fast_ps(__m128 const in[4], __m128 out[4]) // + m[0][1] * Inverse[1][0] // + m[0][2] * Inverse[2][0] // + m[0][3] * Inverse[3][0]; - __m128 Det0 = _mm_dot_ps(in[0], Row2); + __m128 Det0 = sse_dot_ps(in[0], Row2); __m128 Rcp0 = _mm_rcp_ps(Det0); //__m128 Rcp0 = _mm_div_ps(one, Det0); // Inverse /= Determinant; @@ -941,7 +936,7 @@ inline void _mm_inverse_fast_ps(__m128 const in[4], __m128 out[4]) } -void _mm_rotate_ps(__m128 const in[4], float Angle, float const v[3], __m128 out[4]) +void sse_rotate_ps(__m128 const in[4], float Angle, float const v[3], __m128 out[4]) { float a = glm::radians(Angle); float c = cos(a); @@ -1008,10 +1003,10 @@ void _mm_rotate_ps(__m128 const in[4], float Angle, float const v[3], __m128 out //Result[2] = m[0] * Rotate[2][0] + m[1] * Rotate[2][1] + m[2] * Rotate[2][2]; //Result[3] = m[3]; //return Result; - _mm_mul_ps(in, Result, out); + sse_mul_ps(in, Result, out); } -void _mm_outer_ps(__m128 const & c, __m128 const & r, __m128 out[4]) +void sse_outer_ps(__m128 const & c, __m128 const & r, __m128 out[4]) { out[0] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(0, 0, 0, 0)); out[1] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(1, 1, 1, 1)); diff --git a/glm/gtx/simd_mat4.inl b/glm/gtx/simd_mat4.inl index f763a3b8..6897c805 100644 --- a/glm/gtx/simd_mat4.inl +++ b/glm/gtx/simd_mat4.inl @@ -234,7 +234,7 @@ namespace simd_mat4 inline detail::fmat4x4SIMD simd_transpose(detail::fmat4x4SIMD const & m) { detail::fmat4x4SIMD result; - _mm_transpose_ps(&m[0].Data, &result[0].Data); + sse_transpose_ps(&m[0].Data, &result[0].Data); return result; } @@ -246,7 +246,7 @@ namespace simd_mat4 inline detail::fmat4x4SIMD simd_inverse(detail::fmat4x4SIMD const & m) { detail::fmat4x4SIMD result; - _mm_inverse_ps(&m[0].Data, &result[0].Data); + sse_inverse_ps(&m[0].Data, &result[0].Data); return result; } }//namespace simd_mat4 diff --git a/glm/setup.hpp b/glm/setup.hpp index 4a0c3daa..06044c0d 100644 --- a/glm/setup.hpp +++ b/glm/setup.hpp @@ -108,7 +108,7 @@ #ifdef _MSC_VER -#if defined(_WIN64) +#if defined(_M_X64) #define GLM_MODEL GLM_MODEL_64 #else #define GLM_MODEL GLM_MODEL_32 @@ -222,42 +222,80 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// // Compiler instruction set -#define GLM_INSTRUCTION_SET_NULL 0x00000000 // +//#define GLM_INSTRUCTION_SET_NULL 0x00000000 // #define GLM_INSTRUCTION_SET_PURE 0x00000001 // x86intrin.h -#define GLM_INSTRUCTION_SET_MMX 0x00000002 // mmintrin.h (MMX) -#define GLM_INSTRUCTION_SET_3DNOW 0x00000004 // mm3dnow.h (3DNOW!) -#define GLM_INSTRUCTION_SET_SSE 0x00000008 // xmmintrin.h (SSE + MMX) +//#define GLM_INSTRUCTION_SET_MMX 0x00000002 // mmintrin.h (MMX) +//#define GLM_INSTRUCTION_SET_3DNOW 0x00000004 // mm3dnow.h (3DNOW!) +//#define GLM_INSTRUCTION_SET_SSE 0x00000008 // xmmintrin.h (SSE + MMX) #define GLM_INSTRUCTION_SET_SSE2 0x00000010 // emmintrin.h (SSE2 + SSE) -#define GLM_INSTRUCTION_SET_SSE3 0x00000020 // pmmintrin.h (SSE3 + SSE2 + SSE1) -#define GLM_INSTRUCTION_SET_SSSE3 0x00000040 // tmmintrin.h (SSSE3 + SSE3 + SSE2 + SSE1) -#define GLM_INSTRUCTION_SET_POPCNT 0x00000080 // popcntintrin.h -#define GLM_INSTRUCTION_SET_SSE4A 0x00000100 // ammintrin.h (SSE4A + POPCNT + SSE3 + SSE2 + SSE) -#define GLM_INSTRUCTION_SET_SSE4_1 0x00000200 // smmintrin.h (SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE) -#define GLM_INSTRUCTION_SET_SSE4_2 0x00000400 // nmmintrin.h (SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE) -#define GLM_INSTRUCTION_SET_AES 0x00000800 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1) -#define GLM_INSTRUCTION_SET_PCLMUL 0x00001000 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1) -#define GLM_INSTRUCTION_SET_AVX 0x00002000 // immintrin.h (AES + PCLMUL + SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE) - -#if(defined(GLM_COMPILER) && (GLM_COMPILER & GLM_COMPILER_GCC)) -# define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_NULL -#elif(defined(GLM_COMPILER) && (GLM_COMPILER & GLM_COMPILER_VC)) -# if(GLM_MODEL == GLM_MODEL_64) -# ifdef _M_CEE_PURE -# define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_PURE -# else -# define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_MMX | GLM_INSTRUCTION_SET_SSE -# endif +#define GLM_INSTRUCTION_SET_SSE3 0x00000020 | GLM_INSTRUCTION_SET_SSE2 // pmmintrin.h (SSE3 + SSE2 + SSE1) +//#define GLM_INSTRUCTION_SET_SSSE3 0x00000040 // tmmintrin.h (SSSE3 + SSE3 + SSE2 + SSE1) +//#define GLM_INSTRUCTION_SET_POPCNT 0x00000080 // popcntintrin.h +//#define GLM_INSTRUCTION_SET_SSE4A 0x00000100 // ammintrin.h (SSE4A + POPCNT + SSE3 + SSE2 + SSE) +//#define GLM_INSTRUCTION_SET_SSE4_1 0x00000200 // smmintrin.h (SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE) +//#define GLM_INSTRUCTION_SET_SSE4_2 0x00000400 // nmmintrin.h (SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE) +//#define GLM_INSTRUCTION_SET_AES 0x00000800 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1) +//#define GLM_INSTRUCTION_SET_PCLMUL 0x00001000 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1) +#define GLM_INSTRUCTION_SET_AVX 0x00002000 | GLM_INSTRUCTION_SET_SSE3 // immintrin.h (AES + PCLMUL + SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE) + +///////////////// +// Platform + +#define GLM_SUPPORT_PURE 0 +#define GLM_SUPPORT_SSE2 1 +#define GLM_SUPPORT_SSE3 2 +#define GLM_SUPPORT_AVX 3 + +#if(GLM_COMPILER & GLM_COMPILER_VC) +# if(GLM_COMPILER >= GLM_COMPILER_VC2010) +# define GLM_SUPPORT GLM_SUPPORT_SSE3 //GLM_SUPPORT_AVX (Require SP1) +# elif(GLM_COMPILER >= GLM_COMPILER_VC2008) +# define GLM_SUPPORT GLM_SUPPORT_SSE3 +# elif(GLM_COMPILER >= GLM_COMPILER_VC2005) +# define GLM_SUPPORT GLM_SUPPORT_SSE2 +# else +# define GLM_SUPPORT GLM_SUPPORT_PURE +# endif +#elif(GLM_COMPILER & GLM_COMPILER_GCC) +# if(GLM_COMPILER >= GLM_COMPILER_GCC44) +# define GLM_SUPPORT GLM_SUPPORT_AVX +# elif(GLM_COMPILER >= GLM_COMPILER_GCC40) +# define GLM_SUPPORT GLM_SUPPORT_SSE3 # else -# ifdef _M_CEE_PURE -# define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_PURE -# else -# define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_NULL -# endif +# define GLM_SUPPORT GLM_SUPPORT_PURE # endif #else -# define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_PURE +# define GLM_SUPPORT GLM_SUPPORT_PURE #endif +#define GLM_PLATFORM_PURE 0 +#define GLM_PLATFORM_SSE2 1 +#define GLM_PLATFORM_SSE3 2 +#define GLM_PLATFORM_AVX 3 + +#ifdef GLM_INSTRUCTION_SET +# if((GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_AVX) && GLM_SUPPORT >= GLM_SUPPORT_AVX) +# include +# define GLM_PLATFORM GLM_PLATFORM_AVX +# elif((GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE3) && GLM_SUPPORT >= GLM_SUPPORT_SSE3) +# include +# define GLM_PLATFORM GLM_PLATFORM_SSE3 +# elif((GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE2) && GLM_SUPPORT >= GLM_SUPPORT_SSE2) +# include +# define GLM_PLATFORM GLM_PLATFORM_SSE2 +# else +# define GLM_PLATFORM GLM_PLATFORM_PURE +# endif +#else +# if(GLM_MODEL == GLM_MODEL_64) +# include +# define GLM_PLATFORM GLM_PLATFORM_SSE2 +# else +# define GLM_PLATFORM GLM_PLATFORM_PURE +# endif +#endif + +/* #if(GLM_INSTRUCTION_SET != GLM_INSTRUCTION_SET_NULL) # if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_MMX) # include @@ -278,10 +316,10 @@ # include # endif # if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_POPCNT) -# include +//# include # endif # if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE4A) -# include +//# include # endif # if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE4_1) # include @@ -299,7 +337,7 @@ # include # endif #endif - +*/ /////////////////////////////////////////////////////////////////////////////////////////////////// // Swizzle operators diff --git a/test/gtx/gtx-simd-mat4.cpp b/test/gtx/gtx-simd-mat4.cpp index 1647016b..90ad4953 100644 --- a/test/gtx/gtx-simd-mat4.cpp +++ b/test/gtx/gtx-simd-mat4.cpp @@ -30,8 +30,8 @@ int main(int argc, void* argv[]) glm::simd_vec4(0.5f, 3.0f, 0.6f, 0.02f), glm::simd_vec4(0.2f, 0.4f, 2.0f, 0.03f), glm::simd_vec4(4.0f, 3.0f, 2.0f, 1.00f)); - __m128 DetB = _mm_slow_det_ps(&IdentityB.Data[0].Data); - __m128 DetC = _mm_det_ps(&IdentityB.Data[0].Data); + __m128 DetB = sse_slow_det_ps(&IdentityB.Data[0].Data); + __m128 DetC = sse_det_ps(&IdentityB.Data[0].Data); return 0; }