diff --git a/glm/core/_detail.hpp b/glm/core/_detail.hpp index 0e4341bc..9c826409 100644 --- a/glm/core/_detail.hpp +++ b/glm/core/_detail.hpp @@ -322,12 +322,12 @@ namespace detail #if((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2005)) # define GLM_DEPRECATED __declspec(deprecated) -# define GLM_ALIGNED(keyword, x) __declspec(align(x)) keyword +# define GLM_ALIGN(x) __declspec(align(x)) # define GLM_RESTRICT __declspec(restrict) # define GLM_RESTRICT_VAR __restrict #elif((GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC31)) # define GLM_DEPRECATED __attribute__((__deprecated__)) -# define GLM_ALIGNED(keyword, x) keyword __attribute__(aligned(x)) +# define GLM_ALIGN(x) __attribute__(aligned(x)) # if(GLM_COMPILER >= GLM_COMPILER_GCC33) # define GLM_RESTRICT __restrict__ # define GLM_RESTRICT_VAR __restrict__ @@ -339,7 +339,7 @@ namespace detail # define GLM_RESTRICT_VAR __restrict__ #else # define GLM_DEPRECATED -# define GLM_ALIGN_UNION(x) union +# define GLM_ALIGN(x) # define GLM_RESTRICT # define GLM_RESTRICT_VAR #endif//GLM_COMPILER diff --git a/glm/core/intrinsic_geometric.inl b/glm/core/intrinsic_geometric.inl index 4a2403c2..46e52ba1 100644 --- a/glm/core/intrinsic_geometric.inl +++ b/glm/core/intrinsic_geometric.inl @@ -62,7 +62,7 @@ inline __m128 sse_xpd_ps(__m128 v1, __m128 v2) } //normalize -inline __m128 _mm_nrm_ps(__m128 v) +inline __m128 sse_nrm_ps(__m128 v) { __m128 dot0 = sse_dot_ps(v, v); __m128 isr0 = _mm_rsqrt_ps(dot0); @@ -71,7 +71,7 @@ inline __m128 _mm_nrm_ps(__m128 v) } //faceforward -inline __m128 _mm_ffd_ps(__m128 N, __m128 I, __m128 Nref) +inline __m128 sse_ffd_ps(__m128 N, __m128 I, __m128 Nref) { //__m128 dot0 = _mm_dot_ps(v, v); //__m128 neg0 = _mm_neg_ps(N); @@ -87,7 +87,7 @@ inline __m128 _mm_ffd_ps(__m128 N, __m128 I, __m128 Nref) } //reflect -inline __m128 _mm_rfe_ps(__m128 I, __m128 N) +inline __m128 sse_rfe_ps(__m128 I, __m128 N) { __m128 dot0 = sse_dot_ps(N, I); __m128 mul0 = _mm_mul_ps(N, dot0); @@ -97,7 +97,7 @@ inline __m128 _mm_rfe_ps(__m128 I, __m128 N) } //refract -inline __m128 _mm_rfa_ps(__m128 I, __m128 N, __m128 eta) +inline __m128 sse_rfa_ps(__m128 I, __m128 N, __m128 eta) { __m128 dot0 = sse_dot_ps(N, I); __m128 mul0 = _mm_mul_ps(eta, eta); diff --git a/glm/core/intrinsic_matrix.inl b/glm/core/intrinsic_matrix.inl index a81f50b7..4a4cebc9 100644 --- a/glm/core/intrinsic_matrix.inl +++ b/glm/core/intrinsic_matrix.inl @@ -1014,7 +1014,7 @@ void sse_rotate_ps(__m128 const in[4], float Angle, float const v[3], __m128 out glm::vec4 AxisA(v[0], v[1], v[2], float(0)); __m128 AxisB = _mm_set_ps(AxisA.w, AxisA.z, AxisA.y, AxisA.x); - __m128 AxisC = _mm_nrm_ps(AxisB); + __m128 AxisC = detail::sse_nrm_ps(AxisB); __m128 Cos0 = _mm_set_ss(c); __m128 CosA = _mm_shuffle_ps(Cos0, Cos0, _MM_SHUFFLE(0, 0, 0, 0)); diff --git a/glm/gtx/simd_mat4.hpp b/glm/gtx/simd_mat4.hpp index d44c8c59..254fa636 100644 --- a/glm/gtx/simd_mat4.hpp +++ b/glm/gtx/simd_mat4.hpp @@ -32,7 +32,7 @@ namespace glm { namespace detail { - GLM_ALIGNED(struct, 16) fmat4x4SIMD + GLM_ALIGN(16) struct fmat4x4SIMD { enum ctor{null}; @@ -132,64 +132,37 @@ namespace glm //! GLM_GTX_simd_mat4 extension: SIMD implementation of vec4 type. namespace simd_mat4 { - typedef detail::fmat4x4SIMD simd_mat4; + typedef detail::fmat4x4SIMD simdMat4; - //! Multiply matrix x by matrix y component-wise, i.e., + //! Multiply matrix x by matrix y component-wise, i.e., //! result[i][j] is the scalar product of x[i][j] and y[i][j]. //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simd_matrixCompMult( + detail::fmat4x4SIMD simdMatrixCompMult( detail::fmat4x4SIMD const & x, detail::fmat4x4SIMD const & y); - //! Returns the transposed matrix of x - //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simd_transpose(detail::fmat4x4SIMD const & m); - - //! Return the determinant of a mat4 matrix. - //! (From GLM_GTX_simd_mat4 extension). - float simd_determinant(detail::fmat4x4SIMD const & m); - - //! Return the inverse of a mat4 matrix. - //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simd_inverse(detail::fmat4x4SIMD const & m); - - }//namespace simd_mat4 - namespace simd_mat4 - { - //! Multiply matrix x by matrix y component-wise, i.e., - //! result[i][j] is the scalar product of x[i][j] and y[i][j]. - //! (From GLM_GTX_simd_mat4 extension). - template - matType matrixCompMult( - matType const & x, - matType const & y); - //! Treats the first parameter c as a column vector //! and the second parameter r as a row vector //! and does a linear algebraic matrix multiply c * r. //! (From GLM_GTX_simd_mat4 extension). - template - matType outerProduct( - vecType const & c, - vecType const & r); + detail::fmat4x4SIMD simdOuterProduct( + detail::fvec4SIMD const & c, + detail::fvec4SIMD const & r); //! Returns the transposed matrix of x //! (From GLM_GTX_simd_mat4 extension). - template - typename matType::transpose_type transpose( - matType const & x); + detail::fmat4x4SIMD simdTranspose( + detail::fmat4x4SIMD const & x); //! Return the determinant of a mat4 matrix. //! (From GLM_GTX_simd_mat4 extension). - template - typename detail::tmat4x4::value_type determinant( - detail::tmat4x4 const & m); + float simdDeterminant( + detail::fmat4x4SIMD const & m); //! Return the inverse of a mat4 matrix. //! (From GLM_GTX_simd_mat4 extension). - template - detail::tmat4x4 inverse( - detail::tmat4x4 const & m); + detail::fmat4x4SIMD simdInverse( + detail::fmat4x4SIMD const & m); }//namespace simd_mat4 }//namespace gtx diff --git a/glm/gtx/simd_mat4.inl b/glm/gtx/simd_mat4.inl index d2e12901..abcb849c 100644 --- a/glm/gtx/simd_mat4.inl +++ b/glm/gtx/simd_mat4.inl @@ -237,7 +237,7 @@ namespace detail namespace gtx{ namespace simd_mat4 { - inline detail::fmat4x4SIMD simd_matrixCompMult + inline detail::fmat4x4SIMD simdMatrixCompMult ( detail::fmat4x4SIMD const & x, detail::fmat4x4SIMD const & y @@ -251,21 +251,30 @@ namespace simd_mat4 return result; } - inline detail::fmat4x4SIMD simd_transpose(detail::fmat4x4SIMD const & m) + inline detail::fmat4x4SIMD simdOuterProduct + ( + detail::fvec4SIMD const & c, + detail::fvec4SIMD const & r + ) + { + + } + + inline detail::fmat4x4SIMD simdTranspose(detail::fmat4x4SIMD const & m) { detail::fmat4x4SIMD result; detail::sse_transpose_ps(&m[0].Data, &result[0].Data); return result; } - inline float simd_determinant(detail::fmat4x4SIMD const & m) + inline float simdDeterminant(detail::fmat4x4SIMD const & m) { float Result; _mm_store_ss(&Result, detail::sse_det_ps(&m[0].Data)); return Result; } - inline detail::fmat4x4SIMD simd_inverse(detail::fmat4x4SIMD const & m) + inline detail::fmat4x4SIMD simdInverse(detail::fmat4x4SIMD const & m) { detail::fmat4x4SIMD result; detail::sse_inverse_ps(&m[0].Data, &result[0].Data); diff --git a/glm/gtx/simd_vec4.hpp b/glm/gtx/simd_vec4.hpp index fde38997..4a8e5313 100644 --- a/glm/gtx/simd_vec4.hpp +++ b/glm/gtx/simd_vec4.hpp @@ -32,10 +32,10 @@ namespace glm { namespace detail { - GLM_ALIGNED(union, 4) fvec4SIMD + GLM_ALIGN(4) struct fvec4SIMD { enum ctor{null}; - typedef float value_type; + typedef __m128 value_type; typedef std::size_t size_type; static size_type value_size(); @@ -43,10 +43,6 @@ namespace glm typedef tvec4 bool_type; __m128 Data; - float Array[4]; - struct{float x, y, z, w;}; - struct{float r, g, b, a;}; - struct{float s, t, q, p;}; ////////////////////////////////////// // Implicit basic constructors @@ -54,7 +50,10 @@ namespace glm fvec4SIMD(); fvec4SIMD(__m128 const & Data); fvec4SIMD(fvec4SIMD const & v); + fvec4SIMD(tvec4 const & v); + //operator detail::tvec4(); + //operator detail::tvec4 const(); ////////////////////////////////////// // Explicit basic constructors @@ -118,43 +117,13 @@ namespace glm //! GLM_GTX_simd_vec4 extension: SIMD implementation of vec4 type. namespace simd_vec4 { - typedef detail::fvec4SIMD simd_vec4; - - detail::fvec4SIMD simd_length( - detail::fvec4SIMD const & v); - - detail::fvec4SIMD simd_cross( - detail::fvec4SIMD const & a, - detail::fvec4SIMD const & b); - - detail::fvec4SIMD simd_distance( - detail::fvec4SIMD const & v); - - detail::fvec4SIMD simd_dot( - detail::fvec4SIMD const & a, - detail::fvec4SIMD const & b); - - detail::fvec4SIMD simd_normalize( - detail::fvec4SIMD const & v); + typedef detail::fvec4SIMD simdVec4; - detail::fvec4SIMD simd_faceforward( - detail::fvec4SIMD const & N, - detail::fvec4SIMD const & I, - detail::fvec4SIMD const & Nref); - - detail::fvec4SIMD simd_reflect( - detail::fvec4SIMD const & I, - detail::fvec4SIMD const & N); - - detail::fvec4SIMD simd_refract( - detail::fvec4SIMD const & I, - detail::fvec4SIMD const & N, - float const & eta); - - }//namespace simd_vec4 + //! Convert a simdVec4 to a vec4. + //! (From GLM_GTX_simd_vec4 extension) + detail::tvec4 vec4_cast( + detail::fvec4SIMD const & x); - namespace simd_vec4 - { //! Returns the length of x, i.e., sqrt(x * x). //! (From GLM_GTX_simd_vec4 extension, geometry functions) float simdLength( diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index 7425d270..de4f40a4 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -35,6 +35,20 @@ namespace glm Data(_mm_set_ps(v.w, v.z, v.y, v.x)) {} + inline fvec4SIMD::operator detail::tvec4() + { + detail::tvec4 Result; + _mm_store_ps(&Result[0], this->Data); + return Result; + } + + //inline fvec4SIMD::operator detail::tvec4 const() + //{ + // detail::tvec4 Result; + // _mm_store_ps(&Result[0], this->Data); + // return Result; + //} + ////////////////////////////////////// // Explicit basic constructors @@ -162,14 +176,18 @@ namespace glm template inline fvec4SIMD fvec4SIMD::swizzle() const { - __m128 Data = _mm_shuffle_ps(this->Data, this->Data, mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); + __m128 Data = _mm_shuffle_ps( + this->Data, this->Data, + mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); return fvec4SIMD(Data); } template inline fvec4SIMD& fvec4SIMD::swizzle() { - this->Data = _mm_shuffle_ps(this->Data, this->Data, mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); + this->Data = _mm_shuffle_ps( + this->Data, this->Data, + mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); return *this; } @@ -266,36 +284,23 @@ namespace glm namespace gtx{ namespace simd_vec4 { -# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE) - inline detail::fvec4SIMD simd_cross - ( - detail::fvec4SIMD const & a, - detail::fvec4SIMD const & b - ) - { - return detail::sse_xpd_ps(a.Data, b.Data); - } -# else//(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_PURE) - inline detail::fvec4SIMD simd_cross - ( - detail::fvec4SIMD const & a, - detail::fvec4SIMD const & b - ) - { - return detail::sse_xpd_ps(a.Data, b.Data); - } -# endif - }//namespace simd_vec4 + inline detail::tvec4 vec4_cast + ( + detail::fvec4SIMD const & x + ) + { + detail::tvec4 Result; + _mm_store_ps(&Result[0], x.Data); + return Result; + } - namespace simd_vec4 - { inline float simdLength ( detail::fvec4SIMD const & x ) { float Result = 0; - _mm_store_ss(sse_len_ps(x.data), &Result); + _mm_store_ss(&Result, detail::sse_len_ps(x.Data)); return Result; } @@ -306,7 +311,7 @@ namespace glm ) { float Result = 0; - _mm_store_ss(sse_dst_ps(p0.data, p1.data), &Result); + _mm_store_ss(&Result, detail::sse_dst_ps(p0.Data, p1.Data)); return Result; } @@ -317,7 +322,7 @@ namespace glm ) { float Result = 0; - _mm_store_ss(sse_dot_ss(x.data, y.data), &Result); + _mm_store_ss(&Result, detail::sse_dot_ss(x.Data, y.Data)); return Result; } @@ -327,7 +332,7 @@ namespace glm detail::fvec4SIMD const & y ) { - return sse_xpd_ps(x.data, y.data); + return detail::sse_xpd_ps(x.Data, y.Data); } inline detail::fvec4SIMD simdNormalize @@ -335,7 +340,7 @@ namespace glm detail::fvec4SIMD const & x ) { - return _mm_nrm_ps(x.data); + return detail::sse_nrm_ps(x.Data); } inline detail::fvec4SIMD simdFaceforward @@ -345,7 +350,7 @@ namespace glm detail::fvec4SIMD const & Nref ) { - return _mm_ffd_ps(N.data, I.data, Nref.data); + return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data); } inline detail::fvec4SIMD simdReflect @@ -354,7 +359,7 @@ namespace glm detail::fvec4SIMD const & N ) { - return detail::fvec4SIMD(_mm_rfe_ps(I.data, N.data)); + return detail::sse_rfe_ps(I.Data, N.Data); } inline detail::fvec4SIMD simdRefract @@ -364,7 +369,7 @@ namespace glm float const & eta ) { - return detail::fvec4SIMD(_mm_rfa_ps(I.data, N.data, _mm_set1_ps(eta))); + return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta)); } }//namespace simd_vec4 diff --git a/test/gtx/gtx-simd-mat4.cpp b/test/gtx/gtx-simd-mat4.cpp index 6b5622d5..e9a56aae 100644 --- a/test/gtx/gtx-simd-mat4.cpp +++ b/test/gtx/gtx-simd-mat4.cpp @@ -42,8 +42,10 @@ std::vector test_detB(std::vector const & Data) for(std::size_t i = 0; i < Test.size() - 1; ++i) { _mm_prefetch((char*)&Data[i + 1], _MM_HINT_T0); - glm::simd_mat4 m(Data[i]); - Test[i] = glm::simd_vec4(glm::detail::sse_slow_det_ps((__m128 const * const)&m)).x; + glm::simdMat4 m(Data[i]); + glm::simdVec4 d(glm::detail::sse_slow_det_ps((__m128 const * const)&m)); + glm::vec4 v;//(d); + Test[i] = v.x; } std::clock_t TimeEnd = clock(); @@ -61,8 +63,10 @@ std::vector test_detC(std::vector const & Data) for(std::size_t i = 0; i < Test.size() - 1; ++i) { _mm_prefetch((char*)&Data[i + 1], _MM_HINT_T0); - glm::simd_mat4 m(Data[i]); - Test[i] = glm::simd_vec4(glm::detail::sse_det_ps((__m128 const * const)&m)).x; + glm::simdMat4 m(Data[i]); + glm::simdVec4 d(glm::detail::sse_det_ps((__m128 const * const)&m)); + glm::vec4 v;//(d); + Test[i] = v.x; } std::clock_t TimeEnd = clock(); @@ -80,8 +84,10 @@ std::vector test_detD(std::vector const & Data) for(std::size_t i = 0; i < Test.size() - 1; ++i) { _mm_prefetch((char*)&Data[i + 1], _MM_HINT_T0); - glm::simd_mat4 m(Data[i]); - Test[i] = glm::simd_vec4(glm::detail::sse_detd_ps((__m128 const * const)&m)).x; + glm::simdMat4 m(Data[i]); + glm::simdVec4 d(glm::detail::sse_detd_ps((__m128 const * const)&m)); + glm::vec4 v;//(d); + Test[i] = v.x; } std::clock_t TimeEnd = clock(); @@ -116,8 +122,8 @@ void test_invC(std::vector const & Data, std::vector & Out for(std::size_t i = 0; i < Out.size() - 1; ++i) { _mm_prefetch((char*)&Data[i + 1], _MM_HINT_T0); - glm::simd_mat4 m(Data[i]); - glm::simd_mat4 o; + glm::simdMat4 m(Data[i]); + glm::simdMat4 o; glm::detail::sse_inverse_fast_ps((__m128 const * const)&m, (__m128 *)&o); Out[i] = *(glm::mat4*)&o; } @@ -136,8 +142,8 @@ void test_invD(std::vector const & Data, std::vector & Out for(std::size_t i = 0; i < Out.size() - 1; ++i) { _mm_prefetch((char*)&Data[i + 1], _MM_HINT_T0); - glm::simd_mat4 m(Data[i]); - glm::simd_mat4 o; + glm::simdMat4 m(Data[i]); + glm::simdMat4 o; glm::detail::sse_inverse_ps((__m128 const * const)&m, (__m128 *)&o); Out[i] = *(glm::mat4*)&o; } @@ -172,7 +178,7 @@ void test_mulD(std::vector const & Data, std::vector & Out for(std::size_t i = 0; i < Out.size() - 1; ++i) { _mm_prefetch((char*)&Data[i + 1], _MM_HINT_T0); - glm::simd_mat4 m(Data[i]); + glm::simdMat4 m(Data[i]); glm::detail::sse_mul_ps((__m128 const * const)&m, (__m128 const * const)&m, (__m128*)&Out[i]); } @@ -267,15 +273,15 @@ int main() } // shuffle test - glm::simd_vec4 A(1.0f, 2.0f, 3.0f, 4.0f); - glm::simd_vec4 B(5.0f, 6.0f, 7.0f, 8.0f); + glm::simdVec4 A(1.0f, 2.0f, 3.0f, 4.0f); + glm::simdVec4 B(5.0f, 6.0f, 7.0f, 8.0f); __m128 C = _mm_shuffle_ps(A.Data, B.Data, _MM_SHUFFLE(1, 0, 1, 0)); Failed += test_compute_glm(); Failed += test_compute_gtx(); - float Det = glm::simd_determinant(glm::simd_mat4(1.0)); - glm::simd_mat4 B = glm::simd_matrixCompMult(glm::simd_mat4(1.0), glm::simd_mat4(1.0)); + float Det = glm::simdDeterminant(glm::simdMat4(1.0)); + glm::simdMat4 D = glm::simdMatrixCompMult(glm::simdMat4(1.0), glm::simdMat4(1.0)); system("pause"); diff --git a/test/gtx/gtx-simd-vec4.cpp b/test/gtx/gtx-simd-vec4.cpp index f13ba95f..56e2bd14 100644 --- a/test/gtx/gtx-simd-vec4.cpp +++ b/test/gtx/gtx-simd-vec4.cpp @@ -13,15 +13,18 @@ int main() { - glm::simd_vec4 A1(0.0f, 0.1f, 0.2f, 0.3f); - glm::simd_vec4 B1(0.4f, 0.5f, 0.6f, 0.7f); - glm::simd_vec4 C1 = A1 + B1; - glm::simd_vec4 D1 = A1.swizzle(); + glm::simdVec4 A1(0.0f, 0.1f, 0.2f, 0.3f); + glm::simdVec4 B1(0.4f, 0.5f, 0.6f, 0.7f); + glm::simdVec4 C1 = A1 + B1; + glm::simdVec4 D1 = A1.swizzle(); + glm::simdVec4 E1 = glm::vec4(1.0f); + glm::vec4 F1 = E1; + //glm::vec4 G1(E1); - printf("A1(%2.3f, %2.3f, %2.3f, %2.3f)\n", A1.x, A1.y, A1.z, A1.w); - printf("B1(%2.3f, %2.3f, %2.3f, %2.3f)\n", B1.x, B1.y, B1.z, B1.w); - printf("C1(%2.3f, %2.3f, %2.3f, %2.3f)\n", C1.x, C1.y, C1.z, C1.w); - printf("D1(%2.3f, %2.3f, %2.3f, %2.3f)\n", D1.x, D1.y, D1.z, D1.w); + //printf("A1(%2.3f, %2.3f, %2.3f, %2.3f)\n", A1.x, A1.y, A1.z, A1.w); + //printf("B1(%2.3f, %2.3f, %2.3f, %2.3f)\n", B1.x, B1.y, B1.z, B1.w); + //printf("C1(%2.3f, %2.3f, %2.3f, %2.3f)\n", C1.x, C1.y, C1.z, C1.w); + //printf("D1(%2.3f, %2.3f, %2.3f, %2.3f)\n", D1.x, D1.y, D1.z, D1.w); return 0; }