Fixed SIMD code path selection

master
Christophe Riccio ago%!(EXTRA string=9 years)
parent fdb985a0eb
commit d33e3df02a
  1. 2
      glm/detail/func_matrix_simd.inl
  2. 26
      glm/detail/setup.hpp
  3. 2
      glm/gtx/simd_mat4.hpp
  4. 2
      glm/gtx/simd_quat.hpp
  5. 2
      glm/gtx/simd_quat.inl
  6. 2
      glm/gtx/simd_vec4.hpp
  7. 2
      glm/simd/geometric.h
  8. 26
      test/core/core_setup_message.cpp
  9. 8
      test/gtc/gtc_bitfield.cpp

@ -8,7 +8,7 @@
namespace glm{ namespace glm{
namespace detail namespace detail
{ {
# if GLM_ARCH & GLM_ARCH_SSE2 # if GLM_ARCH & GLM_ARCH_SSE2_FLAG
template <precision P> template <precision P>
struct compute_inverse<tmat4x4, float, P> struct compute_inverse<tmat4x4, float, P>
{ {

@ -67,25 +67,29 @@
# define GLM_MESSAGE_ARCH_DISPLAYED # define GLM_MESSAGE_ARCH_DISPLAYED
# if(GLM_ARCH == GLM_ARCH_PURE) # if(GLM_ARCH == GLM_ARCH_PURE)
# pragma message("GLM: Platform independent code") # pragma message("GLM: Platform independent code")
# elif(GLM_ARCH & GLM_ARCH_ARM) # elif(GLM_ARCH == GLM_ARCH_AVX2)
# pragma message("GLM: ARM instruction set")
# elif(GLM_ARCH & GLM_ARCH_AVX2)
# pragma message("GLM: AVX2 instruction set") # pragma message("GLM: AVX2 instruction set")
# elif(GLM_ARCH & GLM_ARCH_AVX) # elif(GLM_ARCH == GLM_ARCH_AVX)
# pragma message("GLM: AVX instruction set") # pragma message("GLM: AVX instruction set")
# elif(GLM_ARCH & GLM_ARCH_SSE3) # elif(GLM_ARCH == GLM_ARCH_SSE42)
# pragma message("GLM: SSE4.2 instruction set")
# elif(GLM_ARCH == GLM_ARCH_SSE41)
# pragma message("GLM: SSE4.1 instruction set")
# elif(GLM_ARCH == GLM_ARCH_SSSE3)
# pragma message("GLM: SSSE3 instruction set")
# elif(GLM_ARCH == GLM_ARCH_SSE3)
# pragma message("GLM: SSE3 instruction set") # pragma message("GLM: SSE3 instruction set")
# elif(GLM_ARCH & GLM_ARCH_SSE2) # elif(GLM_ARCH == GLM_ARCH_SSE2)
# pragma message("GLM: SSE2 instruction set") # pragma message("GLM: SSE2 instruction set")
# elif(GLM_ARCH & GLM_ARCH_X86) # elif(GLM_ARCH == GLM_ARCH_X86)
# pragma message("GLM: x86 instruction set") # pragma message("GLM: x86 instruction set")
# elif(GLM_ARCH & GLM_ARCH_NEON) # elif(GLM_ARCH == GLM_ARCH_NEON)
# pragma message("GLM: NEON instruction set") # pragma message("GLM: NEON instruction set")
# elif(GLM_ARCH & GLM_ARCH_ARM) # elif(GLM_ARCH == GLM_ARCH_ARM)
# pragma message("GLM: ARM instruction set") # pragma message("GLM: ARM instruction set")
# elif(GLM_ARCH & GLM_ARCH_MIPS) # elif(GLM_ARCH == GLM_ARCH_MIPS)
# pragma message("GLM: MIPS instruction set") # pragma message("GLM: MIPS instruction set")
# elif(GLM_ARCH & GLM_ARCH_PPC) # elif(GLM_ARCH == GLM_ARCH_PPC)
# pragma message("GLM: PowerPC architechture") # pragma message("GLM: PowerPC architechture")
# endif//GLM_ARCH # endif//GLM_ARCH
#endif//GLM_MESSAGE #endif//GLM_MESSAGE

@ -17,7 +17,7 @@
#if(GLM_ARCH != GLM_ARCH_PURE) #if(GLM_ARCH != GLM_ARCH_PURE)
#if(GLM_ARCH & GLM_ARCH_SSE2) #if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
# include "../detail/intrinsic_matrix.hpp" # include "../detail/intrinsic_matrix.hpp"
# include "../gtx/simd_vec4.hpp" # include "../gtx/simd_vec4.hpp"
#else #else

@ -19,7 +19,7 @@
#if(GLM_ARCH != GLM_ARCH_PURE) #if(GLM_ARCH != GLM_ARCH_PURE)
#if(GLM_ARCH & GLM_ARCH_SSE2) #if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
# include "../gtx/simd_mat4.hpp" # include "../gtx/simd_mat4.hpp"
#else #else
# error "GLM: GLM_GTX_simd_quat requires compiler support of SSE2 through intrinsics" # error "GLM: GLM_GTX_simd_quat requires compiler support of SSE2 through intrinsics"

@ -122,7 +122,7 @@ GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const &
__m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1))); __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
__m128 mul3 = _mm_mul_ps(q1.Data, q2.Data); __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
# if((GLM_ARCH & GLM_ARCH_SSE4)) # if(GLM_ARCH & GLM_ARCH_SSE41_FLAG)
__m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff); __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff);
__m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff); __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff);
__m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff); __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff);

@ -17,7 +17,7 @@
#if(GLM_ARCH != GLM_ARCH_PURE) #if(GLM_ARCH != GLM_ARCH_PURE)
#if(GLM_ARCH & GLM_ARCH_SSE2) #if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
# include "../detail/intrinsic_common.hpp" # include "../detail/intrinsic_common.hpp"
# include "../detail/intrinsic_geometric.hpp" # include "../detail/intrinsic_geometric.hpp"
# include "../detail/intrinsic_integer.hpp" # include "../detail/intrinsic_integer.hpp"

@ -119,4 +119,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfa(__m128 I, __m128 N, __m128 eta)
return sub2; return sub2;
} }
#endif//GLM_ARCH & GLM_ARCH_SSE2 #endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG

@ -176,18 +176,24 @@ int test_instruction_set()
if(GLM_ARCH == GLM_ARCH_PURE) if(GLM_ARCH == GLM_ARCH_PURE)
std::printf("GLM_ARCH_PURE "); std::printf("GLM_ARCH_PURE ");
if(GLM_ARCH & GLM_ARCH_ARM) if(GLM_ARCH & GLM_ARCH_ARM_FLAG)
std::printf("GLM_ARCH_ARM "); std::printf("ARM ");
if(GLM_ARCH & GLM_ARCH_NEON_FLAG)
std::printf("NEON ");
if(GLM_ARCH & GLM_ARCH_AVX2) if(GLM_ARCH & GLM_ARCH_AVX2)
std::printf("GLM_ARCH_AVX2 "); std::printf("AVX2 ");
if(GLM_ARCH & GLM_ARCH_AVX) if(GLM_ARCH & GLM_ARCH_AVX)
std::printf("GLM_ARCH_AVX "); std::printf("AVX ");
if(GLM_ARCH & GLM_ARCH_AVX) if(GLM_ARCH & GLM_ARCH_SSE42_FLAG)
std::printf("GLM_ARCH_SSE4 "); std::printf("SSE4.2 ");
if(GLM_ARCH & GLM_ARCH_SSE3) if(GLM_ARCH & GLM_ARCH_SSE41_FLAG)
std::printf("GLM_ARCH_SSE3 "); std::printf("SSE4.1 ");
if(GLM_ARCH & GLM_ARCH_SSE2) if(GLM_ARCH & GLM_ARCH_SSSE3_FLAG)
std::printf("GLM_ARCH_SSE2 "); std::printf("SSSE3 ");
if(GLM_ARCH & GLM_ARCH_SSE3_FLAG)
std::printf("SSE3 ");
if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
std::printf("SSE2 ");
std::printf("\n"); std::printf("\n");

@ -505,7 +505,7 @@ namespace bitfieldInterleave
assert(A == C); assert(A == C);
assert(A == D); assert(A == D);
# if GLM_ARCH & GLM_ARCH_SSE2 # if GLM_ARCH & GLM_ARCH_SSE2_FLAG
glm::uint64 E = sseBitfieldInterleave(x, y); glm::uint64 E = sseBitfieldInterleave(x, y);
glm::uint64 F = sseUnalignedBitfieldInterleave(x, y); glm::uint64 F = sseUnalignedBitfieldInterleave(x, y);
assert(A == E); assert(A == E);
@ -515,7 +515,7 @@ namespace bitfieldInterleave
glm::uint64 Result[2]; glm::uint64 Result[2];
_mm_storeu_si128((__m128i*)Result, G); _mm_storeu_si128((__m128i*)Result, G);
assert(A == Result[0]); assert(A == Result[0]);
# endif//GLM_ARCH & GLM_ARCH_SSE2 # endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
} }
} }
@ -629,7 +629,7 @@ namespace bitfieldInterleave
std::printf("glm::detail::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time)); std::printf("glm::detail::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
} }
# if(GLM_ARCH & GLM_ARCH_SSE2 && !(GLM_COMPILER & GLM_COMPILER_GCC)) # if(GLM_ARCH & GLM_ARCH_SSE2_FLAG && !(GLM_COMPILER & GLM_COMPILER_GCC))
{ {
// SIMD // SIMD
std::vector<__m128i> SimdData; std::vector<__m128i> SimdData;
@ -648,7 +648,7 @@ namespace bitfieldInterleave
std::printf("_mm_bit_interleave_si128 Time %d clocks\n", static_cast<unsigned int>(Time)); std::printf("_mm_bit_interleave_si128 Time %d clocks\n", static_cast<unsigned int>(Time));
} }
# endif//GLM_ARCH & GLM_ARCH_SSE2 # endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
return 0; return 0;
} }

Loading…
Cancel
Save