Fixed SIMD build

master
Christophe Riccio ago%!(EXTRA string=8 years)
parent 211881abf9
commit 8bcf9b5ae7
  1. 108
      glm/detail/func_common_simd.inl
  2. 4
      glm/detail/func_exponential_simd.inl
  3. 36
      glm/detail/func_geometric_simd.inl
  4. 12
      glm/detail/func_integer_simd.inl
  5. 20
      glm/detail/func_matrix_simd.inl

@ -11,11 +11,11 @@ namespace glm{
namespace detail namespace detail
{ {
template<qualifier Q> template<qualifier Q>
struct compute_abs_vector<4, float, P, true> struct compute_abs_vector<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& v) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
{ {
vec<4, float, P> result; vec<4, float, Q> result;
result.data = glm_vec4_abs(v.data); result.data = glm_vec4_abs(v.data);
return result; return result;
} }
@ -33,168 +33,168 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_floor<4, float, P, true> struct compute_floor<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& v) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
{ {
vec<4, float, P> result; vec<4, float, Q> result;
result.data = glm_vec4_floor(v.data); result.data = glm_vec4_floor(v.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_ceil<4, float, P, true> struct compute_ceil<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& v) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
{ {
vec<4, float, P> result; vec<4, float, Q> result;
result.data = glm_vec4_ceil(v.data); result.data = glm_vec4_ceil(v.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_fract<4, float, P, true> struct compute_fract<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& v) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
{ {
vec<4, float, P> result; vec<4, float, Q> result;
result.data = glm_vec4_fract(v.data); result.data = glm_vec4_fract(v.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_round<4, float, P, true> struct compute_round<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& v) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
{ {
vec<4, float, P> result; vec<4, float, Q> result;
result.data = glm_vec4_round(v.data); result.data = glm_vec4_round(v.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_mod<4, float, P, true> struct compute_mod<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& x, vec<4, float, Q> const& y) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& x, vec<4, float, Q> const& y)
{ {
vec<4, float, P> result; vec<4, float, Q> result;
result.data = glm_vec4_mod(x.data, y.data); result.data = glm_vec4_mod(x.data, y.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_min_vector<4, float, P, true> struct compute_min_vector<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
{ {
vec<4, float, P> result; vec<4, float, Q> result;
result.data = _mm_min_ps(v1.data, v2.data); result.data = _mm_min_ps(v1.data, v2.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_min_vector<4, int32, P, true> struct compute_min_vector<4, int32, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, int32, P> call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, int32, Q> call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2)
{ {
vec<4, int32, P> result; vec<4, int32, Q> result;
result.data = _mm_min_epi32(v1.data, v2.data); result.data = _mm_min_epi32(v1.data, v2.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_min_vector<4, uint32, P, true> struct compute_min_vector<4, uint32, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, int32, P> call(vec<4, uint32, Q> const& v1, vec<4, uint32, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, int32, Q> call(vec<4, uint32, Q> const& v1, vec<4, uint32, Q> const& v2)
{ {
vec<4, uint32, P> result; vec<4, uint32, Q> result;
result.data = _mm_min_epu32(v1.data, v2.data); result.data = _mm_min_epu32(v1.data, v2.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_max_vector<4, float, P, true> struct compute_max_vector<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
{ {
vec<4, float, P> result; vec<4, float, Q> result;
result.data = _mm_max_ps(v1.data, v2.data); result.data = _mm_max_ps(v1.data, v2.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_max_vector<4, int32, P, true> struct compute_max_vector<4, int32, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, int32, P> call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, int32, Q> call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2)
{ {
vec<4, int32, P> result; vec<4, int32, Q> result;
result.data = _mm_max_epi32(v1.data, v2.data); result.data = _mm_max_epi32(v1.data, v2.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_max_vector<4, uint32, P, true> struct compute_max_vector<4, uint32, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, uint32, P> call(vec<4, uint32, Q> const& v1, vec<4, uint32, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& v1, vec<4, uint32, Q> const& v2)
{ {
vec<4, uint32, P> result; vec<4, uint32, Q> result;
result.data = _mm_max_epu32(v1.data, v2.data); result.data = _mm_max_epu32(v1.data, v2.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_clamp_vector<4, float, P, true> struct compute_clamp_vector<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& x, vec<4, float, Q> const& minVal, vec<4, float, Q> const& maxVal) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& x, vec<4, float, Q> const& minVal, vec<4, float, Q> const& maxVal)
{ {
vec<4, float, P> result; vec<4, float, Q> result;
result.data = _mm_min_ps(_mm_max_ps(x.data, minVal.data), maxVal.data); result.data = _mm_min_ps(_mm_max_ps(x.data, minVal.data), maxVal.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_clamp_vector<4, int32, P, true> struct compute_clamp_vector<4, int32, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, int32, P> call(vec<4, int32, Q> const& x, vec<4, int32, Q> const& minVal, vec<4, int32, Q> const& maxVal) GLM_FUNC_QUALIFIER static vec<4, int32, Q> call(vec<4, int32, Q> const& x, vec<4, int32, Q> const& minVal, vec<4, int32, Q> const& maxVal)
{ {
vec<4, int32, P> result; vec<4, int32, Q> result;
result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data); result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_clamp_vector<4, uint32, P, true> struct compute_clamp_vector<4, uint32, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, uint32, P> call(vec<4, uint32, Q> const& x, vec<4, uint32, Q> const& minVal, vec<4, uint32, Q> const& maxVal) GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& x, vec<4, uint32, Q> const& minVal, vec<4, uint32, Q> const& maxVal)
{ {
vec<4, uint32, P> result; vec<4, uint32, Q> result;
result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data); result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_mix_vector<4, float, bool, P, true> struct compute_mix_vector<4, float, bool, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& x, vec<4, float, Q> const& y, vec<4, bool, Q> const& a) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& x, vec<4, float, Q> const& y, vec<4, bool, Q> const& a)
{ {
__m128i const Load = _mm_set_epi32(-static_cast<int>(a.w), -static_cast<int>(a.z), -static_cast<int>(a.y), -static_cast<int>(a.x)); __m128i const Load = _mm_set_epi32(-static_cast<int>(a.w), -static_cast<int>(a.z), -static_cast<int>(a.y), -static_cast<int>(a.x));
__m128 const Mask = _mm_castsi128_ps(Load); __m128 const Mask = _mm_castsi128_ps(Load);
vec<4, float, P> Result; vec<4, float, Q> Result;
# if 0 && GLM_ARCH & GLM_ARCH_AVX # if 0 && GLM_ARCH & GLM_ARCH_AVX
Result.data = _mm_blendv_ps(x.data, y.data, Mask); Result.data = _mm_blendv_ps(x.data, y.data, Mask);
# else # else
@ -205,22 +205,22 @@ namespace detail
}; };
/* FIXME /* FIXME
template<qualifier Q> template<qualifier Q>
struct compute_step_vector<float, P, tvec4> struct compute_step_vector<float, Q, tvec4>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& edge, vec<4, float, Q> const& x) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& edge, vec<4, float, Q> const& x)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
result.data = glm_vec4_step(edge.data, x.data); result.data = glm_vec4_step(edge.data, x.data);
return result; return result;
} }
}; };
*/ */
template<qualifier Q> template<qualifier Q>
struct compute_smoothstep_vector<4, float, P, true> struct compute_smoothstep_vector<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& edge0, vec<4, float, Q> const& edge1, vec<4, float, Q> const& x) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& edge0, vec<4, float, Q> const& edge1, vec<4, float, Q> const& x)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = glm_vec4_smoothstep(edge0.data, edge1.data, x.data); Result.data = glm_vec4_smoothstep(edge0.data, edge1.data, x.data);
return Result; return Result;
} }

@ -9,9 +9,9 @@ namespace glm{
namespace detail namespace detail
{ {
template<qualifier Q> template<qualifier Q>
struct compute_sqrt<4, float, P, true> struct compute_sqrt<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& v) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
{ {
vec<4, float, P> Result; vec<4, float, P> Result;
Result.data = _mm_sqrt_ps(v.data); Result.data = _mm_sqrt_ps(v.data);

@ -9,7 +9,7 @@ namespace glm{
namespace detail namespace detail
{ {
template<qualifier Q> template<qualifier Q>
struct compute_length<4, float, P, true> struct compute_length<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v) GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v)
{ {
@ -18,7 +18,7 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_distance<4, float, P, true> struct compute_distance<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1) GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1)
{ {
@ -27,7 +27,7 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_dot<vec<4, float, P>, float, true> struct compute_dot<vec<4, float, Q>, float, true>
{ {
GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y) GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y)
{ {
@ -36,59 +36,59 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_cross<float, P, true> struct compute_cross<float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<3, float, P> call(vec<3, float, Q> const& a, vec<3, float, Q> const& b) GLM_FUNC_QUALIFIER static vec<3, float, Q> call(vec<3, float, Q> const& a, vec<3, float, Q> const& b)
{ {
__m128 const set0 = _mm_set_ps(0.0f, a.z, a.y, a.x); __m128 const set0 = _mm_set_ps(0.0f, a.z, a.y, a.x);
__m128 const set1 = _mm_set_ps(0.0f, b.z, b.y, b.x); __m128 const set1 = _mm_set_ps(0.0f, b.z, b.y, b.x);
__m128 const xpd0 = glm_vec4_cross(set0, set1); __m128 const xpd0 = glm_vec4_cross(set0, set1);
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = xpd0; Result.data = xpd0;
return vec<3, float, Q>(Result); return vec<3, float, Q>(Result);
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_normalize<4, float, P, true> struct compute_normalize<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& v) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = glm_vec4_normalize(v.data); Result.data = glm_vec4_normalize(v.data);
return Result; return Result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_faceforward<4, float, P, true> struct compute_faceforward<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& N, vec<4, float, Q> const& I, vec<4, float, Q> const& Nref) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& N, vec<4, float, Q> const& I, vec<4, float, Q> const& Nref)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = glm_vec4_faceforward(N.data, I.data, Nref.data); Result.data = glm_vec4_faceforward(N.data, I.data, Nref.data);
return Result; return Result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_reflect<4, float, P, true> struct compute_reflect<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& I, vec<4, float, Q> const& N) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& I, vec<4, float, Q> const& N)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = glm_vec4_reflect(I.data, N.data); Result.data = glm_vec4_reflect(I.data, N.data);
return Result; return Result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_refract<4, float, P, true> struct compute_refract<4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, float, P> call(vec<4, float, Q> const& I, vec<4, float, Q> const& N, float eta) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& I, vec<4, float, Q> const& N, float eta)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = glm_vec4_refract(I.data, N.data, _mm_set1_ps(eta)); Result.data = glm_vec4_refract(I.data, N.data, _mm_set1_ps(eta));
return Result; return Result;
} }

@ -8,10 +8,10 @@
namespace glm{ namespace glm{
namespace detail namespace detail
{ {
template<glm::qualifier P> template<qualifier Q>
struct compute_bitfieldReverseStep<4, uint32, P, true, true> struct compute_bitfieldReverseStep<4, uint32, Q, true, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, uint32, P> call(vec<4, uint32, Q> const& v, uint32 Mask, uint32 Shift) GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& v, uint32 Mask, uint32 Shift)
{ {
__m128i const set0 = v.data; __m128i const set0 = v.data;
@ -29,10 +29,10 @@ namespace detail
} }
}; };
template<glm::qualifier P> template<qualifier Q>
struct compute_bitfieldBitCountStep<4, uint32, P, true, true> struct compute_bitfieldBitCountStep<4, uint32, Q, true, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, uint32, P> call(vec<4, uint32, Q> const& v, uint32 Mask, uint32 Shift) GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& v, uint32 Mask, uint32 Shift)
{ {
__m128i const set0 = v.data; __m128i const set0 = v.data;

@ -12,13 +12,13 @@ namespace glm{
namespace detail namespace detail
{ {
template<qualifier Q> template<qualifier Q>
struct compute_matrixCompMult<mat, 4, 4, float, P, true> struct compute_matrixCompMult<mat, 4, 4, float, Q, true>
{ {
GLM_STATIC_ASSERT(detail::is_aligned<P>::value, "Specialization requires aligned"); GLM_STATIC_ASSERT(detail::is_aligned<P>::value, "Specialization requires aligned");
GLM_FUNC_QUALIFIER static mat<4, 4, float, P> call(mat<4, 4, float, Q> const& x, mat<4, 4, float, Q> const& y) GLM_FUNC_QUALIFIER static mat<4, 4, float, Q> call(mat<4, 4, float, Q> const& x, mat<4, 4, float, Q> const& y)
{ {
mat<4, 4, float, P> Result; mat<4, 4, float, Q> Result;
glm_mat4_matrixCompMult( glm_mat4_matrixCompMult(
*static_cast<glm_vec4 const (*)[4]>(&x[0].data), *static_cast<glm_vec4 const (*)[4]>(&x[0].data),
*static_cast<glm_vec4 const (*)[4]>(&y[0].data), *static_cast<glm_vec4 const (*)[4]>(&y[0].data),
@ -28,11 +28,11 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_transpose<mat, 4, 4, float, P, true> struct compute_transpose<mat, 4, 4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static mat<4, 4, float, P> call(mat<4, 4, float, Q> const& m) GLM_FUNC_QUALIFIER static mat<4, 4, float, Q> call(mat<4, 4, float, Q> const& m)
{ {
mat<4, 4, float, P> Result; mat<4, 4, float, Q> Result;
glm_mat4_transpose( glm_mat4_transpose(
*static_cast<glm_vec4 const (*)[4]>(&m[0].data), *static_cast<glm_vec4 const (*)[4]>(&m[0].data),
*static_cast<glm_vec4(*)[4]>(&Result[0].data)); *static_cast<glm_vec4(*)[4]>(&Result[0].data));
@ -41,7 +41,7 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_determinant<mat, 4, 4, float, P, true> struct compute_determinant<mat, 4, 4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static float call(mat<4, 4, float, Q> const& m) GLM_FUNC_QUALIFIER static float call(mat<4, 4, float, Q> const& m)
{ {
@ -50,11 +50,11 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_inverse<mat, 4, 4, float, P, true> struct compute_inverse<mat, 4, 4, float, Q, true>
{ {
GLM_FUNC_QUALIFIER static mat<4, 4, float, P> call(mat<4, 4, float, Q> const& m) GLM_FUNC_QUALIFIER static mat<4, 4, float, Q> call(mat<4, 4, float, Q> const& m)
{ {
mat<4, 4, float, P> Result; mat<4, 4, float, Q> Result;
glm_mat4_inverse(*reinterpret_cast<__m128 const(*)[4]>(&m[0].data), *reinterpret_cast<__m128(*)[4]>(&Result[0].data)); glm_mat4_inverse(*reinterpret_cast<__m128 const(*)[4]>(&m[0].data), *reinterpret_cast<__m128(*)[4]>(&Result[0].data));
return Result; return Result;
} }

Loading…
Cancel
Save