|
|
|
@ -96,4 +96,71 @@ namespace detail |
|
|
|
|
}//namespace detail |
|
|
|
|
}//namespace glm |
|
|
|
|
|
|
|
|
|
#elif GLM_ARCH & GLM_ARCH_NEON_BIT |
|
|
|
|
namespace glm{ |
|
|
|
|
namespace detail |
|
|
|
|
{ |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_length<4, float, Q, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v) |
|
|
|
|
{ |
|
|
|
|
return compute_dot<vec<4, float, Q>, float, true>::call(v, v); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_distance<4, float, Q, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1) |
|
|
|
|
{ |
|
|
|
|
return compute_length<4, float, Q, true>::call(p1 - p0); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_dot<vec<4, float, Q>, float, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y) |
|
|
|
|
{ |
|
|
|
|
#if GLM_ARCH & GLM_ARCH_ARMV8_BIT |
|
|
|
|
float32x4_t v = vmulq_f32(x.data, y.data); |
|
|
|
|
v = vpaddq_f32(v, v); |
|
|
|
|
v = vpaddq_f32(v, v); |
|
|
|
|
return vgetq_lane_f32(v, 0); |
|
|
|
|
#else // Armv7a with Neon |
|
|
|
|
float32x4_t p = vmulq_f32(x.data, y.data); |
|
|
|
|
float32x2_t v = vpadd_f32(vget_low_f32(p), vget_high_f32(p)); |
|
|
|
|
v = vpadd_f32(v, v); |
|
|
|
|
return vget_lane_f32(v, 0); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_normalize<4, float, Q, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v) |
|
|
|
|
{ |
|
|
|
|
float32x4_t p = vmulq_f32(v.data, v.data); |
|
|
|
|
#if GLM_ARCH & GLM_ARCH_ARMV8_BIT |
|
|
|
|
p = vpaddq_f32(p, p); |
|
|
|
|
p = vpaddq_f32(p, p); |
|
|
|
|
#else |
|
|
|
|
float32x2_t t = vpadd_f32(vget_low_f32(p), vget_high_f32(p)); |
|
|
|
|
t = vpadd_f32(t, t); |
|
|
|
|
p = vcombine_f32(t, t); |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
float32x4_t vd = vrsqrteq_f32(p); |
|
|
|
|
vec<4, float, Q> Result; |
|
|
|
|
Result.data = vmulq_f32(v, vd); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
}//namespace detail |
|
|
|
|
}//namespace glm |
|
|
|
|
|
|
|
|
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT |
|
|
|
|