|
|
|
@ -54,7 +54,7 @@ namespace detail |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, float, Q> Result; |
|
|
|
|
Result.data = _mm_add_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
Result.data = _mm_add_ps(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
@ -78,7 +78,7 @@ namespace detail |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, float, Q> Result; |
|
|
|
|
Result.data = _mm_sub_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
Result.data = _mm_sub_ps(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
@ -90,7 +90,7 @@ namespace detail |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, double, Q> Result; |
|
|
|
|
Result.data = _mm256_sub_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data); |
|
|
|
|
Result.data = _mm256_sub_pd(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
@ -102,7 +102,7 @@ namespace detail |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, float, Q> Result; |
|
|
|
|
Result.data = _mm_mul_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
Result.data = _mm_mul_ps(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
@ -114,7 +114,7 @@ namespace detail |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, double, Q> Result; |
|
|
|
|
Result.data = _mm256_mul_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data); |
|
|
|
|
Result.data = _mm256_mul_pd(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
@ -126,19 +126,19 @@ namespace detail |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, float, Q> Result; |
|
|
|
|
Result.data = _mm_div_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
Result.data = _mm_div_ps(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT |
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_div<double, Q, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, double, Q> Result; |
|
|
|
|
Result.data = _mm256_div_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data); |
|
|
|
|
Result.data = _mm256_div_pd(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
@ -150,150 +150,282 @@ namespace detail |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, float, aligned_lowp> Result; |
|
|
|
|
Result.data = _mm_mul_ps((glm_f32vec4)a.data, _mm_rcp_ps(b.data)); |
|
|
|
|
Result.data = _mm_mul_ps(a.data, _mm_rcp_ps(b.data)); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_and<int, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, int, Q> Result; |
|
|
|
|
Result.data = _mm_and_si128(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_and<T, Q, true, 32, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_and<uint, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, int, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm_and_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, uint, Q> Result; |
|
|
|
|
Result.data = _mm_and_si128(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT |
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_and<T, Q, true, 64, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_and<int64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, int64, Q> Result; |
|
|
|
|
Result.data = _mm256_and_si256(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_and<uint64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm256_and_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, uint64, Q> Result; |
|
|
|
|
Result.data = _mm256_and_si256(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
# endif |
|
|
|
|
|
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_or<T, Q, true, 32, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_or<int, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm_or_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, int, Q> Result; |
|
|
|
|
Result.data = _mm_or_si128(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_or<uint, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, uint, Q> Result; |
|
|
|
|
Result.data = _mm_or_si128(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT |
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_or<T, Q, true, 64, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_or<int64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, int64, Q> Result; |
|
|
|
|
Result.data = _mm256_or_si256(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_or<uint64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm256_or_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, uint64, Q> Result; |
|
|
|
|
Result.data = _mm256_or_si256(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
# endif |
|
|
|
|
|
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_xor<T, Q, true, 32, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_xor<int, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm_xor_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, int, Q> Result; |
|
|
|
|
Result.data = _mm_xor_si128(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_xor<uint, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, uint, Q> Result; |
|
|
|
|
Result.data = _mm_xor_si128(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT |
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_xor<T, Q, true, 64, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_xor<int64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, int64, Q> Result; |
|
|
|
|
Result.data = _mm256_xor_si256(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_xor<uint64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm256_xor_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, uint64, Q> Result; |
|
|
|
|
Result.data = _mm256_xor_si256(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
# endif |
|
|
|
|
|
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_shift_left<T, Q, true, 32, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_shift_left<int, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, int, Q> Result; |
|
|
|
|
Result.data = _mm_sll_epi32(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_shift_left<uint, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm_sll_epi32((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, uint, Q> Result; |
|
|
|
|
Result.data = _mm_sll_epi32(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT |
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_shift_left<T, Q, true, 64, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_shift_left<int64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm256_sll_epi64((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, int64, Q> Result; |
|
|
|
|
Result.data = _mm256_sll_epi64(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_shift_left<uint64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, uint64, Q> Result; |
|
|
|
|
Result.data = _mm256_sll_epi64(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
# endif |
|
|
|
|
|
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_shift_right<T, Q, true, 32, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_shift_right<int, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, int, Q> Result; |
|
|
|
|
Result.data = _mm_srl_epi32(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_shift_right<uint, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm_srl_epi32((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, uint, Q> Result; |
|
|
|
|
Result.data = _mm_srl_epi32(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT |
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_shift_right<T, Q, true, 64, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_shift_right<int64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, int64, Q> Result; |
|
|
|
|
Result.data = _mm256_srl_epi64(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_shift_right<uint64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm256_srl_epi64((glm_f32vec4)a.data, (glm_f32vec4)b.data); |
|
|
|
|
vec<4, uint64, Q> Result; |
|
|
|
|
Result.data = _mm256_srl_epi64(a.data, b.data); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
# endif |
|
|
|
|
|
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_bitwise_not<T, Q, true, 32, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_bitwise_not<int, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& v) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm_xor_si128((glm_f32vec4)v.data, _mm_set1_epi32(-1)); |
|
|
|
|
vec<4, int, Q> Result; |
|
|
|
|
Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1)); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_bitwise_not<uint, Q, true, 32, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v) |
|
|
|
|
{ |
|
|
|
|
vec<4, uint, Q> Result; |
|
|
|
|
Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1)); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT |
|
|
|
|
template<typename T, qualifier Q> |
|
|
|
|
struct compute_vec4_bitwise_not<T, Q, true, 64, true> |
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_bitwise_not<int64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& v) |
|
|
|
|
{ |
|
|
|
|
vec<4, int64, Q> Result; |
|
|
|
|
Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1)); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template<qualifier Q> |
|
|
|
|
struct compute_vec4_bitwise_not<uint64, Q, true, 64, true> |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& v) |
|
|
|
|
GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& v) |
|
|
|
|
{ |
|
|
|
|
vec<4, T, Q> Result; |
|
|
|
|
Result.data = _mm256_xor_si256((glm_f32vec4)v.data, _mm_set1_epi32(-1)); |
|
|
|
|
vec<4, uint64, Q> Result; |
|
|
|
|
Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1)); |
|
|
|
|
return Result; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
@ -304,7 +436,7 @@ namespace detail |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) |
|
|
|
|
{ |
|
|
|
|
return _mm_movemask_ps(_mm_cmpneq_ps((glm_f32vec4)v1.data, (glm_f32vec4)v2.data)) == 0; |
|
|
|
|
return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) == 0; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
@ -315,7 +447,7 @@ namespace detail |
|
|
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) |
|
|
|
|
{ |
|
|
|
|
//return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0; |
|
|
|
|
__m128i neq = _mm_xor_si128((glm_f32vec4)v1.data, (glm_f32vec4)v2.data); |
|
|
|
|
__m128i neq = _mm_xor_si128(v1.data, v2.data); |
|
|
|
|
return _mm_test_all_zeros(neq, neq) == 0; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
@ -326,7 +458,7 @@ namespace detail |
|
|
|
|
{ |
|
|
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) |
|
|
|
|
{ |
|
|
|
|
return _mm_movemask_ps(_mm_cmpneq_ps((glm_f32vec4)v1.data, (glm_f32vec4)v2.data)) != 0; |
|
|
|
|
return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
@ -337,7 +469,7 @@ namespace detail |
|
|
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) |
|
|
|
|
{ |
|
|
|
|
//return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0; |
|
|
|
|
__m128i neq = _mm_xor_si128((glm_f32vec4)v1.data, (glm_f32vec4)v2.data); |
|
|
|
|
__m128i neq = _mm_xor_si128(v1.data, v2.data); |
|
|
|
|
return _mm_test_all_zeros(neq, neq) != 0; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|