diff --git a/glm/gtx/simd_vec4.hpp b/glm/gtx/simd_vec4.hpp index da2736ed..186fd045 100644 --- a/glm/gtx/simd_vec4.hpp +++ b/glm/gtx/simd_vec4.hpp @@ -122,6 +122,218 @@ namespace glm detail::tvec4 vec4_cast( detail::fvec4SIMD const & x); + //! Returns x if x >= 0; otherwise, it returns -x. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD abs(detail::fvec4SIMD const & x); + + //! Returns 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD sign(detail::fvec4SIMD const & x); + + //! Returns a value equal to the nearest integer that is less then or equal to x. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD floor(detail::fvec4SIMD const & x); + + //! Returns a value equal to the nearest integer to x + //! whose absolute value is not larger than the absolute value of x. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD trunc(detail::fvec4SIMD const & x); + + //! Returns a value equal to the nearest integer to x. + //! The fraction 0.5 will round in a direction chosen by the + //! implementation, presumably the direction that is fastest. + //! This includes the possibility that round(x) returns the + //! same value as roundEven(x) for all values of x. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD round(detail::fvec4SIMD const & x); + + //! Returns a value equal to the nearest integer to x. + //! A fractional part of 0.5 will round toward the nearest even + //! integer. (Both 3.5 and 4.5 for x will return 4.0.) + //! (From GLM_GTX_simd_vec4 extension, common function) + //detail::fvec4SIMD roundEven(detail::fvec4SIMD const & x); + + //! Returns a value equal to the nearest integer + //! that is greater than or equal to x. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD ceil(detail::fvec4SIMD const & x); + + //! Return x - floor(x). + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD fract(detail::fvec4SIMD const & x); + + //! Modulus. Returns x - y * floor(x / y) + //! for each component in x using the floating point value y. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD mod( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y); + + //! Modulus. Returns x - y * floor(x / y) + //! for each component in x using the floating point value y. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD mod( + detail::fvec4SIMD const & x, + float const & y); + + //! Returns the fractional part of x and sets i to the integer + //! part (as a whole number floating point value). Both the + //! return value and the output parameter will have the same + //! sign as x. + //! (From GLM_GTX_simd_vec4 extension, common function) + //detail::fvec4SIMD modf( + // detail::fvec4SIMD const & x, + // detail::fvec4SIMD & i); + + //! Returns y if y < x; otherwise, it returns x. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD min( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y); + + detail::fvec4SIMD min( + detail::fvec4SIMD const & x, + float const & y); + + //! Returns y if x < y; otherwise, it returns x. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD max( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y); + + detail::fvec4SIMD max( + detail::fvec4SIMD const & x, + float const & y); + + //! Returns min(max(x, minVal), maxVal) for each component in x + //! using the floating-point values minVal and maxVal. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD clamp( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & minVal, + detail::fvec4SIMD const & maxVal); + + detail::fvec4SIMD clamp( + detail::fvec4SIMD const & x, + float const & minVal, + float const & maxVal); + + //! \return If genTypeU is a floating scalar or vector: + //! Returns x * (1.0 - a) + y * a, i.e., the linear blend of + //! x and y using the floating-point value a. + //! The value for a is not restricted to the range [0, 1]. + //! + //! \return If genTypeU is a boolean scalar or vector: + //! Selects which vector each returned component comes + //! from. For a component of a that is false, the + //! corresponding component of x is returned. For a + //! component of a that is true, the corresponding + //! component of y is returned. Components of x and y that + //! are not selected are allowed to be invalid floating point + //! values and will have no effect on the results. Thus, this + //! provides different functionality than + //! genType mix(genType x, genType y, genType(a)) + //! where a is a Boolean vector. + //! + //! From GLSL 1.30.08 specification, section 8.3 + //! + //! \param[in] x Floating point scalar or vector. + //! \param[in] y Floating point scalar or vector. + //! \param[in] a Floating point or boolean scalar or vector. + //! + // \todo Test when 'a' is a boolean. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD mix( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y, + detail::fvec4SIMD const & a); + + //! Returns 0.0 if x < edge, otherwise it returns 1.0. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD step( + detail::fvec4SIMD const & edge, + detail::fvec4SIMD const & x); + + detail::fvec4SIMD step( + float const & edge, + detail::fvec4SIMD const & x); + + //! Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and + //! performs smooth Hermite interpolation between 0 and 1 + //! when edge0 < x < edge1. This is useful in cases where + //! you would want a threshold function with a smooth + //! transition. This is equivalent to: + //! genType t; + //! t = clamp ((x – edge0) / (edge1 – edge0), 0, 1); + //! return t * t * (3 – 2 * t); + //! Results are undefined if edge0 >= edge1. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD smoothstep( + detail::fvec4SIMD const & edge0, + detail::fvec4SIMD const & edge1, + detail::fvec4SIMD const & x); + + detail::fvec4SIMD smoothstep( + float const & edge0, + float const & edge1, + detail::fvec4SIMD const & x); + + //! Returns true if x holds a NaN (not a number) + //! representation in the underlying implementation's set of + //! floating point representations. Returns false otherwise, + //! including for implementations with no NaN + //! representations. + //! (From GLM_GTX_simd_vec4 extension, common function) + //bvec4 isnan(detail::fvec4SIMD const & x); + + //! Returns true if x holds a positive infinity or negative + //! infinity representation in the underlying implementation's + //! set of floating point representations. Returns false + //! otherwise, including for implementations with no infinity + //! representations. + //! (From GLM_GTX_simd_vec4 extension, common function) + //bvec4 isinf(detail::fvec4SIMD const & x); + + //! Returns a signed or unsigned integer value representing + //! the encoding of a floating-point value. The floatingpoint + //! value's bit-level representation is preserved. + //! (From GLM_GTX_simd_vec4 extension, common function) + //detail::ivec4SIMD floatBitsToInt(detail::fvec4SIMD const & value); + + //! Returns a floating-point value corresponding to a signed + //! or unsigned integer encoding of a floating-point value. + //! If an inf or NaN is passed in, it will not signal, and the + //! resulting floating point value is unspecified. Otherwise, + //! the bit-level representation is preserved. + //! (From GLM_GTX_simd_vec4 extension, common function) + //detail::fvec4SIMD intBitsToFloat(detail::ivec4SIMD const & value); + + //! Computes and returns a * b + c. + //! (From GLM_GTX_simd_vec4 extension, common function) + detail::fvec4SIMD fma( + detail::fvec4SIMD const & a, + detail::fvec4SIMD const & b, + detail::fvec4SIMD const & c); + + //! Splits x into a floating-point significand in the range + //! [0.5, 1.0) and an integral exponent of two, such that: + //! x = significand * exp(2, exponent) + //! The significand is returned by the function and the + //! exponent is returned in the parameter exp. For a + //! floating-point value of zero, the significant and exponent + //! are both zero. For a floating-point value that is an + //! infinity or is not a number, the results are undefined. + //! (From GLM_GTX_simd_vec4 extension, common function) + //detail::fvec4SIMD frexp(detail::fvec4SIMD const & x, detail::ivec4SIMD & exp); + + //! Builds a floating-point number from x and the + //! corresponding integral exponent of two in exp, returning: + //! significand * exp(2, exponent) + //! If this product is too large to be represented in the + //! floating-point type, the result is undefined. + //! (From GLM_GTX_simd_vec4 extension, common function) + //detail::fvec4SIMD ldexp(detail::fvec4SIMD const & x, detail::ivec4SIMD const & exp); + //! Returns the length of x, i.e., sqrt(x * x). //! (From GLM_GTX_simd_vec4 extension, geometry functions) float simdLength( diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index e38d3199..bef92cb2 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -280,6 +280,241 @@ namespace glm return Result; } + detail::fvec4SIMD abs + ( + detail::fvec4SIMD const & x + ) + { + return detail::sse_abs_ps(x.Data); + } + + inline detail::fvec4SIMD sign + ( + detail::fvec4SIMD const & x + ) + { + return detail::sse_sgn_ps(x.Data); + } + + inline detail::fvec4SIMD floor + ( + detail::fvec4SIMD const & x + ) + { + return detail::sse_flr_ps(x.Data); + } + + inline detail::fvec4SIMD trunc + ( + detail::fvec4SIMD const & x + ) + { + return detail::sse_flr_ps(detail::sse_abs_ps(x.Data)); + } + + inline detail::fvec4SIMD round + ( + detail::fvec4SIMD const & x + ) + { + return detail::sse_rnd_ps(x.Data); + } + + //inline detail::fvec4SIMD roundEven + //( + // detail::fvec4SIMD const & x + //) + //{ + + //} + + inline detail::fvec4SIMD ceil + ( + detail::fvec4SIMD const & x + ) + { + return detail::sse_ceil_ps(x.Data); + } + + inline detail::fvec4SIMD fract + ( + detail::fvec4SIMD const & x + ) + { + return detail::sse_frc_ps(x.Data); + } + + inline detail::fvec4SIMD mod + ( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y + ) + { + return detail::sse_mod_ps(x.Data, y.Data); + } + + inline detail::fvec4SIMD mod + ( + detail::fvec4SIMD const & x, + float const & y + ) + { + return detail::sse_mod_ps(x.Data, _mm_set1_ps(y)); + } + + //inline detail::fvec4SIMD modf + //( + // detail::fvec4SIMD const & x, + // detail::fvec4SIMD & i + //) + //{ + + //} + + inline detail::fvec4SIMD min + ( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y + ) + { + return _mm_min_ps(x.Data, y.Data); + } + + inline detail::fvec4SIMD min + ( + detail::fvec4SIMD const & x, + float const & y + ) + { + return _mm_min_ps(x.Data, _mm_set1_ps(y)); + } + + inline detail::fvec4SIMD max + ( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y + ) + { + return _mm_max_ps(x.Data, y.Data); + } + + inline detail::fvec4SIMD max + ( + detail::fvec4SIMD const & x, + float const & y + ) + { + return _mm_max_ps(x.Data, _mm_set1_ps(y)); + } + + inline detail::fvec4SIMD clamp + ( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & minVal, + detail::fvec4SIMD const & maxVal + ) + { + return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data); + } + + inline detail::fvec4SIMD clamp + ( + detail::fvec4SIMD const & x, + float const & minVal, + float const & maxVal + ) + { + return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal)); + } + + inline detail::fvec4SIMD mix + ( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y, + detail::fvec4SIMD const & a + ) + { + __m128 Sub0 = _mm_sub_ps(y.Data, x.Data); + __m128 Mul0 = _mm_mul_ps(a.Data, Sub0); + return _mm_mul_ps(x.Data, Mul0); + } + + inline detail::fvec4SIMD step + ( + detail::fvec4SIMD const & edge, + detail::fvec4SIMD const & x + ) + { + __m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data); + return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); + } + + inline detail::fvec4SIMD step + ( + float const & edge, + detail::fvec4SIMD const & x + ) + { + __m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge)); + return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); + } + + inline detail::fvec4SIMD smoothstep + ( + detail::fvec4SIMD const & edge0, + detail::fvec4SIMD const & edge1, + detail::fvec4SIMD const & x + ) + { + return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data); + } + + inline detail::fvec4SIMD smoothstep + ( + float const & edge0, + float const & edge1, + detail::fvec4SIMD const & x + ) + { + return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data); + } + + //inline bvec4 isnan(detail::fvec4SIMD const & x) + //{ + + //} + + //inline bvec4 isinf(detail::fvec4SIMD const & x) + //{ + + //} + + //inline detail::ivec4SIMD floatBitsToInt + //( + // detail::fvec4SIMD const & value + //) + //{ + + //} + + //inline detail::fvec4SIMD intBitsToFloat + //( + // detail::ivec4SIMD const & value + //) + //{ + + //} + + inline detail::fvec4SIMD fma + ( + detail::fvec4SIMD const & a, + detail::fvec4SIMD const & b, + detail::fvec4SIMD const & c + ) + { + return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data); + } + inline float simdLength ( detail::fvec4SIMD const & x