Fixed mod function specialization #281 Fixed bitscan detection

master
Christophe Riccio ago%!(EXTRA string=11 years)
parent 9b250cc86f
commit 7e81213fdd
  1. 16
      glm/detail/func_common.inl
  2. 8
      glm/detail/func_integer.inl
  3. 9
      glm/detail/setup.hpp
  4. 40
      glm/gtc/integer.hpp
  5. 35
      glm/gtc/integer.inl
  6. 35
      test/core/core_func_common.cpp
  7. 55
      test/core/core_func_integer.cpp
  8. 63
      test/gtc/gtc_integer.cpp

@ -162,6 +162,16 @@ namespace detail
return (x >> Shift) | y; return (x >> Shift) | y;
} }
}; };
template <typename T, precision P, template <class, precision> class vecType, typename genType, bool isFloat = true>
struct compute_mod
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & a, genType const & b)
{
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'mod' only accept floating-point inputs. Include <glm/gtc/integer.hpp> for integer inputs.");
return a - b * floor(a / b);
}
};
}//namespace detail }//namespace detail
// abs // abs
@ -334,15 +344,13 @@ namespace detail
template <typename T, precision P, template <typename, precision> class vecType> template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_QUALIFIER vecType<T, P> mod(vecType<T, P> const & x, T y) GLM_FUNC_QUALIFIER vecType<T, P> mod(vecType<T, P> const & x, T y)
{ {
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'mod' only accept floating-point inputs"); return detail::compute_mod<T, P, vecType, T, std::numeric_limits<T>::is_iec559>::call(x, y);
return x - y * floor(x / y);
} }
template <typename T, precision P, template <typename, precision> class vecType> template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_QUALIFIER vecType<T, P> mod(vecType<T, P> const & x, vecType<T, P> const & y) GLM_FUNC_QUALIFIER vecType<T, P> mod(vecType<T, P> const & x, vecType<T, P> const & y)
{ {
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'mod' only accept floating-point inputs"); return detail::compute_mod<T, P, vecType, vecType<T, P>, std::numeric_limits<T>::is_iec559>::call(x, y);
return x - y * floor(x / y);
} }
// modf // modf

@ -104,7 +104,7 @@ namespace detail
} }
}; };
# if(GLM_ARCH != GLM_ARCH_PURE) && ((GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_INTEL)) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS))) # if GLM_HAS_BITSCAN_WINDOWS
template <typename genIUType> template <typename genIUType>
struct compute_findLSB<genIUType, 32> struct compute_findLSB<genIUType, 32>
{ {
@ -126,7 +126,7 @@ namespace detail
return IsNotNull ? int(Result) : -1; return IsNotNull ? int(Result) : -1;
} }
}; };
# endif//GLM_ARCH != GLM_ARCH_PURE # endif//GLM_HAS_BITSCAN_WINDOWS
template <typename T, glm::precision P, template <class, glm::precision> class vecType, bool EXEC = true> template <typename T, glm::precision P, template <class, glm::precision> class vecType, bool EXEC = true>
struct compute_findMSB_step_vec struct compute_findMSB_step_vec
@ -162,7 +162,7 @@ namespace detail
} }
}; };
# if(GLM_ARCH != GLM_ARCH_PURE) && ((GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_INTEL)) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS))) # if GLM_HAS_BITSCAN_WINDOWS
template <typename genIUType> template <typename genIUType>
GLM_FUNC_QUALIFIER int compute_findMSB_32(genIUType Value) GLM_FUNC_QUALIFIER int compute_findMSB_32(genIUType Value)
{ {
@ -196,7 +196,7 @@ namespace detail
return detail::functor1<int, T, P, vecType>::call(compute_findMSB_64, x); return detail::functor1<int, T, P, vecType>::call(compute_findMSB_64, x);
} }
}; };
# endif//GLM_ARCH != GLM_ARCH_PURE # endif//GLM_HAS_BITSCAN_WINDOWS
}//namespace detail }//namespace detail
// uaddCarry // uaddCarry

@ -525,14 +525,19 @@
(GLM_LANG & GLM_LANG_CXX11_FLAG) || \ (GLM_LANG & GLM_LANG_CXX11_FLAG) || \
((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC49))) ((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC49)))
#define GLM_HAS_TRIVIAL_QUERIES (\ #define GLM_HAS_TRIVIAL_QUERIES ( \
((GLM_LANG & GLM_LANG_CXX11_FLAG) && !(GLM_COMPILER & GLM_COMPILER_GCC)) || \ ((GLM_LANG & GLM_LANG_CXX11_FLAG) && !(GLM_COMPILER & GLM_COMPILER_GCC)) || \
((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2013))) ((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2013)))
#define GLM_HAS_MAKE_SIGNED (\ #define GLM_HAS_MAKE_SIGNED ( \
(GLM_LANG & GLM_LANG_CXX11_FLAG) || \ (GLM_LANG & GLM_LANG_CXX11_FLAG) || \
((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2013))) ((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2013)))
#define GLM_HAS_BITSCAN_WINDOWS ( \
(GLM_ARCH != GLM_ARCH_PURE) && \
(GLM_PLATFORM & GLM_PLATFORM_WINDOWS) && \
(GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_LLVM | GLM_COMPILER_INTEL)))
// OpenMP // OpenMP
#ifdef _OPENMP #ifdef _OPENMP
# if GLM_COMPILER & GLM_COMPILER_GCC # if GLM_COMPILER & GLM_COMPILER_GCC

@ -45,6 +45,7 @@
// Dependencies // Dependencies
#include "../detail/setup.hpp" #include "../detail/setup.hpp"
#include "../detail/precision.hpp" #include "../detail/precision.hpp"
#include "../detail/func_common.hpp"
#include "../detail/func_integer.hpp" #include "../detail/func_integer.hpp"
#include "../detail/func_exponential.hpp" #include "../detail/func_exponential.hpp"
#include <limits> #include <limits>
@ -58,11 +59,46 @@ namespace glm
/// @addtogroup gtc_integer /// @addtogroup gtc_integer
/// @{ /// @{
/// Returns the log2 of x. Can be reliably using to compute mipmap count from the texture size. /// Returns the log2 of x for integer values. Can be reliably using to compute mipmap count from the texture size.
/// From GLM_GTC_integer extension. /// @see gtc_integer
template <typename genIUType> template <typename genIUType>
GLM_FUNC_DECL genIUType log2(genIUType x); GLM_FUNC_DECL genIUType log2(genIUType x);
/// Modulus. Returns x % y
/// for each component in x using the floating point value y.
///
/// @tparam genIUType Integer-point scalar or vector types.
///
/// @see gtc_integer
/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/mod.xml">GLSL mod man page</a>
/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.3 Common Functions</a>
template <typename genIUType>
GLM_FUNC_DECL genIUType mod(genIUType x, genIUType y);
/// Modulus. Returns x % y
/// for each component in x using the floating point value y.
///
/// @tparam T Integer scalar types.
/// @tparam vecType vector types.
///
/// @see gtc_integer
/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/mod.xml">GLSL mod man page</a>
/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.3 Common Functions</a>
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_DECL vecType<T, P> mod(vecType<T, P> const & x, T y);
/// Modulus. Returns x % y
/// for each component in x using the floating point value y.
///
/// @tparam T Integer scalar types.
/// @tparam vecType vector types.
///
/// @see gtc_integer
/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/mod.xml">GLSL mod man page</a>
/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.3 Common Functions</a>
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_DECL vecType<T, P> mod(vecType<T, P> const & x, vecType<T, P> const & y);
/// @} /// @}
} //namespace glm } //namespace glm

@ -44,24 +44,31 @@ namespace detail
} }
}; };
# if(GLM_ARCH != GLM_ARCH_PURE) && (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM)) # if GLM_HAS_BITSCAN_WINDOWS
template <precision P>
template <precision P> struct compute_log2<int, P, tvec4, false>
struct compute_log2<int, P, tvec4, false>
{
GLM_FUNC_QUALIFIER static tvec4<int, P> call(tvec4<int, P> const & vec)
{ {
tvec4<int, P> Result(glm::uninitialize); GLM_FUNC_QUALIFIER static tvec4<int, P> call(tvec4<int, P> const & vec)
{
tvec4<int, P> Result(glm::uninitialize);
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.x), vec.x);
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.y), vec.y);
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.z), vec.z);
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.w), vec.w);
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.x), vec.x); return Result;
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.y), vec.y); }
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.z), vec.z); };
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.w), vec.w); # endif//GLM_HAS_BITSCAN_WINDOWS
return Result; template <typename T, precision P, template <class, precision> class vecType, typename genType>
struct compute_mod<T, P, vecType, genType, false>
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & a, genType const & b)
{
return a % b;
} }
}; };
# endif//GLM_ARCH != GLM_ARCH_PURE
}//namespace detail }//namespace detail
}//namespace glm }//namespace glm

@ -162,6 +162,40 @@ namespace modf_
} }
}//namespace modf }//namespace modf
namespace mod_
{
int test()
{
int Error(0);
{
float A(3.0);
float B(2.0f);
float C = glm::mod(A, B);
Error += glm::abs(C - 1.0f) < 0.00001f ? 0 : 1;
}
{
glm::vec4 A(3.0);
float B(2.0f);
glm::vec4 C = glm::mod(A, B);
Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1;
}
{
glm::vec4 A(3.0);
glm::vec4 B(2.0f);
glm::vec4 C = glm::mod(A, B);
Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1;
}
return Error;
}
}//namespace mod_
namespace floatBitsToInt namespace floatBitsToInt
{ {
int test() int test()
@ -1109,6 +1143,7 @@ int main()
Error += sign::test(); Error += sign::test();
Error += floor_::test(); Error += floor_::test();
Error += mod_::test();
Error += modf_::test(); Error += modf_::test();
Error += floatBitsToInt::test(); Error += floatBitsToInt::test();
Error += floatBitsToUint::test(); Error += floatBitsToUint::test();

@ -578,6 +578,7 @@ namespace findMSB
genType Return; genType Return;
}; };
# if GLM_HAS_BITSCAN_WINDOWS
template <typename genIUType> template <typename genIUType>
GLM_FUNC_QUALIFIER int findMSB_intrinsic(genIUType Value) GLM_FUNC_QUALIFIER int findMSB_intrinsic(genIUType Value)
{ {
@ -590,6 +591,20 @@ namespace findMSB
_BitScanReverse(&Result, Value); _BitScanReverse(&Result, Value);
return int(Result); return int(Result);
} }
# endif//GLM_HAS_BITSCAN_WINDOWS
# if GLM_ARCH & GLM_ARCH_AVX
template <typename genIUType>
GLM_FUNC_QUALIFIER int findMSB_avx(genIUType Value)
{
GLM_STATIC_ASSERT(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
if(Value == 0)
return -1;
return int(_tzcnt_u32(Value));
}
# endif
template <typename genIUType> template <typename genIUType>
GLM_FUNC_QUALIFIER int findMSB_095(genIUType Value) GLM_FUNC_QUALIFIER int findMSB_095(genIUType Value)
@ -698,7 +713,7 @@ namespace findMSB
}; };
int Error(0); int Error(0);
std::size_t const Count(1000000); std::size_t const Count(10000000);
std::clock_t Timestamps0 = std::clock(); std::clock_t Timestamps0 = std::clock();
@ -738,12 +753,14 @@ namespace findMSB
std::clock_t Timestamps4 = std::clock(); std::clock_t Timestamps4 = std::clock();
for(std::size_t k = 0; k < Count; ++k) # if GLM_HAS_BITSCAN_WINDOWS
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i) for(std::size_t k = 0; k < Count; ++k)
{ for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
int Result = findMSB_intrinsic(Data[i].Value); {
Error += Data[i].Return == Result ? 0 : 1; int Result = findMSB_intrinsic(Data[i].Value);
} Error += Data[i].Return == Result ? 0 : 1;
}
# endif//GLM_HAS_BITSCAN_WINDOWS
std::clock_t Timestamps5 = std::clock(); std::clock_t Timestamps5 = std::clock();
@ -756,13 +773,31 @@ namespace findMSB
std::clock_t Timestamps6 = std::clock(); std::clock_t Timestamps6 = std::clock();
# if GLM_ARCH & GLM_ARCH_AVX
for(std::size_t k = 0; k < Count; ++k)
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
{
int Result = findMSB_avx(Data[i].Value);
Error += Data[i].Return == Result ? 0 : 1;
}
# endif
std::clock_t Timestamps7 = std::clock();
std::printf("glm::findMSB: %d clocks\n", static_cast<unsigned int>(Timestamps1 - Timestamps0)); std::printf("glm::findMSB: %d clocks\n", static_cast<unsigned int>(Timestamps1 - Timestamps0));
std::printf("findMSB - nlz1: %d clocks\n", static_cast<unsigned int>(Timestamps2 - Timestamps1)); std::printf("findMSB - nlz1: %d clocks\n", static_cast<unsigned int>(Timestamps2 - Timestamps1));
std::printf("findMSB - nlz2: %d clocks\n", static_cast<unsigned int>(Timestamps3 - Timestamps2)); std::printf("findMSB - nlz2: %d clocks\n", static_cast<unsigned int>(Timestamps3 - Timestamps2));
std::printf("findMSB - 0.9.5: %d clocks\n", static_cast<unsigned int>(Timestamps4 - Timestamps3)); std::printf("findMSB - 0.9.5: %d clocks\n", static_cast<unsigned int>(Timestamps4 - Timestamps3));
std::printf("findMSB - intrinsics: %d clocks\n", static_cast<unsigned int>(Timestamps5 - Timestamps4));
# if GLM_HAS_BITSCAN_WINDOWS
std::printf("findMSB - intrinsics: %d clocks\n", static_cast<unsigned int>(Timestamps5 - Timestamps4));
# endif//GLM_HAS_BITSCAN_WINDOWS
std::printf("findMSB - pop: %d clocks\n", static_cast<unsigned int>(Timestamps6 - Timestamps5)); std::printf("findMSB - pop: %d clocks\n", static_cast<unsigned int>(Timestamps6 - Timestamps5));
# if GLM_ARCH & GLM_ARCH_AVX
std::printf("findMSB - avx tzcnt: %d clocks\n", static_cast<unsigned int>(Timestamps7 - Timestamps6));
# endif
return Error; return Error;
} }
@ -888,6 +923,8 @@ namespace findMSB
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i) for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
{ {
int Result0 = findMSB_intrinsic(Data[i].Value); int Result0 = findMSB_intrinsic(Data[i].Value);
//unsigned int A = _lzcnt_u32(Data[i].Value);
//unsigned int B = _tzcnt_u32(Data[i].Value);
Error += Data[i].Return == Result0 ? 0 : 1; Error += Data[i].Return == Result0 ? 0 : 1;
} }
@ -1527,6 +1564,8 @@ int main()
Error += ::bitfieldInsert::test(); Error += ::bitfieldInsert::test();
Error += ::bitfieldExtract::test(); Error += ::bitfieldExtract::test();
Error += ::findMSB::perf();
# ifdef GLM_TEST_ENABLE_PERF # ifdef GLM_TEST_ENABLE_PERF
Error += ::bitCount::perf(); Error += ::bitCount::perf();
Error += ::bitfieldReverse::perf(); Error += ::bitfieldReverse::perf();

@ -30,6 +30,7 @@
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
#define GLM_FORCE_INLINE #define GLM_FORCE_INLINE
#include <glm/gtc/epsilon.hpp>
#include <glm/gtc/integer.hpp> #include <glm/gtc/integer.hpp>
#include <glm/gtc/type_precision.hpp> #include <glm/gtc/type_precision.hpp>
#include <glm/gtc/vec1.hpp> #include <glm/gtc/vec1.hpp>
@ -102,7 +103,7 @@ namespace log2_
printf("glm::log2<ivec4>: %d clocks\n", End - Begin); printf("glm::log2<ivec4>: %d clocks\n", End - Begin);
} }
# if(GLM_ARCH != GLM_ARCH_PURE) && (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM)) # if GLM_HAS_BITSCAN_WINDOWS
{ {
std::vector<glm::ivec4> Result; std::vector<glm::ivec4> Result;
Result.resize(Count); Result.resize(Count);
@ -163,7 +164,7 @@ namespace log2_
printf("glm::log2<ivec4> reinterpret: %d clocks\n", End - Begin); printf("glm::log2<ivec4> reinterpret: %d clocks\n", End - Begin);
} }
# endif//GLM_ARCH != GLM_ARCH_PURE # endif//GLM_HAS_BITSCAN_WINDOWS
{ {
std::vector<float> Result; std::vector<float> Result;
@ -197,12 +198,70 @@ namespace log2_
} }
}//namespace log2_ }//namespace log2_
namespace mod_
{
int test()
{
int Error(0);
{
float A(3.0);
float B(2.0f);
float C = glm::mod(A, B);
Error += glm::abs(C - 1.0f) < 0.00001f ? 0 : 1;
}
{
glm::vec4 A(3.0);
float B(2.0f);
glm::vec4 C = glm::mod(A, B);
Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1;
}
{
glm::vec4 A(3.0);
glm::vec4 B(2.0f);
glm::vec4 C = glm::mod(A, B);
Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1;
}
{
int A(3);
int B(2);
int C = glm::mod(A, B);
Error += C == 1 ? 0 : 1;
}
{
glm::ivec4 A(3);
int B(2);
glm::ivec4 C = glm::mod(A, B);
Error += glm::all(glm::equal(C, glm::ivec4(1))) ? 0 : 1;
}
{
glm::ivec4 A(3);
glm::ivec4 B(2);
glm::ivec4 C = glm::mod(A, B);
Error += glm::all(glm::equal(C, glm::ivec4(1))) ? 0 : 1;
}
return Error;
}
}//namespace mod_
int main() int main()
{ {
int Error(0); int Error(0);
Error += ::log2_::test(); Error += ::log2_::test();
Error += ::mod_::test();
# ifdef GLM_TEST_ENABLE_PERF # ifdef GLM_TEST_ENABLE_PERF
Error += ::log2_::perf(); Error += ::log2_::perf();

Loading…
Cancel
Save