Optimized bitCount function

master
Christophe Riccio ago%!(EXTRA string=11 years)
parent c62b6c7324
commit 377d5a4046
  1. 11
      glm/detail/func_integer.hpp
  2. 43
      glm/detail/func_integer.inl
  3. 104
      glm/detail/type_int.hpp
  4. 2
      readme.txt
  5. 78
      test/core/core_func_integer.cpp
  6. 3
      test/core/core_func_integer_bit_count.cpp

@ -158,6 +158,17 @@ namespace glm
template <typename T, precision P, template <typename, precision> class vecType> template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_DECL vecType<T, P> bitfieldReverse(vecType<T, P> const & v); GLM_FUNC_DECL vecType<T, P> bitfieldReverse(vecType<T, P> const & v);
/// Returns the number of bits set to 1 in the binary representation of value.
///
/// @tparam T Signed or unsigned integer scalar or vector types.
///
/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/bitCount.xml">GLSL bitCount man page</a>
/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.8 Integer Functions</a>
///
/// @todo Clarify the declaration to specify that scalars are suported.
template <typename genType>
GLM_FUNC_DECL int bitCount(genType v);
/// Returns the number of bits set to 1 in the binary representation of value. /// Returns the number of bits set to 1 in the binary representation of value.
/// ///
/// @tparam T Signed or unsigned integer scalar or vector types. /// @tparam T Signed or unsigned integer scalar or vector types.

@ -66,6 +66,26 @@ namespace detail
return (v & Mask) << Shift | (v & (~Mask)) >> Shift; return (v & Mask) << Shift | (v & (~Mask)) >> Shift;
} }
}; };
template <bool EXEC = false>
struct compute_bitfieldBitCountStep
{
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
{
return v;
}
};
template <>
struct compute_bitfieldBitCountStep<true>
{
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
{
return (v & Mask) + ((v >> Shift) & Mask);
}
};
}//namespace detail }//namespace detail
// uaddCarry // uaddCarry
@ -207,21 +227,24 @@ namespace detail
} }
// bitCount // bitCount
template <typename genIUType> template <typename genType>
GLM_FUNC_QUALIFIER int bitCount(genIUType x) GLM_FUNC_QUALIFIER int bitCount(genType x)
{ {
return bitCount(tvec1<genIUType>(x)).x; return bitCount(glm::tvec1<genType, glm::defaultp>(x)).x;
} }
template <typename T, precision P, template <typename, precision> class vecType> template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
GLM_FUNC_QUALIFIER vecType<int, P> bitCount(vecType<T, P> const & v) GLM_FUNC_QUALIFIER vecType<int, P> bitCount(vecType<T, P> const & v)
{ {
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_integer, "'bitCount' only accept integer values"); typedef glm::detail::make_unsigned<T>::type U;
vecType<U, P> x(*reinterpret_cast<vecType<U, P> const *>(&v));
vecType<int, P> Count(0); x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 2>::call<U, P, vecType>(x, U(0x5555555555555555ull), static_cast<U>( 1));
for(T i = 0, n = static_cast<T>(sizeof(T) * 8); i < n; ++i) x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 4>::call<U, P, vecType>(x, U(0x3333333333333333ull), static_cast<U>( 2));
Count += vecType<int, P>((v >> i) & static_cast<T>(1)); x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 8>::call<U, P, vecType>(x, U(0x0F0F0F0F0F0F0F0Full), static_cast<U>( 4));
return Count; x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 16>::call<U, P, vecType>(x, U(0x00FF00FF00FF00FFull), static_cast<U>( 8));
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 32>::call<U, P, vecType>(x, U(0x0000FFFF0000FFFFull), static_cast<U>(16));
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 64>::call<U, P, vecType>(x, U(0x00000000FFFFFFFFull), static_cast<U>(32));
return vecType<int, P>(x);
} }
// findLSB // findLSB

@ -83,6 +83,110 @@ namespace detail
typedef unsigned int lowp_uint_t; typedef unsigned int lowp_uint_t;
typedef unsigned int mediump_uint_t; typedef unsigned int mediump_uint_t;
typedef unsigned int highp_uint_t; typedef unsigned int highp_uint_t;
template <typename genType>
struct make_signed
{};
template <>
struct make_signed<int8>
{
typedef int8 type;
};
template <>
struct make_signed<uint8>
{
typedef int8 type;
};
template <>
struct make_signed<int16>
{
typedef int16 type;
};
template <>
struct make_signed<uint16>
{
typedef int16 type;
};
template <>
struct make_signed<int32>
{
typedef int32 type;
};
template <>
struct make_signed<uint32>
{
typedef int32 type;
};
template <>
struct make_signed<int64>
{
typedef int64 type;
};
template <>
struct make_signed<uint64>
{
typedef int64 type;
};
template <typename genType>
struct make_unsigned
{};
template <>
struct make_unsigned<int8>
{
typedef uint8 type;
};
template <>
struct make_unsigned<uint8>
{
typedef uint8 type;
};
template <>
struct make_unsigned<int16>
{
typedef uint16 type;
};
template <>
struct make_unsigned<uint16>
{
typedef uint16 type;
};
template <>
struct make_unsigned<int32>
{
typedef uint32 type;
};
template <>
struct make_unsigned<uint32>
{
typedef uint32 type;
};
template <>
struct make_unsigned<int64>
{
typedef uint64 type;
};
template <>
struct make_unsigned<uint64>
{
typedef uint64 type;
};
}//namespace detail }//namespace detail
typedef detail::int8 int8; typedef detail::int8 int8;

@ -80,7 +80,7 @@ GLM 0.9.6.0: 2014-XX-XX
- Added GTC_bitfield extension, promoted GTX_bit - Added GTC_bitfield extension, promoted GTX_bit
- Added GTC_integer extension, promoted GTX_bit - Added GTC_integer extension, promoted GTX_bit
- Fixed bad matrix-vector multiple performance with Cuda #257, #258 - Fixed bad matrix-vector multiple performance with Cuda #257, #258
- Optimized bitfieldReverse function - Optimized bitfieldReverse and bitCount functions
================================================================================ ================================================================================
GLM 0.9.5.4: 2014-06-21 GLM 0.9.5.4: 2014-06-21

@ -858,7 +858,7 @@ namespace findMSB
int Error(0); int Error(0);
Error += perf_950(); Error += perf_950();
Error += perf_ops(); //Error += perf_ops();
return Error; return Error;
} }
@ -1190,17 +1190,44 @@ namespace bitCount
return Count; return Count;
} }
template <typename T> template <bool EXEC = false>
inline int bitCount_bits(T v) struct compute_bitfieldBitCountStep
{ {
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_integer, "'bitCount' only accept integer values"); template <typename T, glm::precision P, template <class, glm::precision> class vecType>
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
{
return v;
}
};
int Count(0); template <>
for(T i = 0, n = static_cast<T>(sizeof(T) * 8); i < n; ++i) struct compute_bitfieldBitCountStep<true>
{ {
Count += static_cast<int>((v >> i) & static_cast<T>(1)); template <typename T, glm::precision P, template <class, glm::precision> class vecType>
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
{
return (v & Mask) + ((v >> Shift) & Mask);
} }
return Count; };
template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
GLM_FUNC_QUALIFIER vecType<int, P> bitCount_bitfield(vecType<T, P> const & v)
{
typedef glm::detail::make_unsigned<T>::type U;
vecType<U, P> x(*reinterpret_cast<vecType<U, P> const *>(&v));
x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 2>::call<U, P, vecType>(x, U(0x5555555555555555ull), static_cast<U>( 1));
x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 4>::call<U, P, vecType>(x, U(0x3333333333333333ull), static_cast<U>( 2));
x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 8>::call<U, P, vecType>(x, U(0x0F0F0F0F0F0F0F0Full), static_cast<U>( 4));
x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 16>::call<U, P, vecType>(x, U(0x00FF00FF00FF00FFull), static_cast<U>( 8));
x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 32>::call<U, P, vecType>(x, U(0x0000FFFF0000FFFFull), static_cast<U>(16));
x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 64>::call<U, P, vecType>(x, U(0x00000000FFFFFFFFull), static_cast<U>(32));
return vecType<int, P>(x);
}
template <typename genType>
GLM_FUNC_QUALIFIER int bitCount_bitfield(genType x)
{
return bitCount_bitfield(glm::tvec1<genType, glm::defaultp>(x)).x;
} }
int perf() int perf()
@ -1249,15 +1276,18 @@ namespace bitCount
std::clock_t TimestampsE = std::clock(); std::clock_t TimestampsE = std::clock();
std::clock_t TimeIf = TimestampsB - TimestampsA; {
std::clock_t TimeVec = TimestampsC - TimestampsB; for(std::size_t i = 0, n = v.size(); i < n; ++i)
std::clock_t TimeDefault = TimestampsD - TimestampsC; v[i] = bitCount_bitfield(static_cast<int>(i));
std::clock_t TimeVec4 = TimestampsE - TimestampsD; }
std::clock_t TimestampsF = std::clock();
std::printf("bitCount - TimeIf %d\n", static_cast<unsigned int>(TimeIf)); std::printf("bitCount - TimeIf %d\n", static_cast<unsigned int>(TimestampsB - TimestampsA));
std::printf("bitCount - TimeVec %d\n", static_cast<unsigned int>(TimeVec)); std::printf("bitCount - TimeVec %d\n", static_cast<unsigned int>(TimestampsC - TimestampsB));
std::printf("bitCount - TimeDefault %d\n", static_cast<unsigned int>(TimeDefault)); std::printf("bitCount - TimeDefault %d\n", static_cast<unsigned int>(TimestampsD - TimestampsC));
std::printf("bitCount - TimeVec4 %d\n", static_cast<unsigned int>(TimeVec4)); std::printf("bitCount - TimeVec4 %d\n", static_cast<unsigned int>(TimestampsE - TimestampsD));
std::printf("bitCount - bitfield %d\n", static_cast<unsigned int>(TimestampsF - TimestampsE));
return Error; return Error;
} }
@ -1268,8 +1298,16 @@ namespace bitCount
for(std::size_t i = 0, n = sizeof(DataI32) / sizeof(type<int>); i < n; ++i) for(std::size_t i = 0, n = sizeof(DataI32) / sizeof(type<int>); i < n; ++i)
{ {
int Result = glm::bitCount(DataI32[i].Value); int ResultA = glm::bitCount(DataI32[i].Value);
Error += DataI32[i].Return == Result ? 0 : 1; int ResultB = bitCount_if(DataI32[i].Value);
int ResultC = bitCount_vec(DataI32[i].Value);
int ResultE = bitCount_bitfield(DataI32[i].Value);
Error += DataI32[i].Return == ResultA ? 0 : 1;
Error += DataI32[i].Return == ResultB ? 0 : 1;
Error += DataI32[i].Return == ResultC ? 0 : 1;
Error += DataI32[i].Return == ResultE ? 0 : 1;
assert(!Error); assert(!Error);
} }
@ -1281,6 +1319,8 @@ int main()
{ {
int Error = 0; int Error = 0;
Error += ::bitCount::test();
Error += ::bitCount::perf();
Error += ::bitfieldReverse::test(); Error += ::bitfieldReverse::test();
Error += ::bitfieldReverse::perf(); Error += ::bitfieldReverse::perf();
Error += ::findMSB::test(); Error += ::findMSB::test();
@ -1292,8 +1332,6 @@ int main()
Error += ::usubBorrow::test(); Error += ::usubBorrow::test();
Error += ::bitfieldInsert::test(); Error += ::bitfieldInsert::test();
Error += ::bitfieldExtract::test(); Error += ::bitfieldExtract::test();
Error += ::bitCount::test();
Error += ::bitCount::perf();
return Error; return Error;
} }

@ -10,7 +10,8 @@ unsigned rotatel(unsigned x, int n) {
return (x << n) | (x >> (32 - n)); return (x << n) | (x >> (32 - n));
} }
int pop0(unsigned x) { int pop0(unsigned x)
{
x = (x & 0x55555555) + ((x >> 1) & 0x55555555); x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F); x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);

Loading…
Cancel
Save