|
|
@ -34,7 +34,7 @@ int nlz1a(unsigned x) { |
|
|
|
int n; |
|
|
|
int n; |
|
|
|
|
|
|
|
|
|
|
|
/* if (x == 0) return(32); */ |
|
|
|
/* if (x == 0) return(32); */ |
|
|
|
if ((int)x <= 0) return (~x >> 26) & 32; |
|
|
|
if (static_cast<int>(x) <= 0) return (~x >> 26) & 32; |
|
|
|
n = 1; |
|
|
|
n = 1; |
|
|
|
if ((x >> 16) == 0) {n = n +16; x = x <<16;} |
|
|
|
if ((x >> 16) == 0) {n = n +16; x = x <<16;} |
|
|
|
if ((x >> 24) == 0) {n = n + 8; x = x << 8;} |
|
|
|
if ((x >> 24) == 0) {n = n + 8; x = x << 8;} |
|
|
@ -141,29 +141,31 @@ gcc/AIX, and gcc/NT, at some optimization levels. |
|
|
|
BTW, these programs use the "anonymous union" feature of C++, not |
|
|
|
BTW, these programs use the "anonymous union" feature of C++, not |
|
|
|
available in C. */ |
|
|
|
available in C. */ |
|
|
|
|
|
|
|
|
|
|
|
int nlz6(unsigned k) { |
|
|
|
int nlz6(unsigned k) |
|
|
|
union { |
|
|
|
{ |
|
|
|
unsigned asInt[2]; |
|
|
|
union { |
|
|
|
double asDouble; |
|
|
|
unsigned asInt[2]; |
|
|
|
}; |
|
|
|
double asDouble; |
|
|
|
int n; |
|
|
|
}; |
|
|
|
|
|
|
|
int n; |
|
|
|
asDouble = (double)k + 0.5; |
|
|
|
|
|
|
|
n = 1054 - (asInt[LE] >> 20); |
|
|
|
asDouble = static_cast<double>(k) + 0.5; |
|
|
|
return n; |
|
|
|
n = 1054 - (asInt[LE] >> 20); |
|
|
|
|
|
|
|
return n; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int nlz7(unsigned k) { |
|
|
|
int nlz7(unsigned k) |
|
|
|
union { |
|
|
|
{ |
|
|
|
unsigned asInt[2]; |
|
|
|
union { |
|
|
|
double asDouble; |
|
|
|
unsigned asInt[2]; |
|
|
|
}; |
|
|
|
double asDouble; |
|
|
|
int n; |
|
|
|
}; |
|
|
|
|
|
|
|
int n; |
|
|
|
asDouble = (double)k; |
|
|
|
|
|
|
|
n = 1054 - (asInt[LE] >> 20); |
|
|
|
asDouble = static_cast<double>(k); |
|
|
|
n = (n & 31) + (n >> 9); |
|
|
|
n = 1054 - (asInt[LE] >> 20); |
|
|
|
return n; |
|
|
|
n = (n & 31) + (n >> 9); |
|
|
|
|
|
|
|
return n; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* In single precision, round-to-nearest mode, the basic method fails for:
|
|
|
|
/* In single precision, round-to-nearest mode, the basic method fails for:
|
|
|
@ -175,17 +177,18 @@ int nlz7(unsigned k) { |
|
|
|
FFFFFF80 <= k <= FFFFFFFF. |
|
|
|
FFFFFF80 <= k <= FFFFFFFF. |
|
|
|
For k = 0 it gives 158, and for the other values it is too low by 1. */ |
|
|
|
For k = 0 it gives 158, and for the other values it is too low by 1. */ |
|
|
|
|
|
|
|
|
|
|
|
int nlz8(unsigned k) { |
|
|
|
int nlz8(unsigned k) |
|
|
|
union { |
|
|
|
{ |
|
|
|
unsigned asInt; |
|
|
|
union { |
|
|
|
float asFloat; |
|
|
|
unsigned asInt; |
|
|
|
}; |
|
|
|
float asFloat; |
|
|
|
int n; |
|
|
|
}; |
|
|
|
|
|
|
|
int n; |
|
|
|
k = k & ~(k >> 1); /* Fix problem with rounding. */ |
|
|
|
|
|
|
|
asFloat = (float)k + 0.5f; |
|
|
|
k = k & ~(k >> 1); /* Fix problem with rounding. */ |
|
|
|
n = 158 - (asInt >> 23); |
|
|
|
asFloat = static_cast<float>(k) + 0.5f; |
|
|
|
return n; |
|
|
|
n = 158 - (asInt >> 23); |
|
|
|
|
|
|
|
return n; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* The example below shows how to make a macro for nlz. It uses an
|
|
|
|
/* The example below shows how to make a macro for nlz. It uses an
|
|
|
@ -196,18 +199,19 @@ expressions (see "Using and Porting GNU CC", by Richard M. Stallman |
|
|
|
possibility that the macro argument will conflict with one of its local |
|
|
|
possibility that the macro argument will conflict with one of its local |
|
|
|
variables, e.g., NLZ(k). */ |
|
|
|
variables, e.g., NLZ(k). */ |
|
|
|
|
|
|
|
|
|
|
|
int nlz9(unsigned k) { |
|
|
|
int nlz9(unsigned k) |
|
|
|
union { |
|
|
|
{ |
|
|
|
unsigned asInt; |
|
|
|
union { |
|
|
|
float asFloat; |
|
|
|
unsigned asInt; |
|
|
|
}; |
|
|
|
float asFloat; |
|
|
|
int n; |
|
|
|
}; |
|
|
|
|
|
|
|
int n; |
|
|
|
k = k & ~(k >> 1); /* Fix problem with rounding. */ |
|
|
|
|
|
|
|
asFloat = (float)k; |
|
|
|
k = k & ~(k >> 1); /* Fix problem with rounding. */ |
|
|
|
n = 158 - (asInt >> 23); |
|
|
|
asFloat = static_cast<float>(k); |
|
|
|
n = (n & 31) + (n >> 6); /* Fix problem with k = 0. */ |
|
|
|
n = 158 - (asInt >> 23); |
|
|
|
return n; |
|
|
|
n = (n & 31) + (n >> 6); /* Fix problem with k = 0. */ |
|
|
|
|
|
|
|
return n; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* Below are three nearly equivalent programs for computing the number
|
|
|
|
/* Below are three nearly equivalent programs for computing the number
|
|
|
@ -229,74 +233,75 @@ multiplication expanded into shifts and adds, but the table size is |
|
|
|
getting a bit large). */ |
|
|
|
getting a bit large). */ |
|
|
|
|
|
|
|
|
|
|
|
#define u 99 |
|
|
|
#define u 99 |
|
|
|
int nlz10(unsigned x) { |
|
|
|
int nlz10(unsigned x) |
|
|
|
|
|
|
|
{ |
|
|
|
static char table[64] = |
|
|
|
static char table[64] = |
|
|
|
{32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u, |
|
|
|
{32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u, |
|
|
|
u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u, |
|
|
|
u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u, |
|
|
|
17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18, |
|
|
|
17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18, |
|
|
|
5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u}; |
|
|
|
5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u}; |
|
|
|
|
|
|
|
|
|
|
|
x = x | (x >> 1); // Propagate leftmost
|
|
|
|
x = x | (x >> 1); // Propagate leftmost
|
|
|
|
x = x | (x >> 2); // 1-bit to the right.
|
|
|
|
x = x | (x >> 2); // 1-bit to the right.
|
|
|
|
x = x | (x >> 4); |
|
|
|
x = x | (x >> 4); |
|
|
|
x = x | (x >> 8); |
|
|
|
x = x | (x >> 8); |
|
|
|
x = x | (x >>16); |
|
|
|
x = x | (x >>16); |
|
|
|
x = x*0x06EB14F9; // Multiplier is 7*255**3.
|
|
|
|
x = x*0x06EB14F9; // Multiplier is 7*255**3.
|
|
|
|
return table[x >> 26]; |
|
|
|
return table[x >> 26]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* Harley's algorithm with multiply expanded.
|
|
|
|
/* Harley's algorithm with multiply expanded.
|
|
|
|
19 elementary ops plus an indexed load. */ |
|
|
|
19 elementary ops plus an indexed load. */ |
|
|
|
|
|
|
|
|
|
|
|
int nlz10a(unsigned x) { |
|
|
|
int nlz10a(unsigned x) |
|
|
|
|
|
|
|
{ |
|
|
|
static char table[64] = |
|
|
|
static char table[64] = |
|
|
|
{32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u, |
|
|
|
{32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u, |
|
|
|
u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u, |
|
|
|
u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u, |
|
|
|
17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18, |
|
|
|
17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18, |
|
|
|
5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u}; |
|
|
|
5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u}; |
|
|
|
|
|
|
|
|
|
|
|
x = x | (x >> 1); // Propagate leftmost
|
|
|
|
x = x | (x >> 1); // Propagate leftmost
|
|
|
|
x = x | (x >> 2); // 1-bit to the right.
|
|
|
|
x = x | (x >> 2); // 1-bit to the right.
|
|
|
|
x = x | (x >> 4); |
|
|
|
x = x | (x >> 4); |
|
|
|
x = x | (x >> 8); |
|
|
|
x = x | (x >> 8); |
|
|
|
x = x | (x >> 16); |
|
|
|
x = x | (x >> 16); |
|
|
|
x = (x << 3) - x; // Multiply by 7.
|
|
|
|
x = (x << 3) - x; // Multiply by 7.
|
|
|
|
x = (x << 8) - x; // Multiply by 255.
|
|
|
|
x = (x << 8) - x; // Multiply by 255.
|
|
|
|
x = (x << 8) - x; // Again.
|
|
|
|
x = (x << 8) - x; // Again.
|
|
|
|
x = (x << 8) - x; // Again.
|
|
|
|
x = (x << 8) - x; // Again.
|
|
|
|
return table[x >> 26]; |
|
|
|
return table[x >> 26]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* Julius Goryavsky's version of Harley's algorithm.
|
|
|
|
/* Julius Goryavsky's version of Harley's algorithm.
|
|
|
|
17 elementary ops plus an indexed load, if the machine |
|
|
|
17 elementary ops plus an indexed load, if the machine |
|
|
|
has "and not." */ |
|
|
|
has "and not." */ |
|
|
|
|
|
|
|
|
|
|
|
int nlz10b(unsigned x) { |
|
|
|
int nlz10b(unsigned x) |
|
|
|
|
|
|
|
{ |
|
|
|
static char table[64] = |
|
|
|
static char table[64] = |
|
|
|
{32,20,19, u, u,18, u, 7, 10,17, u, u,14, u, 6, u, |
|
|
|
{32,20,19, u, u,18, u, 7, 10,17, u, u,14, u, 6, u, |
|
|
|
u, 9, u,16, u, u, 1,26, u,13, u, u,24, 5, u, u, |
|
|
|
u, 9, u,16, u, u, 1,26, u,13, u, u,24, 5, u, u, |
|
|
|
u,21, u, 8,11, u,15, u, u, u, u, 2,27, 0,25, u, |
|
|
|
u,21, u, 8,11, u,15, u, u, u, u, 2,27, 0,25, u, |
|
|
|
22, u,12, u, u, 3,28, u, 23, u, 4,29, u, u,30,31}; |
|
|
|
22, u,12, u, u, 3,28, u, 23, u, 4,29, u, u,30,31}; |
|
|
|
|
|
|
|
|
|
|
|
x = x | (x >> 1); // Propagate leftmost
|
|
|
|
x = x | (x >> 1); // Propagate leftmost
|
|
|
|
x = x | (x >> 2); // 1-bit to the right.
|
|
|
|
x = x | (x >> 2); // 1-bit to the right.
|
|
|
|
x = x | (x >> 4); |
|
|
|
x = x | (x >> 4); |
|
|
|
x = x | (x >> 8); |
|
|
|
x = x | (x >> 8); |
|
|
|
x = x & ~(x >> 16); |
|
|
|
x = x & ~(x >> 16); |
|
|
|
x = x*0xFD7049FF; // Activate this line or the following 3.
|
|
|
|
x = x*0xFD7049FF; // Activate this line or the following 3.
|
|
|
|
// x = (x << 9) - x; // Multiply by 511.
|
|
|
|
// x = (x << 9) - x; // Multiply by 511.
|
|
|
|
// x = (x << 11) - x; // Multiply by 2047.
|
|
|
|
// x = (x << 11) - x; // Multiply by 2047.
|
|
|
|
// x = (x << 14) - x; // Multiply by 16383.
|
|
|
|
// x = (x << 14) - x; // Multiply by 16383.
|
|
|
|
return table[x >> 26]; |
|
|
|
return table[x >> 26]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int errors; |
|
|
|
int errors; |
|
|
|
void error(int x, int y) { |
|
|
|
void error(int x, int y) |
|
|
|
errors = errors + 1; |
|
|
|
{ |
|
|
|
printf("Error for x = %08x, got %d\n", x, y); |
|
|
|
errors = errors + 1; |
|
|
|
|
|
|
|
printf("Error for x = %08x, got %d\n", x, y); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int main() |
|
|
|
int main() |
|
|
|