switch srgb convert to second ryg method, which is a bit faster (8% total speedup on upsampling test);

remove extra table in slowpath
master
Sean Barrett ago%!(EXTRA string=11 years)
parent a12d3dedf0
commit 8024673461
  1. 160
      stb_image_resize.h

@ -22,7 +22,7 @@
input_pixels , in_w , in_h , 0, input_pixels , in_w , in_h , 0,
output_pixels, out_w, out_h, 0, output_pixels, out_w, out_h, 0,
num_channels , alpha_chan , 0, STBIR_EDGE_CLAMP) num_channels , alpha_chan , 0, STBIR_EDGE_CLAMP)
WRAP/REFLECT/ZERO // WRAP/REFLECT/ZERO
FULL API FULL API
See the "header file" section of the source for API documentation. See the "header file" section of the source for API documentation.
@ -629,47 +629,6 @@ static float stbir__srgb_uchar_to_linear_float[256] = {
0.982251f, 0.991102f, 1.0f 0.982251f, 0.991102f, 1.0f
}; };
// sRGB transition values, scaled by 1<<28
// note that if you only scaled by 1<<16, all the values would be 4K smaller,
// so [1] would be ~10, and so that would have around 5% error (10 +- 0.5)
// at the boundary between uint8 0 and 1. This also means that a 64K-entry table
// would have the same 5% error there.
static int stbir__srgb_offset_to_linear_scaled[256] =
{
0, 40738, 122216, 203693, 285170, 366648, 448125, 529603,
611080, 692557, 774035, 855852, 942009, 1033024, 1128971, 1229926,
1335959, 1447142, 1563542, 1685229, 1812268, 1944725, 2082664, 2226148,
2375238, 2529996, 2690481, 2856753, 3028870, 3206888, 3390865, 3580856,
3776916, 3979100, 4187460, 4402049, 4622919, 4850123, 5083710, 5323731,
5570236, 5823273, 6082892, 6349140, 6622065, 6901714, 7188133, 7481369,
7781466, 8088471, 8402427, 8723380, 9051372, 9386448, 9728650, 10078021,
10434603, 10798439, 11169569, 11548036, 11933879, 12327139, 12727857, 13136073,
13551826, 13975156, 14406100, 14844697, 15290987, 15745007, 16206795, 16676389,
17153826, 17639142, 18132374, 18633560, 19142734, 19659934, 20185196, 20718552,
21260042, 21809696, 22367554, 22933648, 23508010, 24090680, 24681686, 25281066,
25888850, 26505076, 27129772, 27762974, 28404716, 29055026, 29713942, 30381490,
31057708, 31742624, 32436272, 33138682, 33849884, 34569912, 35298800, 36036568,
36783260, 37538896, 38303512, 39077136, 39859796, 40651528, 41452360, 42262316,
43081432, 43909732, 44747252, 45594016, 46450052, 47315392, 48190064, 49074096,
49967516, 50870356, 51782636, 52704392, 53635648, 54576432, 55526772, 56486700,
57456236, 58435408, 59424248, 60422780, 61431036, 62449032, 63476804, 64514376,
65561776, 66619028, 67686160, 68763192, 69850160, 70947088, 72053992, 73170912,
74297864, 75434880, 76581976, 77739184, 78906536, 80084040, 81271736, 82469648,
83677792, 84896192, 86124888, 87363888, 88613232, 89872928, 91143016, 92423512,
93714432, 95015816, 96327688, 97650056, 98982952, 100326408, 101680440, 103045072,
104420320, 105806224, 107202800, 108610064, 110028048, 111456776, 112896264, 114346544,
115807632, 117279552, 118762328, 120255976, 121760536, 123276016, 124802440, 126339832,
127888216, 129447616, 131018048, 132599544, 134192112, 135795792, 137410592, 139036528,
140673648, 142321952, 143981456, 145652208, 147334208, 149027488, 150732064, 152447968,
154175200, 155913792, 157663776, 159425168, 161197984, 162982240, 164777968, 166585184,
168403904, 170234160, 172075968, 173929344, 175794320, 177670896, 179559120, 181458992,
183370528, 185293776, 187228736, 189175424, 191133888, 193104112, 195086128, 197079968,
199085648, 201103184, 203132592, 205173888, 207227120, 209292272, 211369392, 213458480,
215559568, 217672656, 219797792, 221934976, 224084240, 226245600, 228419056, 230604656,
232802400, 235012320, 237234432, 239468736, 241715280, 243974080, 246245120, 248528464,
250824112, 253132064, 255452368, 257785040, 260130080, 262487520, 264857376, 267239664,
};
static float stbir__srgb_to_linear(float f) static float stbir__srgb_to_linear(float f)
{ {
if (f <= 0.04045f) if (f <= 0.04045f)
@ -687,7 +646,6 @@ static float stbir__linear_to_srgb(float f)
} }
#ifndef STBIR_NON_IEEE_FLOAT #ifndef STBIR_NON_IEEE_FLOAT
// From https://gist.github.com/rygorous/2203834 // From https://gist.github.com/rygorous/2203834
typedef union typedef union
@ -702,7 +660,13 @@ typedef union
}; };
} stbir__FP32; } stbir__FP32;
static const stbir_uint32 fp32_to_srgb8_tab3[64] = { static const stbir_uint32 fp32_to_srgb8_tab4[104] = {
0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143, 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af, 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240, 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
@ -710,78 +674,90 @@ static const stbir_uint32 fp32_to_srgb8_tab3[64] = {
0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401, 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559, 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723, 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
}; };
static stbir_uint8 stbir__linear_to_srgb_uchar(float in) static stbir_uint8 stbir__linear_to_srgb_uchar(float in)
{ {
static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps
static const stbir__FP32 lutthresh = { 0x3b800000 }; // 2^(-8) static const stbir__FP32 minval = { (127-13) << 23 };
static const stbir__FP32 linearsc = { 0x454c5d00 }; stbir_uint32 tab,bias,scale,t;
static const stbir__FP32 float2int = { (127 + 23) << 23 };
stbir__FP32 f; stbir__FP32 f;
// Clamp to [0, 1-eps]; these two values map to 0 and 1, respectively. // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
// The tests are carefully written so that NaNs map to 0, same as in the reference // The tests are carefully written so that NaNs map to 0, same as in the reference
// implementation. // implementation.
if (!(in > 0.0f)) // written this way to catch NaNs if (!(in > minval.f)) // written this way to catch NaNs
in = 0.0f; in = minval.f;
if (in > almostone.f) if (in > almostone.f)
in = almostone.f; in = almostone.f;
// Check which region this value falls into // Do the table lookup and unpack bias, scale
f.f = in; f.f = in;
if (f.f < lutthresh.f) // linear region tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20];
{ bias = (tab >> 16) << 9;
f.f *= linearsc.f; scale = tab & 0xffff;
f.f += float2int.f; // use "magic value" to get float->int with rounding.
return (stbir_uint8)(f.u & 255); // Grab next-highest mantissa bits and perform linear interpolation
} t = (f.u >> 12) & 0xff;
else // non-linear region return (unsigned char) ((bias + scale*t) >> 16);
{
// Unpack bias, scale from table
stbir_uint32 tab = fp32_to_srgb8_tab3[(f.u >> 20) & 63];
stbir_uint32 bias = (tab >> 16) << 9;
stbir_uint32 scale = tab & 0xffff;
// Grab next-highest mantissa bits and perform linear interpolation
stbir_uint32 t = (f.u >> 12) & 0xff;
return (stbir_uint8)((bias + scale*t) >> 16);
}
} }
#else #else
// sRGB transition values, scaled by 1<<28
// Used as a starting point to save time in the binary search in stbir__linear_to_srgb_uchar. static int stbir__srgb_offset_to_linear_scaled[256] =
static stbir_uint8 stbr__linear_uchar_to_srgb_uchar[] = { {
0, 12, 21, 28, 33, 38, 42, 46, 49, 52, 55, 58, 61, 63, 66, 68, 70, 73, 75, 77, 79, 81, 82, 84, 86, 88, 89, 91, 93, 94, 0, 40738, 122216, 203693, 285170, 366648, 448125, 529603,
96, 97, 99, 100, 102, 103, 104, 106, 107, 109, 110, 111, 112, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 611080, 692557, 774035, 855852, 942009, 1033024, 1128971, 1229926,
127, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 142, 143, 144, 145, 146, 147, 148, 149, 150, 1335959, 1447142, 1563542, 1685229, 1812268, 1944725, 2082664, 2226148,
151, 151, 152, 153, 154, 155, 156, 157, 157, 158, 159, 160, 161, 161, 162, 163, 164, 165, 165, 166, 167, 168, 168, 169, 2375238, 2529996, 2690481, 2856753, 3028870, 3206888, 3390865, 3580856,
170, 171, 171, 172, 173, 174, 174, 175, 176, 176, 177, 178, 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 185, 186, 3776916, 3979100, 4187460, 4402049, 4622919, 4850123, 5083710, 5323731,
187, 187, 188, 189, 189, 190, 191, 191, 192, 193, 193, 194, 194, 195, 196, 196, 197, 197, 198, 199, 199, 200, 201, 201, 5570236, 5823273, 6082892, 6349140, 6622065, 6901714, 7188133, 7481369,
202, 202, 203, 204, 204, 205, 205, 206, 206, 207, 208, 208, 209, 209, 210, 210, 211, 212, 212, 213, 213, 214, 214, 215, 7781466, 8088471, 8402427, 8723380, 9051372, 9386448, 9728650, 10078021,
215, 216, 217, 217, 218, 218, 219, 219, 220, 220, 221, 221, 222, 222, 223, 223, 224, 224, 225, 226, 226, 227, 227, 228, 10434603, 10798439, 11169569, 11548036, 11933879, 12327139, 12727857, 13136073,
228, 229, 229, 230, 230, 231, 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 236, 237, 237, 237, 238, 238, 239, 239, 13551826, 13975156, 14406100, 14844697, 15290987, 15745007, 16206795, 16676389,
240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 17153826, 17639142, 18132374, 18633560, 19142734, 19659934, 20185196, 20718552,
251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 21260042, 21809696, 22367554, 22933648, 23508010, 24090680, 24681686, 25281066,
25888850, 26505076, 27129772, 27762974, 28404716, 29055026, 29713942, 30381490,
31057708, 31742624, 32436272, 33138682, 33849884, 34569912, 35298800, 36036568,
36783260, 37538896, 38303512, 39077136, 39859796, 40651528, 41452360, 42262316,
43081432, 43909732, 44747252, 45594016, 46450052, 47315392, 48190064, 49074096,
49967516, 50870356, 51782636, 52704392, 53635648, 54576432, 55526772, 56486700,
57456236, 58435408, 59424248, 60422780, 61431036, 62449032, 63476804, 64514376,
65561776, 66619028, 67686160, 68763192, 69850160, 70947088, 72053992, 73170912,
74297864, 75434880, 76581976, 77739184, 78906536, 80084040, 81271736, 82469648,
83677792, 84896192, 86124888, 87363888, 88613232, 89872928, 91143016, 92423512,
93714432, 95015816, 96327688, 97650056, 98982952, 100326408, 101680440, 103045072,
104420320, 105806224, 107202800, 108610064, 110028048, 111456776, 112896264, 114346544,
115807632, 117279552, 118762328, 120255976, 121760536, 123276016, 124802440, 126339832,
127888216, 129447616, 131018048, 132599544, 134192112, 135795792, 137410592, 139036528,
140673648, 142321952, 143981456, 145652208, 147334208, 149027488, 150732064, 152447968,
154175200, 155913792, 157663776, 159425168, 161197984, 162982240, 164777968, 166585184,
168403904, 170234160, 172075968, 173929344, 175794320, 177670896, 179559120, 181458992,
183370528, 185293776, 187228736, 189175424, 191133888, 193104112, 195086128, 197079968,
199085648, 201103184, 203132592, 205173888, 207227120, 209292272, 211369392, 213458480,
215559568, 217672656, 219797792, 221934976, 224084240, 226245600, 228419056, 230604656,
232802400, 235012320, 237234432, 239468736, 241715280, 243974080, 246245120, 248528464,
250824112, 253132064, 255452368, 257785040, 260130080, 262487520, 264857376, 267239664,
}; };
static stbir_uint8 stbir__linear_to_srgb_uchar(float f) static stbir_uint8 stbir__linear_to_srgb_uchar(float f)
{ {
int x = (int) (f * (1 << 28)); // has headroom so you don't need to clamp int x = (int) (f * (1 << 28)); // has headroom so you don't need to clamp
int v = stbr__linear_uchar_to_srgb_uchar[(int)(f * 255)]; // Make a guess at the value with a table. int v = 0;
int i; int i;
// Refine the guess with a short binary search. // Refine the guess with a short binary search.
i = v + 8; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; i = v + 128; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
i = v + 4; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; i = v + 64; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
i = v + 2; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; i = v + 32; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
i = v + 1; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; i = v + 16; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
i = v + 8; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
i = v + 4; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
i = v + 2; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
i = v + 1; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
return (stbir_uint8) v; return (stbir_uint8) v;
} }
#endif #endif
static float stbir__filter_trapezoid(float x, float scale) static float stbir__filter_trapezoid(float x, float scale)

Loading…
Cancel
Save