From 61428d4526bccce3e268ac3b1aebce27397b2f42 Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Sat, 13 Dec 2014 17:22:57 -0800 Subject: [PATCH 1/7] stb_image: Trivial optimizations for filter path when img_n==out_n. --- stb_image.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/stb_image.h b/stb_image.h index 90d0d15..c365e4d 100644 --- a/stb_image.h +++ b/stb_image.h @@ -2526,12 +2526,13 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r prior += out_n; // this is a little gross, so that we don't switch per-pixel or per-component if (img_n == out_n) { + int nk = (x - 1)*img_n; #define CASE(f) \ case f: \ - for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \ - for (k=0; k < img_n; ++k) + for (k=0; k < nk; ++k) switch (filter) { - CASE(STBI__F_none) cur[k] = raw[k]; break; + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k-img_n]); break; CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-img_n])>>1)); break; @@ -2540,6 +2541,7 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-img_n],0,0)); break; } #undef CASE + raw += nk; } else { STBI_ASSERT(img_n+1 == out_n); #define CASE(f) \ From 8188e842e26236bbf1f33ff20f5bd8c3d6f6e6be Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Sat, 13 Dec 2014 17:31:51 -0800 Subject: [PATCH 2/7] stb_image: Add 'static' for some internal funcs, STBIDEF for external ones. --- stb_image.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/stb_image.h b/stb_image.h index c365e4d..ea3f2ca 100644 --- a/stb_image.h +++ b/stb_image.h @@ -584,7 +584,7 @@ static unsigned char *stbi_load_main(stbi__context *s, int *x, int *y, int *comp #ifndef STBI_NO_STDIO -FILE *stbi__fopen(char const *filename, char const *mode) +static FILE *stbi__fopen(char const *filename, char const *mode) { FILE *f; #if defined(_MSC_VER) && _MSC_VER >= 1400 @@ -628,7 +628,7 @@ STBIDEF unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int return stbi_load_main(&s,x,y,comp,req_comp); } -unsigned char *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +STBIDEF unsigned char *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); @@ -637,7 +637,7 @@ unsigned char *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *use #ifndef STBI_NO_HDR -float *stbi_loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static float *stbi_loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) { unsigned char *data; #ifndef STBI_NO_HDR @@ -650,14 +650,14 @@ float *stbi_loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); } -float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi_loadf_main(&s,x,y,comp,req_comp); } -float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); @@ -665,7 +665,7 @@ float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int } #ifndef STBI_NO_STDIO -float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) { float *result; FILE *f = stbi__fopen(filename, "rb"); @@ -675,7 +675,7 @@ float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) return result; } -float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_file(&s,f); From 92b9e262b79dbc27e04487c57f0dfc6335bc993a Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Sat, 13 Dec 2014 17:58:36 -0800 Subject: [PATCH 3/7] stb_image: Keep zout in a local var during stbi__parse_huffman_block. --- stb_image.h | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/stb_image.h b/stb_image.h index ea3f2ca..ba9e065 100644 --- a/stb_image.h +++ b/stb_image.h @@ -2146,10 +2146,11 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) return z->value[b]; } -static int stbi__zexpand(stbi__zbuf *z, int n) // need to make room for n bytes +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes { char *q; int cur, limit; + z->zout = zout; if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); cur = (int) (z->zout - z->zout_start); limit = (int) (z->zout_end - z->zout_start); @@ -2179,16 +2180,23 @@ static int stbi__zdist_extra[32] = static int stbi__parse_huffman_block(stbi__zbuf *a) { + char *zout = a->zout; for(;;) { int z = stbi__zhuffman_decode(a, &a->z_length); if (z < 256) { if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes - if (a->zout >= a->zout_end) if (!stbi__zexpand(a, 1)) return 0; - *a->zout++ = (char) z; + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; } else { stbi_uc *p; int len,dist; - if (z == 256) return 1; + if (z == 256) { + a->zout = zout; + return 1; + } z -= 257; len = stbi__zlength_base[z]; if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); @@ -2196,11 +2204,14 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); - if (a->zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (a->zout + len > a->zout_end) if (!stbi__zexpand(a, len)) return 0; - p = (stbi_uc *) (a->zout - dist); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); while (len--) - *a->zout++ = *p++; + *zout++ = *p++; } } } @@ -2273,7 +2284,7 @@ static int stbi__parse_uncomperssed_block(stbi__zbuf *a) if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); if (a->zout + len > a->zout_end) - if (!stbi__zexpand(a, len)) return 0; + if (!stbi__zexpand(a, a->zout, len)) return 0; memcpy(a->zout, a->zbuffer, len); a->zbuffer += len; a->zout += len; From cdc230598e795cd53c5b976402901efe0de268e9 Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Sat, 13 Dec 2014 18:07:00 -0800 Subject: [PATCH 4/7] stb_image: Fast path for matches with dist=1 (runs) in stbi__parse_huffman_block. --- stb_image.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/stb_image.h b/stb_image.h index ba9e065..318ea94 100644 --- a/stb_image.h +++ b/stb_image.h @@ -2210,8 +2210,12 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) zout = a->zout; } p = (stbi_uc *) (zout - dist); - while (len--) - *zout++ = *p++; + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + do *zout++ = v; while (--len); + } else { + do *zout++ = *p++; while (--len); + } } } } From 007de5eb6e22f9a239d5fa25ed3e5a1cecf88d4a Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Sat, 13 Dec 2014 18:18:36 -0800 Subject: [PATCH 5/7] stb_image: Extract zhuffman_decode slow path into own function. --- stb_image.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/stb_image.h b/stb_image.h index 318ea94..2d955c7 100644 --- a/stb_image.h +++ b/stb_image.h @@ -2119,18 +2119,9 @@ stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) return k; } -stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) { int b,s,k; - if (a->num_bits < 16) stbi__fill_bits(a); - b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; - if (b < 0xffff) { - s = z->size[b]; - a->code_buffer >>= s; - a->num_bits -= s; - return z->value[b]; - } - // not resolved by fast table, so compute it the slow way // use jpeg approach, which requires MSbits at top k = stbi__bit_reverse(a->code_buffer, 16); @@ -2146,6 +2137,20 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) return z->value[b]; } +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) stbi__fill_bits(a); + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b < 0xffff) { + s = z->size[b]; + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes { char *q; From 3d6dccf0c40d81f9d637f2b81d7f9274454bee44 Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Sat, 13 Dec 2014 18:48:37 -0800 Subject: [PATCH 6/7] stb_image: Make 'fast' table contain code size and value directly. --- stb_image.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/stb_image.h b/stb_image.h index 2d955c7..e87f059 100644 --- a/stb_image.h +++ b/stb_image.h @@ -2036,7 +2036,7 @@ static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num) // DEFLATE spec for generating codes memset(sizes, 0, sizeof(sizes)); - memset(z->fast, 255, sizeof(z->fast)); + memset(z->fast, 0, sizeof(z->fast)); for (i=0; i < num; ++i) ++sizes[sizelist[i]]; sizes[0] = 0; @@ -2059,12 +2059,13 @@ static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num) int s = sizelist[i]; if (s) { int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); z->size [c] = (stbi_uc ) s; z->value[c] = (stbi__uint16) i; if (s <= STBI__ZFAST_BITS) { int k = stbi__bit_reverse(next_code[s],s); while (k < (1 << STBI__ZFAST_BITS)) { - z->fast[k] = (stbi__uint16) c; + z->fast[k] = fastv; k += (1 << s); } } @@ -2142,11 +2143,11 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) int b,s; if (a->num_bits < 16) stbi__fill_bits(a); b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; - if (b < 0xffff) { - s = z->size[b]; + if (b) { + s = b >> 9; a->code_buffer >>= s; a->num_bits -= s; - return z->value[b]; + return b & 511; } return stbi__zhuffman_decode_slowpath(a, z); } From 1996a019ac13abe20b1010a23c24502a3547b67d Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Sat, 13 Dec 2014 19:15:38 -0800 Subject: [PATCH 7/7] stb_image: Guess decoded image size before zlib decode to avoid unnecessary reallocs. --- stb_image.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stb_image.h b/stb_image.h index e87f059..01cc73a 100644 --- a/stb_image.h +++ b/stb_image.h @@ -2845,7 +2845,9 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (scan != SCAN_load) return 1; if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); - z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, 16384, (int *) &raw_len, !is_iphone); + // initial guess for decoded data size to avoid unnecessary reallocs + raw_len = s->img_x * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); if (z->expanded == NULL) return 0; // zlib should set error free(z->idata); z->idata = NULL; if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)