@ -1,4 +1,4 @@
/* stb_image_resize2 - v2.01 - public domain image resizing
/* stb_image_resize2 - v2.04 - public domain image resizing
by Jeff Roberts ( v2 ) and Jorge L Rodriguez
http : //github.com/nothings/stb
@ -328,9 +328,11 @@
Nathan Reed : warning fixes for 1.0
REVISIONS
2.00 ( 2022 - 02 - 20 ) mostly new source : new api , optimizations , simd , vertical - first , etc
( 2 x - 5 x faster without simd , 4 x - 12 x faster with simd )
( in some cases , 20 x to 40 x faster - resizing to very small for example )
2.04 ( 2023 - 11 - 17 ) Fix for rare AVX bug , shadowed symbol ( thanks Nikola Smiljanic ) .
2.03 ( 2023 - 11 - 01 ) ASAN and TSAN warnings fixed , minor tweaks .
2.00 ( 2023 - 10 - 10 ) mostly new source : new api , optimizations , simd , vertical - first , etc
( 2 x - 5 x faster without simd , 4 x - 12 x faster with simd )
( in some cases , 20 x to 40 x faster - resizing to very small for example )
0.96 ( 2019 - 03 - 04 ) fixed warnings
0.95 ( 2017 - 07 - 23 ) fixed warnings
0.94 ( 2017 - 03 - 18 ) fixed warnings
@ -450,25 +452,33 @@ typedef uint64_t stbir_uint64;
// for back compatibility, you can cast the old channel count to an stbir_pixel_layout
typedef enum
{
STBIR_BGR = 0 , // 3-chan, with order specified (for channel flipping)
STBIR_1CHANNEL = 1 ,
STBIR_2CHANNEL = 2 ,
STBIR_RGB = 3 , // 3-chan, with order specified (for channel flipping)
STBIR_RGBA = 4 , // alpha formats, alpha is NOT premultiplied into color channels
STBIR_BGR = 0 , // 3-chan, with order specified (for channel flipping)
STBIR_4CHANNEL = 5 ,
STBIR_RGBA = 4 , // alpha formats, where alpha is NOT premultiplied into color channels
STBIR_BGRA = 6 ,
STBIR_ARGB = 7 ,
STBIR_ABGR = 8 ,
STBIR_RA = 9 ,
STBIR_AR = 10 ,
STBIR_RGBA_PM = 11 , // alpha formats, alpha is premultiplied into color channels
STBIR_RGBA_PM = 11 , // alpha formats, where alpha is premultiplied into color channels
STBIR_BGRA_PM = 12 ,
STBIR_ARGB_PM = 13 ,
STBIR_ABGR_PM = 14 ,
STBIR_RA_PM = 15 ,
STBIR_AR_PM = 16 ,
STBIR_RGBA_NO_AW = 11 , // alpha formats, where NO alpha weighting is applied at all!
STBIR_BGRA_NO_AW = 12 , // these are just synonyms for the _PM flags (which also do
STBIR_ARGB_NO_AW = 13 , // no alpha weighting). These names just make it more clear
STBIR_ABGR_NO_AW = 14 , // for some folks).
STBIR_RA_NO_AW = 15 ,
STBIR_AR_NO_AW = 16 ,
} stbir_pixel_layout ;
//===============================================================
@ -1172,6 +1182,10 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
# define STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT 32 // when downsampling and <= 32 scanlines of buffering, use gather. gather used down to 1/8th scaling for 25% win.
# endif
# ifndef STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS
# define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS 4 // when threading, what is the minimum number of scanlines for a split?
# endif
// restrict pointers for the output pointers
# if defined( _MSC_VER ) && !defined(__clang__)
# define STBIR_STREAMOUT_PTR( star ) star __restrict
@ -1549,7 +1563,6 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
# define stbir__simdf8_0123to2222( out, in ) (out) = stbir__simdf_swiz(_mm256_castps256_ps128(in), 2,2,2,2 )
# define stbir__simdf8_load2( out, ptr ) (out) = _mm256_castsi256_ps(_mm256_castsi128_si256( _mm_loadl_epi64( (__m128i*)(ptr)) )) // top values can be random (not denormal or nan for perf)
# define stbir__simdf8_load4b( out, ptr ) (out) = _mm256_broadcast_ps( (__m128 const *)(ptr) )
static __m256i stbir_00112233 = { STBIR__CONST_4d_32i ( 0 , 0 , 1 , 1 ) , STBIR__CONST_4d_32i ( 2 , 2 , 3 , 3 ) } ;
@ -1582,11 +1595,11 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
# ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to non-simd
# define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_fmadd_ps( mul1, mul2, add )
# define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_fmadd_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ), add )
# define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_fmadd_ps( _mm256_castps128_ps256( mul ), _mm256_castps128_ps256( _mm_loadu_ps( (float const*)(ptr) ) ), add )
# define stbir__simdf8_madd_mem4( out, add, mul, ptr )(out) = _mm256_fmadd_ps( _mm256_setr_m128( mul, _mm_setzero_ps() ), _mm256_setr_m128( _mm_loadu_ps( (float const*)(ptr) ), _mm_setzero_ps( ) ), add )
# else
# define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul1, mul2 ) )
# define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ) ) )
# define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_castps128_ps256 ( _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ) ) )
# define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_setr_m128 ( _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ), _mm_setzero_ps( ) ) )
# endif
# define stbir__if_simdf8_cast_to_simdf4( val ) _mm256_castps256_ps128( val )
@ -3697,7 +3710,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors*
float * coeffs = coefficents + widest * ( num_contributors - 1 ) ;
// go until no chance of clipping (this is usually less than 8 lops)
while ( ( ( contribs - > n0 + widest * 2 ) > = row_width ) & & ( contribs > = contributors ) )
while ( ( contribs > = contributors ) & & ( ( contribs - > n0 + widest * 2 ) > = row_width ) )
{
// might we clip??
if ( ( contribs - > n0 + widest ) > row_width )
@ -4652,10 +4665,10 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
stbir__simdf8_madd ( tot0 , tot0 , c , d ) ; }
# define stbir__store_output() \
{ stbir__simdf t , c ; \
{ stbir__simdf t , d ; \
stbir__simdf8_add4halves ( t , stbir__if_simdf8_cast_to_simdf4 ( tot0 ) , tot0 ) ; \
stbir__simdf_0123to2301 ( c , t ) ; \
stbir__simdf_add ( t , t , c ) ; \
stbir__simdf_0123to2301 ( d , t ) ; \
stbir__simdf_add ( t , t , d ) ; \
stbir__simdf_store2 ( output , t ) ; \
horizontal_coefficients + = coefficient_width ; \
+ + horizontal_contributors ; \
@ -7389,7 +7402,6 @@ static void stbir__init_and_set_layout( STBIR_RESIZE * resize, stbir_pixel_layou
resize - > output_cb = 0 ;
resize - > user_data = resize ;
resize - > samplers = 0 ;
resize - > needs_rebuild = 1 ;
resize - > called_alloc = 0 ;
resize - > horizontal_filter = STBIR_FILTER_DEFAULT ;
resize - > horizontal_filter_kernel = 0 ; resize - > horizontal_filter_support = 0 ;
@ -7403,6 +7415,7 @@ static void stbir__init_and_set_layout( STBIR_RESIZE * resize, stbir_pixel_layou
resize - > output_data_type = data_type ;
resize - > input_pixel_layout_public = pixel_layout ;
resize - > output_pixel_layout_public = pixel_layout ;
resize - > needs_rebuild = 1 ;
}
STBIRDEF void stbir_resize_init ( STBIR_RESIZE * resize ,
@ -7428,17 +7441,27 @@ STBIRDEF void stbir_set_datatypes( STBIR_RESIZE * resize, stbir_datatype input_t
{
resize - > input_data_type = input_type ;
resize - > output_data_type = output_type ;
if ( ( resize - > samplers ) & & ( ! resize - > needs_rebuild ) )
stbir__update_info_from_resize ( resize - > samplers , resize ) ;
}
STBIRDEF void stbir_set_pixel_callbacks ( STBIR_RESIZE * resize , stbir_input_callback * input_cb , stbir_output_callback * output_cb ) // no callbacks by default
{
resize - > input_cb = input_cb ;
resize - > output_cb = output_cb ;
if ( ( resize - > samplers ) & & ( ! resize - > needs_rebuild ) )
{
resize - > samplers - > in_pixels_cb = input_cb ;
resize - > samplers - > out_pixels_cb = output_cb ;
}
}
STBIRDEF void stbir_set_user_data ( STBIR_RESIZE * resize , void * user_data ) // pass back STBIR_RESIZE* by default
{
resize - > user_data = user_data ;
if ( ( resize - > samplers ) & & ( ! resize - > needs_rebuild ) )
resize - > samplers - > user_data = user_data ;
}
STBIRDEF void stbir_set_buffer_ptrs ( STBIR_RESIZE * resize , const void * input_pixels , int input_stride_in_bytes , void * output_pixels , int output_stride_in_bytes )
@ -7447,6 +7470,8 @@ STBIRDEF void stbir_set_buffer_ptrs( STBIR_RESIZE * resize, const void * input_p
resize - > input_stride_in_bytes = input_stride_in_bytes ;
resize - > output_pixels = output_pixels ;
resize - > output_stride_in_bytes = output_stride_in_bytes ;
if ( ( resize - > samplers ) & & ( ! resize - > needs_rebuild ) )
stbir__update_info_from_resize ( resize - > samplers , resize ) ;
}
@ -7593,9 +7618,9 @@ static int stbir__perform_build( STBIR_RESIZE * resize, int splits )
stbir__get_conservative_extents ( & horizontal , & conservative , resize - > user_data ) ;
stbir__set_sampler ( & vertical , resize - > vertical_filter , resize - > horizontal_filter_kernel , resize - > vertical_filter_support , resize - > vertical_edge , & vertical . scale_info , 0 , resize - > user_data ) ;
if ( ( vertical . scale_info . output_sub_size / splits ) < 4 ) // each split should be a minimum of 4 scanlines (handwavey choice)
if ( ( vertical . scale_info . output_sub_size / splits ) < STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS ) // each split should be a minimum of 4 scanlines (handwavey choice)
{
splits = vertical . scale_info . output_sub_size / 4 ;
splits = vertical . scale_info . output_sub_size / STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS ;
if ( splits = = 0 ) splits = 1 ;
}
@ -7612,6 +7637,10 @@ static int stbir__perform_build( STBIR_RESIZE * resize, int splits )
# ifdef STBIR_PROFILE
STBIR_MEMCPY ( & out_info - > profile , & profile_infod . profile , sizeof ( out_info - > profile ) ) ;
# endif
// update anything that can be changed without recalcing samplers
stbir__update_info_from_resize ( out_info , resize ) ;
return splits ;
}
@ -7680,10 +7709,6 @@ STBIRDEF int stbir_resize_extended( STBIR_RESIZE * resize )
STBIR_PROFILE_BUILD_CLEAR ( resize - > samplers ) ;
}
// update anything that can be changed without recalcing samplers
stbir__update_info_from_resize ( resize - > samplers , resize ) ;
// do resize
result = stbir__perform_resize ( resize - > samplers , 0 , resize - > splits ) ;
@ -7712,9 +7737,6 @@ STBIRDEF int stbir_resize_extended_split( STBIR_RESIZE * resize, int split_start
if ( ( split_start > = resize - > splits ) | | ( split_start < 0 ) | | ( ( split_start + split_count ) > resize - > splits ) | | ( split_count < = 0 ) )
return 0 ;
// update anything that can be changed without recalcing samplers
stbir__update_info_from_resize ( resize - > samplers , resize ) ;
// do resize
return stbir__perform_resize ( resize - > samplers , split_start , split_count ) ;
}