parent
5736b15f7e
commit
c4bbb6e75f
14 changed files with 15329 additions and 15 deletions
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,224 @@ |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
|
||||
#define stop() __debugbreak() |
||||
#include <windows.h> |
||||
#define int64 __int64 |
||||
#pragma warning(disable:4127) |
||||
|
||||
#define get_milliseconds GetTickCount |
||||
|
||||
#else |
||||
|
||||
#define stop() __builtin_trap() |
||||
#define int64 long long |
||||
|
||||
typedef unsigned int U32; |
||||
typedef unsigned long long U64; |
||||
|
||||
#include <time.h> |
||||
static int get_milliseconds() |
||||
{ |
||||
struct timespec ts; |
||||
clock_gettime( CLOCK_MONOTONIC, &ts ); |
||||
return (U32) ( ( ((U64)(U32)ts.tv_sec) * 1000LL ) + (U64)(((U32)ts.tv_nsec+500000)/1000000) ); |
||||
} |
||||
|
||||
#endif |
||||
|
||||
#if defined(TIME_SIMD) |
||||
// default for most platforms
|
||||
#elif defined(TIME_SCALAR) |
||||
#define STBIR_NO_SIMD |
||||
#else |
||||
#error You must define TIME_SIMD or TIME_SCALAR when compiling this file. |
||||
#endif |
||||
|
||||
#define STBIR_PROFILE |
||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION |
||||
#define STBIR__V_FIRST_INFO_BUFFER v_info |
||||
#include "stb_image_resize2.h" // new one! |
||||
|
||||
#if defined(TIME_SIMD) && !defined(STBIR_SIMD) |
||||
#error Timing SIMD, but scalar was ON! |
||||
#endif |
||||
|
||||
#if defined(TIME_SCALAR) && defined(STBIR_SIMD) |
||||
#error Timing scalar, but SIMD was ON! |
||||
#endif |
||||
|
||||
#define HEADER 32 |
||||
|
||||
|
||||
static int file_write( const char *filename, void * buffer, size_t size )
|
||||
{ |
||||
FILE * f = fopen( filename, "wb" ); |
||||
if ( f == 0 ) return 0; |
||||
if ( fwrite( buffer, 1, size, f) != size ) return 0; |
||||
fclose(f); |
||||
return 1; |
||||
} |
||||
|
||||
int64 nresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt ) |
||||
{ |
||||
STBIR_RESIZE resize; |
||||
int t; |
||||
int64 b; |
||||
|
||||
stbir_resize_init( &resize, i, ix, iy, ip, o, ox, oy, op, buf, type ); |
||||
stbir_set_edgemodes( &resize, edg, edg ); |
||||
stbir_set_filters( &resize, flt, flt ); |
||||
|
||||
stbir_build_samplers_with_splits( &resize, 1 ); |
||||
|
||||
b = 0x7fffffffffffffffULL; |
||||
for( t = 0 ; t < 16 ; t++ ) |
||||
{ |
||||
STBIR_PROFILE_INFO profile; |
||||
int64 v; |
||||
if(!stbir_resize_extended( &resize ) ) |
||||
stop(); |
||||
stbir_resize_extended_profile_info( &profile, &resize ); |
||||
v = profile.clocks[1]+profile.clocks[2]; |
||||
if ( v < b ) |
||||
{ |
||||
b = v; |
||||
t = 0; |
||||
} |
||||
} |
||||
|
||||
stbir_free_samplers( &resize ); |
||||
|
||||
return b; |
||||
} |
||||
|
||||
|
||||
#define INSIZES 5 |
||||
#define TYPESCOUNT 5 |
||||
#define NUM 64 |
||||
|
||||
static const int sizes[INSIZES]={63,126,252,520,772}; |
||||
static const int types[TYPESCOUNT]={STBIR_1CHANNEL,STBIR_2CHANNEL,STBIR_RGB,STBIR_4CHANNEL,STBIR_RGBA}; |
||||
static const int effective[TYPESCOUNT]={1,2,3,4,7}; |
||||
|
||||
int main( int argc, char ** argv ) |
||||
{ |
||||
unsigned char * input; |
||||
unsigned char * output; |
||||
int dimensionx, dimensiony; |
||||
int scalex, scaley; |
||||
int totalms; |
||||
int timing_count; |
||||
int ir; |
||||
int * file; |
||||
int * ts; |
||||
int64 totalcycles; |
||||
|
||||
if ( argc != 6 ) |
||||
{ |
||||
printf("command: dotimings x_samps y_samps x_scale y_scale outfilename\n"); |
||||
exit(1); |
||||
} |
||||
|
||||
input = malloc( 4*1200*1200 ); |
||||
memset( input, 0x80, 4*1200*1200 ); |
||||
output = malloc( 4*10000*10000ULL ); |
||||
|
||||
dimensionx = atoi( argv[1] ); |
||||
dimensiony = atoi( argv[2] ); |
||||
scalex = atoi( argv[3] ); |
||||
scaley = atoi( argv[4] ); |
||||
|
||||
timing_count = dimensionx * dimensiony * INSIZES * TYPESCOUNT; |
||||
|
||||
file = malloc( sizeof(int) * ( 2 * timing_count + HEADER ) ); |
||||
ts = file + HEADER; |
||||
|
||||
totalms = get_milliseconds();
|
||||
totalcycles = STBIR_PROFILE_FUNC(); |
||||
for( ir = 0 ; ir < INSIZES ; ir++ ) |
||||
{ |
||||
int ix, iy, ty; |
||||
ix = iy = sizes[ir]; |
||||
|
||||
for( ty = 0 ; ty < TYPESCOUNT ; ty++ ) |
||||
{ |
||||
int h, hh; |
||||
|
||||
h = 1; |
||||
for( hh = 0 ; hh < dimensiony; hh++ ) |
||||
{ |
||||
int ww, w = 1; |
||||
for( ww = 0 ; ww < dimensionx; ww++ ) |
||||
{ |
||||
int64 VF, HF; |
||||
int good; |
||||
|
||||
v_info.control_v_first = 2; // vertical first
|
||||
VF = nresize( output, w, h, (w*4*1)&~3, input, ix, iy, ix*4*1, types[ty], STBIR_TYPE_UINT8, STBIR_EDGE_CLAMP, STBIR_FILTER_MITCHELL ); |
||||
v_info.control_v_first = 1; // horizonal first
|
||||
HF = nresize( output, w, h, (w*4*1)&~3, input, ix, iy, ix*4*1, types[ty], STBIR_TYPE_UINT8, STBIR_EDGE_CLAMP, STBIR_FILTER_MITCHELL ); |
||||
|
||||
good = ( ((HF<=VF) && (!v_info.v_first)) || ((VF<=HF) && (v_info.v_first))); |
||||
|
||||
// printf("\r%d,%d, %d,%d, %d, %I64d,%I64d, // Good: %c(%c-%d) CompEst: %.1f %.1f\n", ix, iy, w, h, ty, VF, HF, good?'y':'n', v_info.v_first?'v':'h', v_info.v_resize_classification, v_info.v_cost,v_info.h_cost );
|
||||
ts[0] = (int)VF; |
||||
ts[1] = (int)HF; |
||||
|
||||
ts += 2; |
||||
|
||||
w += scalex; |
||||
} |
||||
printf("."); |
||||
h += scaley;
|
||||
} |
||||
} |
||||
} |
||||
totalms = get_milliseconds() - totalms;
|
||||
totalcycles = STBIR_PROFILE_FUNC() - totalcycles; |
||||
|
||||
printf("\n"); |
||||
|
||||
file[0] = 'VFT1'; |
||||
|
||||
#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(__SSE2__) || defined( _M_IX86_FP ) || defined(__i386) || defined( __i386__ ) || defined( _M_IX86 ) || defined( _X86_ ) |
||||
file[1] = 1; // x64
|
||||
#elif defined( _M_AMD64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(__arm__) || defined( _M_ARM ) |
||||
file[1] = 2; // arm
|
||||
#else |
||||
file[1] = 99; // who knows???
|
||||
#endif |
||||
|
||||
#ifdef STBIR_SIMD8 |
||||
file[2] = 2; // simd-8
|
||||
#elif defined( STBIR_SIMD ) |
||||
file[2] = 1; // simd-4
|
||||
#else |
||||
file[2] = 0; // nosimd
|
||||
#endif |
||||
|
||||
file[3] = dimensionx; // dimx
|
||||
file[4] = dimensiony; // dimy
|
||||
file[5] = TYPESCOUNT; // channel types
|
||||
file[ 6] = types[0]; file[7] = types[1]; file[8] = types[2]; file[9] = types[3]; file[10] = types[4]; // buffer_type
|
||||
file[11] = effective[0]; file[12] = effective[1]; file[13] = effective[2]; file[14] = effective[3]; file[15] = effective[4]; // effective channels
|
||||
file[16] = INSIZES; // resizes
|
||||
file[17] = sizes[0]; file[18] = sizes[0]; // input sizes (w x h)
|
||||
file[19] = sizes[1]; file[20] = sizes[1]; |
||||
file[21] = sizes[2]; file[22] = sizes[2]; |
||||
file[23] = sizes[3]; file[24] = sizes[3]; |
||||
file[25] = sizes[4]; file[26] = sizes[4]; |
||||
file[27] = scalex; file[28] = scaley; // scale the dimx and dimy amount ( for(i=0;i<dimx) outputx = 1 + i*scalex; )
|
||||
file[29] = totalms; |
||||
((int64*)(file+30))[0] = totalcycles; |
||||
|
||||
if ( !file_write( argv[5], file, sizeof(int) * ( 2 * timing_count + HEADER ) ) ) |
||||
printf( "Error writing file: %s\n", argv[5] ); |
||||
else |
||||
printf( "Successfully wrote timing file: %s\n", argv[5] ); |
||||
|
||||
return 0; |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,56 @@ |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
#define stop() __debugbreak() |
||||
#else |
||||
#define stop() __builtin_trap() |
||||
#endif |
||||
|
||||
//#define HEAVYTM
|
||||
#include "tm.h" |
||||
|
||||
#define STBIR_SATURATE_INT |
||||
#define STB_IMAGE_RESIZE_STATIC |
||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION |
||||
#include "old_image_resize.h" |
||||
|
||||
|
||||
static int types[4] = { STBIR_TYPE_UINT8, STBIR_TYPE_UINT8, STBIR_TYPE_UINT16, STBIR_TYPE_FLOAT }; |
||||
static int edges[4] = { STBIR_EDGE_CLAMP, STBIR_EDGE_REFLECT, STBIR_EDGE_ZERO, STBIR_EDGE_WRAP }; |
||||
static int flts[5] = { STBIR_FILTER_BOX, STBIR_FILTER_TRIANGLE, STBIR_FILTER_CUBICBSPLINE, STBIR_FILTER_CATMULLROM, STBIR_FILTER_MITCHELL }; |
||||
static int channels[20] = { 1, 2, 3, 4, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2 };
|
||||
static int alphapos[20] = { -1, -1, -1, -1, 3,0, 1,0, 3,0, 1,0, 3,0, 1,0, 3,0, 1,0 };
|
||||
|
||||
|
||||
void oresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt ) |
||||
{ |
||||
int t = types[type]; |
||||
int ic = channels[buf]; |
||||
int alpha = alphapos[buf]; |
||||
int e = edges[edg]; |
||||
int f = flts[flt]; |
||||
int space = ( type == 1 ) ? STBIR_COLORSPACE_SRGB : 0; |
||||
int flags = ( buf >= 16 ) ? STBIR_FLAG_ALPHA_PREMULTIPLIED : ( ( buf >= 12 ) ? STBIR_FLAG_ALPHA_OUT_PREMULTIPLIED : ( ( buf >= 8 ) ? (STBIR_FLAG_ALPHA_PREMULTIPLIED|STBIR_FLAG_ALPHA_OUT_PREMULTIPLIED) : 0 ) ); |
||||
stbir_uint64 start; |
||||
|
||||
ENTER( "Resize (old)" ); |
||||
start = tmGetAccumulationStart( tm_mask ); |
||||
|
||||
if(!stbir_resize( i, ix, iy, ip, o, ox, oy, op, t, ic, alpha, flags, e, e, f, f, space, 0 ) ) |
||||
stop(); |
||||
|
||||
#ifdef STBIR_PROFILE |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64 *)&start, 0, oldprofile.named.setup, "Setup (old)" ); |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64 *)&start, 0, oldprofile.named.filters, "Filters (old)" ); |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64 *)&start, 0, oldprofile.named.looping, "Looping (old)" ); |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64 *)&start, 0, oldprofile.named.vertical, "Vertical (old)" ); |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64 *)&start, 0, oldprofile.named.horizontal, "Horizontal (old)" ); |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64 *)&start, 0, oldprofile.named.decode, "Scanline input (old)" ); |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64 *)&start, 0, oldprofile.named.encode, "Scanline output (old)" ); |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64 *)&start, 0, oldprofile.named.alpha, "Alpha weighting (old)" ); |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64 *)&start, 0, oldprofile.named.unalpha, "Alpha unweighting (old)" ); |
||||
#endif |
||||
|
||||
LEAVE(); |
||||
} |
@ -0,0 +1,992 @@ |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
|
||||
//#define HEAVYTM
|
||||
#include "tm.h" |
||||
|
||||
#ifdef RADUSETM3 |
||||
tm_api * g_tm_api; |
||||
//#define PROFILE_MODE
|
||||
#endif |
||||
|
||||
#include <math.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
#define stop() __debugbreak() |
||||
#include <windows.h> |
||||
#define int64 __int64 |
||||
#define uint64 unsigned __int64 |
||||
#else |
||||
#define stop() __builtin_trap() |
||||
#define int64 long long |
||||
#define uint64 unsigned long long |
||||
#endif |
||||
|
||||
#ifdef _MSC_VER |
||||
#pragma warning(disable:4127) |
||||
#endif |
||||
|
||||
//#define NOCOMP
|
||||
|
||||
|
||||
//#define PROFILE_NEW_ONLY
|
||||
//#define PROFILE_MODE
|
||||
|
||||
|
||||
#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(__SSE2__) || defined(STBIR_SSE) || defined( _M_IX86_FP ) || defined(__i386) || defined( __i386__ ) || defined( _M_IX86 ) || defined( _X86_ ) |
||||
|
||||
#ifdef _MSC_VER |
||||
|
||||
uint64 __rdtsc(); |
||||
#define __cycles() __rdtsc() |
||||
|
||||
#else // non msvc
|
||||
|
||||
static inline uint64 __cycles()
|
||||
{ |
||||
unsigned int lo, hi; |
||||
asm volatile ("rdtsc" : "=a" (lo), "=d" (hi) ); |
||||
return ( ( (uint64) hi ) << 32 ) | ( (uint64) lo ); |
||||
} |
||||
|
||||
#endif // msvc
|
||||
|
||||
#elif defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || defined(__ARM_NEON__) |
||||
|
||||
#ifdef _MSC_VER |
||||
|
||||
#define __cycles() _ReadStatusReg(ARM64_CNTVCT) |
||||
|
||||
#else |
||||
|
||||
static inline uint64 __cycles() |
||||
{ |
||||
uint64 tsc; |
||||
asm volatile("mrs %0, cntvct_el0" : "=r" (tsc)); |
||||
return tsc; |
||||
} |
||||
|
||||
#endif |
||||
|
||||
#else // x64, arm
|
||||
|
||||
#error Unknown platform for timing. |
||||
|
||||
#endif //x64 and
|
||||
|
||||
|
||||
#ifdef PROFILE_MODE |
||||
|
||||
#define STBIR_ASSERT(cond) |
||||
|
||||
#endif |
||||
|
||||
#ifdef _DEBUG |
||||
#undef STBIR_ASSERT |
||||
#define STBIR_ASSERT(cond) { if (!(cond)) stop(); } |
||||
#endif |
||||
|
||||
|
||||
#define SHRINKBYW 2 |
||||
#define ZOOMBYW 2 |
||||
#define SHRINKBYH 2 |
||||
#define ZOOMBYH 2 |
||||
|
||||
|
||||
int mem_count = 0; |
||||
|
||||
#ifdef TEST_WITH_VALLOC |
||||
|
||||
#define STBIR__SEPARATE_ALLOCATIONS |
||||
|
||||
#if TEST_WITH_LIMIT_AT_FRONT |
||||
|
||||
void * wmalloc(SIZE_T size) |
||||
{ |
||||
static unsigned int pagesize=0; |
||||
void* p; |
||||
SIZE_T s; |
||||
|
||||
// get the page size, if we haven't yet
|
||||
if (pagesize==0) |
||||
{ |
||||
SYSTEM_INFO si; |
||||
GetSystemInfo(&si); |
||||
pagesize=si.dwPageSize; |
||||
} |
||||
|
||||
// we need room for the size, 8 bytes to hide the original pointer and a
|
||||
// validation dword, and enough data to completely fill one page
|
||||
s=(size+(pagesize-1))&~(pagesize-1); |
||||
|
||||
// allocate the size plus a page (for the guard)
|
||||
p=VirtualAlloc(0,(SIZE_T)s,MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE); |
||||
|
||||
return p; |
||||
} |
||||
|
||||
void wfree(void * ptr) |
||||
{ |
||||
if (ptr) |
||||
{ |
||||
if ( ((ptrdiff_t)ptr) & 4095 ) stop(); |
||||
if ( VirtualFree(ptr,0,MEM_RELEASE) == 0 ) stop(); |
||||
} |
||||
} |
||||
|
||||
#else |
||||
|
||||
void * wmalloc(SIZE_T size) |
||||
{ |
||||
static unsigned int pagesize=0; |
||||
void* p; |
||||
SIZE_T s; |
||||
|
||||
// get the page size, if we haven't yet
|
||||
if (pagesize==0) |
||||
{ |
||||
SYSTEM_INFO si; |
||||
GetSystemInfo(&si); |
||||
pagesize=si.dwPageSize; |
||||
} |
||||
|
||||
// we need room for the size, 8 bytes to hide the original pointer and a
|
||||
// validation dword, and enough data to completely fill one page
|
||||
s=(size+16+(pagesize-1))&~(pagesize-1); |
||||
|
||||
// allocate the size plus a page (for the guard)
|
||||
p=VirtualAlloc(0,(SIZE_T)(s+pagesize+pagesize),MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE); |
||||
|
||||
if (p) |
||||
{ |
||||
DWORD oldprot; |
||||
void* orig=p; |
||||
|
||||
// protect the first page
|
||||
VirtualProtect(((char*)p),pagesize,PAGE_NOACCESS,&oldprot); |
||||
|
||||
// protect the final page
|
||||
VirtualProtect(((char*)p)+s+pagesize,pagesize,PAGE_NOACCESS,&oldprot); |
||||
|
||||
// now move the returned pointer so that it bumps right up against the
|
||||
// the next (protected) page (this may result in unaligned return
|
||||
// addresses - pre-align the sizes if you always want aligned ptrs)
|
||||
//#define ERROR_ON_FRONT
|
||||
#ifdef ERROR_ON_FRONT |
||||
p=((char*)p)+pagesize+16; |
||||
#else |
||||
p=((char*)p)+(s-size)+pagesize; |
||||
#endif |
||||
|
||||
// hide the validation value and the original pointer (which we'll
|
||||
// need used for freeing) right behind the returned pointer
|
||||
((unsigned int*)p)[-1]=0x98765432; |
||||
((void**)p)[-2]=orig; |
||||
++mem_count; |
||||
//printf("aloc: %p bytes: %d\n",p,(int)size);
|
||||
return(p); |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
void wfree(void * ptr) |
||||
{ |
||||
if (ptr) |
||||
{ |
||||
int err=0; |
||||
|
||||
// is this one of our allocations?
|
||||
if (((((unsigned int*)ptr)[-1])!=0x98765432) || ((((void**)ptr)[-2])==0)) |
||||
{ |
||||
err=1; |
||||
} |
||||
|
||||
if (err) |
||||
{ |
||||
__debugbreak(); |
||||
} |
||||
else |
||||
{ |
||||
|
||||
// back up to find the original pointer
|
||||
void* p=((void**)ptr)[-2]; |
||||
|
||||
// clear the validation value and the original pointer
|
||||
((unsigned int*)ptr)[-1]=0; |
||||
((void**)ptr)[-2]=0; |
||||
|
||||
//printf("free: %p\n",ptr);
|
||||
|
||||
--mem_count; |
||||
|
||||
// now free the pages
|
||||
if (p) |
||||
VirtualFree(p,0,MEM_RELEASE); |
||||
|
||||
} |
||||
} |
||||
} |
||||
|
||||
#endif |
||||
|
||||
#define STBIR_MALLOC(size,user_data) ((void)(user_data), wmalloc(size)) |
||||
#define STBIR_FREE(ptr,user_data) ((void)(user_data), wfree(ptr)) |
||||
|
||||
#endif |
||||
|
||||
#define STBIR_PROFILE |
||||
//#define STBIR_NO_SIMD
|
||||
//#define STBIR_AVX
|
||||
//#define STBIR_AVX2
|
||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION |
||||
#include "stb_image_resize2.h" // new one! |
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION |
||||
#include "stb_image_write.h" |
||||
|
||||
int tsizes[5] = { 1, 1, 2, 4, 2 }; |
||||
int ttypes[5] = { STBIR_TYPE_UINT8, STBIR_TYPE_UINT8_SRGB, STBIR_TYPE_UINT16, STBIR_TYPE_FLOAT, STBIR_TYPE_HALF_FLOAT }; |
||||
|
||||
int cedges[4] = { STBIR_EDGE_CLAMP, STBIR_EDGE_REFLECT, STBIR_EDGE_ZERO, STBIR_EDGE_WRAP }; |
||||
int flts[5] = { STBIR_FILTER_BOX, STBIR_FILTER_TRIANGLE, STBIR_FILTER_CUBICBSPLINE, STBIR_FILTER_CATMULLROM, STBIR_FILTER_MITCHELL }; |
||||
int buffers[20] = { STBIR_1CHANNEL, STBIR_2CHANNEL, STBIR_RGB, STBIR_4CHANNEL,
|
||||
STBIR_BGRA, STBIR_ARGB, STBIR_RA, STBIR_AR, |
||||
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM, |
||||
STBIR_RGBA, STBIR_ARGB, STBIR_RA, STBIR_AR, |
||||
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM, |
||||
}; |
||||
int obuffers[20] = { STBIR_1CHANNEL, STBIR_2CHANNEL, STBIR_RGB, STBIR_4CHANNEL,
|
||||
STBIR_BGRA, STBIR_ARGB, STBIR_RA, STBIR_AR, |
||||
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM, |
||||
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM, |
||||
STBIR_RGBA, STBIR_ARGB, STBIR_RA, STBIR_AR, |
||||
}; |
||||
|
||||
int bchannels[20] = { 1, 2, 3, 4, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2 };
|
||||
int alphapos[20] = { -1, -1, -1, -1, 3,0, 1,0, 3,0, 1,0, 3,0, 1,0,3,0, 1,0 };
|
||||
|
||||
|
||||
char const * buffstrs[20] = { "1ch", "2ch", "3ch", "4ch", "RGBA", "ARGB", "RA", "AR", "RGBA_both_pre", "ARGB_both_pre", "RA_both_pre", "AR_both_pre", "RGBA_out_pre", "ARGB_out_pre", "RA_out_pre", "AR_out_pre", "RGBA_in_pre", "ARGB_in_pre", "RA_in_pre", "AR_in_pre" }; |
||||
char const * typestrs[5] = { "Bytes", "BytesSRGB", "Shorts", "Floats", "Half Floats"}; |
||||
char const * edgestrs[4] = { "Clamp", "Reflect", "Zero", "Wrap" }; |
||||
char const * fltstrs[5] = { "Box", "Triangle", "Cubic", "Catmullrom", "Mitchell" }; |
||||
|
||||
#ifdef STBIR_PROFILE |
||||
static void do_acc_zones( STBIR_PROFILE_INFO * profile ) |
||||
{ |
||||
stbir_uint32 j; |
||||
stbir_uint64 start = tmGetAccumulationStart( tm_mask ); start=start; |
||||
|
||||
for( j = 0 ; j < profile->count ; j++ ) |
||||
{ |
||||
if ( profile->clocks[j] ) |
||||
tmEmitAccumulationZone( 0, 0, (tm_uint64*)&start, 0, profile->clocks[j], profile->descriptions[j] ); |
||||
} |
||||
} |
||||
#else |
||||
#define do_acc_zones(...) |
||||
#endif |
||||
|
||||
int64 vert; |
||||
|
||||
//#define WINTHREADTEST
|
||||
#ifdef WINTHREADTEST |
||||
|
||||
static STBIR_RESIZE * thread_resize; |
||||
static LONG which; |
||||
static int threads_started = 0; |
||||
static HANDLE threads[32]; |
||||
static HANDLE starts,stops; |
||||
|
||||
static DWORD resize_shim( LPVOID p ) |
||||
{ |
||||
for(;;) |
||||
{ |
||||
LONG wh; |
||||
|
||||
WaitForSingleObject( starts, INFINITE ); |
||||
|
||||
wh = InterlockedAdd( &which, 1 ) - 1; |
||||
|
||||
ENTER( "Split %d", wh ); |
||||
stbir_resize_split( thread_resize, wh, 1 ); |
||||
#ifdef STBIR_PROFILE |
||||
{ STBIR_PROFILE_INFO profile; stbir_resize_split_profile_info( &profile, thread_resize, wh, 1 ); do_acc_zones( &profile ); vert = profile.clocks[1]; } |
||||
#endif |
||||
LEAVE(); |
||||
|
||||
ReleaseSemaphore( stops, 1, 0 ); |
||||
} |
||||
} |
||||
|
||||
#endif |
||||
|
||||
void nresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt ) |
||||
{ |
||||
STBIR_RESIZE resize; |
||||
|
||||
stbir_resize_init( &resize, i, ix, iy, ip, o, ox, oy, op, buffers[buf], ttypes[type] ); |
||||
stbir_set_pixel_layouts( &resize, buffers[buf], obuffers[buf] ); |
||||
stbir_set_edgemodes( &resize, cedges[edg], cedges[edg] ); |
||||
stbir_set_filters( &resize, flts[flt], /*STBIR_FILTER_POINT_SAMPLE */ flts[flt] ); |
||||
//stbir_set_input_subrect( &resize, 0.55f,0.333f,0.75f,0.50f);
|
||||
//stbir_set_output_pixel_subrect( &resize, 00, 00, ox/2,oy/2);
|
||||
//stbir_set_pixel_subrect(&resize, 1430,1361,30,30);
|
||||
|
||||
ENTER( "Resize" ); |
||||
|
||||
#ifndef WINTHREADTEST |
||||
|
||||
ENTER( "Filters" ); |
||||
stbir_build_samplers_with_splits( &resize, 1 ); |
||||
#ifdef STBIR_PROFILE |
||||
{ STBIR_PROFILE_INFO profile; stbir_resize_build_profile_info( &profile, &resize ); do_acc_zones( &profile ); } |
||||
#endif |
||||
LEAVE(); |
||||
|
||||
ENTER( "Resize" ); |
||||
if(!stbir_resize_extended( &resize ) ) |
||||
stop(); |
||||
#ifdef STBIR_PROFILE |
||||
{ STBIR_PROFILE_INFO profile; stbir_resize_extended_profile_info( &profile, &resize ); do_acc_zones( &profile ); vert = profile.clocks[1]; } |
||||
#endif |
||||
LEAVE(); |
||||
|
||||
#else |
||||
{ |
||||
int c, cnt; |
||||
|
||||
ENTER( "Filters" ); |
||||
cnt = stbir_build_samplers_with_splits( &resize, 4 ); |
||||
#ifdef STBIR_PROFILE |
||||
{ STBIR_PROFILE_INFO profile; stbir_resize_build_profile_info( &profile, &resize ); do_acc_zones( &profile ); } |
||||
#endif |
||||
LEAVE(); |
||||
|
||||
ENTER( "Thread start" ); |
||||
if ( threads_started == 0 ) |
||||
{ |
||||
starts = CreateSemaphore( 0, 0, 32, 0 ); |
||||
stops = CreateSemaphore( 0, 0, 32, 0 ); |
||||
} |
||||
for( c = threads_started ; c < cnt ; c++ ) |
||||
threads[ c ] = CreateThread( 0, 2048*1024, resize_shim, 0, 0, 0 ); |
||||
|
||||
threads_started = cnt; |
||||
thread_resize = &resize; |
||||
which = 0; |
||||
LEAVE(); |
||||
|
||||
// starts the threads
|
||||
ReleaseSemaphore( starts, cnt, 0 ); |
||||
|
||||
ENTER( "Wait" ); |
||||
for( c = 0 ; c < cnt; c++ ) |
||||
WaitForSingleObject( stops, INFINITE ); |
||||
LEAVE(); |
||||
} |
||||
#endif |
||||
|
||||
ENTER( "Free" ); |
||||
stbir_free_samplers( &resize ); |
||||
LEAVE(); |
||||
LEAVE(); |
||||
} |
||||
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION |
||||
#include "stb_image.h" |
||||
|
||||
extern void oresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt ); |
||||
|
||||
|
||||
|
||||
#define TYPESTART 0 |
||||
#define TYPEEND 4 |
||||
|
||||
#define LAYOUTSTART 0 |
||||
#define LAYOUTEND 19 |
||||
|
||||
#define SIZEWSTART 0 |
||||
#define SIZEWEND 2 |
||||
|
||||
#define SIZEHSTART 0 |
||||
#define SIZEHEND 2 |
||||
|
||||
#define EDGESTART 0 |
||||
#define EDGEEND 3 |
||||
|
||||
#define FILTERSTART 0 |
||||
#define FILTEREND 4 |
||||
|
||||
#define HEIGHTSTART 0 |
||||
#define HEIGHTEND 2 |
||||
|
||||
#define WIDTHSTART 0 |
||||
#define WIDTHEND 2 |
||||
|
||||
|
||||
|
||||
|
||||
static void * convert8to16( unsigned char * i, int w, int h, int c ) |
||||
{ |
||||
unsigned short * ret; |
||||
int p; |
||||
|
||||
ret = malloc( w*h*c*sizeof(short) ); |
||||
for(p = 0 ; p < (w*h*c) ; p++ ) |
||||
{ |
||||
ret[p]=(short)((((int)i[p])<<8)+i[p]); |
||||
} |
||||
|
||||
return ret; |
||||
} |
||||
|
||||
static void * convert8tof( unsigned char * i, int w, int h, int c ) |
||||
{ |
||||
float * ret; |
||||
int p; |
||||
|
||||
ret = malloc( w*h*c*sizeof(float) ); |
||||
for(p = 0 ; p < (w*h*c) ; p++ ) |
||||
{ |
||||
ret[p]=((float)i[p])*(1.0f/255.0f); |
||||
} |
||||
|
||||
return ret; |
||||
} |
||||
|
||||
static void * convert8tohf( unsigned char * i, int w, int h, int c ) |
||||
{ |
||||
stbir__FP16 * ret; |
||||
int p; |
||||
|
||||
ret = malloc( w*h*c*sizeof(stbir__FP16) ); |
||||
for(p = 0 ; p < (w*h*c) ; p++ ) |
||||
{ |
||||
ret[p]=stbir__float_to_half(((float)i[p])*(1.0f/255.0f)); |
||||
} |
||||
|
||||
return ret; |
||||
} |
||||
|
||||
static void * convert8tohff( unsigned char * i, int w, int h, int c ) |
||||
{ |
||||
float * ret; |
||||
int p; |
||||
|
||||
ret = malloc( w*h*c*sizeof(float) ); |
||||
for(p = 0 ; p < (w*h*c) ; p++ ) |
||||
{ |
||||
ret[p]=stbir__half_to_float(stbir__float_to_half(((float)i[p])*(1.0f/255.0f))); |
||||
} |
||||
|
||||
return ret; |
||||
} |
||||
|
||||
static int isprime( int v ) |
||||
{ |
||||
int i; |
||||
|
||||
if ( v <= 3 ) |
||||
return ( v > 1 ); |
||||
if ( ( v & 1 ) == 0 ) |
||||
return 0; |
||||
if ( ( v % 3 ) == 0 ) |
||||
return 0; |
||||
i = 5; |
||||
while ( (i*i) <= v ) |
||||
{ |
||||
if ( ( v % i ) == 0 ) |
||||
return 0; |
||||
if ( ( v % ( i + 2 ) ) == 0 ) |
||||
return 0; |
||||
i += 6; |
||||
} |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
static int getprime( int v ) |
||||
{ |
||||
int i; |
||||
i = 0; |
||||
for(;;) |
||||
{ |
||||
if ( i >= v ) |
||||
return v; // can't find any, just return orig
|
||||
if (isprime(v - i)) |
||||
return v - i; |
||||
if (isprime(v + i)) |
||||
return v + i; |
||||
++i; |
||||
} |
||||
} |
||||
|
||||
|
||||
int main( int argc, char ** argv ) |
||||
{ |
||||
int ix, iy, ic; |
||||
unsigned char * input[6]; |
||||
char * ir1; |
||||
char * ir2; |
||||
int szhs[3]; |
||||
int szws[3]; |
||||
int aw, ah, ac; |
||||
unsigned char * correctalpha; |
||||
int layouts, types, heights, widths, edges, filters; |
||||
|
||||
if ( argc != 2 ) |
||||
{ |
||||
printf("command: stbirtest [imagefile]\n"); |
||||
exit(1); |
||||
} |
||||
|
||||
SetupTM( "127.0.0.1" ); |
||||
|
||||
correctalpha = stbi_load( "correctalpha.png", &aw, &ah, &ac, 0 ); |
||||
|
||||
input[0] = stbi_load( argv[1], &ix, &iy, &ic, 0 ); |
||||
input[1] = input[0]; |
||||
input[2] = convert8to16( input[0], ix, iy, ic ); |
||||
input[3] = convert8tof( input[0], ix, iy, ic ); |
||||
input[4] = convert8tohf( input[0], ix, iy, ic ); |
||||
input[5] = convert8tohff( input[0], ix, iy, ic ); |
||||
|
||||
printf("Input %dx%d (%d channels)\n",ix,iy,ic); |
||||
|
||||
ir1 = malloc( 4 * 4 * 3000 * 3000ULL ); |
||||
ir2 = malloc( 4 * 4 * 3000 * 3000ULL ); |
||||
|
||||
szhs[0] = getprime( iy/SHRINKBYH ); |
||||
szhs[1] = iy; |
||||
szhs[2] = getprime( iy*ZOOMBYH ); |
||||
|
||||
szws[0] = getprime( ix/SHRINKBYW ); |
||||
szws[1] = ix; |
||||
szws[2] = getprime( ix*ZOOMBYW ); |
||||
|
||||
#if 1 |
||||
for( types = TYPESTART ; types <= TYPEEND ; types++ ) |
||||
#else |
||||
for( types = 1 ; types <= 1 ; types++ ) |
||||
#endif |
||||
{ |
||||
ENTER( "Test type: %s",typestrs[types]); |
||||
#if 1 |
||||
for( layouts = LAYOUTSTART ; layouts <= LAYOUTEND ; layouts++ ) |
||||
#else |
||||
for( layouts = 16; layouts <= 16 ; layouts++ ) |
||||
#endif |
||||
{ |
||||
ENTER( "Test layout: %s",buffstrs[layouts]); |
||||
|
||||
#if 0 |
||||
for( heights = HEIGHTSTART ; heights <= HEIGHTEND ; heights++ ) |
||||
{ |
||||
int w, h = szhs[heights]; |
||||
#else |
||||
for( heights = 0 ; heights <= 11 ; heights++ ) |
||||
{ |
||||
static int szhsz[12]={32, 200, 350, 400, 450, 509, 532, 624, 700, 824, 1023, 2053 }; |
||||
int w, h = szhsz[heights]; |
||||
#endif |
||||
|
||||
ENTER( "Test height: %d %s %d",iy,(h<iy)?"Down":((h>iy)?"Up":"Same"),h); |
||||
|
||||
#if 0 |
||||
for( widths = WIDTHSTART ; widths <= WIDTHEND ; widths++ ) |
||||
{ |
||||
w = szws[widths]; |
||||
#else |
||||
for( widths = 0 ; widths <= 12 ; widths++ ) |
||||
{ |
||||
static int szwsz[13]={2, 32, 200, 350, 400, 450, 509, 532, 624, 700, 824, 1023, 2053 }; |
||||
w = szwsz[widths]; |
||||
#endif |
||||
|
||||
ENTER( "Test width: %d %s %d",ix, (w<ix)?"Down":((w>ix)?"Up":"Same"), w); |
||||
|
||||
#if 0 |
||||
for( edges = EDGESTART ; edges <= EDGEEND ; edges++ ) |
||||
#else |
||||
for( edges = 0 ; edges <= 0 ; edges++ ) |
||||
#endif |
||||
{ |
||||
ENTER( "Test edge: %s",edgestrs[edges]); |
||||
#if 0 |
||||
for( filters = FILTERSTART ; filters <= FILTEREND ; filters++ ) |
||||
#else |
||||
for( filters = 3 ; filters <= 3 ; filters++ ) |
||||
#endif |
||||
{ |
||||
int op, opw, np,npw, c, a; |
||||
#ifdef COMPARE_SAME |
||||
int oldtypes = types; |
||||
#else |
||||
int oldtypes = (types==4)?3:types; |
||||
#endif |
||||
|
||||
ENTER( "Test filter: %s",fltstrs[filters]); |
||||
{ |
||||
c = bchannels[layouts]; |
||||
a = alphapos[layouts]; |
||||
|
||||
op = w*tsizes[oldtypes]*c + 60; |
||||
opw = w*tsizes[oldtypes]*c; |
||||
|
||||
np = w*tsizes[types]*c + 60; |
||||
npw = w*tsizes[types]*c; |
||||
|
||||
printf( "%s:layout: %s w: %d h: %d edge: %s filt: %s\n", typestrs[types],buffstrs[layouts], w, h, edgestrs[edges], fltstrs[filters] ); |
||||
|
||||
|
||||
// clear pixel area to different, right edge to zero
|
||||
#ifndef NOCLEAR |
||||
ENTER( "Test clear padding" ); |
||||
{ |
||||
int d; |
||||
for( d = 0 ; d < h ; d++ ) |
||||
{ |
||||
int oofs = d * op; |
||||
int nofs = d * np; |
||||
memset( ir1 + oofs, 192, opw ); |
||||
memset( ir1 + oofs+opw, 79, op-opw ); |
||||
memset( ir2 + nofs, 255, npw ); |
||||
memset( ir2 + nofs+npw, 79, np-npw ); |
||||
} |
||||
} |
||||
LEAVE(); |
||||
|
||||
#endif |
||||
|
||||
#ifdef COMPARE_SAME |
||||
#define TIMINGS 1 |
||||
#else |
||||
#define TIMINGS 1 |
||||
#endif |
||||
ENTER( "Test both" ); |
||||
{ |
||||
#ifndef PROFILE_NEW_ONLY |
||||
{ |
||||
int ttt, max = 0x7fffffff; |
||||
ENTER( "Test old" ); |
||||
for( ttt = 0 ; ttt < TIMINGS ; ttt++ ) |
||||
{ |
||||
int64 m = __cycles(); |
||||
|
||||
oresize( ir1, w, h, op,
|
||||
#ifdef COMPARE_SAME |
||||
input[types],
|
||||
#else |
||||
input[(types==4)?5:types],
|
||||
#endif |
||||
ix, iy, ix*ic*tsizes[oldtypes], layouts, oldtypes, edges, filters ); |
||||
|
||||
m = __cycles() - m; |
||||
if ( ( (int)m ) < max ) |
||||
max = (int) m; |
||||
} |
||||
LEAVE(); |
||||
printf("old: %d\n", max ); |
||||
} |
||||
#endif |
||||
|
||||
{ |
||||
int ttt, max = 0x7fffffff, maxv = 0x7fffffff; |
||||
ENTER( "Test new" ); |
||||
for( ttt = 0 ; ttt < TIMINGS ; ttt++ ) |
||||
{ |
||||
int64 m = __cycles(); |
||||
|
||||
nresize( ir2, w, h, np, input[types], ix, iy, ix*ic*tsizes[types], layouts, types, edges, filters ); |
||||
|
||||
m = __cycles() - m; |
||||
if ( ( (int)m ) < max ) |
||||
max = (int) m; |
||||
if ( ( (int)vert ) < maxv ) |
||||
maxv = (int) vert; |
||||
} |
||||
LEAVE(); // test new
|
||||
printf("new: %d (v: %d)\n", max, maxv ); |
||||
} |
||||
} |
||||
LEAVE(); // test both
|
||||
|
||||
if ( mem_count!= 0 ) |
||||
stop(); |
||||
|
||||
#ifndef NOCOMP |
||||
ENTER( "Test compare" ); |
||||
{ |
||||
int x,y,ch; |
||||
int nums = 0; |
||||
for( y = 0 ; y < h ; y++ ) |
||||
{ |
||||
for( x = 0 ; x < w ; x++ ) |
||||
{ |
||||
switch(types) |
||||
{ |
||||
case 0: |
||||
case 1: //SRGB
|
||||
{ |
||||
unsigned char * p1 = (unsigned char *)&ir1[y*op+x*c]; |
||||
unsigned char * p2 = (unsigned char *)&ir2[y*np+x*c]; |
||||
for( ch = 0 ; ch < c ; ch++ ) |
||||
{ |
||||
float pp1,pp2,d; |
||||
float av = (a==-1)?1.0f:((float)p1[a]/255.0f); |
||||
|
||||
pp1 = p1[ch]; |
||||
pp2 = p2[ch]; |
||||
|
||||
// compare in premult space
|
||||
#ifndef COMPARE_SAME |
||||
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >=16 ) && ( layouts <= 19 ) ) ) |
||||
{ |
||||
pp1 *= av; |
||||
pp2 *= av; |
||||
} |
||||
#endif |
||||
|
||||
d = pp1 - pp2; |
||||
if ( d < 0 ) d = -d; |
||||
|
||||
#ifdef COMPARE_SAME |
||||
if ( d > 0 )
|
||||
#else |
||||
if ( d > 1 ) |
||||
#endif |
||||
{ |
||||
printf("Error at %d x %d (chan %d) (d: %g a: %g) [%d %d %d %d] [%d %d %d %d]\n",x,y,ch, d,av, p1[0],p1[1],p1[2],p1[3], p2[0],p2[1],p2[2],p2[3]); |
||||
++nums; |
||||
if ( nums > 16 ) goto ex; |
||||
//if (d) exit(1);
|
||||
//goto ex;
|
||||
} |
||||
}
|
||||
} |
||||
break; |
||||
|
||||
case 2: |
||||
{ |
||||
unsigned short * p1 = (unsigned short *)&ir1[y*op+x*c*sizeof(short)]; |
||||
unsigned short * p2 = (unsigned short *)&ir2[y*np+x*c*sizeof(short)]; |
||||
for( ch = 0 ; ch < c ; ch++ ) |
||||
{ |
||||
float thres,pp1,pp2,d; |
||||
float av = (a==-1)?1.0f:((float)p1[a]/65535.0f); |
||||
|
||||
pp1 = p1[ch]; |
||||
pp2 = p2[ch]; |
||||
|
||||
// compare in premult space
|
||||
#ifndef COMPARE_SAME |
||||
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) ) |
||||
{ |
||||
pp1 *= av; |
||||
pp2 *= av; |
||||
} |
||||
#endif |
||||
|
||||
d = pp1 - pp2; |
||||
if ( d < 0 ) d = -d; |
||||
|
||||
thres=((float)p1[ch]*0.007f)+2.0f; |
||||
if (thres<4) thres = 4; |
||||
|
||||
#ifdef COMPARE_SAME |
||||
if ( d > 0 )
|
||||
#else |
||||
if ( d > thres) |
||||
#endif |
||||
{ |
||||
printf("Error at %d x %d (chan %d) %d %d [df: %g th: %g al: %g] (%d %d %d %d) (%d %d %d %d)\n",x,y,ch, p1[ch],p2[ch],d,thres,av,p1[0],p1[1],p1[2],p1[3],p2[0],p2[1],p2[2],p2[3]); |
||||
++nums; |
||||
if ( nums > 16 ) goto ex; |
||||
//if (d) exit(1);
|
||||
//goto ex;
|
||||
} |
||||
} |
||||
} |
||||
break; |
||||
|
||||
case 3: |
||||
{ |
||||
float * p1 = (float *)&ir1[y*op+x*c*sizeof(float)]; |
||||
float * p2 = (float *)&ir2[y*np+x*c*sizeof(float)]; |
||||
for( ch = 0 ; ch < c ; ch++ ) |
||||
{ |
||||
float pp1 = p1[ch], pp2 = p2[ch]; |
||||
float av = (a==-1)?1.0f:p1[a]; |
||||
float thres, d; |
||||
|
||||
// clamp
|
||||
if (pp1<=0.0f) pp1 = 0; |
||||
if (pp2<=0.0f) pp2 = 0; |
||||
if (av<=0.0f) av = 0; |
||||
if (pp1>1.0f) pp1 = 1.0f; |
||||
if (pp2>1.0f) pp2 = 1.0f; |
||||
if (av>1.0f) av = 1.0f; |
||||
|
||||
// compare in premult space
|
||||
#ifndef COMPARE_SAME |
||||
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) ) |
||||
{ |
||||
pp1 *= av; |
||||
pp2 *= av; |
||||
} |
||||
#endif |
||||
|
||||
d = pp1 - pp2; |
||||
if ( d < 0 ) d = -d; |
||||
|
||||
thres=(p1[ch]*0.002f)+0.0002f; |
||||
if ( thres < 0 ) thres = -thres; |
||||
|
||||
#ifdef COMPARE_SAME |
||||
if ( d != 0.0f )
|
||||
#else |
||||
if ( d > thres ) |
||||
#endif |
||||
{ |
||||
printf("Error at %d x %d (chan %d) %g %g [df: %g th: %g al: %g] (%g %g %g %g) (%g %g %g %g)\n",x,y,ch, p1[ch],p2[ch],d,thres,av,p1[0],p1[1],p1[2],p1[3],p2[0],p2[1],p2[2],p2[3]); |
||||
++nums; |
||||
if ( nums > 16 ) goto ex; |
||||
//if (d) exit(1);
|
||||
//goto ex;
|
||||
} |
||||
} |
||||
} |
||||
break; |
||||
|
||||
case 4: |
||||
{ |
||||
#ifdef COMPARE_SAME |
||||
stbir__FP16 * p1 = (stbir__FP16 *)&ir1[y*op+x*c*sizeof(stbir__FP16)]; |
||||
#else |
||||
float * p1 = (float *)&ir1[y*op+x*c*sizeof(float)]; |
||||
#endif |
||||
stbir__FP16 * p2 = (stbir__FP16 *)&ir2[y*np+x*c*sizeof(stbir__FP16)]; |
||||
for( ch = 0 ; ch < c ; ch++ ) |
||||
{ |
||||
#ifdef COMPARE_SAME |
||||
float pp1 = stbir__half_to_float(p1[ch]); |
||||
float av = (a==-1)?1.0f:stbir__half_to_float(p1[a]); |
||||
#else |
||||
float pp1 = stbir__half_to_float(stbir__float_to_half(p1[ch])); |
||||
float av = (a==-1)?1.0f:stbir__half_to_float(stbir__float_to_half(p1[a])); |
||||
#endif |
||||
float pp2 = stbir__half_to_float(p2[ch]); |
||||
float d, thres; |
||||
|
||||
// clamp
|
||||
if (pp1<=0.0f) pp1 = 0; |
||||
if (pp2<=0.0f) pp2 = 0; |
||||
if (av<=0.0f) av = 0; |
||||
if (pp1>1.0f) pp1 = 1.0f; |
||||
if (pp2>1.0f) pp2 = 1.0f; |
||||
if (av>1.0f) av = 1.0f; |
||||
|
||||
thres=(pp1*0.002f)+0.0002f; |
||||
|
||||
// compare in premult space
|
||||
#ifndef COMPARE_SAME |
||||
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) ) |
||||
{ |
||||
pp1 *= av; |
||||
pp2 *= av; |
||||
} |
||||
#endif |
||||
|
||||
d = pp1 - pp2; |
||||
if ( d < 0 ) d = -d; |
||||
|
||||
|
||||
#ifdef COMPARE_SAME |
||||
if ( d != 0.0f )
|
||||
#else |
||||
if ( d > thres ) |
||||
#endif |
||||
{ |
||||
printf("Error at %d x %d (chan %d) %g %g [df: %g th: %g al: %g] (%g %g %g %g) (%g %g %g %g)\n",x,y,ch,
|
||||
#ifdef COMPARE_SAME |
||||
stbir__half_to_float(p1[ch]),
|
||||
#else |
||||
p1[ch], |
||||
#endif |
||||
stbir__half_to_float(p2[ch]),
|
||||
d,thres,av, |
||||
#ifdef COMPARE_SAME |
||||
stbir__half_to_float(p1[0]),stbir__half_to_float(p1[1]),stbir__half_to_float(p1[2]),stbir__half_to_float(p1[3]), |
||||
#else |
||||
p1[0],p1[1],p1[2],p1[3], |
||||
#endif |
||||
stbir__half_to_float(p2[0]),stbir__half_to_float(p2[1]),stbir__half_to_float(p2[2]),stbir__half_to_float(p2[3]) ); |
||||
++nums; |
||||
if ( nums > 16 ) goto ex; |
||||
//if (d) exit(1);
|
||||
//goto ex;
|
||||
} |
||||
} |
||||
} |
||||
break; |
||||
} |
||||
} |
||||
|
||||
for( x = (w*c)*tsizes[oldtypes]; x < op; x++ ) |
||||
{ |
||||
if ( ir1[y*op+x] != 79 ) |
||||
{ |
||||
printf("Margin error at %d x %d %d (should be 79) OLD!\n",x,y,(unsigned char)ir1[y*op+x]); |
||||
goto ex; |
||||
} |
||||
} |
||||
|
||||
for( x = (w*c)*tsizes[types]; x < np; x++ ) |
||||
{ |
||||
if ( ir2[y*np+x] != 79 ) |
||||
{ |
||||
printf("Margin error at %d x %d %d (should be 79) NEW\n",x,y,(unsigned char)ir2[y*np+x]); |
||||
goto ex; |
||||
} |
||||
} |
||||
} |
||||
|
||||
ex: |
||||
ENTER( "OUTPUT IMAGES" ); |
||||
printf(" tot pix: %d, errs: %d\n", w*h*c,nums ); |
||||
|
||||
if (nums) |
||||
{ |
||||
stbi_write_png("old.png", w, h, c, ir1, op); |
||||
stbi_write_png("new.png", w, h, c, ir2, np); |
||||
exit(1); |
||||
} |
||||
|
||||
LEAVE(); // output images
|
||||
} |
||||
LEAVE(); //test compare
|
||||
#endif |
||||
|
||||
|
||||
|
||||
} |
||||
LEAVE(); // test filter
|
||||
} |
||||
LEAVE(); // test edge
|
||||
} |
||||
LEAVE(); // test width
|
||||
} |
||||
LEAVE(); // test height
|
||||
} |
||||
LEAVE(); // test type
|
||||
} |
||||
LEAVE(); // test layout
|
||||
} |
||||
|
||||
CloseTM(); |
||||
return 0; |
||||
} |
@ -0,0 +1,999 @@ |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
|
||||
#define stop() __debugbreak() |
||||
#include <windows.h> |
||||
#define int64 __int64 |
||||
|
||||
#pragma warning(disable:4127) |
||||
|
||||
#define STBIR__WEIGHT_TABLES |
||||
#define STBIR_PROFILE |
||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION |
||||
#include "stb_image_resize2.h" |
||||
|
||||
static int * file_read( char const * filename ) |
||||
{ |
||||
size_t s; |
||||
int * m; |
||||
FILE * f = fopen( filename, "rb" ); |
||||
if ( f == 0 ) return 0; |
||||
|
||||
fseek( f, 0, SEEK_END); |
||||
s = ftell( f ); |
||||
fseek( f, 0, SEEK_SET); |
||||
m = malloc( s + 4 ); |
||||
m[0] = (int)s; |
||||
fread( m+1, 1, s, f); |
||||
fclose(f); |
||||
|
||||
return( m ); |
||||
} |
||||
|
||||
typedef struct fileinfo |
||||
{ |
||||
int * timings; |
||||
int timing_count; |
||||
int dimensionx, dimensiony; |
||||
int numtypes; |
||||
int * types; |
||||
int * effective; |
||||
int cpu; |
||||
int simd; |
||||
int numinputrects; |
||||
int * inputrects; |
||||
int outputscalex, outputscaley; |
||||
int milliseconds; |
||||
int64 cycles; |
||||
double scale_time; |
||||
int bitmapx, bitmapy; |
||||
char const * filename; |
||||
} fileinfo; |
||||
|
||||
int numfileinfo; |
||||
fileinfo fi[256]; |
||||
unsigned char * bitmap; |
||||
int bitmapw, bitmaph, bitmapp; |
||||
|
||||
static int use_timing_file( char const * filename, int index ) |
||||
{ |
||||
int * base = file_read( filename ); |
||||
int * file = base; |
||||
|
||||
if ( base == 0 ) return 0; |
||||
|
||||
++file; // skip file image size;
|
||||
if ( *file++ != 'VFT1' ) return 0; |
||||
fi[index].cpu = *file++; |
||||
fi[index].simd = *file++; |
||||
fi[index].dimensionx = *file++; |
||||
fi[index].dimensiony = *file++; |
||||
fi[index].numtypes = *file++; |
||||
fi[index].types = file; file += fi[index].numtypes; |
||||
fi[index].effective = file; file += fi[index].numtypes; |
||||
fi[index].numinputrects = *file++; |
||||
fi[index].inputrects = file; file += fi[index].numinputrects * 2; |
||||
fi[index].outputscalex = *file++; |
||||
fi[index].outputscaley = *file++; |
||||
fi[index].milliseconds = *file++; |
||||
fi[index].cycles = ((int64*)file)[0]; file += 2; |
||||
fi[index].filename = filename; |
||||
|
||||
fi[index].timings = file; |
||||
fi[index].timing_count = (int) ( ( base[0] - ( ((char*)file - (char*)base - sizeof(int) ) ) ) / (sizeof(int)*2) ); |
||||
|
||||
fi[index].scale_time = (double)fi[index].milliseconds / (double)fi[index].cycles; |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
static int vert_first( float weights_table[STBIR_RESIZE_CLASSIFICATIONS][4], int ox, int oy, int ix, int iy, int filter, STBIR__V_FIRST_INFO * v_info ) |
||||
{ |
||||
float h_scale=(float)ox/(float)(ix); |
||||
float v_scale=(float)oy/(float)(iy); |
||||
stbir__support_callback * support = stbir__builtin_supports[filter]; |
||||
int vertical_filter_width = stbir__get_filter_pixel_width(support,v_scale,0); |
||||
int vertical_gather = ( v_scale >= ( 1.0f - stbir__small_float ) ) || ( vertical_filter_width <= STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT ); |
||||
|
||||
return stbir__should_do_vertical_first( weights_table, stbir__get_filter_pixel_width(support,h_scale,0), h_scale, ox, vertical_filter_width, v_scale, oy, vertical_gather, v_info ); |
||||
}
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION |
||||
#include "stb_image_write.h" |
||||
|
||||
static void alloc_bitmap() |
||||
{ |
||||
int findex; |
||||
int x = 0, y = 0; |
||||
int w = 0, h = 0; |
||||
|
||||
for( findex = 0 ; findex < numfileinfo ; findex++ ) |
||||
{ |
||||
int nx, ny; |
||||
int thisw, thish; |
||||
|
||||
thisw = ( fi[findex].dimensionx * fi[findex].numtypes ) + ( fi[findex].numtypes - 1 ); |
||||
thish = ( fi[findex].dimensiony * fi[findex].numinputrects ) + ( fi[findex].numinputrects - 1 ); |
||||
|
||||
for(;;) |
||||
{ |
||||
nx = x + ((x)?4:0) + thisw; |
||||
ny = y + ((y)?4:0) + thish; |
||||
if ( ( nx <= 3600 ) || ( x == 0 ) ) |
||||
{
|
||||
fi[findex].bitmapx = x + ((x)?4:0); |
||||
fi[findex].bitmapy = y + ((y)?4:0); |
||||
x = nx; |
||||
if ( x > w ) w = x; |
||||
if ( ny > h ) h = ny; |
||||
break; |
||||
} |
||||
else |
||||
{ |
||||
x = 0; |
||||
y = h; |
||||
} |
||||
} |
||||
} |
||||
|
||||
w = (w+3) & ~3; |
||||
bitmapw = w; |
||||
bitmaph = h; |
||||
bitmapp = w * 3; // RGB
|
||||
bitmap = malloc( bitmapp * bitmaph ); |
||||
|
||||
memset( bitmap, 0, bitmapp * bitmaph ); |
||||
} |
||||
|
||||
static void build_bitmap( float weights[STBIR_RESIZE_CLASSIFICATIONS][4], int do_channel_count_index, int findex ) |
||||
{ |
||||
static int colors[STBIR_RESIZE_CLASSIFICATIONS]; |
||||
STBIR__V_FIRST_INFO v_info = {0}; |
||||
|
||||
int * ts; |
||||
int ir; |
||||
unsigned char * bitm = bitmap + ( fi[findex].bitmapx*3 ) + ( fi[findex].bitmapy*bitmapp) ; |
||||
|
||||
for( ir = 0; ir < STBIR_RESIZE_CLASSIFICATIONS ; ir++ ) colors[ ir ] = 127*ir/STBIR_RESIZE_CLASSIFICATIONS+128; |
||||
|
||||
ts = fi[findex].timings; |
||||
|
||||
for( ir = 0 ; ir < fi[findex].numinputrects ; ir++ ) |
||||
{ |
||||
int ix, iy, chanind; |
||||
ix = fi[findex].inputrects[ir*2]; |
||||
iy = fi[findex].inputrects[ir*2+1]; |
||||
|
||||
for( chanind = 0 ; chanind < fi[findex].numtypes ; chanind++ ) |
||||
{ |
||||
int ofs, h, hh; |
||||
|
||||
// just do the type that we're on
|
||||
if ( chanind != do_channel_count_index ) |
||||
{ |
||||
ts += 2 * fi[findex].dimensionx * fi[findex].dimensiony; |
||||
continue; |
||||
} |
||||
|
||||
// bitmap offset
|
||||
ofs=chanind*(fi[findex].dimensionx+1)*3+ir*(fi[findex].dimensiony+1)*bitmapp; |
||||
|
||||
h = 1; |
||||
for( hh = 0 ; hh < fi[findex].dimensiony; hh++ ) |
||||
{ |
||||
int ww, w = 1; |
||||
for( ww = 0 ; ww < fi[findex].dimensionx; ww++ ) |
||||
{ |
||||
int good, v_first, VF, HF; |
||||
|
||||
VF = ts[0]; |
||||
HF = ts[1]; |
||||
|
||||
v_first = vert_first( weights, w, h, ix, iy, STBIR_FILTER_MITCHELL, &v_info ); |
||||
|
||||
good = ( ((HF<=VF) && (!v_first)) || ((VF<=HF) && (v_first))); |
||||
|
||||
if ( good ) |
||||
{ |
||||
bitm[ofs+2] = 0; |
||||
bitm[ofs+1] = (unsigned char)colors[v_info.v_resize_classification]; |
||||
} |
||||
else |
||||
{ |
||||
double r; |
||||
|
||||
if ( HF < VF ) |
||||
r = (double)(VF-HF)/(double)HF; |
||||
else |
||||
r = (double)(HF-VF)/(double)VF; |
||||
|
||||
if ( r > 0.4f) r = 0.4; |
||||
r *= 1.0f/0.4f;
|
||||
|
||||
bitm[ofs+2] = (char)(255.0f*r); |
||||
bitm[ofs+1] = (char)(((float)colors[v_info.v_resize_classification])*(1.0f-r)); |
||||
} |
||||
bitm[ofs] = 0; |
||||
|
||||
ofs += 3; |
||||
ts += 2; |
||||
w += fi[findex].outputscalex; |
||||
} |
||||
ofs += bitmapp - fi[findex].dimensionx*3; |
||||
h += fi[findex].outputscaley; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void build_comp_bitmap( float weights[STBIR_RESIZE_CLASSIFICATIONS][4], int do_channel_count_index ) |
||||
{ |
||||
int * ts0; |
||||
int * ts1; |
||||
int ir; |
||||
unsigned char * bitm = bitmap + ( fi[0].bitmapx*3 ) + ( fi[0].bitmapy*bitmapp) ; |
||||
|
||||
ts0 = fi[0].timings; |
||||
ts1 = fi[1].timings; |
||||
|
||||
for( ir = 0 ; ir < fi[0].numinputrects ; ir++ ) |
||||
{ |
||||
int ix, iy, chanind; |
||||
ix = fi[0].inputrects[ir*2]; |
||||
iy = fi[0].inputrects[ir*2+1]; |
||||
|
||||
for( chanind = 0 ; chanind < fi[0].numtypes ; chanind++ ) |
||||
{ |
||||
int ofs, h, hh; |
||||
|
||||
// just do the type that we're on
|
||||
if ( chanind != do_channel_count_index ) |
||||
{ |
||||
ts0 += 2 * fi[0].dimensionx * fi[0].dimensiony; |
||||
ts1 += 2 * fi[0].dimensionx * fi[0].dimensiony; |
||||
continue; |
||||
} |
||||
|
||||
// bitmap offset
|
||||
ofs=chanind*(fi[0].dimensionx+1)*3+ir*(fi[0].dimensiony+1)*bitmapp; |
||||
|
||||
h = 1; |
||||
for( hh = 0 ; hh < fi[0].dimensiony; hh++ ) |
||||
{ |
||||
int ww, w = 1; |
||||
for( ww = 0 ; ww < fi[0].dimensionx; ww++ ) |
||||
{ |
||||
int v_first, time0, time1; |
||||
|
||||
v_first = vert_first( weights, w, h, ix, iy, STBIR_FILTER_MITCHELL, 0 ); |
||||
|
||||
time0 = ( v_first ) ? ts0[0] : ts0[1]; |
||||
time1 = ( v_first ) ? ts1[0] : ts1[1]; |
||||
|
||||
if ( time0 < time1 ) |
||||
{ |
||||
double r = (double)(time1-time0)/(double)time0; |
||||
if ( r > 0.4f) r = 0.4; |
||||
r *= 1.0f/0.4f;
|
||||
bitm[ofs+2] = 0; |
||||
bitm[ofs+1] = (char)(255.0f*r); |
||||
bitm[ofs] = (char)(64.0f*(1.0f-r)); |
||||
} |
||||
else |
||||
{ |
||||
double r = (double)(time0-time1)/(double)time1; |
||||
if ( r > 0.4f) r = 0.4; |
||||
r *= 1.0f/0.4f;
|
||||
bitm[ofs+2] = (char)(255.0f*r); |
||||
bitm[ofs+1] = 0; |
||||
bitm[ofs] = (char)(64.0f*(1.0f-r)); |
||||
} |
||||
ofs += 3; |
||||
ts0 += 2; |
||||
ts1 += 2; |
||||
w += fi[0].outputscalex; |
||||
} |
||||
ofs += bitmapp - fi[0].dimensionx*3; |
||||
h += fi[0].outputscaley; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void write_bitmap() |
||||
{ |
||||
stbi_write_png( "results.png", bitmapp / 3, bitmaph, 3|STB_IMAGE_BGR, bitmap, bitmapp ); |
||||
} |
||||
|
||||
|
||||
static void calc_errors( float weights_table[STBIR_RESIZE_CLASSIFICATIONS][4], int * curtot, double * curerr, int do_channel_count_index ) |
||||
{ |
||||
int th, findex; |
||||
STBIR__V_FIRST_INFO v_info = {0}; |
||||
|
||||
for(th=0;th<STBIR_RESIZE_CLASSIFICATIONS;th++) |
||||
{ |
||||
curerr[th]=0; |
||||
curtot[th]=0; |
||||
} |
||||
|
||||
for( findex = 0 ; findex < numfileinfo ; findex++ ) |
||||
{ |
||||
int * ts; |
||||
int ir; |
||||
ts = fi[findex].timings; |
||||
|
||||
for( ir = 0 ; ir < fi[findex].numinputrects ; ir++ ) |
||||
{ |
||||
int ix, iy, chanind; |
||||
ix = fi[findex].inputrects[ir*2]; |
||||
iy = fi[findex].inputrects[ir*2+1]; |
||||
|
||||
for( chanind = 0 ; chanind < fi[findex].numtypes ; chanind++ ) |
||||
{ |
||||
int h, hh; |
||||
|
||||
// just do the type that we're on
|
||||
if ( chanind != do_channel_count_index ) |
||||
{ |
||||
ts += 2 * fi[findex].dimensionx * fi[findex].dimensiony; |
||||
continue; |
||||
} |
||||
|
||||
h = 1; |
||||
for( hh = 0 ; hh < fi[findex].dimensiony; hh++ ) |
||||
{ |
||||
int ww, w = 1; |
||||
for( ww = 0 ; ww < fi[findex].dimensionx; ww++ ) |
||||
{ |
||||
int good, v_first, VF, HF; |
||||
|
||||
VF = ts[0]; |
||||
HF = ts[1]; |
||||
|
||||
v_first = vert_first( weights_table, w, h, ix, iy, STBIR_FILTER_MITCHELL, &v_info ); |
||||
|
||||
good = ( ((HF<=VF) && (!v_first)) || ((VF<=HF) && (v_first))); |
||||
|
||||
if ( !good ) |
||||
{ |
||||
double diff; |
||||
if ( VF < HF ) |
||||
diff = ((double)HF-(double)VF) * fi[findex].scale_time; |
||||
else |
||||
diff = ((double)VF-(double)HF) * fi[findex].scale_time; |
||||
|
||||
curtot[v_info.v_resize_classification] += 1; |
||||
curerr[v_info.v_resize_classification] += diff; |
||||
} |
||||
|
||||
ts += 2; |
||||
w += fi[findex].outputscalex; |
||||
} |
||||
h += fi[findex].outputscaley; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
#define TRIESPERWEIGHT 32 |
||||
#define MAXRANGE ((TRIESPERWEIGHT+1) * (TRIESPERWEIGHT+1) * (TRIESPERWEIGHT+1) * (TRIESPERWEIGHT+1) - 1) |
||||
|
||||
static void expand_to_floats( float * weights, int range ) |
||||
{ |
||||
weights[0] = (float)( range % (TRIESPERWEIGHT+1) ) / (float)TRIESPERWEIGHT; |
||||
weights[1] = (float)( range/(TRIESPERWEIGHT+1) % (TRIESPERWEIGHT+1) ) / (float)TRIESPERWEIGHT; |
||||
weights[2] = (float)( range/(TRIESPERWEIGHT+1)/(TRIESPERWEIGHT+1) % (TRIESPERWEIGHT+1) ) / (float)TRIESPERWEIGHT; |
||||
weights[3] = (float)( range/(TRIESPERWEIGHT+1)/(TRIESPERWEIGHT+1)/(TRIESPERWEIGHT+1) % (TRIESPERWEIGHT+1) ) / (float)TRIESPERWEIGHT; |
||||
} |
||||
|
||||
static char const * expand_to_string( int range ) |
||||
{ |
||||
static char str[128]; |
||||
int w0,w1,w2,w3; |
||||
w0 = range % (TRIESPERWEIGHT+1); |
||||
w1 = range/(TRIESPERWEIGHT+1) % (TRIESPERWEIGHT+1); |
||||
w2 = range/(TRIESPERWEIGHT+1)/(TRIESPERWEIGHT+1) % (TRIESPERWEIGHT+1); |
||||
w3 = range/(TRIESPERWEIGHT+1)/(TRIESPERWEIGHT+1)/(TRIESPERWEIGHT+1) % (TRIESPERWEIGHT+1); |
||||
sprintf( str, "[ %2d/%d %2d/%d %2d/%d %2d/%d ]",w0,TRIESPERWEIGHT,w1,TRIESPERWEIGHT,w2,TRIESPERWEIGHT,w3,TRIESPERWEIGHT ); |
||||
return str; |
||||
} |
||||
|
||||
static void print_weights( float weights[STBIR_RESIZE_CLASSIFICATIONS][4], int channel_count_index, int * tots, double * errs ) |
||||
{ |
||||
int th; |
||||
printf("ChInd: %d Weights:\n",channel_count_index); |
||||
for(th=0;th<STBIR_RESIZE_CLASSIFICATIONS;th++) |
||||
{ |
||||
float * w = weights[th]; |
||||
printf(" %d: [%1.5f %1.5f %1.5f %1.5f] (%d %.4f)\n",th, w[0], w[1], w[2], w[3], tots[th], errs[th] ); |
||||
} |
||||
printf("\n"); |
||||
} |
||||
|
||||
static int windowranges[ 16 ]; |
||||
static int windowstatus = 0; |
||||
static DWORD trainstart = 0; |
||||
|
||||
static void opt_channel( float best_output_weights[STBIR_RESIZE_CLASSIFICATIONS][4], int channel_count_index ) |
||||
{ |
||||
int newbest = 0; |
||||
float weights[STBIR_RESIZE_CLASSIFICATIONS][4] = {0}; |
||||
double besterr[STBIR_RESIZE_CLASSIFICATIONS]; |
||||
int besttot[STBIR_RESIZE_CLASSIFICATIONS]; |
||||
int best[STBIR_RESIZE_CLASSIFICATIONS]={0}; |
||||
|
||||
double curerr[STBIR_RESIZE_CLASSIFICATIONS]; |
||||
int curtot[STBIR_RESIZE_CLASSIFICATIONS]; |
||||
int th, range; |
||||
DWORD lasttick = 0; |
||||
|
||||
for(th=0;th<STBIR_RESIZE_CLASSIFICATIONS;th++)
|
||||
{ |
||||
besterr[th]=1000000000000.0; |
||||
besttot[th]=0x7fffffff; |
||||
} |
||||
|
||||
newbest = 0; |
||||
|
||||
// try the whole range
|
||||
range = MAXRANGE; |
||||
do |
||||
{ |
||||
for(th=0;th<STBIR_RESIZE_CLASSIFICATIONS;th++) |
||||
expand_to_floats( weights[th], range ); |
||||
|
||||
calc_errors( weights, curtot, curerr, channel_count_index ); |
||||
|
||||
for(th=0;th<STBIR_RESIZE_CLASSIFICATIONS;th++) |
||||
{ |
||||
if ( curerr[th] < besterr[th] ) |
||||
{ |
||||
besterr[th] = curerr[th]; |
||||
besttot[th] = curtot[th]; |
||||
best[th] = range; |
||||
expand_to_floats( best_output_weights[th], best[th] ); |
||||
newbest = 1; |
||||
} |
||||
} |
||||
|
||||
{ |
||||
DWORD t = GetTickCount(); |
||||
if ( range == 0 ) |
||||
goto do_bitmap; |
||||
|
||||
if ( newbest ) |
||||
{ |
||||
if ( ( GetTickCount() - lasttick ) > 200 ) |
||||
{ |
||||
int findex; |
||||
|
||||
do_bitmap: |
||||
lasttick = t; |
||||
newbest = 0; |
||||
|
||||
for( findex = 0 ; findex < numfileinfo ; findex++ ) |
||||
build_bitmap( best_output_weights, channel_count_index, findex ); |
||||
|
||||
lasttick = GetTickCount(); |
||||
} |
||||
} |
||||
} |
||||
|
||||
windowranges[ channel_count_index ] = range; |
||||
|
||||
// advance all the weights and loop
|
||||
--range; |
||||
} while( ( range >= 0 ) && ( !windowstatus ) ); |
||||
|
||||
// if we hit here, then we tried all weights for this opt, so save them
|
||||
} |
||||
|
||||
static void print_struct( float weight[5][STBIR_RESIZE_CLASSIFICATIONS][4], char const * name ) |
||||
{ |
||||
printf("\n\nstatic float %s[5][STBIR_RESIZE_CLASSIFICATIONS][4]=\n{", name ); |
||||
{ |
||||
int i; |
||||
for(i=0;i<5;i++)
|
||||
{
|
||||
int th; |
||||
for(th=0;th<STBIR_RESIZE_CLASSIFICATIONS;th++) |
||||
{ |
||||
int j; |
||||
printf("\n ");
|
||||
for(j=0;j<4;j++)
|
||||
printf("%1.5ff, ", weight[i][th][j] );
|
||||
} |
||||
printf("\n"); |
||||
} |
||||
printf("\n};\n"); |
||||
} |
||||
} |
||||
|
||||
static float retrain_weights[5][STBIR_RESIZE_CLASSIFICATIONS][4]; |
||||
|
||||
static DWORD __stdcall retrain_shim( LPVOID p ) |
||||
{ |
||||
int chanind = (int) (size_t)p; |
||||
opt_channel( retrain_weights[chanind], chanind ); |
||||
return 0; |
||||
} |
||||
|
||||
static char const * gettime( int ms ) |
||||
{ |
||||
static char time[32]; |
||||
if (ms > 60000) |
||||
sprintf( time, "%dm %ds",ms/60000, (ms/1000)%60 ); |
||||
else
|
||||
sprintf( time, "%ds",ms/1000 ); |
||||
return time; |
||||
} |
||||
|
||||
static BITMAPINFOHEADER bmiHeader; |
||||
static DWORD extrawindoww, extrawindowh; |
||||
static HINSTANCE instance; |
||||
static int curzoom = 1; |
||||
|
||||
static LRESULT WINAPI WindowProc( HWND window, |
||||
UINT message, |
||||
WPARAM wparam, |
||||
LPARAM lparam ) |
||||
{ |
||||
switch( message ) |
||||
{ |
||||
case WM_CHAR: |
||||
if ( wparam != 27 ) |
||||
break; |
||||
// falls through
|
||||
|
||||
case WM_CLOSE: |
||||
{ |
||||
int i; |
||||
int max = 0; |
||||
|
||||
for( i = 0 ; i < fi[0].numtypes ; i++ ) |
||||
if( windowranges[i] > max ) max = windowranges[i]; |
||||
|
||||
if ( ( max == 0 ) || ( MessageBox( window, "Cancel before training is finished?", "Vertical First Training", MB_OKCANCEL|MB_ICONSTOP ) == IDOK ) ) |
||||
{ |
||||
for( i = 0 ; i < fi[0].numtypes ; i++ ) |
||||
if( windowranges[i] > max ) max = windowranges[i]; |
||||
if ( max ) |
||||
windowstatus = 1; |
||||
DestroyWindow( window ); |
||||
} |
||||
} |
||||
return 0; |
||||
|
||||
case WM_PAINT: |
||||
{ |
||||
PAINTSTRUCT ps; |
||||
HDC dc; |
||||
|
||||
dc = BeginPaint( window, &ps ); |
||||
StretchDIBits( dc,
|
||||
0, 0, bitmapw*curzoom, bitmaph*curzoom, |
||||
0, 0, bitmapw, bitmaph, |
||||
bitmap, (BITMAPINFO*)&bmiHeader, DIB_RGB_COLORS, SRCCOPY ); |
||||
|
||||
PatBlt( dc, bitmapw*curzoom, 0, 4096, 4096, WHITENESS ); |
||||
PatBlt( dc, 0, bitmaph*curzoom, 4096, 4096, WHITENESS ); |
||||
|
||||
SetTextColor( dc, RGB(0,0,0) ); |
||||
SetBkColor( dc, RGB(255,255,255) ); |
||||
SetBkMode( dc, OPAQUE ); |
||||
|
||||
{ |
||||
int i, l = 0, max = 0; |
||||
char buf[1024]; |
||||
RECT rc; |
||||
POINT p; |
||||
|
||||
for( i = 0 ; i < fi[0].numtypes ; i++ ) |
||||
{ |
||||
l += sprintf( buf + l, "channels: %d %s\n", fi[0].effective[i], windowranges[i] ? expand_to_string( windowranges[i] ) : "Done." ); |
||||
if ( windowranges[i] > max ) max = windowranges[i]; |
||||
} |
||||
|
||||
rc.left = 32; rc.top = bitmaph*curzoom+10; |
||||
rc.right = 512; rc.bottom = rc.top + 512; |
||||
DrawText( dc, buf, -1, &rc, DT_TOP ); |
||||
|
||||
l = 0; |
||||
if ( max == 0 ) |
||||
{ |
||||
static DWORD traindone = 0; |
||||
if ( traindone == 0 ) traindone = GetTickCount(); |
||||
l = sprintf( buf, "Finished in %s.", gettime( traindone - trainstart ) ); |
||||
} |
||||
else if ( max != MAXRANGE ) |
||||
l = sprintf( buf, "Done in %s...", gettime( (int) ( ( ( (int64)max * ( (int64)GetTickCount() - (int64)trainstart ) ) ) / (int64) ( MAXRANGE - max ) ) ) ); |
||||
|
||||
GetCursorPos( &p ); |
||||
ScreenToClient( window, &p ); |
||||
|
||||
if ( ( p.x >= 0 ) && ( p.y >= 0 ) && ( p.x < (bitmapw*curzoom) ) && ( p.y < (bitmaph*curzoom) ) ) |
||||
{ |
||||
int findex; |
||||
int x, y, w, h, sx, sy, ix, iy, ox, oy; |
||||
int ir, chanind; |
||||
int * ts; |
||||
char badstr[64]; |
||||
STBIR__V_FIRST_INFO v_info={0}; |
||||
|
||||
p.x /= curzoom; |
||||
p.y /= curzoom; |
||||
|
||||
for( findex = 0 ; findex < numfileinfo ; findex++ ) |
||||
{ |
||||
x = fi[findex].bitmapx; |
||||
y = fi[findex].bitmapy; |
||||
w = x + ( fi[findex].dimensionx + 1 ) * fi[findex].numtypes; |
||||
h = y + ( fi[findex].dimensiony + 1 ) * fi[findex].numinputrects; |
||||
|
||||
if ( ( p.x >= x ) && ( p.y >= y ) && ( p.x < w ) && ( p.y < h ) ) |
||||
goto found; |
||||
} |
||||
goto nope; |
||||
|
||||
found: |
||||
|
||||
ir = ( p.y - y ) / ( fi[findex].dimensiony + 1 ); |
||||
sy = ( p.y - y ) % ( fi[findex].dimensiony + 1 ); |
||||
if ( sy >= fi[findex].dimensiony ) goto nope; |
||||
|
||||
chanind = ( p.x - x ) / ( fi[findex].dimensionx + 1 ); |
||||
sx = ( p.x - x ) % ( fi[findex].dimensionx + 1 ); |
||||
if ( sx >= fi[findex].dimensionx ) goto nope; |
||||
|
||||
ix = fi[findex].inputrects[ir*2]; |
||||
iy = fi[findex].inputrects[ir*2+1]; |
||||
|
||||
ts = fi[findex].timings + ( ( fi[findex].dimensionx * fi[findex].dimensiony * fi[findex].numtypes * ir ) + ( fi[findex].dimensionx * fi[findex].dimensiony * chanind ) + ( fi[findex].dimensionx * sy ) + sx ) * 2; |
||||
|
||||
ox = 1+fi[findex].outputscalex*sx; |
||||
oy = 1+fi[findex].outputscaley*sy; |
||||
|
||||
if ( windowstatus != 2 ) |
||||
{ |
||||
int VF, HF, v_first, good; |
||||
VF = ts[0]; |
||||
HF = ts[1]; |
||||
|
||||
v_first = vert_first( retrain_weights[chanind], ox, oy, ix, iy, STBIR_FILTER_MITCHELL, &v_info ); |
||||
|
||||
good = ( ((HF<=VF) && (!v_first)) || ((VF<=HF) && (v_first))); |
||||
|
||||
if ( good ) |
||||
badstr[0] = 0; |
||||
else |
||||
{ |
||||
double r; |
||||
|
||||
if ( HF < VF ) |
||||
r = (double)(VF-HF)/(double)HF; |
||||
else |
||||
r = (double)(HF-VF)/(double)VF; |
||||
sprintf( badstr, " %.1f%% off", r*100 ); |
||||
} |
||||
sprintf( buf + l, "\n\n%s\nCh: %d Resize: %dx%d to %dx%d\nV: %d H: %d Order: %c (%s%s)\nClass: %d Scale: %.2f %s", fi[findex].filename,fi[findex].effective[chanind], ix,iy,ox,oy, VF, HF, v_first?'V':'H', good?"Good":"Wrong", badstr, v_info.v_resize_classification, (double)oy/(double)iy, v_info.is_gather ? "Gather" : "Scatter" ); |
||||
} |
||||
else |
||||
{ |
||||
int v_first, time0, time1; |
||||
float (* weights)[4] = stbir__compute_weights[chanind]; |
||||
int * ts1; |
||||
char b0[32], b1[32]; |
||||
|
||||
ts1 = fi[1].timings + ( ts - fi[0].timings ); |
||||
|
||||
v_first = vert_first( weights, ox, oy, ix, iy, STBIR_FILTER_MITCHELL, &v_info ); |
||||
|
||||
time0 = ( v_first ) ? ts[0] : ts[1]; |
||||
time1 = ( v_first ) ? ts1[0] : ts1[1]; |
||||
|
||||
b0[0] = b1[0] = 0; |
||||
if ( time0 < time1 ) |
||||
sprintf( b0," (%.f%% better)", ((double)time1-(double)time0)*100.0f/(double)time0); |
||||
else |
||||
sprintf( b1," (%.f%% better)", ((double)time0-(double)time1)*100.0f/(double)time1); |
||||
|
||||
sprintf( buf + l, "\n\n0: %s\n1: %s\nCh: %d Resize: %dx%d to %dx%d\nClass: %d Scale: %.2f %s\nTime0: %d%s\nTime1: %d%s", fi[0].filename, fi[1].filename, fi[0].effective[chanind], ix,iy,ox,oy, v_info.v_resize_classification, (double)oy/(double)iy, v_info.is_gather ? "Gather" : "Scatter", time0, b0, time1, b1 ); |
||||
} |
||||
} |
||||
nope: |
||||
|
||||
rc.left = 32+320; rc.right = 512+320;
|
||||
SetTextColor( dc, RGB(0,0,128) ); |
||||
DrawText( dc, buf, -1, &rc, DT_TOP ); |
||||
|
||||
} |
||||
EndPaint( window, &ps ); |
||||
return 0; |
||||
} |
||||
|
||||
case WM_TIMER: |
||||
InvalidateRect( window, 0, 0 ); |
||||
return 0; |
||||
|
||||
case WM_DESTROY: |
||||
PostQuitMessage( 0 ); |
||||
return 0; |
||||
} |
||||
|
||||
|
||||
return DefWindowProc( window, message, wparam, lparam ); |
||||
} |
||||
|
||||
static void SetHighDPI(void) |
||||
{ |
||||
typedef HRESULT WINAPI setdpitype(int v); |
||||
HMODULE h=LoadLibrary("Shcore.dll"); |
||||
if (h) |
||||
{ |
||||
setdpitype * sd = (setdpitype*)GetProcAddress(h,"SetProcessDpiAwareness"); |
||||
if (sd ) |
||||
sd(1); |
||||
} |
||||
}
|
||||
|
||||
static void draw_window() |
||||
{ |
||||
WNDCLASS wc; |
||||
HWND w; |
||||
MSG msg; |
||||
|
||||
instance = GetModuleHandle(NULL); |
||||
|
||||
wc.style = 0; |
||||
wc.lpfnWndProc = WindowProc; |
||||
wc.cbClsExtra = 0; |
||||
wc.cbWndExtra = 0; |
||||
wc.hInstance = instance; |
||||
wc.hIcon = 0; |
||||
wc.hCursor = LoadCursor(NULL, IDC_ARROW); |
||||
wc.hbrBackground = 0; |
||||
wc.lpszMenuName = 0; |
||||
wc.lpszClassName = "WHTrain"; |
||||
|
||||
if ( !RegisterClass( &wc ) ) |
||||
exit(1); |
||||
|
||||
SetHighDPI(); |
||||
|
||||
bmiHeader.biSize = sizeof(BITMAPINFOHEADER); |
||||
bmiHeader.biWidth = bitmapp/3; |
||||
bmiHeader.biHeight = -bitmaph; |
||||
bmiHeader.biPlanes = 1; |
||||
bmiHeader.biBitCount = 24; |
||||
bmiHeader.biCompression = BI_RGB; |
||||
|
||||
w = CreateWindow( "WHTrain", |
||||
"Vertical First Training", |
||||
WS_CAPTION | WS_POPUP| WS_CLIPCHILDREN | |
||||
WS_SYSMENU | WS_MINIMIZEBOX | WS_SIZEBOX, |
||||
CW_USEDEFAULT,CW_USEDEFAULT, |
||||
CW_USEDEFAULT,CW_USEDEFAULT, |
||||
0, 0, instance, 0 ); |
||||
|
||||
{ |
||||
RECT r, c; |
||||
GetWindowRect( w, &r ); |
||||
GetClientRect( w, &c ); |
||||
extrawindoww = ( r.right - r.left ) - ( c.right - c.left ); |
||||
extrawindowh = ( r.bottom - r.top ) - ( c.bottom - c.top ); |
||||
SetWindowPos( w, 0, 0, 0, bitmapw * curzoom + extrawindoww, bitmaph * curzoom + extrawindowh + 164, SWP_NOMOVE ); |
||||
} |
||||
|
||||
ShowWindow( w, SW_SHOWNORMAL ); |
||||
SetTimer( w, 1, 250, 0 ); |
||||
|
||||
{ |
||||
BOOL ret; |
||||
while( ( ret = GetMessage( &msg, w, 0, 0 ) ) != 0 ) |
||||
{
|
||||
if ( ret == -1 ) |
||||
break; |
||||
TranslateMessage( &msg );
|
||||
DispatchMessage( &msg );
|
||||
} |
||||
} |
||||
} |
||||
|
||||
static void retrain() |
||||
{ |
||||
HANDLE threads[ 16 ]; |
||||
int chanind; |
||||
|
||||
trainstart = GetTickCount(); |
||||
for( chanind = 0 ; chanind < fi[0].numtypes ; chanind++ ) |
||||
threads[ chanind ] = CreateThread( 0, 2048*1024, retrain_shim, (LPVOID)(size_t)chanind, 0, 0 ); |
||||
|
||||
draw_window(); |
||||
|
||||
for( chanind = 0 ; chanind < fi[0].numtypes ; chanind++ ) |
||||
{ |
||||
WaitForSingleObject( threads[ chanind ], INFINITE ); |
||||
CloseHandle( threads[ chanind ] ); |
||||
} |
||||
|
||||
write_bitmap(); |
||||
|
||||
print_struct( retrain_weights, "retained_weights" ); |
||||
if ( windowstatus ) printf( "CANCELLED!\n" ); |
||||
} |
||||
|
||||
static void info() |
||||
{ |
||||
int findex; |
||||
|
||||
// display info about each input file
|
||||
for( findex = 0 ; findex < numfileinfo ; findex++ ) |
||||
{ |
||||
int i, h,m,s; |
||||
if ( findex ) printf( "\n" ); |
||||
printf( "Timing file: %s\n", fi[findex].filename ); |
||||
printf( "CPU type: %d %s\n", fi[findex].cpu, fi[findex].simd?(fi[findex].simd==2?"SIMD8":"SIMD4"):"Scalar" ); |
||||
h = fi[findex].milliseconds/3600000; |
||||
m = (fi[findex].milliseconds-h*3600000)/60000; |
||||
s = (fi[findex].milliseconds-h*3600000-m*60000)/1000; |
||||
printf( "Total time in test: %dh %dm %ds Cycles/sec: %.f\n", h,m,s, 1000.0/fi[findex].scale_time ); |
||||
printf( "Each tile of samples is %dx%d, and is scaled by %dx%d.\n", fi[findex].dimensionx,fi[findex].dimensiony, fi[findex].outputscalex,fi[findex].outputscaley ); |
||||
printf( "So the x coords are: " ); |
||||
for( i=0; i < fi[findex].dimensionx ; i++ ) printf( "%d ",1+i*fi[findex].outputscalex ); |
||||
printf( "\n" ); |
||||
printf( "And the y coords are: " ); |
||||
for( i=0; i < fi[findex].dimensiony ; i++ ) printf( "%d ",1+i*fi[findex].outputscaley ); |
||||
printf( "\n" ); |
||||
printf( "There are %d channel counts and they are: ", fi[findex].numtypes ); |
||||
for( i=0; i < fi[findex].numtypes ; i++ ) printf( "%d ",fi[findex].effective[i] ); |
||||
printf( "\n" ); |
||||
printf( "There are %d input rect sizes and they are: ", fi[findex].numinputrects ); |
||||
for( i=0; i < fi[findex].numtypes ; i++ ) printf( "%dx%d ",fi[findex].inputrects[i*2],fi[findex].inputrects[i*2+1] ); |
||||
printf( "\n" ); |
||||
} |
||||
} |
||||
|
||||
static void current( int do_win, int do_bitmap ) |
||||
{ |
||||
int i, findex; |
||||
|
||||
trainstart = GetTickCount(); |
||||
|
||||
// clear progress
|
||||
memset( windowranges, 0, sizeof( windowranges ) ); |
||||
// copy in appropriate weights
|
||||
memcpy( retrain_weights, stbir__compute_weights, sizeof( retrain_weights ) ); |
||||
|
||||
// build and print current errors and build current bitmap
|
||||
for( i = 0 ; i < fi[0].numtypes ; i++ ) |
||||
{ |
||||
double curerr[STBIR_RESIZE_CLASSIFICATIONS]; |
||||
int curtot[STBIR_RESIZE_CLASSIFICATIONS]; |
||||
float (* weights)[4] = retrain_weights[i]; |
||||
|
||||
calc_errors( weights, curtot, curerr, i ); |
||||
if ( !do_bitmap ) |
||||
print_weights( weights, i, curtot, curerr ); |
||||
|
||||
for( findex = 0 ; findex < numfileinfo ; findex++ ) |
||||
build_bitmap( weights, i, findex ); |
||||
} |
||||
|
||||
if ( do_win ) |
||||
draw_window(); |
||||
|
||||
if ( do_bitmap ) |
||||
write_bitmap(); |
||||
} |
||||
|
||||
static void compare() |
||||
{ |
||||
int i; |
||||
|
||||
trainstart = GetTickCount(); |
||||
windowstatus = 2; // comp mode
|
||||
|
||||
// clear progress
|
||||
memset( windowranges, 0, sizeof( windowranges ) ); |
||||
|
||||
if ( ( fi[0].numtypes != fi[1].numtypes ) || ( fi[0].numinputrects != fi[1].numinputrects ) || |
||||
( fi[0].dimensionx != fi[1].dimensionx ) || ( fi[0].dimensiony != fi[1].dimensiony ) ||
|
||||
( fi[0].outputscalex != fi[1].outputscalex ) || ( fi[0].outputscaley != fi[1].outputscaley ) ) |
||||
{ |
||||
err: |
||||
printf( "Timing files don't match.\n" ); |
||||
exit(5); |
||||
} |
||||
|
||||
for( i=0; i < fi[0].numtypes ; i++ ) |
||||
{ |
||||
if ( fi[0].effective[i] != fi[1].effective[i] ) goto err; |
||||
if ( fi[0].inputrects[i*2] != fi[1].inputrects[i*2] ) goto err; |
||||
if ( fi[0].inputrects[i*2+1] != fi[1].inputrects[i*2+1] ) goto err; |
||||
} |
||||
|
||||
alloc_bitmap( 1 ); |
||||
|
||||
for( i = 0 ; i < fi[0].numtypes ; i++ ) |
||||
{ |
||||
float (* weights)[4] = stbir__compute_weights[i]; |
||||
build_comp_bitmap( weights, i ); |
||||
} |
||||
|
||||
draw_window(); |
||||
} |
||||
|
||||
static void load_files( char ** args, int count ) |
||||
{ |
||||
int i; |
||||
|
||||
if ( count == 0 ) |
||||
{ |
||||
printf( "No timing files listed!" ); |
||||
exit(3); |
||||
} |
||||
|
||||
for ( i = 0 ; i < count ; i++ ) |
||||
{ |
||||
if ( !use_timing_file( args[i], i ) ) |
||||
{ |
||||
printf( "Bad timing file %s\n", args[i] ); |
||||
exit(2); |
||||
} |
||||
} |
||||
numfileinfo = count; |
||||
}
|
||||
|
||||
int main( int argc, char ** argv ) |
||||
{ |
||||
int check; |
||||
if ( argc < 3 ) |
||||
{ |
||||
err: |
||||
printf( "vf_train retrain [timing_filenames....] - recalcs weights for all the files on the command line.\n"); |
||||
printf( "vf_train info [timing_filenames....] - shows info about each timing file.\n"); |
||||
printf( "vf_train check [timing_filenames...] - show results for the current weights for all files listed.\n"); |
||||
printf( "vf_train compare <timing file1> <timing file2> - compare two timing files (must only be two files and same resolution).\n"); |
||||
printf( "vf_train bitmap [timing_filenames...] - write out results.png, comparing against the current weights for all files listed.\n"); |
||||
exit(1); |
||||
} |
||||
|
||||
check = ( strcmp( argv[1], "check" ) == 0 ); |
||||
if ( ( check ) || ( strcmp( argv[1], "bitmap" ) == 0 ) ) |
||||
{ |
||||
load_files( argv + 2, argc - 2 ); |
||||
alloc_bitmap( numfileinfo ); |
||||
current( check, !check ); |
||||
} |
||||
else if ( strcmp( argv[1], "info" ) == 0 )
|
||||
{ |
||||
load_files( argv + 2, argc - 2 ); |
||||
info(); |
||||
} |
||||
else if ( strcmp( argv[1], "compare" ) == 0 )
|
||||
{ |
||||
if ( argc != 4 ) |
||||
{ |
||||
printf( "You must specify two files to compare.\n" ); |
||||
exit(4); |
||||
} |
||||
|
||||
load_files( argv + 2, argc - 2 ); |
||||
compare(); |
||||
} |
||||
else if ( strcmp( argv[1], "retrain" ) == 0 )
|
||||
{ |
||||
load_files( argv + 2, argc - 2 ); |
||||
alloc_bitmap( numfileinfo ); |
||||
retrain();
|
||||
} |
||||
else |
||||
{ |
||||
goto err; |
||||
} |
||||
|
||||
return 0; |
||||
} |
Loading…
Reference in New Issue