From 3e1754487307572b81a51350efe99f4d31fb52e3 Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Sat, 4 Mar 2017 20:49:14 -0800 Subject: [PATCH 1/2] stb_image: Give up trying to runtime-detect SSE2 on GCC. We tried but it was nothing but trouble. New rule: with GCC/Clang, if you're compiling with -msse2, you get always-on SSE2 code, otherwise you don't get any. Trying to ship anything with proper runtime dispatch requires both working around certain bugs and some fiddling with build settings, which runs contrary to the intent of a one-file library, so bail on it entirely. Fixes issue #280. Fixes issue #410. --- stb_image.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/stb_image.h b/stb_image.h index 023d247..bcfb5fa 100644 --- a/stb_image.h +++ b/stb_image.h @@ -583,12 +583,14 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI__X86_TARGET #endif -#if defined(__GNUC__) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) -// NOTE: not clear do we actually need this for the 64-bit path? +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) // gcc doesn't support sse2 intrinsics unless you compile with -msse2, -// (but compiling with -msse2 allows the compiler to use SSE2 everywhere; -// this is just broken and gcc are jerks for not fixing it properly -// http://www.virtualdub.org/blog/pivot/entry.php?id=363 ) +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. #define STBI_NO_SIMD #endif @@ -646,14 +648,10 @@ static int stbi__sse2_available() static int stbi__sse2_available() { -#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later - // GCC 4.8+ has a nice way to do this - return __builtin_cpu_supports("sse2"); -#else - // portable way to do this, preferably without using GCC inline ASM? - // just bail for now. - return 0; -#endif + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; } #endif #endif From d9e7c55bd7ddefa31e6b8e44b053083b260d3b65 Mon Sep 17 00:00:00 2001 From: Sean Barrett Date: Sat, 18 Mar 2017 18:35:30 -0700 Subject: [PATCH 2/2] minor docs for last merge --- stb_image.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/stb_image.h b/stb_image.h index d38bfcd..f2c8216 100644 --- a/stb_image.h +++ b/stb_image.h @@ -48,7 +48,7 @@ LICENSE RECENT REVISION HISTORY: - 2.15 (2017-03-18) fix png-1,2,4 bug; warnings; all Imagenet JPGs decode + 2.15 (2017-03-18) fix png-1,2,4 bug; all Imagenet JPGs decode 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes @@ -6933,6 +6933,9 @@ STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int /* revision history: + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11 (2016-04-02) allocate large structures on the stack