From 6d59653e825f687d28763fd8ff9141b2cff246b0 Mon Sep 17 00:00:00 2001 From: Cloud Wu Date: Wed, 8 May 2019 10:10:20 +0800 Subject: [PATCH 1/4] Unicode: full Unicode Support (6 squashed commits) (#2541, #2538) fix build for WideCharToMultiByte [3181ff1e] Full Unicode Support [6c9e73ac] Fix ImTextCountUtf8BytesFromChar and ImTextCharToUtf8, these APIs assume the input is an unicode code point, not UTF-16 [ba85665b] Add AddInputCharacterUTF16 for windows backend to handle WM_CHAR [fafdcaf0] Use Windows API to convert UTF-16 for ImFileOpen [dc7d5925] Use windows API to convert UTF-16 for clipboard --- examples/imgui_impl_win32.cpp | 3 +- imconfig.h | 3 ++ imgui.cpp | 82 ++++++++++++++++++++++------------- imgui.h | 14 +++++- imgui_draw.cpp | 3 +- 5 files changed, 70 insertions(+), 35 deletions(-) diff --git a/examples/imgui_impl_win32.cpp b/examples/imgui_impl_win32.cpp index 449922f5..8b1c8087 100644 --- a/examples/imgui_impl_win32.cpp +++ b/examples/imgui_impl_win32.cpp @@ -324,7 +324,8 @@ IMGUI_IMPL_API LRESULT ImGui_ImplWin32_WndProcHandler(HWND hwnd, UINT msg, WPARA return 0; case WM_CHAR: // You can also use ToAscii()+GetKeyboardState() to retrieve characters. - io.AddInputCharacter((unsigned int)wParam); + if (wParam > 0 && wParam < 0x10000) + io.AddInputCharacterUTF16((unsigned short)wParam); return 0; case WM_SETCURSOR: if (LOWORD(lParam) == HTCLIENT && ImGui_ImplWin32_UpdateMouseCursor()) diff --git a/imconfig.h b/imconfig.h index 4f629795..4f01b778 100644 --- a/imconfig.h +++ b/imconfig.h @@ -77,6 +77,9 @@ // Read about ImGuiBackendFlags_RendererHasVtxOffset for details. //#define ImDrawIdx unsigned int +//---- Use 32-bit for ImWchar (default is 16-bit) to support full unicode code points. +//#define ImWchar ImWchar32 + //---- Override ImDrawCallback signature (will need to modify renderer back-ends accordingly) //struct ImDrawList; //struct ImDrawCmd; diff --git a/imgui.cpp b/imgui.cpp index 2a721923..9e7ac21c 100644 --- a/imgui.cpp +++ b/imgui.cpp @@ -1098,13 +1098,35 @@ void ImGuiIO::AddInputCharacter(unsigned int c) InputQueueCharacters.push_back((ImWchar)c); } +// UTF16 string use Surrogate to encode unicode > 0x10000, so we should save the Surrogate. +void ImGuiIO::AddInputCharacterUTF16(ImWchar16 c) +{ + if (c >= 0xD800 && c <= 0xDBFF) + { + Surrogate = c; + } + else + { + ImWchar cp = c; + if (c >= 0xDC00 && c <= 0xDFFF) + { + if (sizeof(ImWchar) == 2) + cp = IM_UNICODE_CODEPOINT_INVALID; + else + cp = ((ImWchar)(Surrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000; + Surrogate = 0; + } + InputQueueCharacters.push_back(cp); + } +} + void ImGuiIO::AddInputCharactersUTF8(const char* utf8_chars) { while (*utf8_chars != 0) { unsigned int c = 0; utf8_chars += ImTextCharFromUtf8(&c, utf8_chars, NULL); - if (c > 0 && c <= IM_UNICODE_CODEPOINT_MAX) + if (c > 0) InputQueueCharacters.push_back((ImWchar)c); } } @@ -1488,13 +1510,13 @@ ImFileHandle ImFileOpen(const char* filename, const char* mode) { #if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__) // We need a fopen() wrapper because MSVC/Windows fopen doesn't handle UTF-8 filenames. - const int filename_wsize = ImTextCountCharsFromUtf8(filename, NULL) + 1; - const int mode_wsize = ImTextCountCharsFromUtf8(mode, NULL) + 1; + const int filename_wsize = ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, NULL, 0); + const int mode_wsize = ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0); ImVector buf; buf.resize(filename_wsize + mode_wsize); - ImTextStrFromUtf8(&buf[0], filename_wsize, filename, NULL); - ImTextStrFromUtf8(&buf[filename_wsize], mode_wsize, mode, NULL); - return _wfopen((wchar_t*)&buf[0], (wchar_t*)&buf[filename_wsize]); + ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, &buf[0], filename_wsize); + ::MultiByteToWideChar(CP_UTF8, 0, mode, -1,&buf[filename_wsize], mode_wsize); + return _wfopen(&buf[0], &buf[filename_wsize]); #else return fopen(filename, mode); #endif @@ -1606,6 +1628,8 @@ int ImTextCharFromUtf8(unsigned int* out_char, const char* in_text, const char* c += (*str++ & 0x3f); // utf-8 encodings of values used in surrogate pairs are invalid if ((c & 0xFFFFF800) == 0xD800) return 4; + // If ImWchar is 16bit, use replacement character U+FFFD instead + if (sizeof(ImWchar) == 2 && c >= 0x10000) c = IM_UNICODE_CODEPOINT_INVALID; *out_char = c; return 4; } @@ -1623,8 +1647,7 @@ int ImTextStrFromUtf8(ImWchar* buf, int buf_size, const char* in_text, const cha in_text += ImTextCharFromUtf8(&c, in_text, in_text_end); if (c == 0) break; - if (c <= IM_UNICODE_CODEPOINT_MAX) // FIXME: Losing characters that don't fit in 2 bytes - *buf_out++ = (ImWchar)c; + *buf_out++ = (ImWchar)c; } *buf_out = 0; if (in_text_remaining) @@ -1641,8 +1664,7 @@ int ImTextCountCharsFromUtf8(const char* in_text, const char* in_text_end) in_text += ImTextCharFromUtf8(&c, in_text, in_text_end); if (c == 0) break; - if (c <= IM_UNICODE_CODEPOINT_MAX) - char_count++; + char_count++; } return char_count; } @@ -1662,11 +1684,15 @@ static inline int ImTextCharToUtf8(char* buf, int buf_size, unsigned int c) buf[1] = (char)(0x80 + (c & 0x3f)); return 2; } - if (c >= 0xdc00 && c < 0xe000) + if (c < 0x10000) { - return 0; + if (buf_size < 3) return 0; + buf[0] = (char)(0xe0 + (c >> 12)); + buf[1] = (char)(0x80 + ((c>> 6) & 0x3f)); + buf[2] = (char)(0x80 + ((c ) & 0x3f)); + return 3; } - if (c >= 0xd800 && c < 0xdc00) + if (c <= 0x10FFFF) { if (buf_size < 4) return 0; buf[0] = (char)(0xf0 + (c >> 18)); @@ -1675,14 +1701,8 @@ static inline int ImTextCharToUtf8(char* buf, int buf_size, unsigned int c) buf[3] = (char)(0x80 + ((c ) & 0x3f)); return 4; } - //else if (c < 0x10000) - { - if (buf_size < 3) return 0; - buf[0] = (char)(0xe0 + (c >> 12)); - buf[1] = (char)(0x80 + ((c>> 6) & 0x3f)); - buf[2] = (char)(0x80 + ((c ) & 0x3f)); - return 3; - } + // Invalid code point, the max unicode is 0x10FFFF + return 0; } // Not optimal but we very rarely use this function. @@ -1696,8 +1716,8 @@ static inline int ImTextCountUtf8BytesFromChar(unsigned int c) { if (c < 0x80) return 1; if (c < 0x800) return 2; - if (c >= 0xdc00 && c < 0xe000) return 0; - if (c >= 0xd800 && c < 0xdc00) return 4; + if (c < 0x10000) return 3; + if (c <= 0x10FFFF) return 4; return 3; } @@ -9748,6 +9768,7 @@ static void WindowSettingsHandler_WriteAll(ImGuiContext* ctx, ImGuiSettingsHandl #else #include #endif +#include #if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP) // UWP doesn't have Win32 functions #define IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS #define IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS @@ -9760,6 +9781,7 @@ static void WindowSettingsHandler_WriteAll(ImGuiContext* ctx, ImGuiSettingsHandl #ifdef _MSC_VER #pragma comment(lib, "user32") +#pragma comment(lib, "kernel32") #endif // Win32 clipboard implementation @@ -9775,11 +9797,11 @@ static const char* GetClipboardTextFn_DefaultImpl(void*) ::CloseClipboard(); return NULL; } - if (ImWchar* wbuf_global = (ImWchar*)::GlobalLock(wbuf_handle)) + if (const WCHAR* wbuf_global = (const WCHAR*)::GlobalLock(wbuf_handle)) { - int buf_len = ImTextCountUtf8BytesFromStr(wbuf_global, NULL) + 1; + int buf_len = ::WideCharToMultiByte(CP_UTF8, 0, wbuf_global, -1, NULL, 0, NULL, NULL); buf_local.resize(buf_len); - ImTextStrToUtf8(buf_local.Data, buf_len, wbuf_global, NULL); + ::WideCharToMultiByte(CP_UTF8, 0, wbuf_global, -1, buf_local.Data, buf_len, NULL, NULL); } ::GlobalUnlock(wbuf_handle); ::CloseClipboard(); @@ -9790,15 +9812,15 @@ static void SetClipboardTextFn_DefaultImpl(void*, const char* text) { if (!::OpenClipboard(NULL)) return; - const int wbuf_length = ImTextCountCharsFromUtf8(text, NULL) + 1; - HGLOBAL wbuf_handle = ::GlobalAlloc(GMEM_MOVEABLE, (SIZE_T)wbuf_length * sizeof(ImWchar)); + const int wbuf_length = ::MultiByteToWideChar(CP_UTF8, 0, text, -1, NULL, 0); + HGLOBAL wbuf_handle = ::GlobalAlloc(GMEM_MOVEABLE, (SIZE_T)wbuf_length * sizeof(WCHAR)); if (wbuf_handle == NULL) { ::CloseClipboard(); return; } - ImWchar* wbuf_global = (ImWchar*)::GlobalLock(wbuf_handle); - ImTextStrFromUtf8(wbuf_global, wbuf_length, text, NULL); + WCHAR* wbuf_global = (WCHAR*)::GlobalLock(wbuf_handle); + ::MultiByteToWideChar(CP_UTF8, 0, text, -1, wbuf_global, wbuf_length); ::GlobalUnlock(wbuf_handle); ::EmptyClipboard(); if (::SetClipboardData(CF_UNICODETEXT, wbuf_handle) == NULL) diff --git a/imgui.h b/imgui.h index 1e7b3f33..09160d11 100644 --- a/imgui.h +++ b/imgui.h @@ -143,7 +143,11 @@ struct ImGuiTextFilter; // Helper to parse and apply text filters (e typedef void* ImTextureID; // User data to identify a texture (this is whatever to you want it to be! read the FAQ about ImTextureID in imgui.cpp) #endif typedef unsigned int ImGuiID; // Unique ID used by widgets (typically hashed from a stack of string) -typedef unsigned short ImWchar; // A single U16 character for keyboard input/display. We encode them as multi bytes UTF-8 when used in strings. +#ifndef ImWchar +#define ImWchar ImWchar16 +#endif +typedef unsigned short ImWchar16; // A single U16 character for keyboard input/display. We encode them as multi bytes UTF-8 when used in strings. +typedef int ImWchar32; // A single 32bit character for keyboard input/display, define ImWchar to ImWchar32 to use it. See imconfig.h . typedef int ImGuiCol; // -> enum ImGuiCol_ // Enum: A color identifier for styling typedef int ImGuiCond; // -> enum ImGuiCond_ // Enum: A condition for many Set*() functions typedef int ImGuiDataType; // -> enum ImGuiDataType_ // Enum: A primary data type @@ -1465,6 +1469,7 @@ struct ImGuiIO // Functions IMGUI_API void AddInputCharacter(unsigned int c); // Queue new character input + IMGUI_API void AddInputCharacterUTF16(ImWchar16 c); // Queue new character input from an UTF-16 character, it can be a surrogate IMGUI_API void AddInputCharactersUTF8(const char* str); // Queue new characters input from an UTF-8 string IMGUI_API void ClearInputCharacters(); // Clear the text input buffer manually @@ -1507,6 +1512,7 @@ struct ImGuiIO float KeysDownDurationPrev[512]; // Previous duration the key has been down float NavInputsDownDuration[ImGuiNavInput_COUNT]; float NavInputsDownDurationPrev[ImGuiNavInput_COUNT]; + ImWchar16 Surrogate; // For AddInputCharacterUTF16 ImVector InputQueueCharacters; // Queue of _characters_ input (obtained by platform back-end). Fill using AddInputCharacter() helper. IMGUI_API ImGuiIO(); @@ -2092,7 +2098,11 @@ struct ImFontGlyphRangesBuilder ImVector UsedChars; // Store 1-bit per Unicode code point (0=unused, 1=used) ImFontGlyphRangesBuilder() { Clear(); } - inline void Clear() { int size_in_bytes = (IM_UNICODE_CODEPOINT_MAX+1) / 8; UsedChars.resize(size_in_bytes / (int)sizeof(ImU32)); memset(UsedChars.Data, 0, (size_t)size_in_bytes); } + inline void Clear() + { + int MaxUnicode = sizeof(ImWchar) == 2 ? 0x10000 : 0x110000; + UsedChars.resize(MaxUnicode / sizeof(int)); memset(UsedChars.Data, 0, MaxUnicode / sizeof(int)); + } inline bool GetBit(int n) const { int off = (n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array inline void SetBit(int n) { int off = (n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array inline void AddChar(ImWchar c) { SetBit(c); } // Add character diff --git a/imgui_draw.cpp b/imgui_draw.cpp index 69d18945..28f77281 100644 --- a/imgui_draw.cpp +++ b/imgui_draw.cpp @@ -2550,8 +2550,7 @@ void ImFontGlyphRangesBuilder::AddText(const char* text, const char* text_end) text += c_len; if (c_len == 0) break; - if (c <= IM_UNICODE_CODEPOINT_MAX) - AddChar((ImWchar)c); + AddChar((ImWchar)c); } } From c8ea0a017d0ea851225f159f26816d86799163a8 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Sun, 29 Sep 2019 12:15:13 +0200 Subject: [PATCH 2/4] Unicode: UTF32 support improvements (#2541, #2538, #2815) - Make ImWchar32 unsigned. - Fix Win32 version of ImFileOpen by including windows.h sooner. - Make ImGuiIO::AddInputCharacterUTF16() more robust by disallowing illegal surrogate pairs. - Allow pushing higher plane codepoints through ImGuiIO::AddInputCharacter(). - Minor cleaning up in the high-plane Unicode support. - Fix Clang -Wunreachable-code warning --- imgui.cpp | 57 +++++++++++++++++++++++++++++++------------------- imgui.h | 20 +++++++----------- imgui_draw.cpp | 4 ++-- 3 files changed, 46 insertions(+), 35 deletions(-) diff --git a/imgui.cpp b/imgui.cpp index 9e7ac21c..8b1bda5c 100644 --- a/imgui.cpp +++ b/imgui.cpp @@ -1094,30 +1094,33 @@ ImGuiIO::ImGuiIO() // - on Windows you can get those using ToAscii+keyboard state, or via the WM_CHAR message void ImGuiIO::AddInputCharacter(unsigned int c) { - if (c > 0 && c <= IM_UNICODE_CODEPOINT_MAX) - InputQueueCharacters.push_back((ImWchar)c); + InputQueueCharacters.push_back(c > 0 && c <= IM_UNICODE_CODEPOINT_MAX ? (ImWchar)c : IM_UNICODE_CODEPOINT_INVALID); } -// UTF16 string use Surrogate to encode unicode > 0x10000, so we should save the Surrogate. +// UTF16 strings use surrogate pairs to encode codepoints >= 0x10000, so +// we should save the high surrogate. void ImGuiIO::AddInputCharacterUTF16(ImWchar16 c) { - if (c >= 0xD800 && c <= 0xDBFF) + if ((c & 0xFC00) == 0xD800) // High surrogate, must save { - Surrogate = c; + if (InputQueueSurrogate != 0) + InputQueueCharacters.push_back(0xFFFD); + InputQueueSurrogate = c; + return; } - else + + ImWchar cp = c; + if (InputQueueSurrogate != 0) { - ImWchar cp = c; - if (c >= 0xDC00 && c <= 0xDFFF) - { - if (sizeof(ImWchar) == 2) - cp = IM_UNICODE_CODEPOINT_INVALID; - else - cp = ((ImWchar)(Surrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000; - Surrogate = 0; - } - InputQueueCharacters.push_back(cp); + if ((c & 0xFC00) != 0xDC00) // Invalid low surrogate + InputQueueCharacters.push_back(IM_UNICODE_CODEPOINT_INVALID); + else if (IM_UNICODE_CODEPOINT_MAX == (0xFFFF)) // Codepoint will not fit in ImWchar (extra parenthesis around 0xFFFF somehow fixes -Wunreachable-code with Clang) + cp = IM_UNICODE_CODEPOINT_INVALID; + else + cp = (ImWchar)(((InputQueueSurrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000); + InputQueueSurrogate = 0; } + InputQueueCharacters.push_back(cp); } void ImGuiIO::AddInputCharactersUTF8(const char* utf8_chars) @@ -1506,6 +1509,18 @@ ImU32 ImHashStr(const char* data_p, size_t data_size, ImU32 seed) // Default file functions #ifndef IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS + +#if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef __MINGW32__ +#include +#else +#include +#endif +#endif + ImFileHandle ImFileOpen(const char* filename, const char* mode) { #if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__) @@ -1514,9 +1529,9 @@ ImFileHandle ImFileOpen(const char* filename, const char* mode) const int mode_wsize = ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0); ImVector buf; buf.resize(filename_wsize + mode_wsize); - ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, &buf[0], filename_wsize); - ::MultiByteToWideChar(CP_UTF8, 0, mode, -1,&buf[filename_wsize], mode_wsize); - return _wfopen(&buf[0], &buf[filename_wsize]); + ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, (wchar_t*)&buf[0], filename_wsize); + ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, (wchar_t*)&buf[filename_wsize], mode_wsize); + return _wfopen((const wchar_t*)&buf[0], (const wchar_t*)&buf[filename_wsize]); #else return fopen(filename, mode); #endif @@ -1628,8 +1643,8 @@ int ImTextCharFromUtf8(unsigned int* out_char, const char* in_text, const char* c += (*str++ & 0x3f); // utf-8 encodings of values used in surrogate pairs are invalid if ((c & 0xFFFFF800) == 0xD800) return 4; - // If ImWchar is 16bit, use replacement character U+FFFD instead - if (sizeof(ImWchar) == 2 && c >= 0x10000) c = IM_UNICODE_CODEPOINT_INVALID; + // If codepoint does not fit in ImWchar, use replacement character U+FFFD instead + if (c > IM_UNICODE_CODEPOINT_MAX) c = IM_UNICODE_CODEPOINT_INVALID; *out_char = c; return 4; } diff --git a/imgui.h b/imgui.h index 09160d11..cbed1010 100644 --- a/imgui.h +++ b/imgui.h @@ -92,7 +92,7 @@ Index of this file: #else #define IM_OFFSETOF(_TYPE,_MEMBER) ((size_t)&(((_TYPE*)0)->_MEMBER)) // Offset of _MEMBER within _TYPE. Old style macro. #endif -#define IM_UNICODE_CODEPOINT_MAX 0xFFFF // Last Unicode code point supported by this build. +#define IM_UNICODE_CODEPOINT_MAX (sizeof(ImWchar) == 2 ? 0xFFFF : 0x10FFFF) // Last Unicode code point supported by this build. #define IM_UNICODE_CODEPOINT_INVALID 0xFFFD // Standard invalid Unicode code point. // Warnings @@ -147,7 +147,7 @@ typedef unsigned int ImGuiID; // Unique ID used by widgets (typically hash #define ImWchar ImWchar16 #endif typedef unsigned short ImWchar16; // A single U16 character for keyboard input/display. We encode them as multi bytes UTF-8 when used in strings. -typedef int ImWchar32; // A single 32bit character for keyboard input/display, define ImWchar to ImWchar32 to use it. See imconfig.h . +typedef unsigned int ImWchar32; // A single U32 character for keyboard input/display. Define ImWchar to ImWchar32 to use it. See imconfig.h . typedef int ImGuiCol; // -> enum ImGuiCol_ // Enum: A color identifier for styling typedef int ImGuiCond; // -> enum ImGuiCond_ // Enum: A condition for many Set*() functions typedef int ImGuiDataType; // -> enum ImGuiDataType_ // Enum: A primary data type @@ -1512,7 +1512,7 @@ struct ImGuiIO float KeysDownDurationPrev[512]; // Previous duration the key has been down float NavInputsDownDuration[ImGuiNavInput_COUNT]; float NavInputsDownDurationPrev[ImGuiNavInput_COUNT]; - ImWchar16 Surrogate; // For AddInputCharacterUTF16 + ImWchar16 InputQueueSurrogate; // For AddInputCharacterUTF16 ImVector InputQueueCharacters; // Queue of _characters_ input (obtained by platform back-end). Fill using AddInputCharacter() helper. IMGUI_API ImGuiIO(); @@ -2097,15 +2097,11 @@ struct ImFontGlyphRangesBuilder { ImVector UsedChars; // Store 1-bit per Unicode code point (0=unused, 1=used) - ImFontGlyphRangesBuilder() { Clear(); } - inline void Clear() - { - int MaxUnicode = sizeof(ImWchar) == 2 ? 0x10000 : 0x110000; - UsedChars.resize(MaxUnicode / sizeof(int)); memset(UsedChars.Data, 0, MaxUnicode / sizeof(int)); - } - inline bool GetBit(int n) const { int off = (n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array - inline void SetBit(int n) { int off = (n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array - inline void AddChar(ImWchar c) { SetBit(c); } // Add character + ImFontGlyphRangesBuilder() { Clear(); } + inline void Clear() { int size_in_bytes = (IM_UNICODE_CODEPOINT_MAX + 1) / 8; UsedChars.resize(size_in_bytes / (int)sizeof(ImU32)); memset(UsedChars.Data, 0, (size_t)size_in_bytes); } + inline bool GetBit(size_t n) const { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array + inline void SetBit(size_t n) { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array + inline void AddChar(ImWchar c) { SetBit(c); } // Add character IMGUI_API void AddText(const char* text, const char* text_end = NULL); // Add string (each character of the UTF-8 string are added) IMGUI_API void AddRanges(const ImWchar* ranges); // Add ranges, e.g. builder.AddRanges(ImFontAtlas::GetGlyphRangesDefault()) to force add all of ASCII/Latin+Ext IMGUI_API void BuildRanges(ImVector* out_ranges); // Output new ranges diff --git a/imgui_draw.cpp b/imgui_draw.cpp index 28f77281..ae6b9803 100644 --- a/imgui_draw.cpp +++ b/imgui_draw.cpp @@ -2724,7 +2724,7 @@ void ImFont::AddRemapChar(ImWchar dst, ImWchar src, bool overwrite_dst) const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const { - if (c >= IndexLookup.Size) + if (c >= (size_t)IndexLookup.Size) return FallbackGlyph; const ImWchar i = IndexLookup.Data[c]; if (i == (ImWchar)-1) @@ -2734,7 +2734,7 @@ const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const const ImFontGlyph* ImFont::FindGlyphNoFallback(ImWchar c) const { - if (c >= IndexLookup.Size) + if (c >= (size_t)IndexLookup.Size) return NULL; const ImWchar i = IndexLookup.Data[c]; if (i == (ImWchar)-1) From 0283a6e566ca0365b3fbadffc9eaa7f780711466 Mon Sep 17 00:00:00 2001 From: omar Date: Tue, 29 Oct 2019 23:48:59 +0100 Subject: [PATCH 3/4] ImFont: Demo, Store Used4kPagesMap[] map in ImFont to facilitate iteration on all codepoints with a large value of IM_UNICODE_CODEPOINT_MAX. (#2815) Demo uses IsGlyphRangeUnused() --- imgui.h | 2 ++ imgui_demo.cpp | 9 +++++++++ imgui_draw.cpp | 19 +++++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/imgui.h b/imgui.h index cbed1010..021c12b7 100644 --- a/imgui.h +++ b/imgui.h @@ -2257,6 +2257,7 @@ struct ImFont float Scale; // 4 // in // = 1.f // Base font scale, multiplied by the per-window font scale which you can adjust with SetWindowFontScale() float Ascent, Descent; // 4+4 // out // // Ascent: distance from top to bottom of e.g. 'A' [0..FontSize] int MetricsTotalSurface;// 4 // out // // Total surface in pixels to get an idea of the font rasterization/texture cost (not exact, we approximate the cost of padding between glyphs) + ImU8 Used4kPagesMap[(IM_UNICODE_CODEPOINT_MAX+1)/4096/8]; // 2 bytes if ImWchar=ImWchar16, 34 bytes if ImWchar==ImWchar32. Store 1-bit for each block of 4K codepoints that has one active glyph. This is mainly used to facilitate iterations accross all used codepoints. // Methods IMGUI_API ImFont(); @@ -2282,6 +2283,7 @@ struct ImFont IMGUI_API void AddRemapChar(ImWchar dst, ImWchar src, bool overwrite_dst = true); // Makes 'dst' character/glyph points to 'src' character/glyph. Currently needs to be called AFTER fonts have been built. IMGUI_API void SetGlyphVisible(ImWchar c, bool visible); IMGUI_API void SetFallbackChar(ImWchar c); + IMGUI_API bool IsGlyphRangeUnused(unsigned int c_begin, unsigned int c_last); }; #if defined(__clang__) diff --git a/imgui_demo.cpp b/imgui_demo.cpp index 591d6d99..2664cb14 100644 --- a/imgui_demo.cpp +++ b/imgui_demo.cpp @@ -3443,6 +3443,15 @@ void ImGui::ShowStyleEditor(ImGuiStyle* ref) // Display all glyphs of the fonts in separate pages of 256 characters for (unsigned int base = 0; base <= IM_UNICODE_CODEPOINT_MAX; base += 256) { + // Skip ahead if a large bunch of glyphs are not present in the font (test in chunks of 4k) + // This is only a small optimization to reduce the number of iterations when IM_UNICODE_MAX_CODEPOINT is large. + // (if ImWchar==ImWchar32 we will do at least about 272 queries here) + if (!(base & 4095) && font->IsGlyphRangeUnused(base, base + 4095)) + { + base += 4096 - 256; + continue; + } + int count = 0; for (unsigned int n = 0; n < 256; n++) count += font->FindGlyphNoFallback((ImWchar)(base + n)) ? 1 : 0; diff --git a/imgui_draw.cpp b/imgui_draw.cpp index ae6b9803..12f06d60 100644 --- a/imgui_draw.cpp +++ b/imgui_draw.cpp @@ -2594,6 +2594,7 @@ ImFont::ImFont() Scale = 1.0f; Ascent = Descent = 0.0f; MetricsTotalSurface = 0; + memset(Used4kPagesMap, 0, sizeof(Used4kPagesMap)); } ImFont::~ImFont() @@ -2626,12 +2627,17 @@ void ImFont::BuildLookupTable() IndexAdvanceX.clear(); IndexLookup.clear(); DirtyLookupTables = false; + memset(Used4kPagesMap, 0, sizeof(Used4kPagesMap)); GrowIndex(max_codepoint + 1); for (int i = 0; i < Glyphs.Size; i++) { int codepoint = (int)Glyphs[i].Codepoint; IndexAdvanceX[codepoint] = Glyphs[i].AdvanceX; IndexLookup[codepoint] = (ImWchar)i; + + // Mark 4K page as used + const int page_n = codepoint / 4096; + Used4kPagesMap[page_n >> 3] |= 1 << (page_n & 7); } // Create a glyph to handle TAB @@ -2660,6 +2666,19 @@ void ImFont::BuildLookupTable() IndexAdvanceX[i] = FallbackAdvanceX; } +// API is designed this way to avoid exposing the 4K page size +// e.g. use with IsGlyphRangeUnused(0, 255) +bool ImFont::IsGlyphRangeUnused(unsigned int c_begin, unsigned int c_last) +{ + unsigned int page_begin = (c_begin / 4096); + unsigned int page_last = (c_last / 4096); + for (unsigned int page_n = page_begin; page_n <= page_last; page_n++) + if ((page_n >> 3) < sizeof(Used4kPagesMap)) + if (Used4kPagesMap[page_n >> 3] & (1 << (page_n & 7))) + return false; + return true; +} + void ImFont::SetGlyphVisible(ImWchar c, bool visible) { if (ImFontGlyph* glyph = (ImFontGlyph*)(void*)FindGlyph((ImWchar)c)) From a41332453efed294f21d530c5346887af1213563 Mon Sep 17 00:00:00 2001 From: ocornut Date: Tue, 3 Mar 2020 17:45:06 +0100 Subject: [PATCH 4/4] Unicode: Changelog, comments, minimum CI integration. (#2541, #2538, #2815) --- .github/workflows/build.yml | 12 ++++++++++-- docs/CHANGELOG.txt | 11 +++++++++++ examples/imgui_impl_win32.cpp | 1 + imgui.cpp | 1 + imgui.h | 2 +- 5 files changed, 24 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ff83bd3d..ccc5c244 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -54,7 +54,7 @@ jobs: - name: Build example_null (single file build) shell: bash run: | - echo '#define IMGUI_IMPLEMENTATION' >> example_single_file.cpp + echo '#define IMGUI_IMPLEMENTATION' > example_single_file.cpp echo '#include "misc/single_file/imgui_single_file.h"' >> example_single_file.cpp echo '#include "examples/example_null/main.cpp"' >> example_single_file.cpp g++ -I. -Wall -Wformat -o example_single_file.exe example_single_file.cpp @@ -190,6 +190,14 @@ jobs: - name: Build example_null (single file build) run: | + echo '#define IMGUI_IMPLEMENTATION' > example_single_file.cpp + echo '#include "misc/single_file/imgui_single_file.h"' >> example_single_file.cpp + echo '#include "examples/example_null/main.cpp"' >> example_single_file.cpp + g++ -I. -Wall -Wformat -o example_single_file example_single_file.cpp + + - name: Build example_null (with ImWchar32) + run: | + echo '#define ImWchar ImWchar32' > example_single_file.cpp echo '#define IMGUI_IMPLEMENTATION' >> example_single_file.cpp echo '#include "misc/single_file/imgui_single_file.h"' >> example_single_file.cpp echo '#include "examples/example_null/main.cpp"' >> example_single_file.cpp @@ -226,7 +234,7 @@ jobs: - name: Build example_null (single file build) run: | - echo '#define IMGUI_IMPLEMENTATION' >> example_single_file.cpp + echo '#define IMGUI_IMPLEMENTATION' > example_single_file.cpp echo '#include "misc/single_file/imgui_single_file.h"' >> example_single_file.cpp echo '#include "examples/example_null/main.cpp"' >> example_single_file.cpp clang++ -I. -Wall -Wformat -o example_single_file example_single_file.cpp diff --git a/docs/CHANGELOG.txt b/docs/CHANGELOG.txt index 7a7c1cd2..792b8330 100644 --- a/docs/CHANGELOG.txt +++ b/docs/CHANGELOG.txt @@ -45,6 +45,15 @@ Other Changes: when the menu is not open. (#3030) - InputText: Fixed password fields displaying ASCII spaces as blanks instead of using the '*' glyph. (#2149, #515) +- Added optional support for Unicode plane 1-16 (#2538, #2541, #2815) [@cloudwu, @samhocevar] + - Compile-time enable with '#define ImWchar ImWchar32' in imconfig.h. + - Generally more consistent support for unsupported codepoints (0xFFFD), in particular when + using the default, non-fitting characters will be turned into 0xFFFD instead of being ignored. + - Surrogate pairs are supported when submitting UTF-16 data via io.AddInputCharacterUTF16(), + allowing for more complete CJK input. + - sizeof(ImWchar) goes from 2 to 4. IM_UNICODE_CODEPOINT_MAX goes from 0xFFFF to 0x10FFFF. + - Various structures such as ImFont, ImFontGlyphRangesBuilder will use more memory, this + is currently not particularly efficient. - Window: Fixed a bug with child window inheriting ItemFlags from their parent when the child window also manipulate the ItemFlags stack. (#3024) [@Stanbroek] - Font: Fixed non-ASCII space occasionally creating unnecessary empty polygons. @@ -53,6 +62,8 @@ Other Changes: ImGui_ImplWin32_GetDpiScaleForMonitor() helpers functions (backported from the docking branch). Those functions makes it easier for example apps to support hi-dpi features without setting up a manifest. +- Backends: Win32: Calling AddInputCharacterUTF16() from WM_CHAR message handler in order to support + high-plane surrogate pairs. (#2815) [@cloudwu, @samhocevar] - Backends: SDL: Added ImGui_ImplSDL2_InitForMetal() for API consistency (even though the function currently does nothing). - Backends: SDL: Fixed mapping for ImGuiKey_KeyPadEnter. (#3031) [@Davido71] diff --git a/examples/imgui_impl_win32.cpp b/examples/imgui_impl_win32.cpp index 8b1c8087..f8868ce6 100644 --- a/examples/imgui_impl_win32.cpp +++ b/examples/imgui_impl_win32.cpp @@ -28,6 +28,7 @@ // CHANGELOG // (minor and older changes stripped away, please see git history for details) +// 2020-03-03: Inputs: Calling AddInputCharacterUTF16() to support surrogate pairs leading to codepoint >= 0x10000 (for more complete CJK inputs) // 2020-02-17: Added ImGui_ImplWin32_EnableDpiAwareness(), ImGui_ImplWin32_GetDpiScaleForHwnd(), ImGui_ImplWin32_GetDpiScaleForMonitor() helper functions. // 2020-01-14: Inputs: Added support for #define IMGUI_IMPL_WIN32_DISABLE_GAMEPAD/IMGUI_IMPL_WIN32_DISABLE_LINKING_XINPUT. // 2019-12-05: Inputs: Added support for ImGuiMouseCursor_NotAllowed mouse cursor. diff --git a/imgui.cpp b/imgui.cpp index 8b1bda5c..75c2fee2 100644 --- a/imgui.cpp +++ b/imgui.cpp @@ -1525,6 +1525,7 @@ ImFileHandle ImFileOpen(const char* filename, const char* mode) { #if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__) // We need a fopen() wrapper because MSVC/Windows fopen doesn't handle UTF-8 filenames. + // Previously we used ImTextCountCharsFromUtf8/ImTextStrFromUtf8 here but we now need to support ImWchar16 and ImWchar32! const int filename_wsize = ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, NULL, 0); const int mode_wsize = ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0); ImVector buf; diff --git a/imgui.h b/imgui.h index 021c12b7..70798230 100644 --- a/imgui.h +++ b/imgui.h @@ -2101,7 +2101,7 @@ struct ImFontGlyphRangesBuilder inline void Clear() { int size_in_bytes = (IM_UNICODE_CODEPOINT_MAX + 1) / 8; UsedChars.resize(size_in_bytes / (int)sizeof(ImU32)); memset(UsedChars.Data, 0, (size_t)size_in_bytes); } inline bool GetBit(size_t n) const { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array inline void SetBit(size_t n) { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array - inline void AddChar(ImWchar c) { SetBit(c); } // Add character + inline void AddChar(ImWchar c) { SetBit(c); } // Add character IMGUI_API void AddText(const char* text, const char* text_end = NULL); // Add string (each character of the UTF-8 string are added) IMGUI_API void AddRanges(const ImWchar* ranges); // Add ranges, e.g. builder.AddRanges(ImFontAtlas::GetGlyphRangesDefault()) to force add all of ASCII/Latin+Ext IMGUI_API void BuildRanges(ImVector* out_ranges); // Output new ranges