overhaul utf8decode() - dmenu - dynamic menu | |
git clone git://git.suckless.org/dmenu | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 51e32d49b56c86cd288c64fccf6cd765547781b9 | |
parent 7be720cc88ed2294338f7182600df10f21c575ce | |
Author: NRK <[email protected]> | |
Date: Thu, 4 Jul 2024 21:25:37 +0000 | |
overhaul utf8decode() | |
this changes the utf8decode function to: | |
* report when an error occurs | |
* report how many bytes to advance on error | |
these will be useful in the next commit to render invalid utf8 | |
sequences. | |
the new implementation is also shorter and more direct. | |
Diffstat: | |
M drw.c | 76 +++++++++++++----------------… | |
1 file changed, 31 insertions(+), 45 deletions(-) | |
--- | |
diff --git a/drw.c b/drw.c | |
@@ -9,54 +9,40 @@ | |
#include "util.h" | |
#define UTF_INVALID 0xFFFD | |
-#define UTF_SIZ 4 | |
-static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80, 0, 0xC0, 0xE0, 0xF… | |
-static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF… | |
-static const long utfmin[UTF_SIZ + 1] = { 0, 0, 0x80, 0x800, 0x100… | |
-static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FF… | |
- | |
-static long | |
-utf8decodebyte(const char c, size_t *i) | |
-{ | |
- for (*i = 0; *i < (UTF_SIZ + 1); ++(*i)) | |
- if (((unsigned char)c & utfmask[*i]) == utfbyte[*i]) | |
- return (unsigned char)c & ~utfmask[*i]; | |
- return 0; | |
-} | |
- | |
-static size_t | |
-utf8validate(long *u, size_t i) | |
+static int | |
+utf8decode(const char *s_in, long *u, int *err) | |
{ | |
- if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) | |
- *u = UTF_INVALID; | |
- for (i = 1; *u > utfmax[i]; ++i) | |
- ; | |
- return i; | |
-} | |
- | |
-static size_t | |
-utf8decode(const char *c, long *u, size_t clen) | |
-{ | |
- size_t i, j, len, type; | |
- long udecoded; | |
- | |
+ static const unsigned char lens[] = { | |
+ /* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
+ /* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0, /* invalid */ | |
+ /* 110XX */ 2, 2, 2, 2, | |
+ /* 1110X */ 3, 3, | |
+ /* 11110 */ 4, | |
+ /* 11111 */ 0, /* invalid */ | |
+ }; | |
+ static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 }; | |
+ static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 }; | |
+ | |
+ const unsigned char *s = (const unsigned char *)s_in; | |
+ int len = lens[*s >> 3]; | |
*u = UTF_INVALID; | |
- if (!clen) | |
- return 0; | |
- udecoded = utf8decodebyte(c[0], &len); | |
- if (!BETWEEN(len, 1, UTF_SIZ)) | |
+ *err = 1; | |
+ if (len == 0) | |
return 1; | |
- for (i = 1, j = 1; i < clen && j < len; ++i, ++j) { | |
- udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type); | |
- if (type) | |
- return j; | |
+ | |
+ long cp = s[0] & leading_mask[len - 1]; | |
+ for (int i = 1; i < len; ++i) { | |
+ if (s[i] == '\0' || (s[i] & 0xC0) != 0x80) | |
+ return i; | |
+ cp = (cp << 6) | (s[i] & 0x3F); | |
} | |
- if (j < len) | |
- return 0; | |
- *u = udecoded; | |
- utf8validate(u, len); | |
+ /* out of range, surrogate, overlong encoding */ | |
+ if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1]) | |
+ return len; | |
+ *err = 0; | |
+ *u = cp; | |
return len; | |
} | |
@@ -242,7 +228,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned i… | |
unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1; | |
XftDraw *d = NULL; | |
Fnt *usedfont, *curfont, *nextfont; | |
- int utf8strlen, utf8charlen, render = x || y || w || h; | |
+ int utf8strlen, utf8charlen, utf8err, render = x || y || w || h; | |
long utf8codepoint = 0; | |
const char *utf8str; | |
FcCharSet *fccharset; | |
@@ -272,11 +258,11 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned… | |
if (!ellipsis_width && render) | |
ellipsis_width = drw_fontset_getwidth(drw, "..."); | |
while (1) { | |
- ew = ellipsis_len = utf8strlen = 0; | |
+ ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0; | |
utf8str = text; | |
nextfont = NULL; | |
while (*text) { | |
- utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ… | |
+ utf8charlen = utf8decode(text, &utf8codepoint, &utf8er… | |
for (curfont = drw->fonts; curfont; curfont = curfont-… | |
charexists = charexists || XftCharExists(drw->… | |
if (charexists) { |