sync printutf8pad from sfeed - tscrape - twitter scraper | |
git clone git://git.codemadness.org/tscrape | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 573905aec2e99fbe31a1cabe5864853ef9015a41 | |
parent 426522824e719e081c9c5e47ba8771779b0fdc85 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Fri, 20 Mar 2020 12:00:16 +0100 | |
sync printutf8pad from sfeed | |
changes: | |
- util: printutf8pad: proper counting of multiwidth characters | |
for example the string "\xef\xbc\xb5". | |
- optimization | |
Diffstat: | |
M util.c | 30 ++++++++++++++++++------------ | |
1 file changed, 18 insertions(+), 12 deletions(-) | |
--- | |
diff --git a/util.c b/util.c | |
@@ -72,32 +72,38 @@ xmlencode(const char *s, FILE *fp) | |
} | |
} | |
-/* print `len' columns of characters. If string is shorter pad the rest | |
- * with characters `pad`. */ | |
+/* print `len' columns of characters. If string is shorter pad the rest with | |
+ * characters `pad`. */ | |
void | |
printutf8pad(FILE *fp, const char *s, size_t len, int pad) | |
{ | |
- wchar_t w; | |
+ wchar_t wc; | |
size_t col = 0, i, slen; | |
- int rl, wc; | |
+ int rl, w; | |
if (!len) | |
return; | |
slen = strlen(s); | |
- for (i = 0; i < slen && col < len + 1; i += rl) { | |
- if ((rl = mbtowc(&w, &s[i], slen - i < 4 ? slen - i : 4)) <= 0) | |
- break; | |
- if ((wc = wcwidth(w)) == -1) | |
- wc = 1; | |
- col += (size_t)wc; | |
- if (col >= len && s[i + rl]) { | |
+ for (i = 0; i < slen; i += rl) { | |
+ rl = w = 1; | |
+ if ((unsigned char)s[i] < 32) | |
+ continue; | |
+ if ((unsigned char)s[i] >= 127) { | |
+ if ((rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i :… | |
+ break; | |
+ if ((w = wcwidth(wc)) == -1) | |
+ continue; | |
+ } | |
+ if (col + w > len || (col + w == len && s[i + rl])) { | |
fputs("\xe2\x80\xa6", fp); | |
+ col++; | |
break; | |
} | |
fwrite(&s[i], 1, rl, fp); | |
+ col += w; | |
} | |
- for (; col < len; col++) | |
+ for (; col < len; ++col) | |
putc(pad, fp); | |
} | |