Introduction
Introduction Statistics Contact Development Disclaimer Help
sync printutf8pad from sfeed - tscrape - twitter scraper
git clone git://git.codemadness.org/tscrape
Log
Files
Refs
README
LICENSE
---
commit 573905aec2e99fbe31a1cabe5864853ef9015a41
parent 426522824e719e081c9c5e47ba8771779b0fdc85
Author: Hiltjo Posthuma <[email protected]>
Date: Fri, 20 Mar 2020 12:00:16 +0100
sync printutf8pad from sfeed
changes:
- util: printutf8pad: proper counting of multiwidth characters
for example the string "\xef\xbc\xb5".
- optimization
Diffstat:
M util.c | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
---
diff --git a/util.c b/util.c
@@ -72,32 +72,38 @@ xmlencode(const char *s, FILE *fp)
}
}
-/* print `len' columns of characters. If string is shorter pad the rest
- * with characters `pad`. */
+/* print `len' columns of characters. If string is shorter pad the rest with
+ * characters `pad`. */
void
printutf8pad(FILE *fp, const char *s, size_t len, int pad)
{
- wchar_t w;
+ wchar_t wc;
size_t col = 0, i, slen;
- int rl, wc;
+ int rl, w;
if (!len)
return;
slen = strlen(s);
- for (i = 0; i < slen && col < len + 1; i += rl) {
- if ((rl = mbtowc(&w, &s[i], slen - i < 4 ? slen - i : 4)) <= 0)
- break;
- if ((wc = wcwidth(w)) == -1)
- wc = 1;
- col += (size_t)wc;
- if (col >= len && s[i + rl]) {
+ for (i = 0; i < slen; i += rl) {
+ rl = w = 1;
+ if ((unsigned char)s[i] < 32)
+ continue;
+ if ((unsigned char)s[i] >= 127) {
+ if ((rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i :…
+ break;
+ if ((w = wcwidth(wc)) == -1)
+ continue;
+ }
+ if (col + w > len || (col + w == len && s[i + rl])) {
fputs("\xe2\x80\xa6", fp);
+ col++;
break;
}
fwrite(&s[i], 1, rl, fp);
+ col += w;
}
- for (; col < len; col++)
+ for (; col < len; ++col)
putc(pad, fp);
}
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.