Introduction
Introduction Statistics Contact Development Disclaimer Help
utf8pad: improve padded printing and printing invalid unicode characters - sfee…
git clone git://git.codemadness.org/sfeed_curses
Log
Files
Refs
README
LICENSE
---
commit 7f13213a355aba904f12a595b322909ce630fbe1
parent 1c4116d1fa7db2ddf540d05df381cbf58e932981
Author: Hiltjo Posthuma <[email protected]>
Date: Sat, 9 Jan 2021 14:57:57 +0100
utf8pad: improve padded printing and printing invalid unicode characters
- Use unicode replacement character (codepoint 0xfffd) when a codepoint is
invalid and proceed printing the rest of the characters.
- When a codepoint is invalid reset the internal state of mbtowc(3), from the
OpenBSD man page:
" If a call to mbtowc() resulted in an undefined internal state, mbtowc()
must be called with s set to NULL to reset the internal state before it
can safely be used again."
- Make the function return 0 when `len` is 0 (this should not be not an error).
Diffstat:
M sfeed_curses.c | 76 +++++++++++++++++++++++------…
1 file changed, 56 insertions(+), 20 deletions(-)
---
diff --git a/sfeed_curses.c b/sfeed_curses.c
@@ -30,6 +30,7 @@
#define PAD_TRUNCATE_SYMBOL "\xe2\x80\xa6" /* symbol: "ellipsis" */
#define SCROLLBAR_SYMBOL_BAR "\xe2\x94\x82" /* symbol: "light vertical" */
#define SCROLLBAR_SYMBOL_TICK " "
+#define UTF_INVALID_SYMBOL "\xef\xbf\xbd" /* symbol: "replacement" */
/* color-theme */
#ifndef SFEED_THEME
@@ -310,15 +311,28 @@ colw(const char *s)
{
wchar_t wc;
size_t col = 0, i, slen;
- int rl, w;
+ int inc, rl, w;
slen = strlen(s);
- for (i = 0; i < slen; i += rl) {
- if ((rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4)) <= …
- break;
- if ((w = wcwidth(wc)) == -1)
+ for (i = 0; i < slen; i += inc) {
+ inc = 1;
+ if ((unsigned char)s[i] < 32) {
continue;
- col += w;
+ } else if ((unsigned char)s[i] >= 127) {
+ rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4);
+ if (rl < 0) {
+ mbtowc(NULL, NULL, 0); /* reset state */
+ inc = 1; /* next byte */
+ w = 1; /* replacement char is one width */
+ } else if ((w = wcwidth(wc)) == -1) {
+ continue;
+ } else {
+ inc = rl;
+ }
+ col += w;
+ } else {
+ col++;
+ }
}
return col;
}
@@ -330,33 +344,55 @@ utf8pad(char *buf, size_t bufsiz, const char *s, size_t l…
{
wchar_t wc;
size_t col = 0, i, slen, siz = 0;
- int rl, w;
+ int inc, rl, w;
- if (!len)
+ if (!bufsiz)
return -1;
+ if (!len) {
+ buf[0] = '\0';
+ return 0;
+ }
slen = strlen(s);
- for (i = 0; i < slen; i += rl) {
- if ((rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4)) <= …
- break;
- if ((w = wcwidth(wc)) == -1)
+ for (i = 0; i < slen; i += inc) {
+ inc = 1;
+ if ((unsigned char)s[i] < 32)
+ continue;
+
+ rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4);
+ if (rl < 0) {
+ mbtowc(NULL, NULL, 0); /* reset state */
+ inc = 1; /* next byte */
+ w = 1; /* replacement char is one width */
+ } else if ((w = wcwidth(wc)) == -1) {
continue;
- if (col + w > len || (col + w == len && s[i + rl])) {
+ } else {
+ inc = rl;
+ }
+
+ if (col + w > len || (col + w == len && s[i + inc])) {
if (siz + 4 >= bufsiz)
return -1;
memcpy(&buf[siz], PAD_TRUNCATE_SYMBOL, sizeof(PAD_TRUN…
siz += sizeof(PAD_TRUNCATE_SYMBOL) - 1;
- if (col + w == len && w > 1)
- buf[siz++] = pad;
buf[siz] = '\0';
- return 0;
+ col++;
+ break;
+ } else if (rl < 0) {
+ if (siz + 4 >= bufsiz)
+ return -1;
+ memcpy(&buf[siz], UTF_INVALID_SYMBOL, sizeof(UTF_INVAL…
+ siz += sizeof(UTF_INVALID_SYMBOL) - 1;
+ buf[siz] = '\0';
+ col++;
+ continue;
}
- if (siz + rl + 1 >= bufsiz)
+ if (siz + inc + 1 >= bufsiz)
return -1;
- memcpy(&buf[siz], &s[i], rl);
- col += w;
- siz += rl;
+ memcpy(&buf[siz], &s[i], inc);
+ siz += inc;
buf[siz] = '\0';
+ col += w;
}
len -= col;
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.