Introduction
Introduction Statistics Contact Development Disclaimer Help
utf8pad: improve padded printing and printing invalid unicode characters - stag…
git clone git://git.codemadness.org/stagit-gopher
Log
Files
Refs
README
LICENSE
---
commit 554a9fe2e9d12defd9d6253871d8261d3f3ef3c6
parent 7b93d02cd8f26ab9a25d967c72c359a22c91eb74
Author: Hiltjo Posthuma <[email protected]>
Date: Sat, 9 Jan 2021 14:56:51 +0100
utf8pad: improve padded printing and printing invalid unicode characters
- Use unicode replacement character (codepoint 0xfffd) when a codepoint is
invalid and proceed printing the rest of the characters.
- When a codepoint is invalid reset the internal state of mbtowc(3), from the
OpenBSD man page:
" If a call to mbtowc() resulted in an undefined internal state, mbtowc()
must be called with s set to NULL to reset the internal state before it
can safely be used again."
- Make the function return 0 when `len` is 0 (this should not be not an error).
Diffstat:
M stagit-gopher-index.c | 59 ++++++++++++++++++++++-------…
M stagit-gopher.c | 58 ++++++++++++++++++++++-------…
2 files changed, 83 insertions(+), 34 deletions(-)
---
diff --git a/stagit-gopher-index.c b/stagit-gopher-index.c
@@ -10,6 +10,9 @@
#include <git2.h>
+#define PAD_TRUNCATE_SYMBOL "\xe2\x80\xa6" /* symbol: "ellipsis" */
+#define UTF_INVALID_SYMBOL "\xef\xbf\xbd" /* symbol: "replacement" */
+
static git_repository *repo;
static const char *relpath = "";
@@ -17,40 +20,62 @@ static const char *relpath = "";
static char description[255] = "Repositories";
static char *name = "";
-/* format `len' columns of characters. If string is shorter pad the rest
+/* Format `len' columns of characters. If string is shorter pad the rest
* with characters `pad`. */
int
utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad)
{
wchar_t wc;
size_t col = 0, i, slen, siz = 0;
- int rl, w;
+ int inc, rl, w;
- if (!len)
+ if (!bufsiz)
return -1;
+ if (!len) {
+ buf[0] = '\0';
+ return 0;
+ }
slen = strlen(s);
- for (i = 0; i < slen; i += rl) {
- if ((rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4)) <= …
- break;
- if ((w = wcwidth(wc)) == -1)
+ for (i = 0; i < slen; i += inc) {
+ inc = 1;
+ if ((unsigned char)s[i] < 32)
continue;
- if (col + w > len || (col + w == len && s[i + rl])) {
+
+ rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4);
+ if (rl < 0) {
+ mbtowc(NULL, NULL, 0); /* reset state */
+ inc = 1; /* next byte */
+ w = 1; /* replacement char is one width */
+ } else if ((w = wcwidth(wc)) == -1) {
+ continue;
+ } else {
+ inc = rl;
+ }
+
+ if (col + w > len || (col + w == len && s[i + inc])) {
if (siz + 4 >= bufsiz)
return -1;
- memcpy(&buf[siz], "\xe2\x80\xa6", 3);
- siz += 3;
- if (col + w == len && w > 1)
- buf[siz++] = pad;
+ memcpy(&buf[siz], PAD_TRUNCATE_SYMBOL, sizeof(PAD_TRUN…
+ siz += sizeof(PAD_TRUNCATE_SYMBOL) - 1;
buf[siz] = '\0';
- return 0;
+ col++;
+ break;
+ } else if (rl < 0) {
+ if (siz + 4 >= bufsiz)
+ return -1;
+ memcpy(&buf[siz], UTF_INVALID_SYMBOL, sizeof(UTF_INVAL…
+ siz += sizeof(UTF_INVALID_SYMBOL) - 1;
+ buf[siz] = '\0';
+ col++;
+ continue;
}
- if (siz + rl + 1 >= bufsiz)
+ if (siz + inc + 1 >= bufsiz)
return -1;
- memcpy(&buf[siz], &s[i], rl);
- col += w;
- siz += rl;
+ memcpy(&buf[siz], &s[i], inc);
+ siz += inc;
buf[siz] = '\0';
+ col += w;
}
len -= col;
diff --git a/stagit-gopher.c b/stagit-gopher.c
@@ -19,6 +19,8 @@
#include "compat.h"
#define LEN(s) (sizeof(s)/sizeof(*s))
+#define PAD_TRUNCATE_SYMBOL "\xe2\x80\xa6" /* symbol: "ellipsis" */
+#define UTF_INVALID_SYMBOL "\xef\xbf\xbd" /* symbol: "replacement" */
struct deltainfo {
git_patch *patch;
@@ -80,40 +82,62 @@ static char lastoidstr[GIT_OID_HEXSZ + 2]; /* id + newline …
static FILE *rcachefp, *wcachefp;
static const char *cachefile;
-/* format `len' columns of characters. If string is shorter pad the rest
+/* Format `len' columns of characters. If string is shorter pad the rest
* with characters `pad`. */
int
utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad)
{
wchar_t wc;
size_t col = 0, i, slen, siz = 0;
- int rl, w;
+ int inc, rl, w;
- if (!len)
+ if (!bufsiz)
return -1;
+ if (!len) {
+ buf[0] = '\0';
+ return 0;
+ }
slen = strlen(s);
- for (i = 0; i < slen; i += rl) {
- if ((rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4)) <= …
- break;
- if ((w = wcwidth(wc)) == -1)
+ for (i = 0; i < slen; i += inc) {
+ inc = 1;
+ if ((unsigned char)s[i] < 32)
+ continue;
+
+ rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4);
+ if (rl < 0) {
+ mbtowc(NULL, NULL, 0); /* reset state */
+ inc = 1; /* next byte */
+ w = 1; /* replacement char is one width */
+ } else if ((w = wcwidth(wc)) == -1) {
continue;
- if (col + w > len || (col + w == len && s[i + rl])) {
+ } else {
+ inc = rl;
+ }
+
+ if (col + w > len || (col + w == len && s[i + inc])) {
if (siz + 4 >= bufsiz)
return -1;
- memcpy(&buf[siz], "\xe2\x80\xa6", 3);
- siz += 3;
- if (col + w == len && w > 1)
- buf[siz++] = pad;
+ memcpy(&buf[siz], PAD_TRUNCATE_SYMBOL, sizeof(PAD_TRUN…
+ siz += sizeof(PAD_TRUNCATE_SYMBOL) - 1;
buf[siz] = '\0';
- return 0;
+ col++;
+ break;
+ } else if (rl < 0) {
+ if (siz + 4 >= bufsiz)
+ return -1;
+ memcpy(&buf[siz], UTF_INVALID_SYMBOL, sizeof(UTF_INVAL…
+ siz += sizeof(UTF_INVALID_SYMBOL) - 1;
+ buf[siz] = '\0';
+ col++;
+ continue;
}
- if (siz + rl + 1 >= bufsiz)
+ if (siz + inc + 1 >= bufsiz)
return -1;
- memcpy(&buf[siz], &s[i], rl);
- col += w;
- siz += rl;
+ memcpy(&buf[siz], &s[i], inc);
+ siz += inc;
buf[siz] = '\0';
+ col += w;
}
len -= col;
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.