support utf8 input and output - sob - simple output bar | |
git clone git://git.codemadness.org/sob | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 517a1cac03d09213419cabd41dc481c08ad16c9d | |
parent d52ae758f565c6a59c5ddb96995b2e364c507c4f | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Fri, 3 Oct 2014 23:49:15 +0000 | |
support utf8 input and output | |
Diffstat: | |
M sob.c | 374 +++++++++++++++++++++--------… | |
1 file changed, 255 insertions(+), 119 deletions(-) | |
--- | |
diff --git a/sob.c b/sob.c | |
@@ -22,9 +22,11 @@ char *argv0; | |
#define MIN(A, B) ((A) < (B) ? (A) : (B)) | |
struct line { | |
- char line[BUFSIZ]; | |
- size_t len; | |
- size_t pos; | |
+ char line[BUFSIZ]; /* static line buffer */ | |
+ size_t bytesiz; /* length in bytes */ | |
+ size_t utflen; /* length in characters */ | |
+ size_t bytepos; /* index position (in bytes) */ | |
+ size_t utfpos; /* pos in characters */ | |
}; | |
static void line_clear(void); | |
@@ -36,14 +38,17 @@ static void line_cursor_next(void); | |
static void line_cursor_prev(void); | |
static void line_cursor_wordprev(void); | |
static void line_cursor_wordnext(void); | |
-static void line_delcharback(void); | |
+static void line_delcharprev(void); | |
static void line_delcharnext(void); | |
static void line_deltoend(void); | |
-static void line_delwordback(void); | |
+static void line_delwordprev(void); | |
static void line_delwordcursor(void); | |
static void line_draw(void); | |
static void line_exit(void); | |
-static void line_getwordpos(size_t *, size_t *); | |
+static void line_getwordpos(size_t, size_t, size_t *, size_t *, size_t *, | |
+ size_t *); | |
+static void line_getwordposprev(size_t, size_t, size_t *, size_t *); | |
+static void line_getwordposnext(size_t, size_t, size_t *, size_t *); | |
static void line_inserttext(const char *); | |
static void line_newline(void); | |
static void line_out(void); | |
@@ -51,7 +56,7 @@ static void line_prompt(void); | |
static int line_promptlen(void); | |
static int line_pipeto(char **); | |
static void line_set(const char *); | |
-static void line_wordpipeto(char **); | |
+static int line_wordpipeto(char **); | |
static int pipe_readline(int, int, char *, char *, size_t); | |
static int pipe_cmd(char *[], char *, char *, size_t); | |
@@ -64,6 +69,12 @@ static void setup(void); | |
static void sighandler(int); | |
static void usage(void); | |
+static int nonspace(int c); | |
+static size_t utf8len(const char *); | |
+static size_t utfprevn(const char *, size_t , size_t); | |
+static size_t utfnextn(const char *, size_t , size_t); | |
+static void utfuntilchar(size_t *, size_t *, int (*)(int), int); | |
+ | |
static struct termios ttystate, ttysave; | |
static struct line line; | |
@@ -74,35 +85,123 @@ static FILE * lineoutfp = NULL; | |
#include "config.h" | |
+static int | |
+nonspace(int c) | |
+{ | |
+ return !isspace(c); | |
+} | |
+ | |
+static size_t | |
+utf8len(const char *s) | |
+{ | |
+ size_t i; | |
+ | |
+ for(i = 0; *s; s++) { | |
+ if((*s & 0xc0) != 0x80) | |
+ i++; | |
+ } | |
+ return i; | |
+} | |
+ | |
+/* returns amount of bytes needed to go to previous utf char | |
+ * p is index in bytes. */ | |
+static size_t | |
+utfprevn(const char *s, size_t p, size_t n) | |
+{ | |
+ size_t i; | |
+ | |
+ for(i = 0; p > 0; p--) { | |
+ i++; | |
+ if((s[p - 1] & 0xc0) != 0x80 && !--n) | |
+ return i; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+/* returns amount of bytes needed to go to next utf char | |
+ * p is index in bytes. */ | |
+static size_t | |
+utfnextn(const char *s, size_t p, size_t n) | |
+{ | |
+ size_t i; | |
+ | |
+ for(i = 0; s[p]; p++) { | |
+ i++; | |
+ if((s[p + 1] & 0xc0) != 0x80 && !--n) | |
+ return i; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+/* b is byte start pos, u is utf pos, f is filter function, | |
+ * dir is -1 or +1 for prev or next */ | |
+static void | |
+utfuntilchar(size_t *b, size_t *u, int (*f)(int), int dir) | |
+{ | |
+ size_t n; | |
+ | |
+ if(dir > 0) { | |
+ while(*u < line.utflen && *b < line.bytesiz) { | |
+ if(f(line.line[*b])) | |
+ break; | |
+ if((n = utfnextn(line.line, *b, 1)) == 0) | |
+ break; | |
+ *b += n; | |
+ (*u)++; | |
+ } | |
+ | |
+ } else { | |
+ while(*u > 0) { | |
+ if(f(line.line[*b - 1])) | |
+ break; | |
+ if((n = utfprevn(line.line, *b, 1)) == 0) | |
+ break; | |
+ *b -= n; | |
+ (*u)--; | |
+ } | |
+ } | |
+} | |
+ | |
static void | |
line_inserttext(const char *s) | |
{ | |
- size_t len; | |
+ size_t siz, len; | |
- len = strlen(s); | |
- if(line.pos + len + 1 > sizeof(line.line)) | |
+ siz = strlen(s); | |
+ if(line.bytepos + siz + 1 > sizeof(line.line)) | |
return; | |
+ len = utf8len(s); | |
/* append */ | |
- if(line.pos == line.len) { | |
- memmove(&line.line[line.pos], s, len); | |
+ if(line.bytepos == line.bytesiz) { | |
+ memmove(&line.line[line.bytepos], s, siz); | |
} else { | |
/* insert */ | |
- memmove(&line.line[line.pos + len], &line.line[line.pos], | |
- line.len - line.pos); | |
- memcpy(&line.line[line.pos], s, len); | |
+ memmove(&line.line[line.bytepos + siz], &line.line[line.bytepo… | |
+ line.bytesiz - line.bytepos); | |
+ memcpy(&line.line[line.bytepos], s, siz); | |
} | |
- line.len += len; | |
- line.pos += len; | |
- line.line[line.len + 1] = '\0'; | |
+ line.bytepos += siz; | |
+ line.bytesiz += siz; | |
+ line.line[line.bytesiz + 1] = '\0'; | |
+ line.utflen = utf8len(line.line); | |
+ line.utfpos += len; | |
line_draw(); | |
} | |
static void | |
line_set(const char *s) | |
{ | |
+ char *p; | |
+ | |
strlcpy(line.line, s, sizeof(line.line)); | |
- line.len = strlen(line.line); | |
- line.pos = line.len; | |
+ /* remove linefeed and return from string */ | |
+ if((p = strpbrk(line.line, "\r\n"))) | |
+ *p = '\0'; | |
+ | |
+ line.bytesiz = strlen(line.line); | |
+ line.bytepos = line.bytesiz; | |
+ line.utflen = utf8len(line.line); | |
+ line.utfpos = line.utflen; | |
} | |
/* like mksh, toggle counting of escape codes in prompt with "\x01" */ | |
@@ -115,7 +214,7 @@ line_promptlen(void) | |
for(i = 0; prompt[i]; i++) { | |
if(prompt[i] == 1) | |
t = !t; | |
- else if(!t) | |
+ else if(!t && (prompt[i] & 0xc0) != 0x80) | |
n++; | |
} | |
return n; | |
@@ -135,16 +234,10 @@ line_prompt(void) | |
static void | |
line_draw(void) | |
{ | |
- size_t n; | |
- | |
- /* clear */ | |
- fprintf(outfp, "\x1b[2J\x1b[H"); | |
- | |
+ fprintf(outfp, "\x1b[2J\x1b[H"); /* clear */ | |
line_prompt(); | |
- for(n = 0; line.line[n] && n < line.len; n++) | |
- fputc(line.line[n], outfp); | |
- | |
- line_cursor_move(line.pos); | |
+ fwrite(line.line, 1, line.bytesiz, outfp); | |
+ line_cursor_move(line.utfpos); | |
} | |
static void | |
@@ -169,143 +262,158 @@ line_cursor_move(size_t newpos) | |
} | |
fprintf(outfp, "\x1b[%lu;%luH", y + 1, x + 1); | |
fflush(outfp); | |
- line.pos = newpos; | |
} | |
static void | |
line_cursor_wordprev(void) | |
{ | |
- size_t s, e; | |
- | |
- line_getwordpos(&s, &e); | |
- if(s == line.pos) { | |
- while(s > 0 && isspace(line.line[s - 1])) | |
- s--; | |
- } | |
- line_cursor_move(s); | |
+ line_getwordposprev(line.bytepos, line.utfpos, &line.bytepos, &line.ut… | |
+ line_cursor_move(line.utfpos); | |
} | |
static void | |
line_cursor_wordnext(void) | |
{ | |
- size_t s, e; | |
- | |
- line_getwordpos(&s, &e); | |
- if(e == line.pos) { | |
- while(e < line.len && line.line[e] && isspace(line.line[e])) | |
- e++; | |
- } | |
- line_cursor_move(e); | |
+ line_getwordposnext(line.bytepos, line.utfpos, &line.bytepos, &line.ut… | |
+ line_cursor_move(line.utfpos); | |
} | |
static void | |
line_cursor_begin(void) | |
{ | |
- line_cursor_move(0); | |
+ line.utfpos = 0; | |
+ line.bytepos = 0; | |
+ line_cursor_move(line.utfpos); | |
} | |
static void | |
line_cursor_prev(void) | |
{ | |
- if(line.pos > 0) | |
- line_cursor_move(line.pos - 1); | |
+ if(line.utfpos <= 0) | |
+ return; | |
+ | |
+ line.bytepos -= utfprevn(line.line, line.bytepos, 1); | |
+ line.utfpos--; | |
+ line_cursor_move(line.utfpos); | |
} | |
static void | |
line_cursor_next(void) | |
{ | |
- if(line.pos < line.len) | |
- line_cursor_move(line.pos + 1); | |
+ if(line.utfpos >= line.utflen) | |
+ return; | |
+ | |
+ line.bytepos += utfnextn(line.line, line.bytepos, 1); | |
+ line.utfpos++; | |
+ line_cursor_move(line.utfpos); | |
} | |
static void | |
line_cursor_end(void) | |
{ | |
- line_cursor_move(line.len); | |
+ line.bytepos = line.bytesiz; | |
+ line.utfpos = line.utflen; | |
+ line_cursor_move(line.utfpos); | |
} | |
static void | |
line_clear(void) | |
{ | |
line_cursor_begin(); | |
- line.line[0] = '\0'; | |
- line.len = 0; | |
+ line_set(""); | |
line_draw(); | |
} | |
static void | |
line_delcharnext(void) | |
{ | |
- if(line.pos == line.len || line.len <= 0) | |
+ size_t siz; | |
+ | |
+ if(line.utfpos == line.utflen || line.utflen <= 0) | |
return; | |
- memmove(&line.line[line.pos], &line.line[line.pos + 1], | |
- line.len - line.pos - 1); | |
- line.len--; | |
- line.line[line.len] = '\0'; | |
+ siz = utfnextn(line.line, line.bytepos, 1); | |
+ memmove(&line.line[line.bytepos], &line.line[line.bytepos + siz], | |
+ line.bytesiz - line.bytepos - siz); | |
+ | |
+ line.bytesiz -= siz; | |
+ line.line[line.bytesiz] = '\0'; | |
line_draw(); | |
} | |
static void | |
-line_delcharback(void) | |
+line_delcharprev(void) | |
{ | |
- if(line.pos <= 0 || line.len <= 0) | |
+ size_t siz; | |
+ | |
+ if(line.utfpos <= 0 || line.utflen <= 0) | |
return; | |
- memmove(&line.line[line.pos - 1], &line.line[line.pos], | |
- line.len - line.pos); | |
- line.len--; | |
- line.line[line.len] = '\0'; | |
- line_cursor_prev(); | |
+ siz = utfprevn(line.line, line.bytepos, 1); | |
+ | |
+ memmove(&line.line[line.bytepos - siz], &line.line[line.bytepos], | |
+ line.bytesiz - line.bytepos); | |
+ line.utflen--; | |
+ line.utfpos--; | |
+ line.bytepos -= siz; | |
+ line.bytesiz -= siz; | |
+ line.line[line.bytesiz] = '\0'; | |
line_draw(); | |
} | |
static void | |
line_deltoend(void) | |
{ | |
- line.line[line.pos] = '\0'; | |
- line.len = line.pos; | |
- line_cursor_end(); | |
+ line.line[line.bytepos] = '\0'; | |
+ line.bytesiz = line.bytepos; | |
+ line.utflen = utf8len(line.line); | |
+ line.utfpos = line.utflen; | |
line_draw(); | |
} | |
static void | |
line_delwordcursor(void) | |
{ | |
- size_t len, s, e; | |
+ size_t len, siz, bs, be, us, ue; | |
+ | |
+ line_getwordpos(line.bytepos, line.utfpos, &bs, &be, &us, &ue); | |
+ | |
+ siz = be - bs; | |
+ len = ue - us; | |
- line_getwordpos(&s, &e); | |
+ memmove(&line.line[bs], &line.line[be], line.bytesiz - be); | |
+ | |
+ line.bytesiz -= siz; | |
+ line.bytepos -= siz; | |
+ line.utfpos -= len; | |
+ line.utflen -= len; | |
+ line.line[line.bytesiz] = '\0'; | |
- memmove(&line.line[s], &line.line[e], line.len - e); | |
- len = e - s; | |
- line.len -= len; | |
- line.pos = s; | |
- line.line[line.len] = '\0'; | |
line_draw(); | |
} | |
static void | |
-line_delwordback(void) | |
+line_delwordprev(void) | |
{ | |
- size_t i, len; | |
+ size_t bs, us, siz, len; | |
- if(line.pos <= 0 || line.len <= 0) | |
+ if(line.utfpos <= 0 || line.utflen <= 0) | |
return; | |
- i = line.pos; | |
- while(i > 0 && isspace(line.line[i - 1])) | |
- i--; | |
- while(i > 0 && !isspace(line.line[i - 1])) | |
- i--; | |
- | |
- len = line.len - line.pos; | |
- if(len > 0) | |
- memmove(&line.line[i], &line.line[line.pos], | |
- line.len - line.pos); | |
- len = line.pos - i; | |
- line.pos = i; | |
- line.len -= len; | |
- line.line[line.len] = '\0'; | |
+ line_getwordposprev(line.bytepos, line.utfpos, &bs, &us); | |
+ | |
+ siz = line.bytepos - bs; | |
+ memmove(&line.line[bs], &line.line[line.bytepos], | |
+ line.bytesiz - line.bytepos); | |
+ | |
+ len = line.utfpos - us; | |
+ | |
+ line.bytesiz -= siz; | |
+ line.bytepos -= siz; | |
+ line.utfpos -= len; | |
+ line.utflen -= len; | |
+ line.line[line.bytesiz] = '\0'; | |
+ | |
line_draw(); | |
} | |
@@ -319,38 +427,65 @@ line_newline(void) | |
static void | |
line_exit(void) | |
{ | |
- line_out(); | |
+ fprintf(outfp, "\n"); | |
+ fflush(outfp); | |
isrunning = 0; | |
} | |
static void | |
-line_getwordpos(size_t *start, size_t *end) | |
+line_getwordpos(size_t b, size_t u, size_t *bs, size_t *be, | |
+ size_t *us, size_t *ue) | |
{ | |
- size_t i; | |
+ size_t tb = b, tu = u; | |
- i = line.pos; | |
- while(i > 0 && !isspace(line.line[i - 1])) | |
- i--; | |
- if(start) | |
- *start = i; | |
- i = line.pos; | |
- while(line.line[i] && i < line.len && !isspace(line.line[i])) | |
- i++; | |
- if(end) | |
- *end = i; | |
+ utfuntilchar(&b, &u, isspace, -1); | |
+ if(bs) | |
+ *bs = b; | |
+ if(us) | |
+ *us = u; | |
+ | |
+ /* seek from original specified position */ | |
+ utfuntilchar(&tb, &tu, isspace, +1); | |
+ if(be) | |
+ *be = tb; | |
+ if(ue) | |
+ *ue = tu; | |
+} | |
+ | |
+static void | |
+line_getwordposprev(size_t sb, size_t su, size_t *b, size_t *u) | |
+{ | |
+ utfuntilchar(&sb, &su, nonspace, -1); | |
+ utfuntilchar(&sb, &su, isspace, -1); | |
+ if(b) | |
+ *b = sb; | |
+ if(u) | |
+ *u = su; | |
+} | |
+ | |
+static void | |
+line_getwordposnext(size_t sb, size_t su, size_t *b, size_t *u) | |
+{ | |
+ utfuntilchar(&sb, &su, nonspace, +1); | |
+ utfuntilchar(&sb, &su, isspace, +1); | |
+ if(b) | |
+ *b = sb; | |
+ if(u) | |
+ *u = su; | |
} | |
static void | |
line_copywordcursor(char *buf, size_t bufsiz) | |
{ | |
- size_t s, e, len; | |
+ size_t bs, be, len; | |
+ | |
+ line_getwordpos(line.bytepos, line.utfpos, &bs, &be, NULL, NULL); | |
+ len = be - bs; | |
- line_getwordpos(&s, &e); | |
- len = e - s; | |
/* truncate */ | |
if(len + 1 > bufsiz) | |
len = bufsiz - 1; | |
- memcpy(buf, &line.line[s], len); | |
+ memcpy(buf, &line.line[bs], len); | |
buf[len + 1] = '\0'; | |
} | |
@@ -375,7 +510,7 @@ pipe_readline(int fd_in, int fd_out, char *writestr, char *… | |
} | |
memset(&tv, 0, sizeof(tv)); | |
tv.tv_sec = 0; | |
- tv.tv_usec = 200; | |
+ tv.tv_usec = 50000; /* 50 ms */ | |
if((r = select(maxfd + 1, haswritten ? &fdr : NULL, | |
haswritten ? NULL : &fdw, NULL, &tv)) == -1) | |
@@ -468,21 +603,19 @@ static int | |
line_pipeto(char **cmd) | |
{ | |
char buf[BUFSIZ]; | |
- size_t len; | |
if(pipe_cmd(cmd, line.line, buf, sizeof(buf)) == -1) | |
return -1; | |
if(buf[0] == '\0') | |
return -1; | |
- len = strlcpy(line.line, buf, sizeof(line.line)); | |
- line.len = len; | |
+ line_set(buf); | |
line_cursor_end(); | |
line_draw(); | |
return 0; | |
} | |
/* pipe word under cursor and replace it */ | |
-static void | |
+static int | |
line_wordpipeto(char **cmd) | |
{ | |
char wordbuf[BUFSIZ], outbuf[BUFSIZ]; | |
@@ -493,13 +626,15 @@ line_wordpipeto(char **cmd) | |
if(pipe_cmd((char**)cmd, wordbuf, outbuf, | |
sizeof(outbuf)) == -1) | |
- return; | |
+ return -1; | |
if(outbuf[0] == '\0') | |
- return; | |
+ return -1; | |
line_delwordcursor(); | |
line_inserttext(outbuf); | |
line_draw(); | |
+ | |
+ return 0; | |
} | |
static void | |
@@ -619,7 +754,7 @@ run(void) | |
memset(&tv, 0, sizeof(tv)); | |
tv.tv_sec = 0; | |
- tv.tv_usec = 50000; /* 50 ms */ | |
+ tv.tv_usec = 32000; /* 32 ms */ | |
errno = 0; | |
if((r = select(STDIN_FILENO + 1, &fdr, NULL, NULL, &tv)) == -1… | |
@@ -667,6 +802,7 @@ main(int argc, char **argv) | |
lineoutfp = stdout; | |
outfp = stderr; | |
+ | |
setlocale(LC_ALL, ""); | |
setup(); | |
run(); |