Introduction
Introduction Statistics Contact Development Disclaimer Help
libc/wchar: Fix unicode handling - scc - simple c99 compiler
git clone git://git.simple-cc.org/scc
Log
Files
Refs
Submodules
README
LICENSE
---
commit dee0c6f0c90f7d64fd45cb6e3c48321f4beaf81d
parent 2eaef0900f5ebd9f00bebd8ce899423a0b37b4bb
Author: Roberto E. Vargas Caballero <[email protected]>
Date: Wed, 26 Feb 2025 10:31:51 +0100
libc/wchar: Fix unicode handling
* mbrtowc: validate input, handle 4-byte UTF-8 code points, set errno
* wcrtomb: if a UTF-8 sequence has N bytes, the leading byte has the first
N bits set (with ASCII characters a special case), not the first N-1 bits
* _validutf8: negate condition
Diffstat:
M src/libc/wchar/_validutf8.c | 2 +-
M src/libc/wchar/mbrtowc.c | 26 +++++++++++++++++---------
M src/libc/wchar/wcrtomb.c | 9 +++++++--
3 files changed, 25 insertions(+), 12 deletions(-)
---
diff --git a/src/libc/wchar/_validutf8.c b/src/libc/wchar/_validutf8.c
@@ -23,7 +23,7 @@ _validutf8(wchar_t wc, int *nbytes)
};
struct range *bp;
- for (bp = ranges; bp->begin <= wc && bp->end > wc; ++bp)
+ for (bp = ranges; bp->begin > wc || bp->end <= wc; ++bp)
;
*nbytes = bp->nbytes;
diff --git a/src/libc/wchar/mbrtowc.c b/src/libc/wchar/mbrtowc.c
@@ -1,3 +1,5 @@
+#include <errno.h>
+#include <stdlib.h>
#include <wchar.h>
#include "../libc.h"
@@ -8,37 +10,43 @@ size_t
mbrtowc(wchar_t *restrict pwc, const char *restrict s, size_t n,
mbstate_t *restrict ps)
{
- unsigned char *t = (unsigned char *) s;
+ const unsigned char *t = (const unsigned char *) s;
unsigned long wc;
unsigned c;
int i, len, maxlen;
- if (s == NULL)
+ if (t == NULL)
return 0;
+ if ((wc = *t) == 0)
+ goto return_code;
- wc = c = *t++;
+ c = *t++;
for (len = 0; n > 0 && c & 0x80; --n, ++len)
c <<= 1;
- if (n == 0 || len == 1 || len == 8)
- return -1;
+ if (n == 0 && c & 0x80)
+ return -2;
+ if (len == 1 || len > MB_CUR_MAX)
+ goto return_error;
if (len == 0)
goto return_code;
wc = (c & 0xFF) >> len;
for (i = 0; i < len-1; i++) {
if (((c = *t++) & 0xC0) != 0x80)
- return -1;
+ goto return_error;
wc <<= 6;
wc |= c & 0x3F;
}
if (!_validutf8(wc, &maxlen) || len != maxlen)
- return -1;
+ goto return_error;
return_code:
if (pwc)
*pwc = wc;
- if (*s == '\0')
- return 0;
return t - (unsigned char *) s;
+
+return_error:
+ errno = EILSEQ;
+ return -1;
}
diff --git a/src/libc/wchar/wcrtomb.c b/src/libc/wchar/wcrtomb.c
@@ -14,13 +14,18 @@ wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict p…
if (!s)
return 1;
+ if (c < 0x80) {
+ *s = wc;
+ return 1;
+ }
+
if (!_validutf8(wc, &n)) {
errno = EILSEQ;
return -1;
}
-
n--;
- *s = 0;
+
+ *s = 0x80;
for (i = 0; i < n; i++) {
*s >>= 1;
*s |= 0x80;
You are viewing proxied material from simple-cc.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.