| handle unescaped string and unexpected EOF and improve handling surrogates - js… | |
| git clone git://git.codemadness.org/json2tsv | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| commit 9f4ab639718e4351b02b4bde6035cd588c32b169 | |
| parent f0b7f8935d41162e29c5a01f15273ba225909969 | |
| Author: Hiltjo Posthuma <[email protected]> | |
| Date: Tue, 15 Oct 2019 18:58:56 +0200 | |
| handle unescaped string and unexpected EOF and improve handling surrogates | |
| .... handle UTF-16 surrogate code-point errors and try to recover gracefully, j… | |
| output the raw bytes. | |
| Diffstat: | |
| M json2tsv.c | 45 ++++++++++++++++++++---------… | |
| 1 file changed, 29 insertions(+), 16 deletions(-) | |
| --- | |
| diff --git a/json2tsv.c b/json2tsv.c | |
| @@ -126,7 +126,7 @@ parsejson(void (*cb)(struct json_node *, size_t, const char… | |
| nodes[depth].type = TYPE_PRIMITIVE; | |
| while ((c = GETNEXT()) != EOF) { | |
| - /* not whitespace or control-character */ | |
| + /* not whitespace or control character */ | |
| if (c <= 0x20 || c == 0x7f) | |
| continue; | |
| @@ -149,14 +149,19 @@ parsejson(void (*cb)(struct json_node *, size_t, const ch… | |
| break; | |
| case '"': | |
| nodes[depth].type = TYPE_STRING; | |
| - for (escape = 0; (c = GETNEXT()) != EOF;) { | |
| - /* 0x7f is not defined as a control-character … | |
| - if (c < 0x20) | |
| - continue; | |
| + escape = 0; | |
| + for (;;) { | |
| + c = GETNEXT(); | |
| +chr: | |
| + if (c < 0x20) { | |
| + /* EOF or control char: 0x7f is not de… | |
| + *errstr = JSON_ERROR_INVALID_CHAR; | |
| + goto end; | |
| + } | |
| if (escape) { | |
| +escchr: | |
| escape = 0; | |
| - | |
| switch (c) { | |
| case '"': /* FALLTHROUGH */ | |
| case '\\': | |
| @@ -167,6 +172,8 @@ parsejson(void (*cb)(struct json_node *, size_t, const char… | |
| case 'r': c = '\r'; break; | |
| case 't': c = '\t'; break; | |
| case 'u': /* hex hex hex hex */ | |
| + if (capacity(&value, &vz, v, 4… | |
| + goto end; | |
| for (i = 12, cp = 0; i >= 0; i… | |
| if ((c = GETNEXT()) ==… | |
| *errstr = JSON… | |
| @@ -175,13 +182,17 @@ parsejson(void (*cb)(struct json_node *, size_t, const ch… | |
| cp |= (hexdigit(c) << … | |
| } | |
| /* See also: | |
| - * RFC7159 - 7. Strings and | |
| + * RFC8259 - 7. Strings and | |
| * https://unicode.org/faq/utf… | |
| * 0xd800 - 0xdb7f - high surr… | |
| if (cp >= 0xd800 && cp <= 0xdb… | |
| - if (GETNEXT() != '\\' … | |
| - *errstr = JSON… | |
| - goto end; | |
| + if ((c = GETNEXT()) !=… | |
| + v += codepoint… | |
| + goto chr; | |
| + } | |
| + if ((c = GETNEXT()) !=… | |
| + v += codepoint… | |
| + goto escchr; | |
| } | |
| for (hi = cp, i = 12, … | |
| if ((c = GETNE… | |
| @@ -191,14 +202,16 @@ parsejson(void (*cb)(struct json_node *, size_t, const ch… | |
| lo |= (hexdigi… | |
| } | |
| /* 0xdc00 - 0xdfff - l… | |
| - if (!(lo >= 0xdc00 && … | |
| - *errstr = JSON… | |
| - goto end; | |
| + if (lo >= 0xdc00 && lo… | |
| + cp = (hi << 10… | |
| + } else { | |
| + v += codepoint… | |
| + if (capacity(&… | |
| + goto e… | |
| + v += codepoint… | |
| + continue; | |
| } | |
| - cp = (hi << 10) + lo -… | |
| } | |
| - if (capacity(&value, &vz, v, 4… | |
| - goto end; | |
| v += codepointtoutf8(cp, &valu… | |
| continue; | |
| default: |