| json.c - json2tsv - JSON to TSV converter | |
| git clone git://git.codemadness.org/json2tsv | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| json.c (8152B) | |
| --- | |
| 1 #include <errno.h> | |
| 2 #include <stdint.h> | |
| 3 #include <stdio.h> | |
| 4 #include <stdlib.h> | |
| 5 #include <string.h> | |
| 6 | |
| 7 #ifndef GETNEXT | |
| 8 #define GETNEXT getchar_unlocked | |
| 9 #endif | |
| 10 | |
| 11 #include "json.h" | |
| 12 | |
| 13 /* ctype-like macros, but always compatible with ASCII / UTF-8 */ | |
| 14 #define ISDIGIT(c) (((unsigned)c) - '0' < 10) | |
| 15 #define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || ((unsigned)c | 32) - … | |
| 16 | |
| 17 static int | |
| 18 codepointtoutf8(long r, char *s) | |
| 19 { | |
| 20 if (r == 0) { | |
| 21 return 0; /* NUL byte */ | |
| 22 } else if (r <= 0x7F) { | |
| 23 /* 1 byte: 0aaaaaaa */ | |
| 24 s[0] = r; | |
| 25 return 1; | |
| 26 } else if (r <= 0x07FF) { | |
| 27 /* 2 bytes: 00000aaa aabbbbbb */ | |
| 28 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ | |
| 29 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ | |
| 30 return 2; | |
| 31 } else if (r <= 0xFFFF) { | |
| 32 /* 3 bytes: aaaabbbb bbcccccc */ | |
| 33 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ | |
| 34 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ | |
| 35 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ | |
| 36 return 3; | |
| 37 } else { | |
| 38 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ | |
| 39 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ | |
| 40 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ | |
| 41 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ | |
| 42 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ | |
| 43 return 4; | |
| 44 } | |
| 45 } | |
| 46 | |
| 47 static int | |
| 48 hexdigit(int c) | |
| 49 { | |
| 50 if (c >= '0' && c <= '9') | |
| 51 return c - '0'; | |
| 52 else if (c >= 'a' && c <= 'f') | |
| 53 return 10 + (c - 'a'); | |
| 54 else if (c >= 'A' && c <= 'F') | |
| 55 return 10 + (c - 'A'); | |
| 56 return 0; | |
| 57 } | |
| 58 | |
| 59 static int | |
| 60 capacity(char **value, size_t *sz, size_t cur, size_t inc) | |
| 61 { | |
| 62 size_t need, newsiz; | |
| 63 char *newp; | |
| 64 | |
| 65 /* check for addition overflow */ | |
| 66 if (cur > SIZE_MAX - inc) { | |
| 67 errno = ENOMEM; | |
| 68 return -1; | |
| 69 } | |
| 70 need = cur + inc; | |
| 71 | |
| 72 if (need > *sz) { | |
| 73 if (need > SIZE_MAX / 2) { | |
| 74 newsiz = SIZE_MAX; | |
| 75 } else { | |
| 76 for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= nee… | |
| 77 ; | |
| 78 } | |
| 79 if (!(newp = realloc(*value, newsiz))) | |
| 80 return -1; /* up to caller to free *value */ | |
| 81 *value = newp; | |
| 82 *sz = newsiz; | |
| 83 } | |
| 84 return 0; | |
| 85 } | |
| 86 | |
| 87 #define EXPECT_VALUE "{[\"-0123456789tfn" | |
| 88 #define EXPECT_STRING "\"" | |
| 89 #define EXPECT_END "}]," | |
| 90 #define EXPECT_OBJECT_STRING EXPECT_STRING "}" | |
| 91 #define EXPECT_OBJECT_KEY ":" | |
| 92 #define EXPECT_ARRAY_VALUE EXPECT_VALUE "]" | |
| 93 | |
| 94 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } … | |
| 95 | |
| 96 int | |
| 97 parsejson(void (*cb)(struct json_node *, size_t, const char *, size_t)) | |
| 98 { | |
| 99 struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } }; | |
| 100 size_t depth = 0, p = 0, len, sz = 0; | |
| 101 long cp, hi, lo; | |
| 102 char pri[128], *str = NULL; | |
| 103 int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM; | |
| 104 const char *expect = EXPECT_VALUE; | |
| 105 | |
| 106 if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1) | |
| 107 goto end; | |
| 108 nodes[0].name[0] = '\0'; | |
| 109 | |
| 110 while (1) { | |
| 111 c = GETNEXT(); | |
| 112 handlechr: | |
| 113 if (c == EOF) | |
| 114 break; | |
| 115 | |
| 116 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */ | |
| 117 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') | |
| 118 continue; | |
| 119 | |
| 120 if (!c || !strchr(expect, c)) | |
| 121 JSON_INVALID(); | |
| 122 | |
| 123 switch (c) { | |
| 124 case ':': | |
| 125 iskey = 0; | |
| 126 expect = EXPECT_VALUE; | |
| 127 break; | |
| 128 case '"': | |
| 129 nodes[depth].type = JSON_TYPE_STRING; | |
| 130 escape = 0; | |
| 131 len = 0; | |
| 132 while (1) { | |
| 133 c = GETNEXT(); | |
| 134 chr: | |
| 135 /* EOF or control char: 0x7f is not defi… | |
| 136 if (c < 0x20) | |
| 137 JSON_INVALID(); | |
| 138 | |
| 139 if (escape) { | |
| 140 escchr: | |
| 141 escape = 0; | |
| 142 switch (c) { | |
| 143 case '"': /* FALLTHROUGH */ | |
| 144 case '\\': | |
| 145 case '/': break; | |
| 146 case 'b': c = '\b'; break; | |
| 147 case 'f': c = '\f'; break; | |
| 148 case 'n': c = '\n'; break; | |
| 149 case 'r': c = '\r'; break; | |
| 150 case 't': c = '\t'; break; | |
| 151 case 'u': /* hex hex hex hex */ | |
| 152 if (capacity(&str, &sz, … | |
| 153 goto end; | |
| 154 for (i = 12, cp = 0; i >… | |
| 155 if ((c = GETNEXT… | |
| 156 JSON_INV… | |
| 157 cp |= (hexdigit(… | |
| 158 } | |
| 159 /* RFC 8259 - 7. Strings… | |
| 160 * 0xd800 - 0xdbff - hig… | |
| 161 if (cp >= 0xd800 && cp <… | |
| 162 if ((c = GETNEXT… | |
| 163 len += c… | |
| 164 goto chr; | |
| 165 } | |
| 166 if ((c = GETNEXT… | |
| 167 len += c… | |
| 168 goto esc… | |
| 169 } | |
| 170 for (hi = cp, i … | |
| 171 if ((c =… | |
| 172 … | |
| 173 lo |= (h… | |
| 174 } | |
| 175 /* 0xdc00 - 0xdf… | |
| 176 if (lo >= 0xdc00… | |
| 177 cp = (hi… | |
| 178 } else { | |
| 179 /* handl… | |
| 180 len += c… | |
| 181 if (capa… | |
| 182 … | |
| 183 len += c… | |
| 184 continue; | |
| 185 } | |
| 186 } | |
| 187 len += codepointtoutf8(c… | |
| 188 continue; | |
| 189 default: | |
| 190 JSON_INVALID(); /* inval… | |
| 191 } | |
| 192 if (capacity(&str, &sz, len, 1) … | |
| 193 goto end; | |
| 194 str[len++] = c; | |
| 195 } else if (c == '\\') { | |
| 196 escape = 1; | |
| 197 } else if (c == '"') { | |
| 198 if (capacity(&str, &sz, len, 1) … | |
| 199 goto end; | |
| 200 str[len++] = '\0'; | |
| 201 | |
| 202 if (iskey) { | |
| 203 /* copy string as key, i… | |
| 204 if (capacity(&(nodes[dep… | |
| 205 goto end; | |
| 206 memcpy(nodes[depth].name… | |
| 207 } else { | |
| 208 cb(nodes, depth + 1, str… | |
| 209 } | |
| 210 break; | |
| 211 } else { | |
| 212 if (capacity(&str, &sz, len, 1) … | |
| 213 goto end; | |
| 214 str[len++] = c; | |
| 215 } | |
| 216 } | |
| 217 if (iskey) | |
| 218 expect = EXPECT_OBJECT_KEY; | |
| 219 else | |
| 220 expect = EXPECT_END; | |
| 221 break; | |
| 222 case '[': | |
| 223 case '{': | |
| 224 if (depth + 1 >= JSON_MAX_NODE_DEPTH) | |
| 225 JSON_INVALID(); /* too deep */ | |
| 226 | |
| 227 nodes[depth].index = 0; | |
| 228 if (c == '[') { | |
| 229 nodes[depth].type = JSON_TYPE_ARRAY; | |
| 230 expect = EXPECT_ARRAY_VALUE; | |
| 231 } else if (c == '{') { | |
| 232 iskey = 1; | |
| 233 nodes[depth].type = JSON_TYPE_OBJECT; | |
| 234 expect = EXPECT_OBJECT_STRING; | |
| 235 } | |
| 236 | |
| 237 cb(nodes, depth + 1, "", 0); | |
| 238 | |
| 239 depth++; | |
| 240 nodes[depth].index = 0; | |
| 241 if (capacity(&(nodes[depth].name), &(nodes[depth… | |
| 242 goto end; | |
| 243 nodes[depth].name[0] = '\0'; | |
| 244 break; | |
| 245 case ']': | |
| 246 case '}': | |
| 247 if (!depth || | |
| 248 (c == ']' && nodes[depth - 1].type != JSON_TY… | |
| 249 (c == '}' && nodes[depth - 1].type != JSON_TY… | |
| 250 JSON_INVALID(); /* unbalanced nodes */ | |
| 251 | |
| 252 depth--; | |
| 253 nodes[depth].index++; | |
| 254 expect = EXPECT_END; | |
| 255 break; | |
| 256 case ',': | |
| 257 if (!depth) | |
| 258 JSON_INVALID(); /* unbalanced nodes */ | |
| 259 | |
| 260 nodes[depth - 1].index++; | |
| 261 if (nodes[depth - 1].type == JSON_TYPE_OBJECT) { | |
| 262 iskey = 1; | |
| 263 expect = EXPECT_STRING; | |
| 264 } else { | |
| 265 iskey = 0; | |
| 266 expect = EXPECT_VALUE; | |
| 267 } | |
| 268 break; | |
| 269 case 't': /* true */ | |
| 270 if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETN… | |
| 271 JSON_INVALID(); | |
| 272 nodes[depth].type = JSON_TYPE_BOOL; | |
| 273 cb(nodes, depth + 1, "true", 4); | |
| 274 expect = EXPECT_END; | |
| 275 break; | |
| 276 case 'f': /* false */ | |
| 277 if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETN… | |
| 278 GETNEXT() != 'e') | |
| 279 JSON_INVALID(); | |
| 280 nodes[depth].type = JSON_TYPE_BOOL; | |
| 281 cb(nodes, depth + 1, "false", 5); | |
| 282 expect = EXPECT_END; | |
| 283 break; | |
| 284 case 'n': /* null */ | |
| 285 if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETN… | |
| 286 JSON_INVALID(); | |
| 287 nodes[depth].type = JSON_TYPE_NULL; | |
| 288 cb(nodes, depth + 1, "null", 4); | |
| 289 expect = EXPECT_END; | |
| 290 break; | |
| 291 default: /* number */ | |
| 292 nodes[depth].type = JSON_TYPE_NUMBER; | |
| 293 p = 0; | |
| 294 pri[p++] = c; | |
| 295 expect = EXPECT_END; | |
| 296 while (1) { | |
| 297 c = GETNEXT(); | |
| 298 if (c == EOF || | |
| 299 (!ISDIGIT(c) && c != 'e' && c != 'E'… | |
| 300 c != '+' && c != '-' && c != '.') || | |
| 301 p + 1 >= sizeof(pri)) { | |
| 302 pri[p] = '\0'; | |
| 303 cb(nodes, depth + 1, pri, p); | |
| 304 goto handlechr; /* do not read n… | |
| 305 } else { | |
| 306 pri[p++] = c; | |
| 307 } | |
| 308 } | |
| 309 } | |
| 310 } | |
| 311 if (depth) | |
| 312 JSON_INVALID(); /* unbalanced nodes */ | |
| 313 | |
| 314 ret = 0; /* success */ | |
| 315 end: | |
| 316 for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth+… | |
| 317 free(nodes[depth].name); | |
| 318 free(str); | |
| 319 | |
| 320 return ret; | |
| 321 } |