json.c - jfconvert - JSON Feed (subset) to sfeed or Atom converter | |
git clone git://git.codemadness.org/jfconvert | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
json.c (8152B) | |
--- | |
1 #include <errno.h> | |
2 #include <stdint.h> | |
3 #include <stdio.h> | |
4 #include <stdlib.h> | |
5 #include <string.h> | |
6 | |
7 #ifndef GETNEXT | |
8 #define GETNEXT getchar_unlocked | |
9 #endif | |
10 | |
11 #include "json.h" | |
12 | |
13 /* ctype-like macros, but always compatible with ASCII / UTF-8 */ | |
14 #define ISDIGIT(c) (((unsigned)c) - '0' < 10) | |
15 #define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || ((unsigned)c | 32) - … | |
16 | |
17 static int | |
18 codepointtoutf8(long r, char *s) | |
19 { | |
20 if (r == 0) { | |
21 return 0; /* NUL byte */ | |
22 } else if (r <= 0x7F) { | |
23 /* 1 byte: 0aaaaaaa */ | |
24 s[0] = r; | |
25 return 1; | |
26 } else if (r <= 0x07FF) { | |
27 /* 2 bytes: 00000aaa aabbbbbb */ | |
28 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ | |
29 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ | |
30 return 2; | |
31 } else if (r <= 0xFFFF) { | |
32 /* 3 bytes: aaaabbbb bbcccccc */ | |
33 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ | |
34 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ | |
35 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ | |
36 return 3; | |
37 } else { | |
38 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ | |
39 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ | |
40 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ | |
41 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ | |
42 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ | |
43 return 4; | |
44 } | |
45 } | |
46 | |
47 static int | |
48 hexdigit(int c) | |
49 { | |
50 if (c >= '0' && c <= '9') | |
51 return c - '0'; | |
52 else if (c >= 'a' && c <= 'f') | |
53 return 10 + (c - 'a'); | |
54 else if (c >= 'A' && c <= 'F') | |
55 return 10 + (c - 'A'); | |
56 return 0; | |
57 } | |
58 | |
59 static int | |
60 capacity(char **value, size_t *sz, size_t cur, size_t inc) | |
61 { | |
62 size_t need, newsiz; | |
63 char *newp; | |
64 | |
65 /* check for addition overflow */ | |
66 if (cur > SIZE_MAX - inc) { | |
67 errno = ENOMEM; | |
68 return -1; | |
69 } | |
70 need = cur + inc; | |
71 | |
72 if (need > *sz) { | |
73 if (need > SIZE_MAX / 2) { | |
74 newsiz = SIZE_MAX; | |
75 } else { | |
76 for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= nee… | |
77 ; | |
78 } | |
79 if (!(newp = realloc(*value, newsiz))) | |
80 return -1; /* up to caller to free *value */ | |
81 *value = newp; | |
82 *sz = newsiz; | |
83 } | |
84 return 0; | |
85 } | |
86 | |
87 #define EXPECT_VALUE "{[\"-0123456789tfn" | |
88 #define EXPECT_STRING "\"" | |
89 #define EXPECT_END "}]," | |
90 #define EXPECT_OBJECT_STRING EXPECT_STRING "}" | |
91 #define EXPECT_OBJECT_KEY ":" | |
92 #define EXPECT_ARRAY_VALUE EXPECT_VALUE "]" | |
93 | |
94 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } … | |
95 | |
96 int | |
97 parsejson(void (*cb)(struct json_node *, size_t, const char *, size_t)) | |
98 { | |
99 struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } }; | |
100 size_t depth = 0, p = 0, len, sz = 0; | |
101 long cp, hi, lo; | |
102 char pri[128], *str = NULL; | |
103 int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM; | |
104 const char *expect = EXPECT_VALUE; | |
105 | |
106 if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1) | |
107 goto end; | |
108 nodes[0].name[0] = '\0'; | |
109 | |
110 while (1) { | |
111 c = GETNEXT(); | |
112 handlechr: | |
113 if (c == EOF) | |
114 break; | |
115 | |
116 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */ | |
117 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') | |
118 continue; | |
119 | |
120 if (!c || !strchr(expect, c)) | |
121 JSON_INVALID(); | |
122 | |
123 switch (c) { | |
124 case ':': | |
125 iskey = 0; | |
126 expect = EXPECT_VALUE; | |
127 break; | |
128 case '"': | |
129 nodes[depth].type = JSON_TYPE_STRING; | |
130 escape = 0; | |
131 len = 0; | |
132 while (1) { | |
133 c = GETNEXT(); | |
134 chr: | |
135 /* EOF or control char: 0x7f is not defi… | |
136 if (c < 0x20) | |
137 JSON_INVALID(); | |
138 | |
139 if (escape) { | |
140 escchr: | |
141 escape = 0; | |
142 switch (c) { | |
143 case '"': /* FALLTHROUGH */ | |
144 case '\\': | |
145 case '/': break; | |
146 case 'b': c = '\b'; break; | |
147 case 'f': c = '\f'; break; | |
148 case 'n': c = '\n'; break; | |
149 case 'r': c = '\r'; break; | |
150 case 't': c = '\t'; break; | |
151 case 'u': /* hex hex hex hex */ | |
152 if (capacity(&str, &sz, … | |
153 goto end; | |
154 for (i = 12, cp = 0; i >… | |
155 if ((c = GETNEXT… | |
156 JSON_INV… | |
157 cp |= (hexdigit(… | |
158 } | |
159 /* RFC 8259 - 7. Strings… | |
160 * 0xd800 - 0xdbff - hig… | |
161 if (cp >= 0xd800 && cp <… | |
162 if ((c = GETNEXT… | |
163 len += c… | |
164 goto chr; | |
165 } | |
166 if ((c = GETNEXT… | |
167 len += c… | |
168 goto esc… | |
169 } | |
170 for (hi = cp, i … | |
171 if ((c =… | |
172 … | |
173 lo |= (h… | |
174 } | |
175 /* 0xdc00 - 0xdf… | |
176 if (lo >= 0xdc00… | |
177 cp = (hi… | |
178 } else { | |
179 /* handl… | |
180 len += c… | |
181 if (capa… | |
182 … | |
183 len += c… | |
184 continue; | |
185 } | |
186 } | |
187 len += codepointtoutf8(c… | |
188 continue; | |
189 default: | |
190 JSON_INVALID(); /* inval… | |
191 } | |
192 if (capacity(&str, &sz, len, 1) … | |
193 goto end; | |
194 str[len++] = c; | |
195 } else if (c == '\\') { | |
196 escape = 1; | |
197 } else if (c == '"') { | |
198 if (capacity(&str, &sz, len, 1) … | |
199 goto end; | |
200 str[len++] = '\0'; | |
201 | |
202 if (iskey) { | |
203 /* copy string as key, i… | |
204 if (capacity(&(nodes[dep… | |
205 goto end; | |
206 memcpy(nodes[depth].name… | |
207 } else { | |
208 cb(nodes, depth + 1, str… | |
209 } | |
210 break; | |
211 } else { | |
212 if (capacity(&str, &sz, len, 1) … | |
213 goto end; | |
214 str[len++] = c; | |
215 } | |
216 } | |
217 if (iskey) | |
218 expect = EXPECT_OBJECT_KEY; | |
219 else | |
220 expect = EXPECT_END; | |
221 break; | |
222 case '[': | |
223 case '{': | |
224 if (depth + 1 >= JSON_MAX_NODE_DEPTH) | |
225 JSON_INVALID(); /* too deep */ | |
226 | |
227 nodes[depth].index = 0; | |
228 if (c == '[') { | |
229 nodes[depth].type = JSON_TYPE_ARRAY; | |
230 expect = EXPECT_ARRAY_VALUE; | |
231 } else if (c == '{') { | |
232 iskey = 1; | |
233 nodes[depth].type = JSON_TYPE_OBJECT; | |
234 expect = EXPECT_OBJECT_STRING; | |
235 } | |
236 | |
237 cb(nodes, depth + 1, "", 0); | |
238 | |
239 depth++; | |
240 nodes[depth].index = 0; | |
241 if (capacity(&(nodes[depth].name), &(nodes[depth… | |
242 goto end; | |
243 nodes[depth].name[0] = '\0'; | |
244 break; | |
245 case ']': | |
246 case '}': | |
247 if (!depth || | |
248 (c == ']' && nodes[depth - 1].type != JSON_TY… | |
249 (c == '}' && nodes[depth - 1].type != JSON_TY… | |
250 JSON_INVALID(); /* unbalanced nodes */ | |
251 | |
252 depth--; | |
253 nodes[depth].index++; | |
254 expect = EXPECT_END; | |
255 break; | |
256 case ',': | |
257 if (!depth) | |
258 JSON_INVALID(); /* unbalanced nodes */ | |
259 | |
260 nodes[depth - 1].index++; | |
261 if (nodes[depth - 1].type == JSON_TYPE_OBJECT) { | |
262 iskey = 1; | |
263 expect = EXPECT_STRING; | |
264 } else { | |
265 iskey = 0; | |
266 expect = EXPECT_VALUE; | |
267 } | |
268 break; | |
269 case 't': /* true */ | |
270 if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETN… | |
271 JSON_INVALID(); | |
272 nodes[depth].type = JSON_TYPE_BOOL; | |
273 cb(nodes, depth + 1, "true", 4); | |
274 expect = EXPECT_END; | |
275 break; | |
276 case 'f': /* false */ | |
277 if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETN… | |
278 GETNEXT() != 'e') | |
279 JSON_INVALID(); | |
280 nodes[depth].type = JSON_TYPE_BOOL; | |
281 cb(nodes, depth + 1, "false", 5); | |
282 expect = EXPECT_END; | |
283 break; | |
284 case 'n': /* null */ | |
285 if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETN… | |
286 JSON_INVALID(); | |
287 nodes[depth].type = JSON_TYPE_NULL; | |
288 cb(nodes, depth + 1, "null", 4); | |
289 expect = EXPECT_END; | |
290 break; | |
291 default: /* number */ | |
292 nodes[depth].type = JSON_TYPE_NUMBER; | |
293 p = 0; | |
294 pri[p++] = c; | |
295 expect = EXPECT_END; | |
296 while (1) { | |
297 c = GETNEXT(); | |
298 if (c == EOF || | |
299 (!ISDIGIT(c) && c != 'e' && c != 'E'… | |
300 c != '+' && c != '-' && c != '.') || | |
301 p + 1 >= sizeof(pri)) { | |
302 pri[p] = '\0'; | |
303 cb(nodes, depth + 1, pri, p); | |
304 goto handlechr; /* do not read n… | |
305 } else { | |
306 pri[p++] = c; | |
307 } | |
308 } | |
309 } | |
310 } | |
311 if (depth) | |
312 JSON_INVALID(); /* unbalanced nodes */ | |
313 | |
314 ret = 0; /* success */ | |
315 end: | |
316 for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth+… | |
317 free(nodes[depth].name); | |
318 free(str); | |
319 | |
320 return ret; | |
321 } |