json.c - tscrape - twitter scraper (not working anymore) | |
git clone git://git.codemadness.org/tscrape | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
json.c (7806B) | |
--- | |
1 #include <ctype.h> | |
2 #include <errno.h> | |
3 #include <stdint.h> | |
4 #include <stdio.h> | |
5 #include <stdlib.h> | |
6 #include <string.h> | |
7 | |
8 #define GETNEXT getchar | |
9 | |
10 #include "json.h" | |
11 | |
12 static int | |
13 codepointtoutf8(long r, char *s) | |
14 { | |
15 if (r == 0) { | |
16 return 0; /* NUL byte */ | |
17 } else if (r <= 0x7F) { | |
18 /* 1 byte: 0aaaaaaa */ | |
19 s[0] = r; | |
20 return 1; | |
21 } else if (r <= 0x07FF) { | |
22 /* 2 bytes: 00000aaa aabbbbbb */ | |
23 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ | |
24 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ | |
25 return 2; | |
26 } else if (r <= 0xFFFF) { | |
27 /* 3 bytes: aaaabbbb bbcccccc */ | |
28 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ | |
29 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ | |
30 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ | |
31 return 3; | |
32 } else { | |
33 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ | |
34 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ | |
35 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ | |
36 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ | |
37 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ | |
38 return 4; | |
39 } | |
40 } | |
41 | |
42 static int | |
43 hexdigit(int c) | |
44 { | |
45 if (c >= '0' && c <= '9') | |
46 return c - '0'; | |
47 else if (c >= 'a' && c <= 'f') | |
48 return 10 + (c - 'a'); | |
49 else if (c >= 'A' && c <= 'F') | |
50 return 10 + (c - 'A'); | |
51 return 0; | |
52 } | |
53 | |
54 static int | |
55 capacity(char **value, size_t *sz, size_t cur, size_t inc) | |
56 { | |
57 size_t need, newsiz; | |
58 char *newp; | |
59 | |
60 /* check for addition overflow */ | |
61 if (cur > SIZE_MAX - inc) { | |
62 errno = EOVERFLOW; | |
63 return -1; | |
64 } | |
65 need = cur + inc; | |
66 | |
67 if (need > *sz) { | |
68 if (need > SIZE_MAX / 2) { | |
69 newsiz = SIZE_MAX; | |
70 } else { | |
71 for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= nee… | |
72 ; | |
73 } | |
74 if (!(newp = realloc(*value, newsiz))) | |
75 return -1; /* up to caller to free *value */ | |
76 *value = newp; | |
77 *sz = newsiz; | |
78 } | |
79 return 0; | |
80 } | |
81 | |
82 #define EXPECT_VALUE "{[\"-0123456789tfn" | |
83 #define EXPECT_STRING "\"" | |
84 #define EXPECT_END "}]," | |
85 #define EXPECT_OBJECT_STRING EXPECT_STRING "}" | |
86 #define EXPECT_OBJECT_KEY ":" | |
87 #define EXPECT_ARRAY_VALUE EXPECT_VALUE "]" | |
88 | |
89 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } … | |
90 | |
91 int | |
92 parsejson(void (*cb)(struct json_node *, size_t, const char *)) | |
93 { | |
94 struct json_node nodes[JSON_MAX_NODE_DEPTH] = { 0 }; | |
95 size_t depth = 0, p = 0, len, sz = 0; | |
96 long cp, hi, lo; | |
97 char pri[128], *str = NULL; | |
98 int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM; | |
99 const char *expect = EXPECT_VALUE; | |
100 | |
101 if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1) | |
102 goto end; | |
103 nodes[0].name[0] = '\0'; | |
104 | |
105 while (1) { | |
106 c = GETNEXT(); | |
107 handlechr: | |
108 if (c == EOF) | |
109 break; | |
110 | |
111 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */ | |
112 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') | |
113 continue; | |
114 | |
115 if (!c || !strchr(expect, c)) | |
116 JSON_INVALID(); | |
117 | |
118 switch (c) { | |
119 case ':': | |
120 iskey = 0; | |
121 expect = EXPECT_VALUE; | |
122 break; | |
123 case '"': | |
124 nodes[depth].type = JSON_TYPE_STRING; | |
125 escape = 0; | |
126 len = 0; | |
127 while (1) { | |
128 c = GETNEXT(); | |
129 chr: | |
130 /* EOF or control char: 0x7f is not defi… | |
131 if (c < 0x20) | |
132 JSON_INVALID(); | |
133 | |
134 if (escape) { | |
135 escchr: | |
136 escape = 0; | |
137 switch (c) { | |
138 case '"': /* FALLTHROUGH */ | |
139 case '\\': | |
140 case '/': break; | |
141 case 'b': c = '\b'; break; | |
142 case 'f': c = '\f'; break; | |
143 case 'n': c = '\n'; break; | |
144 case 'r': c = '\r'; break; | |
145 case 't': c = '\t'; break; | |
146 case 'u': /* hex hex hex hex */ | |
147 if (capacity(&str, &sz, … | |
148 goto end; | |
149 for (i = 12, cp = 0; i >… | |
150 if ((c = GETNEXT… | |
151 JSON_INV… | |
152 cp |= (hexdigit(… | |
153 } | |
154 /* RFC8259 - 7. Strings … | |
155 * 0xd800 - 0xdb7f - hig… | |
156 if (cp >= 0xd800 && cp <… | |
157 if ((c = GETNEXT… | |
158 len += c… | |
159 goto chr; | |
160 } | |
161 if ((c = GETNEXT… | |
162 len += c… | |
163 goto esc… | |
164 } | |
165 for (hi = cp, i … | |
166 if ((c =… | |
167 … | |
168 lo |= (h… | |
169 } | |
170 /* 0xdc00 - 0xdf… | |
171 if (lo >= 0xdc00… | |
172 cp = (hi… | |
173 } else { | |
174 /* handl… | |
175 len += c… | |
176 if (capa… | |
177 … | |
178 len += c… | |
179 continue; | |
180 } | |
181 } | |
182 len += codepointtoutf8(c… | |
183 continue; | |
184 default: | |
185 JSON_INVALID(); /* inval… | |
186 } | |
187 if (capacity(&str, &sz, len, 1) … | |
188 goto end; | |
189 str[len++] = c; | |
190 } else if (c == '\\') { | |
191 escape = 1; | |
192 } else if (c == '"') { | |
193 if (capacity(&str, &sz, len, 1) … | |
194 goto end; | |
195 str[len++] = '\0'; | |
196 | |
197 if (iskey) { | |
198 /* copy string as key, i… | |
199 if (capacity(&(nodes[dep… | |
200 goto end; | |
201 memcpy(nodes[depth].name… | |
202 } else { | |
203 cb(nodes, depth + 1, str… | |
204 } | |
205 break; | |
206 } else { | |
207 if (capacity(&str, &sz, len, 1) … | |
208 goto end; | |
209 str[len++] = c; | |
210 } | |
211 } | |
212 if (iskey) | |
213 expect = EXPECT_OBJECT_KEY; | |
214 else | |
215 expect = EXPECT_END; | |
216 break; | |
217 case '[': | |
218 case '{': | |
219 if (depth + 1 >= JSON_MAX_NODE_DEPTH) | |
220 JSON_INVALID(); /* too deep */ | |
221 | |
222 nodes[depth].index = 0; | |
223 if (c == '[') { | |
224 nodes[depth].type = JSON_TYPE_ARRAY; | |
225 expect = EXPECT_ARRAY_VALUE; | |
226 } else if (c == '{') { | |
227 iskey = 1; | |
228 nodes[depth].type = JSON_TYPE_OBJECT; | |
229 expect = EXPECT_OBJECT_STRING; | |
230 } | |
231 | |
232 cb(nodes, depth + 1, ""); | |
233 | |
234 depth++; | |
235 nodes[depth].index = 0; | |
236 if (capacity(&(nodes[depth].name), &(nodes[depth… | |
237 goto end; | |
238 nodes[depth].name[0] = '\0'; | |
239 break; | |
240 case ']': | |
241 case '}': | |
242 if (!depth || | |
243 (c == ']' && nodes[depth - 1].type != JSON_TY… | |
244 (c == '}' && nodes[depth - 1].type != JSON_TY… | |
245 JSON_INVALID(); /* unbalanced nodes */ | |
246 | |
247 nodes[--depth].index++; | |
248 expect = EXPECT_END; | |
249 break; | |
250 case ',': | |
251 if (!depth) | |
252 JSON_INVALID(); /* unbalanced nodes */ | |
253 | |
254 nodes[depth - 1].index++; | |
255 if (nodes[depth - 1].type == JSON_TYPE_OBJECT) { | |
256 iskey = 1; | |
257 expect = EXPECT_STRING; | |
258 } else { | |
259 expect = EXPECT_VALUE; | |
260 } | |
261 break; | |
262 case 't': /* true */ | |
263 if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETN… | |
264 JSON_INVALID(); | |
265 nodes[depth].type = JSON_TYPE_BOOL; | |
266 cb(nodes, depth + 1, "true"); | |
267 expect = EXPECT_END; | |
268 break; | |
269 case 'f': /* false */ | |
270 if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETN… | |
271 GETNEXT() != 'e') | |
272 JSON_INVALID(); | |
273 nodes[depth].type = JSON_TYPE_BOOL; | |
274 cb(nodes, depth + 1, "false"); | |
275 expect = EXPECT_END; | |
276 break; | |
277 case 'n': /* null */ | |
278 if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETN… | |
279 JSON_INVALID(); | |
280 nodes[depth].type = JSON_TYPE_NULL; | |
281 cb(nodes, depth + 1, "null"); | |
282 expect = EXPECT_END; | |
283 break; | |
284 default: /* number */ | |
285 nodes[depth].type = JSON_TYPE_NUMBER; | |
286 p = 0; | |
287 pri[p++] = c; | |
288 expect = EXPECT_END; | |
289 while (1) { | |
290 c = GETNEXT(); | |
291 if (c == EOF || | |
292 !c || !strchr("0123456789eE+-.", c) … | |
293 p + 1 >= sizeof(pri)) { | |
294 pri[p] = '\0'; | |
295 cb(nodes, depth + 1, pri); | |
296 goto handlechr; /* do not read n… | |
297 } else { | |
298 pri[p++] = c; | |
299 } | |
300 } | |
301 } | |
302 } | |
303 if (depth) | |
304 JSON_INVALID(); /* unbalanced nodes */ | |
305 | |
306 ret = 0; /* success */ | |
307 end: | |
308 for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth+… | |
309 free(nodes[depth].name); | |
310 free(str); | |
311 | |
312 return ret; | |
313 } |