Introduction
Introduction Statistics Contact Development Disclaimer Help
json.c - tscrape - twitter scraper (not working anymore)
git clone git://git.codemadness.org/tscrape
Log
Files
Refs
README
LICENSE
---
json.c (7806B)
---
1 #include <ctype.h>
2 #include <errno.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7
8 #define GETNEXT getchar
9
10 #include "json.h"
11
12 static int
13 codepointtoutf8(long r, char *s)
14 {
15 if (r == 0) {
16 return 0; /* NUL byte */
17 } else if (r <= 0x7F) {
18 /* 1 byte: 0aaaaaaa */
19 s[0] = r;
20 return 1;
21 } else if (r <= 0x07FF) {
22 /* 2 bytes: 00000aaa aabbbbbb */
23 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
24 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
25 return 2;
26 } else if (r <= 0xFFFF) {
27 /* 3 bytes: aaaabbbb bbcccccc */
28 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
29 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
30 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
31 return 3;
32 } else {
33 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
34 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
35 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
36 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
37 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
38 return 4;
39 }
40 }
41
42 static int
43 hexdigit(int c)
44 {
45 if (c >= '0' && c <= '9')
46 return c - '0';
47 else if (c >= 'a' && c <= 'f')
48 return 10 + (c - 'a');
49 else if (c >= 'A' && c <= 'F')
50 return 10 + (c - 'A');
51 return 0;
52 }
53
54 static int
55 capacity(char **value, size_t *sz, size_t cur, size_t inc)
56 {
57 size_t need, newsiz;
58 char *newp;
59
60 /* check for addition overflow */
61 if (cur > SIZE_MAX - inc) {
62 errno = EOVERFLOW;
63 return -1;
64 }
65 need = cur + inc;
66
67 if (need > *sz) {
68 if (need > SIZE_MAX / 2) {
69 newsiz = SIZE_MAX;
70 } else {
71 for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= nee…
72 ;
73 }
74 if (!(newp = realloc(*value, newsiz)))
75 return -1; /* up to caller to free *value */
76 *value = newp;
77 *sz = newsiz;
78 }
79 return 0;
80 }
81
82 #define EXPECT_VALUE "{[\"-0123456789tfn"
83 #define EXPECT_STRING "\""
84 #define EXPECT_END "}],"
85 #define EXPECT_OBJECT_STRING EXPECT_STRING "}"
86 #define EXPECT_OBJECT_KEY ":"
87 #define EXPECT_ARRAY_VALUE EXPECT_VALUE "]"
88
89 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } …
90
91 int
92 parsejson(void (*cb)(struct json_node *, size_t, const char *))
93 {
94 struct json_node nodes[JSON_MAX_NODE_DEPTH] = { 0 };
95 size_t depth = 0, p = 0, len, sz = 0;
96 long cp, hi, lo;
97 char pri[128], *str = NULL;
98 int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM;
99 const char *expect = EXPECT_VALUE;
100
101 if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1)
102 goto end;
103 nodes[0].name[0] = '\0';
104
105 while (1) {
106 c = GETNEXT();
107 handlechr:
108 if (c == EOF)
109 break;
110
111 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */
112 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
113 continue;
114
115 if (!c || !strchr(expect, c))
116 JSON_INVALID();
117
118 switch (c) {
119 case ':':
120 iskey = 0;
121 expect = EXPECT_VALUE;
122 break;
123 case '"':
124 nodes[depth].type = JSON_TYPE_STRING;
125 escape = 0;
126 len = 0;
127 while (1) {
128 c = GETNEXT();
129 chr:
130 /* EOF or control char: 0x7f is not defi…
131 if (c < 0x20)
132 JSON_INVALID();
133
134 if (escape) {
135 escchr:
136 escape = 0;
137 switch (c) {
138 case '"': /* FALLTHROUGH */
139 case '\\':
140 case '/': break;
141 case 'b': c = '\b'; break;
142 case 'f': c = '\f'; break;
143 case 'n': c = '\n'; break;
144 case 'r': c = '\r'; break;
145 case 't': c = '\t'; break;
146 case 'u': /* hex hex hex hex */
147 if (capacity(&str, &sz, …
148 goto end;
149 for (i = 12, cp = 0; i >…
150 if ((c = GETNEXT…
151 JSON_INV…
152 cp |= (hexdigit(…
153 }
154 /* RFC8259 - 7. Strings …
155 * 0xd800 - 0xdb7f - hig…
156 if (cp >= 0xd800 && cp <…
157 if ((c = GETNEXT…
158 len += c…
159 goto chr;
160 }
161 if ((c = GETNEXT…
162 len += c…
163 goto esc…
164 }
165 for (hi = cp, i …
166 if ((c =…
167 …
168 lo |= (h…
169 }
170 /* 0xdc00 - 0xdf…
171 if (lo >= 0xdc00…
172 cp = (hi…
173 } else {
174 /* handl…
175 len += c…
176 if (capa…
177 …
178 len += c…
179 continue;
180 }
181 }
182 len += codepointtoutf8(c…
183 continue;
184 default:
185 JSON_INVALID(); /* inval…
186 }
187 if (capacity(&str, &sz, len, 1) …
188 goto end;
189 str[len++] = c;
190 } else if (c == '\\') {
191 escape = 1;
192 } else if (c == '"') {
193 if (capacity(&str, &sz, len, 1) …
194 goto end;
195 str[len++] = '\0';
196
197 if (iskey) {
198 /* copy string as key, i…
199 if (capacity(&(nodes[dep…
200 goto end;
201 memcpy(nodes[depth].name…
202 } else {
203 cb(nodes, depth + 1, str…
204 }
205 break;
206 } else {
207 if (capacity(&str, &sz, len, 1) …
208 goto end;
209 str[len++] = c;
210 }
211 }
212 if (iskey)
213 expect = EXPECT_OBJECT_KEY;
214 else
215 expect = EXPECT_END;
216 break;
217 case '[':
218 case '{':
219 if (depth + 1 >= JSON_MAX_NODE_DEPTH)
220 JSON_INVALID(); /* too deep */
221
222 nodes[depth].index = 0;
223 if (c == '[') {
224 nodes[depth].type = JSON_TYPE_ARRAY;
225 expect = EXPECT_ARRAY_VALUE;
226 } else if (c == '{') {
227 iskey = 1;
228 nodes[depth].type = JSON_TYPE_OBJECT;
229 expect = EXPECT_OBJECT_STRING;
230 }
231
232 cb(nodes, depth + 1, "");
233
234 depth++;
235 nodes[depth].index = 0;
236 if (capacity(&(nodes[depth].name), &(nodes[depth…
237 goto end;
238 nodes[depth].name[0] = '\0';
239 break;
240 case ']':
241 case '}':
242 if (!depth ||
243 (c == ']' && nodes[depth - 1].type != JSON_TY…
244 (c == '}' && nodes[depth - 1].type != JSON_TY…
245 JSON_INVALID(); /* unbalanced nodes */
246
247 nodes[--depth].index++;
248 expect = EXPECT_END;
249 break;
250 case ',':
251 if (!depth)
252 JSON_INVALID(); /* unbalanced nodes */
253
254 nodes[depth - 1].index++;
255 if (nodes[depth - 1].type == JSON_TYPE_OBJECT) {
256 iskey = 1;
257 expect = EXPECT_STRING;
258 } else {
259 expect = EXPECT_VALUE;
260 }
261 break;
262 case 't': /* true */
263 if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETN…
264 JSON_INVALID();
265 nodes[depth].type = JSON_TYPE_BOOL;
266 cb(nodes, depth + 1, "true");
267 expect = EXPECT_END;
268 break;
269 case 'f': /* false */
270 if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETN…
271 GETNEXT() != 'e')
272 JSON_INVALID();
273 nodes[depth].type = JSON_TYPE_BOOL;
274 cb(nodes, depth + 1, "false");
275 expect = EXPECT_END;
276 break;
277 case 'n': /* null */
278 if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETN…
279 JSON_INVALID();
280 nodes[depth].type = JSON_TYPE_NULL;
281 cb(nodes, depth + 1, "null");
282 expect = EXPECT_END;
283 break;
284 default: /* number */
285 nodes[depth].type = JSON_TYPE_NUMBER;
286 p = 0;
287 pri[p++] = c;
288 expect = EXPECT_END;
289 while (1) {
290 c = GETNEXT();
291 if (c == EOF ||
292 !c || !strchr("0123456789eE+-.", c) …
293 p + 1 >= sizeof(pri)) {
294 pri[p] = '\0';
295 cb(nodes, depth + 1, pri);
296 goto handlechr; /* do not read n…
297 } else {
298 pri[p++] = c;
299 }
300 }
301 }
302 }
303 if (depth)
304 JSON_INVALID(); /* unbalanced nodes */
305
306 ret = 0; /* success */
307 end:
308 for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth+…
309 free(nodes[depth].name);
310 free(str);
311
312 return ret;
313 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.