xml.c - frontends - front-ends for some sites (experiment) | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
xml.c (11454B) | |
--- | |
1 #include <errno.h> | |
2 #include <stdio.h> | |
3 #include <stdlib.h> | |
4 #include <string.h> | |
5 | |
6 #include "xml.h" | |
7 | |
8 #define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) | |
9 #define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) | |
10 | |
11 /* data buffers, size and offset used for parsing XML, see getnext() */ | |
12 static const unsigned char *xml_data_buf; | |
13 static size_t xml_data_size; | |
14 static size_t xml_data_off; | |
15 | |
16 void | |
17 setxmldata(const char *s, size_t len) | |
18 { | |
19 xml_data_off = 0; | |
20 xml_data_size = len; | |
21 xml_data_buf = (unsigned char *)s; | |
22 } | |
23 | |
24 static int | |
25 getnext(void) | |
26 { | |
27 if (xml_data_off >= xml_data_size) | |
28 return EOF; | |
29 return xml_data_buf[xml_data_off++]; | |
30 } | |
31 | |
32 static void | |
33 xml_parseattrs(XMLParser *x) | |
34 { | |
35 size_t namelen = 0, valuelen; | |
36 int c, endsep, endname = 0, valuestart = 0; | |
37 | |
38 while ((c = GETNEXT()) != EOF) { | |
39 if (ISSPACE(c)) { | |
40 if (namelen) | |
41 endname = 1; | |
42 continue; | |
43 } else if (c == '?') | |
44 ; /* ignore */ | |
45 else if (c == '=') { | |
46 x->name[namelen] = '\0'; | |
47 valuestart = 1; | |
48 endname = 1; | |
49 } else if (namelen && ((endname && !valuestart && ISALPH… | |
50 /* attribute without value */ | |
51 x->name[namelen] = '\0'; | |
52 if (x->xmlattrstart) | |
53 x->xmlattrstart(x, x->tag, x->taglen, x-… | |
54 if (x->xmlattr) | |
55 x->xmlattr(x, x->tag, x->taglen, x->name… | |
56 if (x->xmlattrend) | |
57 x->xmlattrend(x, x->tag, x->taglen, x->n… | |
58 endname = 0; | |
59 x->name[0] = c; | |
60 namelen = 1; | |
61 } else if (namelen && valuestart) { | |
62 /* attribute with value */ | |
63 if (x->xmlattrstart) | |
64 x->xmlattrstart(x, x->tag, x->taglen, x-… | |
65 | |
66 valuelen = 0; | |
67 if (c == '\'' || c == '"') { | |
68 endsep = c; | |
69 } else { | |
70 endsep = ' '; /* ISSPACE() */ | |
71 goto startvalue; | |
72 } | |
73 | |
74 while ((c = GETNEXT()) != EOF) { | |
75 startvalue: | |
76 if (c == '&') { /* entities */ | |
77 x->data[valuelen] = '\0'; | |
78 /* call data function with data … | |
79 if (valuelen && x->xmlattr) | |
80 x->xmlattr(x, x->tag, x-… | |
81 x->data[0] = c; | |
82 valuelen = 1; | |
83 while ((c = GETNEXT()) != EOF) { | |
84 if (c == endsep || (ends… | |
85 break; | |
86 if (valuelen < sizeof(x-… | |
87 x->data[valuelen… | |
88 else { | |
89 /* entity too lo… | |
90 x->data[valuelen… | |
91 if (x->xmlattr) | |
92 x->xmlat… | |
93 x->data[0] = c; | |
94 valuelen = 1; | |
95 break; | |
96 } | |
97 if (c == ';') { | |
98 x->data[valuelen… | |
99 if (x->xmlattren… | |
100 x->xmlat… | |
101 valuelen = 0; | |
102 break; | |
103 } | |
104 } | |
105 } else if (c != endsep && !(endsep == ' … | |
106 if (valuelen < sizeof(x->data) -… | |
107 x->data[valuelen++] = c; | |
108 } else { | |
109 x->data[valuelen] = '\0'; | |
110 if (x->xmlattr) | |
111 x->xmlattr(x, x-… | |
112 x->data[0] = c; | |
113 valuelen = 1; | |
114 } | |
115 } | |
116 if (c == endsep || (endsep == ' ' && (c … | |
117 x->data[valuelen] = '\0'; | |
118 if (x->xmlattr) | |
119 x->xmlattr(x, x->tag, x-… | |
120 if (x->xmlattrend) | |
121 x->xmlattrend(x, x->tag,… | |
122 break; | |
123 } | |
124 } | |
125 namelen = endname = valuestart = 0; | |
126 } else if (namelen < sizeof(x->name) - 1) { | |
127 x->name[namelen++] = c; | |
128 } | |
129 if (c == '>') { | |
130 break; | |
131 } else if (c == '/') { | |
132 x->isshorttag = 1; | |
133 x->name[0] = '\0'; | |
134 namelen = 0; | |
135 } | |
136 } | |
137 } | |
138 | |
139 static void | |
140 xml_parsecomment(XMLParser *x) | |
141 { | |
142 size_t datalen = 0, i = 0; | |
143 int c; | |
144 | |
145 if (x->xmlcommentstart) | |
146 x->xmlcommentstart(x); | |
147 while ((c = GETNEXT()) != EOF) { | |
148 if (c == '-' || c == '>') { | |
149 if (x->xmlcomment && datalen) { | |
150 x->data[datalen] = '\0'; | |
151 x->xmlcomment(x, x->data, datalen); | |
152 datalen = 0; | |
153 } | |
154 } | |
155 | |
156 if (c == '-') { | |
157 if (++i > 2) { | |
158 if (x->xmlcomment) | |
159 for (; i > 2; i--) | |
160 x->xmlcomment(x, "-", 1); | |
161 i = 2; | |
162 } | |
163 continue; | |
164 } else if (c == '>' && i == 2) { | |
165 if (x->xmlcommentend) | |
166 x->xmlcommentend(x); | |
167 return; | |
168 } else if (i) { | |
169 if (x->xmlcomment) { | |
170 for (; i > 0; i--) | |
171 x->xmlcomment(x, "-", 1); | |
172 } | |
173 i = 0; | |
174 } | |
175 | |
176 if (datalen < sizeof(x->data) - 1) { | |
177 x->data[datalen++] = c; | |
178 } else { | |
179 x->data[datalen] = '\0'; | |
180 if (x->xmlcomment) | |
181 x->xmlcomment(x, x->data, datalen); | |
182 x->data[0] = c; | |
183 datalen = 1; | |
184 } | |
185 } | |
186 } | |
187 | |
188 static void | |
189 xml_parsecdata(XMLParser *x) | |
190 { | |
191 size_t datalen = 0, i = 0; | |
192 int c; | |
193 | |
194 if (x->xmlcdatastart) | |
195 x->xmlcdatastart(x); | |
196 while ((c = GETNEXT()) != EOF) { | |
197 if (c == ']' || c == '>') { | |
198 if (x->xmlcdata && datalen) { | |
199 x->data[datalen] = '\0'; | |
200 x->xmlcdata(x, x->data, datalen); | |
201 datalen = 0; | |
202 } | |
203 } | |
204 | |
205 if (c == ']') { | |
206 if (++i > 2) { | |
207 if (x->xmlcdata) | |
208 for (; i > 2; i--) | |
209 x->xmlcdata(x, "]", 1); | |
210 i = 2; | |
211 } | |
212 continue; | |
213 } else if (c == '>' && i == 2) { | |
214 if (x->xmlcdataend) | |
215 x->xmlcdataend(x); | |
216 return; | |
217 } else if (i) { | |
218 if (x->xmlcdata) | |
219 for (; i > 0; i--) | |
220 x->xmlcdata(x, "]", 1); | |
221 i = 0; | |
222 } | |
223 | |
224 if (datalen < sizeof(x->data) - 1) { | |
225 x->data[datalen++] = c; | |
226 } else { | |
227 x->data[datalen] = '\0'; | |
228 if (x->xmlcdata) | |
229 x->xmlcdata(x, x->data, datalen); | |
230 x->data[0] = c; | |
231 datalen = 1; | |
232 } | |
233 } | |
234 } | |
235 | |
236 static int | |
237 codepointtoutf8(long r, char *s) | |
238 { | |
239 if (r == 0) { | |
240 return 0; /* NUL byte */ | |
241 } else if (r <= 0x7F) { | |
242 /* 1 byte: 0aaaaaaa */ | |
243 s[0] = r; | |
244 return 1; | |
245 } else if (r <= 0x07FF) { | |
246 /* 2 bytes: 00000aaa aabbbbbb */ | |
247 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ | |
248 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ | |
249 return 2; | |
250 } else if (r <= 0xFFFF) { | |
251 /* 3 bytes: aaaabbbb bbcccccc */ | |
252 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ | |
253 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ | |
254 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ | |
255 return 3; | |
256 } else { | |
257 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ | |
258 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ | |
259 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ | |
260 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ | |
261 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ | |
262 return 4; | |
263 } | |
264 } | |
265 | |
266 static int | |
267 namedentitytostr(const char *e, char *buf, size_t bufsiz) | |
268 { | |
269 static const struct { | |
270 const char *entity; | |
271 int c; | |
272 } entities[] = { | |
273 { "amp;", '&' }, | |
274 { "lt;", '<' }, | |
275 { "gt;", '>' }, | |
276 { "apos;", '\'' }, | |
277 { "quot;", '"' }, | |
278 { "AMP;", '&' }, | |
279 { "LT;", '<' }, | |
280 { "GT;", '>' }, | |
281 { "APOS;", '\'' }, | |
282 { "QUOT;", '"' } | |
283 }; | |
284 size_t i; | |
285 | |
286 /* buffer is too small */ | |
287 if (bufsiz < 2) | |
288 return -1; | |
289 | |
290 for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) { | |
291 if (!strcmp(e, entities[i].entity)) { | |
292 buf[0] = entities[i].c; | |
293 buf[1] = '\0'; | |
294 return 1; | |
295 } | |
296 } | |
297 return -1; | |
298 } | |
299 | |
300 static int | |
301 numericentitytostr(const char *e, char *buf, size_t bufsiz) | |
302 { | |
303 long l; | |
304 int len; | |
305 char *end; | |
306 | |
307 /* buffer is too small */ | |
308 if (bufsiz < 5) | |
309 return -1; | |
310 | |
311 errno = 0; | |
312 /* hex (16) or decimal (10) */ | |
313 if (*e == 'x') | |
314 l = strtol(++e, &end, 16); | |
315 else | |
316 l = strtol(e, &end, 10); | |
317 /* invalid value or not a well-formed entity or invalid code poi… | |
318 if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff || | |
319 (l >= 0xd800 && l <= 0xdfff)) | |
320 return -1; | |
321 len = codepointtoutf8(l, buf); | |
322 buf[len] = '\0'; | |
323 | |
324 return len; | |
325 } | |
326 | |
327 /* convert named- or numeric entity string to buffer string | |
328 * returns byte-length of string or -1 on failure. */ | |
329 int | |
330 xml_entitytostr(const char *e, char *buf, size_t bufsiz) | |
331 { | |
332 /* doesn't start with & */ | |
333 if (e[0] != '&') | |
334 return -1; | |
335 /* numeric entity */ | |
336 if (e[1] == '#') | |
337 return numericentitytostr(e + 2, buf, bufsiz); | |
338 else /* named entity */ | |
339 return namedentitytostr(e + 1, buf, bufsiz); | |
340 } | |
341 | |
342 void | |
343 xml_parse(XMLParser *x) | |
344 { | |
345 size_t datalen, tagdatalen; | |
346 int c, isend; | |
347 | |
348 while ((c = GETNEXT()) != EOF && c != '<') | |
349 ; /* skip until < */ | |
350 | |
351 while (c != EOF) { | |
352 if (c == '<') { /* parse tag */ | |
353 if ((c = GETNEXT()) == EOF) | |
354 return; | |
355 | |
356 if (c == '!') { /* cdata and comments */ | |
357 for (tagdatalen = 0; (c = GETNEXT()) != … | |
358 /* NOTE: sizeof(x->data) must be… | |
359 if (tagdatalen <= sizeof("[CDATA… | |
360 x->data[tagdatalen++] = … | |
361 if (c == '>') | |
362 break; | |
363 else if (c == '-' && tagdatalen … | |
364 (x->data[0] == '… | |
365 xml_parsecomment(x); | |
366 break; | |
367 } else if (c == '[') { | |
368 if (tagdatalen == sizeof… | |
369 !strncmp(x->data, "[… | |
370 xml_parsecdata(x… | |
371 break; | |
372 } | |
373 } | |
374 } | |
375 } else { | |
376 /* normal tag (open, short open, close),… | |
377 x->tag[0] = c; | |
378 x->taglen = 1; | |
379 x->isshorttag = isend = 0; | |
380 | |
381 /* treat processing instruction as short… | |
382 if (c == '?') { | |
383 x->isshorttag = 1; | |
384 } else if (c == '/') { | |
385 if ((c = GETNEXT()) == EOF) | |
386 return; | |
387 x->tag[0] = c; | |
388 isend = 1; | |
389 } | |
390 | |
391 while ((c = GETNEXT()) != EOF) { | |
392 if (c == '/') | |
393 x->isshorttag = 1; /* sh… | |
394 else if (c == '>' || ISSPACE(c))… | |
395 x->tag[x->taglen] = '\0'; | |
396 if (isend) { /* end tag,… | |
397 while (c != '>' … | |
398 c = GETN… | |
399 if (x->xmltagend) | |
400 x->xmlta… | |
401 x->tag[0] = '\0'; | |
402 x->taglen = 0; | |
403 } else { | |
404 /* start tag */ | |
405 if (x->xmltagsta… | |
406 x->xmlta… | |
407 if (ISSPACE(c)) | |
408 xml_pars… | |
409 if (x->xmltagsta… | |
410 x->xmlta… | |
411 } | |
412 /* call tagend for short… | |
413 if (x->isshorttag) { | |
414 if (x->xmltagend) | |
415 x->xmlta… | |
416 x->tag[0] = '\0'; | |
417 x->taglen = 0; | |
418 } | |
419 break; | |
420 } else if (x->taglen < sizeof(x-… | |
421 x->tag[x->taglen++] = c;… | |
422 } | |
423 } | |
424 } else { | |
425 /* parse tag data */ | |
426 datalen = 0; | |
427 if (x->xmldatastart) | |
428 x->xmldatastart(x); | |
429 while ((c = GETNEXT()) != EOF) { | |
430 if (c == '&') { | |
431 if (datalen) { | |
432 x->data[datalen] = '\0'; | |
433 if (x->xmldata) | |
434 x->xmldata(x, x-… | |
435 } | |
436 x->data[0] = c; | |
437 datalen = 1; | |
438 while ((c = GETNEXT()) != EOF) { | |
439 if (c == '<') | |
440 break; | |
441 if (datalen < sizeof(x->… | |
442 x->data[datalen+… | |
443 else { | |
444 /* entity too lo… | |
445 x->data[datalen]… | |
446 if (x->xmldata) | |
447 x->xmlda… | |
448 x->data[0] = c; | |
449 datalen = 1; | |
450 break; | |
451 } | |
452 if (c == ';') { | |
453 x->data[datalen]… | |
454 if (x->xmldataen… | |
455 x->xmlda… | |
456 datalen = 0; | |
457 break; | |
458 } | |
459 } | |
460 } else if (c != '<') { | |
461 if (datalen < sizeof(x->data) - … | |
462 x->data[datalen++] = c; | |
463 } else { | |
464 x->data[datalen] = '\0'; | |
465 if (x->xmldata) | |
466 x->xmldata(x, x-… | |
467 x->data[0] = c; | |
468 datalen = 1; | |
469 } | |
470 } | |
471 if (c == '<') { | |
472 x->data[datalen] = '\0'; | |
473 if (x->xmldata && datalen) | |
474 x->xmldata(x, x->data, d… | |
475 if (x->xmldataend) | |
476 x->xmldataend(x); | |
477 break; | |
478 } | |
479 } | |
480 } | |
481 } | |
482 } |