Introduction
Introduction Statistics Contact Development Disclaimer Help
xml.c - frontends - front-ends for some sites (experiment)
Log
Files
Refs
README
LICENSE
---
xml.c (11454B)
---
1 #include <errno.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include "xml.h"
7
8 #define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
9 #define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
10
11 /* data buffers, size and offset used for parsing XML, see getnext() */
12 static const unsigned char *xml_data_buf;
13 static size_t xml_data_size;
14 static size_t xml_data_off;
15
16 void
17 setxmldata(const char *s, size_t len)
18 {
19 xml_data_off = 0;
20 xml_data_size = len;
21 xml_data_buf = (unsigned char *)s;
22 }
23
24 static int
25 getnext(void)
26 {
27 if (xml_data_off >= xml_data_size)
28 return EOF;
29 return xml_data_buf[xml_data_off++];
30 }
31
32 static void
33 xml_parseattrs(XMLParser *x)
34 {
35 size_t namelen = 0, valuelen;
36 int c, endsep, endname = 0, valuestart = 0;
37
38 while ((c = GETNEXT()) != EOF) {
39 if (ISSPACE(c)) {
40 if (namelen)
41 endname = 1;
42 continue;
43 } else if (c == '?')
44 ; /* ignore */
45 else if (c == '=') {
46 x->name[namelen] = '\0';
47 valuestart = 1;
48 endname = 1;
49 } else if (namelen && ((endname && !valuestart && ISALPH…
50 /* attribute without value */
51 x->name[namelen] = '\0';
52 if (x->xmlattrstart)
53 x->xmlattrstart(x, x->tag, x->taglen, x-…
54 if (x->xmlattr)
55 x->xmlattr(x, x->tag, x->taglen, x->name…
56 if (x->xmlattrend)
57 x->xmlattrend(x, x->tag, x->taglen, x->n…
58 endname = 0;
59 x->name[0] = c;
60 namelen = 1;
61 } else if (namelen && valuestart) {
62 /* attribute with value */
63 if (x->xmlattrstart)
64 x->xmlattrstart(x, x->tag, x->taglen, x-…
65
66 valuelen = 0;
67 if (c == '\'' || c == '"') {
68 endsep = c;
69 } else {
70 endsep = ' '; /* ISSPACE() */
71 goto startvalue;
72 }
73
74 while ((c = GETNEXT()) != EOF) {
75 startvalue:
76 if (c == '&') { /* entities */
77 x->data[valuelen] = '\0';
78 /* call data function with data …
79 if (valuelen && x->xmlattr)
80 x->xmlattr(x, x->tag, x-…
81 x->data[0] = c;
82 valuelen = 1;
83 while ((c = GETNEXT()) != EOF) {
84 if (c == endsep || (ends…
85 break;
86 if (valuelen < sizeof(x-…
87 x->data[valuelen…
88 else {
89 /* entity too lo…
90 x->data[valuelen…
91 if (x->xmlattr)
92 x->xmlat…
93 x->data[0] = c;
94 valuelen = 1;
95 break;
96 }
97 if (c == ';') {
98 x->data[valuelen…
99 if (x->xmlattren…
100 x->xmlat…
101 valuelen = 0;
102 break;
103 }
104 }
105 } else if (c != endsep && !(endsep == ' …
106 if (valuelen < sizeof(x->data) -…
107 x->data[valuelen++] = c;
108 } else {
109 x->data[valuelen] = '\0';
110 if (x->xmlattr)
111 x->xmlattr(x, x-…
112 x->data[0] = c;
113 valuelen = 1;
114 }
115 }
116 if (c == endsep || (endsep == ' ' && (c …
117 x->data[valuelen] = '\0';
118 if (x->xmlattr)
119 x->xmlattr(x, x->tag, x-…
120 if (x->xmlattrend)
121 x->xmlattrend(x, x->tag,…
122 break;
123 }
124 }
125 namelen = endname = valuestart = 0;
126 } else if (namelen < sizeof(x->name) - 1) {
127 x->name[namelen++] = c;
128 }
129 if (c == '>') {
130 break;
131 } else if (c == '/') {
132 x->isshorttag = 1;
133 x->name[0] = '\0';
134 namelen = 0;
135 }
136 }
137 }
138
139 static void
140 xml_parsecomment(XMLParser *x)
141 {
142 size_t datalen = 0, i = 0;
143 int c;
144
145 if (x->xmlcommentstart)
146 x->xmlcommentstart(x);
147 while ((c = GETNEXT()) != EOF) {
148 if (c == '-' || c == '>') {
149 if (x->xmlcomment && datalen) {
150 x->data[datalen] = '\0';
151 x->xmlcomment(x, x->data, datalen);
152 datalen = 0;
153 }
154 }
155
156 if (c == '-') {
157 if (++i > 2) {
158 if (x->xmlcomment)
159 for (; i > 2; i--)
160 x->xmlcomment(x, "-", 1);
161 i = 2;
162 }
163 continue;
164 } else if (c == '>' && i == 2) {
165 if (x->xmlcommentend)
166 x->xmlcommentend(x);
167 return;
168 } else if (i) {
169 if (x->xmlcomment) {
170 for (; i > 0; i--)
171 x->xmlcomment(x, "-", 1);
172 }
173 i = 0;
174 }
175
176 if (datalen < sizeof(x->data) - 1) {
177 x->data[datalen++] = c;
178 } else {
179 x->data[datalen] = '\0';
180 if (x->xmlcomment)
181 x->xmlcomment(x, x->data, datalen);
182 x->data[0] = c;
183 datalen = 1;
184 }
185 }
186 }
187
188 static void
189 xml_parsecdata(XMLParser *x)
190 {
191 size_t datalen = 0, i = 0;
192 int c;
193
194 if (x->xmlcdatastart)
195 x->xmlcdatastart(x);
196 while ((c = GETNEXT()) != EOF) {
197 if (c == ']' || c == '>') {
198 if (x->xmlcdata && datalen) {
199 x->data[datalen] = '\0';
200 x->xmlcdata(x, x->data, datalen);
201 datalen = 0;
202 }
203 }
204
205 if (c == ']') {
206 if (++i > 2) {
207 if (x->xmlcdata)
208 for (; i > 2; i--)
209 x->xmlcdata(x, "]", 1);
210 i = 2;
211 }
212 continue;
213 } else if (c == '>' && i == 2) {
214 if (x->xmlcdataend)
215 x->xmlcdataend(x);
216 return;
217 } else if (i) {
218 if (x->xmlcdata)
219 for (; i > 0; i--)
220 x->xmlcdata(x, "]", 1);
221 i = 0;
222 }
223
224 if (datalen < sizeof(x->data) - 1) {
225 x->data[datalen++] = c;
226 } else {
227 x->data[datalen] = '\0';
228 if (x->xmlcdata)
229 x->xmlcdata(x, x->data, datalen);
230 x->data[0] = c;
231 datalen = 1;
232 }
233 }
234 }
235
236 static int
237 codepointtoutf8(long r, char *s)
238 {
239 if (r == 0) {
240 return 0; /* NUL byte */
241 } else if (r <= 0x7F) {
242 /* 1 byte: 0aaaaaaa */
243 s[0] = r;
244 return 1;
245 } else if (r <= 0x07FF) {
246 /* 2 bytes: 00000aaa aabbbbbb */
247 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
248 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
249 return 2;
250 } else if (r <= 0xFFFF) {
251 /* 3 bytes: aaaabbbb bbcccccc */
252 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
253 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
254 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
255 return 3;
256 } else {
257 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
258 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
259 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
260 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
261 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
262 return 4;
263 }
264 }
265
266 static int
267 namedentitytostr(const char *e, char *buf, size_t bufsiz)
268 {
269 static const struct {
270 const char *entity;
271 int c;
272 } entities[] = {
273 { "amp;", '&' },
274 { "lt;", '<' },
275 { "gt;", '>' },
276 { "apos;", '\'' },
277 { "quot;", '"' },
278 { "AMP;", '&' },
279 { "LT;", '<' },
280 { "GT;", '>' },
281 { "APOS;", '\'' },
282 { "QUOT;", '"' }
283 };
284 size_t i;
285
286 /* buffer is too small */
287 if (bufsiz < 2)
288 return -1;
289
290 for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
291 if (!strcmp(e, entities[i].entity)) {
292 buf[0] = entities[i].c;
293 buf[1] = '\0';
294 return 1;
295 }
296 }
297 return -1;
298 }
299
300 static int
301 numericentitytostr(const char *e, char *buf, size_t bufsiz)
302 {
303 long l;
304 int len;
305 char *end;
306
307 /* buffer is too small */
308 if (bufsiz < 5)
309 return -1;
310
311 errno = 0;
312 /* hex (16) or decimal (10) */
313 if (*e == 'x')
314 l = strtol(++e, &end, 16);
315 else
316 l = strtol(e, &end, 10);
317 /* invalid value or not a well-formed entity or invalid code poi…
318 if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
319 (l >= 0xd800 && l <= 0xdfff))
320 return -1;
321 len = codepointtoutf8(l, buf);
322 buf[len] = '\0';
323
324 return len;
325 }
326
327 /* convert named- or numeric entity string to buffer string
328 * returns byte-length of string or -1 on failure. */
329 int
330 xml_entitytostr(const char *e, char *buf, size_t bufsiz)
331 {
332 /* doesn't start with & */
333 if (e[0] != '&')
334 return -1;
335 /* numeric entity */
336 if (e[1] == '#')
337 return numericentitytostr(e + 2, buf, bufsiz);
338 else /* named entity */
339 return namedentitytostr(e + 1, buf, bufsiz);
340 }
341
342 void
343 xml_parse(XMLParser *x)
344 {
345 size_t datalen, tagdatalen;
346 int c, isend;
347
348 while ((c = GETNEXT()) != EOF && c != '<')
349 ; /* skip until < */
350
351 while (c != EOF) {
352 if (c == '<') { /* parse tag */
353 if ((c = GETNEXT()) == EOF)
354 return;
355
356 if (c == '!') { /* cdata and comments */
357 for (tagdatalen = 0; (c = GETNEXT()) != …
358 /* NOTE: sizeof(x->data) must be…
359 if (tagdatalen <= sizeof("[CDATA…
360 x->data[tagdatalen++] = …
361 if (c == '>')
362 break;
363 else if (c == '-' && tagdatalen …
364 (x->data[0] == '…
365 xml_parsecomment(x);
366 break;
367 } else if (c == '[') {
368 if (tagdatalen == sizeof…
369 !strncmp(x->data, "[…
370 xml_parsecdata(x…
371 break;
372 }
373 }
374 }
375 } else {
376 /* normal tag (open, short open, close),…
377 x->tag[0] = c;
378 x->taglen = 1;
379 x->isshorttag = isend = 0;
380
381 /* treat processing instruction as short…
382 if (c == '?') {
383 x->isshorttag = 1;
384 } else if (c == '/') {
385 if ((c = GETNEXT()) == EOF)
386 return;
387 x->tag[0] = c;
388 isend = 1;
389 }
390
391 while ((c = GETNEXT()) != EOF) {
392 if (c == '/')
393 x->isshorttag = 1; /* sh…
394 else if (c == '>' || ISSPACE(c))…
395 x->tag[x->taglen] = '\0';
396 if (isend) { /* end tag,…
397 while (c != '>' …
398 c = GETN…
399 if (x->xmltagend)
400 x->xmlta…
401 x->tag[0] = '\0';
402 x->taglen = 0;
403 } else {
404 /* start tag */
405 if (x->xmltagsta…
406 x->xmlta…
407 if (ISSPACE(c))
408 xml_pars…
409 if (x->xmltagsta…
410 x->xmlta…
411 }
412 /* call tagend for short…
413 if (x->isshorttag) {
414 if (x->xmltagend)
415 x->xmlta…
416 x->tag[0] = '\0';
417 x->taglen = 0;
418 }
419 break;
420 } else if (x->taglen < sizeof(x-…
421 x->tag[x->taglen++] = c;…
422 }
423 }
424 } else {
425 /* parse tag data */
426 datalen = 0;
427 if (x->xmldatastart)
428 x->xmldatastart(x);
429 while ((c = GETNEXT()) != EOF) {
430 if (c == '&') {
431 if (datalen) {
432 x->data[datalen] = '\0';
433 if (x->xmldata)
434 x->xmldata(x, x-…
435 }
436 x->data[0] = c;
437 datalen = 1;
438 while ((c = GETNEXT()) != EOF) {
439 if (c == '<')
440 break;
441 if (datalen < sizeof(x->…
442 x->data[datalen+…
443 else {
444 /* entity too lo…
445 x->data[datalen]…
446 if (x->xmldata)
447 x->xmlda…
448 x->data[0] = c;
449 datalen = 1;
450 break;
451 }
452 if (c == ';') {
453 x->data[datalen]…
454 if (x->xmldataen…
455 x->xmlda…
456 datalen = 0;
457 break;
458 }
459 }
460 } else if (c != '<') {
461 if (datalen < sizeof(x->data) - …
462 x->data[datalen++] = c;
463 } else {
464 x->data[datalen] = '\0';
465 if (x->xmldata)
466 x->xmldata(x, x-…
467 x->data[0] = c;
468 datalen = 1;
469 }
470 }
471 if (c == '<') {
472 x->data[datalen] = '\0';
473 if (x->xmldata && datalen)
474 x->xmldata(x, x->data, d…
475 if (x->xmldataend)
476 x->xmldataend(x);
477 break;
478 }
479 }
480 }
481 }
482 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.