Introduction
Introduction Statistics Contact Development Disclaimer Help
xml.c - sub - subscene.com subtitle search
git clone git://git.codemadness.org/sub
Log
Files
Refs
README
LICENSE
---
xml.c (11059B)
---
1 #include <sys/types.h>
2
3 #include <ctype.h>
4 #include <errno.h>
5 #include <limits.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9
10 #include "xml.h"
11
12 static void
13 xml_parseattrs(XMLParser *x)
14 {
15 size_t namelen = 0, valuelen;
16 int c, endsep, endname = 0, valuestart = 0;
17
18 while ((c = x->getnext()) != EOF) {
19 if (isspace(c)) {
20 if (namelen)
21 endname = 1;
22 continue;
23 } else if (c == '?')
24 ; /* ignore */
25 else if (c == '=') {
26 x->name[namelen] = '\0';
27 valuestart = 1;
28 endname = 1;
29 } else if (namelen && ((endname && !valuestart && isalph…
30 /* attribute without value */
31 x->name[namelen] = '\0';
32 if (x->xmlattrstart)
33 x->xmlattrstart(x, x->tag, x->taglen, x-…
34 if (x->xmlattr)
35 x->xmlattr(x, x->tag, x->taglen, x->name…
36 if (x->xmlattrend)
37 x->xmlattrend(x, x->tag, x->taglen, x->n…
38 endname = 0;
39 x->name[0] = c;
40 namelen = 1;
41 } else if (namelen && valuestart) {
42 /* attribute with value */
43 if (x->xmlattrstart)
44 x->xmlattrstart(x, x->tag, x->taglen, x-…
45
46 valuelen = 0;
47 if (c == '\'' || c == '"') {
48 endsep = c;
49 } else {
50 endsep = ' '; /* isspace() */
51 goto startvalue;
52 }
53
54 while ((c = x->getnext()) != EOF) {
55 startvalue:
56 if (c == '&') { /* entities */
57 x->data[valuelen] = '\0';
58 /* call data function with data …
59 if (valuelen && x->xmlattr)
60 x->xmlattr(x, x->tag, x-…
61 x->data[0] = c;
62 valuelen = 1;
63 while ((c = x->getnext()) != EOF…
64 if (c == endsep || (ends…
65 break;
66 if (valuelen < sizeof(x-…
67 x->data[valuelen…
68 else {
69 /* entity too lo…
70 x->data[valuelen…
71 if (x->xmlattr)
72 x->xmlat…
73 x->data[0] = c;
74 valuelen = 1;
75 break;
76 }
77 if (c == ';') {
78 x->data[valuelen…
79 if (x->xmlattren…
80 x->xmlat…
81 valuelen = 0;
82 break;
83 }
84 }
85 } else if (c != endsep && !(endsep == ' …
86 if (valuelen < sizeof(x->data) -…
87 x->data[valuelen++] = c;
88 } else {
89 x->data[valuelen] = '\0';
90 if (x->xmlattr)
91 x->xmlattr(x, x-…
92 x->data[0] = c;
93 valuelen = 1;
94 }
95 }
96 if (c == endsep || (endsep == ' ' && (c …
97 x->data[valuelen] = '\0';
98 if (x->xmlattr)
99 x->xmlattr(x, x->tag, x-…
100 if (x->xmlattrend)
101 x->xmlattrend(x, x->tag,…
102 break;
103 }
104 }
105 namelen = endname = valuestart = 0;
106 } else if (namelen < sizeof(x->name) - 1) {
107 x->name[namelen++] = c;
108 }
109 if (c == '>') {
110 break;
111 } else if (c == '/') {
112 x->isshorttag = 1;
113 x->name[0] = '\0';
114 namelen = 0;
115 }
116 }
117 }
118
119 static void
120 xml_parsecomment(XMLParser *x)
121 {
122 size_t datalen = 0, i = 0;
123 int c;
124
125 if (x->xmlcommentstart)
126 x->xmlcommentstart(x);
127 while ((c = x->getnext()) != EOF) {
128 if (c == '-' || c == '>') {
129 if (x->xmlcomment) {
130 x->data[datalen] = '\0';
131 x->xmlcomment(x, x->data, datalen);
132 datalen = 0;
133 }
134 }
135
136 if (c == '-') {
137 if (++i > 2) {
138 if (x->xmlcomment)
139 for (; i > 2; i--)
140 x->xmlcomment(x, "-", 1);
141 i = 2;
142 }
143 continue;
144 } else if (c == '>' && i == 2) {
145 if (x->xmlcommentend)
146 x->xmlcommentend(x);
147 return;
148 } else if (i) {
149 if (x->xmlcomment) {
150 for (; i > 0; i--)
151 x->xmlcomment(x, "-", 1);
152 }
153 i = 0;
154 }
155
156 if (datalen < sizeof(x->data) - 1) {
157 x->data[datalen++] = c;
158 } else {
159 x->data[datalen] = '\0';
160 if (x->xmlcomment)
161 x->xmlcomment(x, x->data, datalen);
162 x->data[0] = c;
163 datalen = 1;
164 }
165 }
166 }
167
168 static void
169 xml_parsecdata(XMLParser *x)
170 {
171 size_t datalen = 0, i = 0;
172 int c;
173
174 if (x->xmlcdatastart)
175 x->xmlcdatastart(x);
176 while ((c = x->getnext()) != EOF) {
177 if (c == ']' || c == '>') {
178 if (x->xmlcdata) {
179 x->data[datalen] = '\0';
180 x->xmlcdata(x, x->data, datalen);
181 datalen = 0;
182 }
183 }
184
185 if (c == ']') {
186 if (++i > 2) {
187 if (x->xmlcdata)
188 for (; i > 2; i--)
189 x->xmlcdata(x, "]", 1);
190 i = 2;
191 }
192 continue;
193 } else if (c == '>' && i == 2) {
194 if (x->xmlcdataend)
195 x->xmlcdataend(x);
196 return;
197 } else if (i) {
198 if (x->xmlcdata)
199 for (; i > 0; i--)
200 x->xmlcdata(x, "]", 1);
201 i = 0;
202 }
203
204 if (datalen < sizeof(x->data) - 1) {
205 x->data[datalen++] = c;
206 } else {
207 x->data[datalen] = '\0';
208 if (x->xmlcdata)
209 x->xmlcdata(x, x->data, datalen);
210 x->data[0] = c;
211 datalen = 1;
212 }
213 }
214 }
215
216 static int
217 codepointtoutf8(long r, char *s)
218 {
219 if (r == 0) {
220 return 0; /* NUL byte */
221 } else if (r <= 0x7F) {
222 /* 1 byte: 0aaaaaaa */
223 s[0] = r;
224 return 1;
225 } else if (r <= 0x07FF) {
226 /* 2 bytes: 00000aaa aabbbbbb */
227 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
228 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
229 return 2;
230 } else if (r <= 0xFFFF) {
231 /* 3 bytes: aaaabbbb bbcccccc */
232 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
233 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
234 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
235 return 3;
236 } else {
237 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
238 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
239 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
240 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
241 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
242 return 4;
243 }
244 }
245
246 static int
247 namedentitytostr(const char *e, char *buf, size_t bufsiz)
248 {
249 static const struct {
250 char *entity;
251 int c;
252 } entities[] = {
253 { "&amp;", '&' },
254 { "&lt;", '<' },
255 { "&gt;", '>' },
256 { "&apos;", '\'' },
257 { "&quot;", '"' },
258 { "&AMP;", '&' },
259 { "&LT;", '<' },
260 { "&GT;", '>' },
261 { "&APOS;", '\'' },
262 { "&QUOT;", '"' }
263 };
264 size_t i;
265
266 /* buffer is too small */
267 if (bufsiz < 2)
268 return -1;
269
270 /* doesn't start with &: can't match */
271 if (*e != '&')
272 return 0;
273
274 for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
275 if (!strcmp(e, entities[i].entity)) {
276 buf[0] = entities[i].c;
277 buf[1] = '\0';
278 return 1;
279 }
280 }
281 return 0;
282 }
283
284 static int
285 numericentitytostr(const char *e, char *buf, size_t bufsiz)
286 {
287 long l;
288 int len;
289 char *end;
290
291 /* buffer is too small */
292 if (bufsiz < 5)
293 return -1;
294
295 /* not a numeric entity */
296 if (e[0] != '&' || e[1] != '#')
297 return 0;
298
299 /* e[1] == '#', numeric / hexadecimal entity */
300 e += 2; /* skip "&#" */
301 errno = 0;
302 /* hex (16) or decimal (10) */
303 if (*e == 'x')
304 l = strtoul(e + 1, &end, 16);
305 else
306 l = strtoul(e, &end, 10);
307 /* invalid value or not a well-formed entity or too high codepoi…
308 if (errno || *end != ';' || l > 0x10FFFF)
309 return 0;
310 len = codepointtoutf8(l, buf);
311 buf[len] = '\0';
312
313 return len;
314 }
315
316 /* convert named- or numeric entity string to buffer string
317 * returns byte-length of string. */
318 int
319 xml_entitytostr(const char *e, char *buf, size_t bufsiz)
320 {
321 /* buffer is too small */
322 if (bufsiz < 5)
323 return -1;
324 /* doesn't start with & */
325 if (e[0] != '&')
326 return 0;
327 /* named entity */
328 if (e[1] != '#')
329 return namedentitytostr(e, buf, bufsiz);
330 else /* numeric entity */
331 return numericentitytostr(e, buf, bufsiz);
332 }
333
334 void
335 xml_parse(XMLParser *x)
336 {
337 int c, ispi;
338 size_t datalen, tagdatalen, taglen;
339
340 if (!x->getnext)
341 return;
342 while ((c = x->getnext()) != EOF && c != '<')
343 ; /* skip until < */
344
345 while (c != EOF) {
346 if (c == '<') { /* parse tag */
347 if ((c = x->getnext()) == EOF)
348 return;
349
350 if (c == '!') { /* cdata and comments */
351 for (tagdatalen = 0; (c = x->getnext()) …
352 /* NOTE: sizeof(x->data) must be…
353 if (tagdatalen <= sizeof("[CDATA…
354 x->data[tagdatalen++] = …
355 if (c == '>')
356 break;
357 else if (c == '-' && tagdatalen …
358 (x->data[0] == '…
359 xml_parsecomment(x);
360 break;
361 } else if (c == '[') {
362 if (tagdatalen == sizeof…
363 !strncmp(x->data, "[…
364 xml_parsecdata(x…
365 break;
366 }
367 }
368 }
369 } else {
370 x->tag[0] = '\0';
371 x->taglen = 0;
372
373 /* normal tag (open, short open, close),…
374 if (isspace(c))
375 while ((c = x->getnext()) != EOF…
376 ;
377 if (c == EOF)
378 return;
379 x->tag[0] = c;
380 ispi = (c == '?') ? 1 : 0;
381 x->isshorttag = ispi;
382 taglen = 1;
383 while ((c = x->getnext()) != EOF) {
384 if (c == '/')
385 x->isshorttag = 1; /* sh…
386 else if (c == '>' || isspace(c))…
387 x->tag[taglen] = '\0';
388 if (x->tag[0] == '/') { …
389 x->taglen = --ta…
390 if (taglen && x-…
391 x->xmlta…
392 } else {
393 x->taglen = tagl…
394 /* start tag */
395 if (x->xmltagsta…
396 x->xmlta…
397 if (isspace(c))
398 xml_pars…
399 if (x->xmltagsta…
400 x->xmlta…
401 }
402 /* call tagend for short…
403 if ((x->isshorttag || is…
404 x->xmltagend(x, …
405 break;
406 } else if (taglen < sizeof(x->ta…
407 x->tag[taglen++] = c; /*…
408 }
409 }
410 } else {
411 /* parse tag data */
412 datalen = 0;
413 if (x->xmldatastart)
414 x->xmldatastart(x);
415 while ((c = x->getnext()) != EOF) {
416 if (c == '&') {
417 if (datalen) {
418 x->data[datalen] = '\0';
419 if (x->xmldata)
420 x->xmldata(x, x-…
421 }
422 x->data[0] = c;
423 datalen = 1;
424 while ((c = x->getnext()) != EOF…
425 if (c == '<')
426 break;
427 if (datalen < sizeof(x->…
428 x->data[datalen+…
429 else {
430 /* entity too lo…
431 x->data[datalen]…
432 if (x->xmldata)
433 x->xmlda…
434 x->data[0] = c;
435 datalen = 1;
436 break;
437 }
438 if (c == ';') {
439 x->data[datalen]…
440 if (x->xmldataen…
441 x->xmlda…
442 datalen = 0;
443 break;
444 }
445 }
446 } else if (c != '<') {
447 if (datalen < sizeof(x->data) - …
448 x->data[datalen++] = c;
449 } else {
450 x->data[datalen] = '\0';
451 if (x->xmldata)
452 x->xmldata(x, x-…
453 x->data[0] = c;
454 datalen = 1;
455 }
456 }
457 if (c == '<') {
458 x->data[datalen] = '\0';
459 if (x->xmldata && datalen)
460 x->xmldata(x, x->data, d…
461 if (x->xmldataend)
462 x->xmldataend(x);
463 break;
464 }
465 }
466 }
467 }
468 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.