sfeed_web.c - sfeed - RSS and Atom parser | |
git clone git://git.codemadness.org/sfeed | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
sfeed_web.c (3105B) | |
--- | |
1 #include <stdio.h> | |
2 #include <strings.h> | |
3 | |
4 #include "util.h" | |
5 #include "xml.h" | |
6 | |
7 /* string and size */ | |
8 #define STRP(s) s,sizeof(s)-1 | |
9 | |
10 static XMLParser parser; | |
11 static int isbasetag, islinktag, ishrefattr, istypeattr; | |
12 static char linkhref[4096], linktype[256], basehref[4096]; | |
13 | |
14 static void | |
15 printvalue(const char *s) | |
16 { | |
17 for (; *s; s++) | |
18 if (!ISCNTRL((unsigned char)*s)) | |
19 putchar(*s); | |
20 } | |
21 | |
22 static void | |
23 xmltagstart(XMLParser *p, const char *t, size_t tl) | |
24 { | |
25 isbasetag = islinktag = 0; | |
26 | |
27 if (!strcasecmp(t, "base")) { | |
28 isbasetag = 1; | |
29 } else if (!strcasecmp(t, "link")) { | |
30 islinktag = 1; | |
31 linkhref[0] = '\0'; | |
32 linktype[0] = '\0'; | |
33 } | |
34 } | |
35 | |
36 static void | |
37 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) | |
38 { | |
39 struct uri baseuri, linkuri, u; | |
40 char buf[4096]; | |
41 int r = -1; | |
42 | |
43 if (!islinktag) | |
44 return; | |
45 | |
46 if (strncasecmp(linktype, STRP("application/atom")) && | |
47 strncasecmp(linktype, STRP("application/xml")) && | |
48 strncasecmp(linktype, STRP("application/rss"))) | |
49 return; | |
50 | |
51 /* parse base URI each time: it can change. */ | |
52 if (basehref[0] && | |
53 uri_parse(linkhref, &linkuri) != -1 && !linkuri.proto[0] && | |
54 uri_parse(basehref, &baseuri) != -1 && | |
55 uri_makeabs(&u, &linkuri, &baseuri) != -1 && u.proto[0]) | |
56 r = uri_format(buf, sizeof(buf), &u); | |
57 | |
58 if (r >= 0 && (size_t)r < sizeof(buf)) | |
59 printvalue(buf); | |
60 else | |
61 printvalue(linkhref); | |
62 | |
63 putchar('\t'); | |
64 printvalue(linktype); | |
65 putchar('\n'); | |
66 } | |
67 | |
68 static void | |
69 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size… | |
70 { | |
71 ishrefattr = istypeattr = 0; | |
72 | |
73 if (!isbasetag && !islinktag) | |
74 return; | |
75 | |
76 if (!strcasecmp(a, "href")) { | |
77 ishrefattr = 1; | |
78 if (isbasetag) | |
79 basehref[0] = '\0'; | |
80 else if (islinktag) | |
81 linkhref[0] = '\0'; | |
82 } else if (!strcasecmp(a, "type") && islinktag) { | |
83 istypeattr = 1; | |
84 linktype[0] = '\0'; | |
85 } | |
86 } | |
87 | |
88 static void | |
89 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, | |
90 const char *v, size_t vl) | |
91 { | |
92 if (isbasetag && ishrefattr) { | |
93 strlcat(basehref, v, sizeof(basehref)); | |
94 } else if (islinktag) { | |
95 if (ishrefattr) | |
96 strlcat(linkhref, v, sizeof(linkhref)); | |
97 else if (istypeattr) | |
98 strlcat(linktype, v, sizeof(linktype)); | |
99 } | |
100 } | |
101 | |
102 static void | |
103 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, siz… | |
104 const char *v, size_t vl) | |
105 { | |
106 char buf[8]; | |
107 int len; | |
108 | |
109 if (!ishrefattr && !istypeattr) | |
110 return; | |
111 | |
112 /* try to translate entity, else just pass as data to | |
113 * xmlattr handler. */ | |
114 if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0) | |
115 xmlattr(p, t, tl, a, al, buf, (size_t)len); | |
116 else | |
117 xmlattr(p, t, tl, a, al, v, vl); | |
118 } | |
119 | |
120 int | |
121 main(int argc, char *argv[]) | |
122 { | |
123 if (pledge("stdio", NULL) == -1) | |
124 err(1, "pledge"); | |
125 | |
126 if (argc > 1) | |
127 strlcpy(basehref, argv[1], sizeof(basehref)); | |
128 | |
129 parser.xmlattr = xmlattr; | |
130 parser.xmlattrentity = xmlattrentity; | |
131 parser.xmlattrstart = xmlattrstart; | |
132 parser.xmltagstart = xmltagstart; | |
133 parser.xmltagstartparsed = xmltagstartparsed; | |
134 | |
135 /* NOTE: GETNEXT is defined in xml.h for inline optimization */ | |
136 xml_parse(&parser); | |
137 | |
138 checkfileerror(stdin, "<stdin>", 'r'); | |
139 checkfileerror(stdout, "<stdout>", 'w'); | |
140 | |
141 return 0; | |
142 } |