grabtitle.c - grabtitle - stupid HTML title grabber | |
git clone git://git.codemadness.org/grabtitle | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
grabtitle.c (2167B) | |
--- | |
1 #include <ctype.h> | |
2 #include <errno.h> | |
3 #include <stdio.h> | |
4 #include <stdlib.h> | |
5 #include <string.h> | |
6 #include <strings.h> | |
7 | |
8 #include "xml.h" | |
9 | |
10 #ifdef __OpenBSD__ | |
11 #include <unistd.h> | |
12 #else | |
13 #define pledge(a,b) 0 | |
14 #endif | |
15 | |
16 static XMLParser parser; | |
17 static const char *state, *endtag; | |
18 static int (*getnext)(void); | |
19 | |
20 /* return a space for all data until some case-insensitive string occurs… | |
21 is used to parse incorrect HTML/XML that contains unescaped HTML in s… | |
22 or style tags. If you see some </script> tag in a CDATA or comment | |
23 section then e-mail W3C and tell them the web is too complex. */ | |
24 static inline int | |
25 getnext_ignore(void) | |
26 { | |
27 int c; | |
28 | |
29 if ((c = getnext()) == EOF) | |
30 return EOF; | |
31 | |
32 if (tolower(c) == tolower((unsigned char)*state)) { | |
33 state++; | |
34 if (*state == '\0') { | |
35 parser.getnext = getnext; /* restore */ | |
36 return c; | |
37 } | |
38 } else { | |
39 state = endtag; | |
40 } | |
41 | |
42 return ' '; | |
43 } | |
44 | |
45 static void | |
46 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) | |
47 { | |
48 putchar('\n'); | |
49 exit(0); | |
50 } | |
51 | |
52 /* data and CDATA */ | |
53 static void | |
54 xmldata(XMLParser *p, const char *d, size_t dl) | |
55 { | |
56 size_t i; | |
57 | |
58 for (i = 0; *d && i < dl; i++, d++) { | |
59 if (iscntrl((unsigned char)*d)) | |
60 putchar(' '); | |
61 else | |
62 putchar(*d); | |
63 } | |
64 } | |
65 | |
66 static void | |
67 xmldataentity(XMLParser *p, const char *d, size_t dl) | |
68 { | |
69 char buf[16]; | |
70 ssize_t len; | |
71 | |
72 if ((len = xml_entitytostr(d, buf, sizeof(buf))) > 0) | |
73 xmldata(p, buf, (size_t)len); | |
74 else | |
75 xmldata(p, d, dl); | |
76 } | |
77 | |
78 static void | |
79 xmltagstart(XMLParser *p, const char *t, size_t tl) | |
80 { | |
81 if (tl == 6 && !strcasecmp(t, "script")) { | |
82 state = endtag = "</script>"; | |
83 getnext = p->getnext; /* for restore */ | |
84 p->getnext = getnext_ignore; | |
85 } else if (tl == 5 && !strcasecmp(t, "style")) { | |
86 state = endtag = "</style>"; | |
87 getnext = p->getnext; /* for restore */ | |
88 p->getnext = getnext_ignore; | |
89 } else if (tl == 5 && !strcasecmp(t, "title")) { | |
90 p->xmltagend = xmltagend; | |
91 p->xmlcdata = p->xmldata = xmldata; | |
92 p->xmldataentity = xmldataentity; | |
93 } | |
94 } | |
95 | |
96 int | |
97 main(void) | |
98 { | |
99 if (pledge("stdio", NULL) == -1) { | |
100 fprintf(stderr, "pledge: %s\n", strerror(errno)); | |
101 return 2; | |
102 } | |
103 | |
104 parser.xmltagstart = xmltagstart; | |
105 parser.getnext = getchar; | |
106 xml_parse(&parser); | |
107 | |
108 return 1; | |
109 } |