Introduction
Introduction Statistics Contact Development Disclaimer Help
grabtitle.c - grabtitle - stupid HTML title grabber
git clone git://git.codemadness.org/grabtitle
Log
Files
Refs
README
LICENSE
---
grabtitle.c (2167B)
---
1 #include <ctype.h>
2 #include <errno.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <strings.h>
7
8 #include "xml.h"
9
10 #ifdef __OpenBSD__
11 #include <unistd.h>
12 #else
13 #define pledge(a,b) 0
14 #endif
15
16 static XMLParser parser;
17 static const char *state, *endtag;
18 static int (*getnext)(void);
19
20 /* return a space for all data until some case-insensitive string occurs…
21 is used to parse incorrect HTML/XML that contains unescaped HTML in s…
22 or style tags. If you see some </script> tag in a CDATA or comment
23 section then e-mail W3C and tell them the web is too complex. */
24 static inline int
25 getnext_ignore(void)
26 {
27 int c;
28
29 if ((c = getnext()) == EOF)
30 return EOF;
31
32 if (tolower(c) == tolower((unsigned char)*state)) {
33 state++;
34 if (*state == '\0') {
35 parser.getnext = getnext; /* restore */
36 return c;
37 }
38 } else {
39 state = endtag;
40 }
41
42 return ' ';
43 }
44
45 static void
46 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
47 {
48 putchar('\n');
49 exit(0);
50 }
51
52 /* data and CDATA */
53 static void
54 xmldata(XMLParser *p, const char *d, size_t dl)
55 {
56 size_t i;
57
58 for (i = 0; *d && i < dl; i++, d++) {
59 if (iscntrl((unsigned char)*d))
60 putchar(' ');
61 else
62 putchar(*d);
63 }
64 }
65
66 static void
67 xmldataentity(XMLParser *p, const char *d, size_t dl)
68 {
69 char buf[16];
70 ssize_t len;
71
72 if ((len = xml_entitytostr(d, buf, sizeof(buf))) > 0)
73 xmldata(p, buf, (size_t)len);
74 else
75 xmldata(p, d, dl);
76 }
77
78 static void
79 xmltagstart(XMLParser *p, const char *t, size_t tl)
80 {
81 if (tl == 6 && !strcasecmp(t, "script")) {
82 state = endtag = "</script>";
83 getnext = p->getnext; /* for restore */
84 p->getnext = getnext_ignore;
85 } else if (tl == 5 && !strcasecmp(t, "style")) {
86 state = endtag = "</style>";
87 getnext = p->getnext; /* for restore */
88 p->getnext = getnext_ignore;
89 } else if (tl == 5 && !strcasecmp(t, "title")) {
90 p->xmltagend = xmltagend;
91 p->xmlcdata = p->xmldata = xmldata;
92 p->xmldataentity = xmldataentity;
93 }
94 }
95
96 int
97 main(void)
98 {
99 if (pledge("stdio", NULL) == -1) {
100 fprintf(stderr, "pledge: %s\n", strerror(errno));
101 return 2;
102 }
103
104 parser.xmltagstart = xmltagstart;
105 parser.getnext = getchar;
106 xml_parse(&parser);
107
108 return 1;
109 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.