duckduckgo.c - frontends - front-ends for some sites (experiment) | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
duckduckgo.c (4755B) | |
--- | |
1 #include <sys/types.h> | |
2 | |
3 #include <ctype.h> | |
4 #include <err.h> | |
5 #include <locale.h> | |
6 #include <stdio.h> | |
7 #include <stdlib.h> | |
8 #include <string.h> | |
9 #include <unistd.h> | |
10 #include <wchar.h> | |
11 | |
12 #include "duckduckgo.h" | |
13 #include "https.h" | |
14 #include "util.h" | |
15 #include "xml.h" | |
16 | |
17 static XMLParser x; | |
18 | |
19 static struct duckduckgo_results *results; | |
20 static struct duckduckgo_result result; | |
21 static int istitle, isdescription, isurl, isresult; | |
22 | |
23 void | |
24 sanitize(char *s, size_t len) | |
25 { | |
26 size_t i; | |
27 | |
28 /* trim trailing whitespace */ | |
29 for (i = strlen(s); i > 0; i--) { | |
30 if (!isspace((unsigned char)s[i - 1])) | |
31 break; | |
32 } | |
33 s[i] = '\0'; | |
34 | |
35 /* trim leading whitespace */ | |
36 for (i = 0; s[i]; i++) { // TODO: wrong | |
37 if (!isspace((unsigned char)s[i])) | |
38 break; | |
39 } | |
40 memmove(s, s + i, len - i + 1); | |
41 | |
42 for (i = 0; s[i]; i++) { | |
43 if (iscntrl((unsigned char)s[i])) | |
44 s[i] = ' '; | |
45 } | |
46 } | |
47 | |
48 void | |
49 xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al, | |
50 const char *v, size_t vl) | |
51 { | |
52 if (!strcmp(t, "div") && !strcmp(a, "class") && strstr(v, "resul… | |
53 isresult = 1; | |
54 | |
55 if (!isresult) | |
56 return; | |
57 | |
58 /* clear fix is use in the end of a result */ | |
59 if (!strcmp(t, "div") && !strcmp(a, "style") && strstr(v, "clear… | |
60 isresult = 0; | |
61 | |
62 if (!result.title[0] || !result.url[0]) | |
63 return; | |
64 | |
65 /* add result */ | |
66 if (results->nitems <= MAX_ITEMS) { | |
67 memcpy(&(results->items[results->nitems]), | |
68 &result, sizeof(result)); | |
69 results->nitems++; | |
70 } | |
71 memset(&result, 0, sizeof(result)); | |
72 return; | |
73 } | |
74 | |
75 if (!strcmp(t, "h2") && !strcmp(a, "class") && strstr(v, "result… | |
76 istitle = 1; | |
77 if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result_… | |
78 isdescription = 1; | |
79 if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result_… | |
80 isurl = 1; | |
81 if (isurl && !strcmp(t, "a") && !strcmp(a, "href")) | |
82 strlcpy(result.url, v, sizeof(result.url)); | |
83 } | |
84 | |
85 void | |
86 xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, siz… | |
87 const char *v, size_t vl) | |
88 { | |
89 char buf[16]; | |
90 int len; | |
91 | |
92 if (!isresult || !istitle || !isdescription || !isurl) | |
93 return; | |
94 | |
95 if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0) | |
96 xmlattr(x, t, tl, a, al, buf, (size_t)len); | |
97 else | |
98 xmlattr(x, t, tl, a, al, v, vl); | |
99 } | |
100 | |
101 void | |
102 xmldata(XMLParser *x, const char *d, size_t dl) | |
103 { | |
104 if (istitle) | |
105 strlcat(result.title, d, sizeof(result.title)); | |
106 if (isdescription) | |
107 strlcat(result.description, d, sizeof(result.description… | |
108 } | |
109 | |
110 void | |
111 xmlcdata(XMLParser *x, const char *d, size_t dl) | |
112 { | |
113 xmldata(x, d, dl); | |
114 } | |
115 | |
116 void | |
117 xmldataentity(XMLParser *x, const char *d, size_t dl) | |
118 { | |
119 char buf[16]; | |
120 int len; | |
121 | |
122 if (!isresult || !istitle || !isdescription || !isurl) | |
123 return; | |
124 | |
125 if ((len = xml_entitytostr(d, buf, sizeof(buf))) > 0) | |
126 xmldata(x, buf, (size_t)len); | |
127 else | |
128 xmldata(x, d, dl); | |
129 } | |
130 | |
131 void | |
132 xmltagend(XMLParser *x, const char *t, size_t tl, int isshort) | |
133 { | |
134 char *p; | |
135 | |
136 if (!isresult) | |
137 return; | |
138 | |
139 if (isdescription) { | |
140 /* highlight */ | |
141 if (!strcmp(t, "b")) | |
142 strlcat(result.description, "*", sizeof(result.d… | |
143 } | |
144 | |
145 if (istitle && !strcmp(t, "h2")) | |
146 istitle = 0; | |
147 if (isdescription && !strcmp(t, "a")) | |
148 isdescription = 0; | |
149 if (isurl && !strcmp(t, "a")) | |
150 isurl = 0; | |
151 if (!strcmp(t, "div")) { | |
152 /* decode url and remove "tracking"/usage part via DDG */ | |
153 if ((p = strstr(result.url, "uddg="))) { | |
154 p += sizeof("uddg=") - 1; | |
155 if (decodeparam(result.urldecoded, sizeof(result… | |
156 result.urldecoded[0] = '\0'; | |
157 } | |
158 | |
159 sanitize(result.title, strlen(result.title)); | |
160 sanitize(result.urldecoded, strlen(result.urldecoded)); | |
161 sanitize(result.description, strlen(result.description)); | |
162 | |
163 istitle = isdescription = isurl = 0; | |
164 } | |
165 } | |
166 | |
167 void | |
168 xmltagstart(XMLParser *x, const char *t, size_t tl) | |
169 { | |
170 /* highlight */ | |
171 if (isdescription && !strcmp(t, "b")) | |
172 strlcat(result.description, "*", sizeof(result.descripti… | |
173 | |
174 } | |
175 | |
176 char * | |
177 duckduckgo_search_data(const char *s) | |
178 { | |
179 char path[4096]; | |
180 int r; | |
181 | |
182 r = snprintf(path, sizeof(path), "/html/?q=%s", s); | |
183 if (r < 0 || (size_t)r >= sizeof(path)) | |
184 return NULL; | |
185 | |
186 return request("html.duckduckgo.com", path, ""); | |
187 } | |
188 | |
189 struct duckduckgo_results * | |
190 duckduckgo_search(const char *s) | |
191 { | |
192 struct duckduckgo_results *r; | |
193 char *data; | |
194 | |
195 results = NULL; /* global */ | |
196 | |
197 if (!(r = calloc(1, sizeof(*r)))) | |
198 return NULL; | |
199 | |
200 /* TODO: encodeuri s */ | |
201 if (!(data = duckduckgo_search_data(s))) { | |
202 free(r); | |
203 results = NULL; | |
204 return NULL; | |
205 } | |
206 | |
207 // TODO: xmlparser, parse data into struct duckduckgo_results. | |
208 | |
209 x.xmlattr = xmlattr; | |
210 x.xmlattrentity = xmlattrentity; | |
211 x.xmlcdata = xmlcdata; | |
212 x.xmldata = xmldata; | |
213 x.xmldataentity = xmldataentity; | |
214 x.xmltagend = xmltagend; | |
215 x.xmltagstart = xmltagstart; | |
216 | |
217 results = r; /* global: store */ | |
218 setxmldata(data, strlen(data)); | |
219 xml_parse(&x); | |
220 | |
221 free(data); | |
222 | |
223 return r; | |
224 } |