Introduction
Introduction Statistics Contact Development Disclaimer Help
duckduckgo.c - frontends - front-ends for some sites (experiment)
Log
Files
Refs
README
LICENSE
---
duckduckgo.c (4755B)
---
1 #include <sys/types.h>
2
3 #include <ctype.h>
4 #include <err.h>
5 #include <locale.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 #include <wchar.h>
11
12 #include "duckduckgo.h"
13 #include "https.h"
14 #include "util.h"
15 #include "xml.h"
16
17 static XMLParser x;
18
19 static struct duckduckgo_results *results;
20 static struct duckduckgo_result result;
21 static int istitle, isdescription, isurl, isresult;
22
23 void
24 sanitize(char *s, size_t len)
25 {
26 size_t i;
27
28 /* trim trailing whitespace */
29 for (i = strlen(s); i > 0; i--) {
30 if (!isspace((unsigned char)s[i - 1]))
31 break;
32 }
33 s[i] = '\0';
34
35 /* trim leading whitespace */
36 for (i = 0; s[i]; i++) { // TODO: wrong
37 if (!isspace((unsigned char)s[i]))
38 break;
39 }
40 memmove(s, s + i, len - i + 1);
41
42 for (i = 0; s[i]; i++) {
43 if (iscntrl((unsigned char)s[i]))
44 s[i] = ' ';
45 }
46 }
47
48 void
49 xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
50 const char *v, size_t vl)
51 {
52 if (!strcmp(t, "div") && !strcmp(a, "class") && strstr(v, "resul…
53 isresult = 1;
54
55 if (!isresult)
56 return;
57
58 /* clear fix is use in the end of a result */
59 if (!strcmp(t, "div") && !strcmp(a, "style") && strstr(v, "clear…
60 isresult = 0;
61
62 if (!result.title[0] || !result.url[0])
63 return;
64
65 /* add result */
66 if (results->nitems <= MAX_ITEMS) {
67 memcpy(&(results->items[results->nitems]),
68 &result, sizeof(result));
69 results->nitems++;
70 }
71 memset(&result, 0, sizeof(result));
72 return;
73 }
74
75 if (!strcmp(t, "h2") && !strcmp(a, "class") && strstr(v, "result…
76 istitle = 1;
77 if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result_…
78 isdescription = 1;
79 if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result_…
80 isurl = 1;
81 if (isurl && !strcmp(t, "a") && !strcmp(a, "href"))
82 strlcpy(result.url, v, sizeof(result.url));
83 }
84
85 void
86 xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, siz…
87 const char *v, size_t vl)
88 {
89 char buf[16];
90 int len;
91
92 if (!isresult || !istitle || !isdescription || !isurl)
93 return;
94
95 if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0)
96 xmlattr(x, t, tl, a, al, buf, (size_t)len);
97 else
98 xmlattr(x, t, tl, a, al, v, vl);
99 }
100
101 void
102 xmldata(XMLParser *x, const char *d, size_t dl)
103 {
104 if (istitle)
105 strlcat(result.title, d, sizeof(result.title));
106 if (isdescription)
107 strlcat(result.description, d, sizeof(result.description…
108 }
109
110 void
111 xmlcdata(XMLParser *x, const char *d, size_t dl)
112 {
113 xmldata(x, d, dl);
114 }
115
116 void
117 xmldataentity(XMLParser *x, const char *d, size_t dl)
118 {
119 char buf[16];
120 int len;
121
122 if (!isresult || !istitle || !isdescription || !isurl)
123 return;
124
125 if ((len = xml_entitytostr(d, buf, sizeof(buf))) > 0)
126 xmldata(x, buf, (size_t)len);
127 else
128 xmldata(x, d, dl);
129 }
130
131 void
132 xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
133 {
134 char *p;
135
136 if (!isresult)
137 return;
138
139 if (isdescription) {
140 /* highlight */
141 if (!strcmp(t, "b"))
142 strlcat(result.description, "*", sizeof(result.d…
143 }
144
145 if (istitle && !strcmp(t, "h2"))
146 istitle = 0;
147 if (isdescription && !strcmp(t, "a"))
148 isdescription = 0;
149 if (isurl && !strcmp(t, "a"))
150 isurl = 0;
151 if (!strcmp(t, "div")) {
152 /* decode url and remove "tracking"/usage part via DDG */
153 if ((p = strstr(result.url, "uddg="))) {
154 p += sizeof("uddg=") - 1;
155 if (decodeparam(result.urldecoded, sizeof(result…
156 result.urldecoded[0] = '\0';
157 }
158
159 sanitize(result.title, strlen(result.title));
160 sanitize(result.urldecoded, strlen(result.urldecoded));
161 sanitize(result.description, strlen(result.description));
162
163 istitle = isdescription = isurl = 0;
164 }
165 }
166
167 void
168 xmltagstart(XMLParser *x, const char *t, size_t tl)
169 {
170 /* highlight */
171 if (isdescription && !strcmp(t, "b"))
172 strlcat(result.description, "*", sizeof(result.descripti…
173
174 }
175
176 char *
177 duckduckgo_search_data(const char *s)
178 {
179 char path[4096];
180 int r;
181
182 r = snprintf(path, sizeof(path), "/html/?q=%s", s);
183 if (r < 0 || (size_t)r >= sizeof(path))
184 return NULL;
185
186 return request("html.duckduckgo.com", path, "");
187 }
188
189 struct duckduckgo_results *
190 duckduckgo_search(const char *s)
191 {
192 struct duckduckgo_results *r;
193 char *data;
194
195 results = NULL; /* global */
196
197 if (!(r = calloc(1, sizeof(*r))))
198 return NULL;
199
200 /* TODO: encodeuri s */
201 if (!(data = duckduckgo_search_data(s))) {
202 free(r);
203 results = NULL;
204 return NULL;
205 }
206
207 // TODO: xmlparser, parse data into struct duckduckgo_results.
208
209 x.xmlattr = xmlattr;
210 x.xmlattrentity = xmlattrentity;
211 x.xmlcdata = xmlcdata;
212 x.xmldata = xmldata;
213 x.xmldataentity = xmldataentity;
214 x.xmltagend = xmltagend;
215 x.xmltagstart = xmltagstart;
216
217 results = r; /* global: store */
218 setxmldata(data, strlen(data));
219 xml_parse(&x);
220
221 free(data);
222
223 return r;
224 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.