adblock.c - surf-adblock - Surf adblock web extension | |
git clone git://git.codemadness.org/surf-adblock | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
adblock.c (22106B) | |
--- | |
1 #include <sys/stat.h> | |
2 #include <sys/types.h> | |
3 | |
4 #include <ctype.h> | |
5 #include <errno.h> | |
6 #include <fcntl.h> | |
7 #include <limits.h> | |
8 #include <stdarg.h> | |
9 #include <stdio.h> | |
10 #include <stdlib.h> | |
11 #include <string.h> | |
12 #include <time.h> /* DEBUG: used for clock_gettime, remove later */ | |
13 #include <wchar.h> | |
14 #include <wctype.h> | |
15 | |
16 #include "adblock.h" | |
17 | |
18 /* String data / memory pool */ | |
19 typedef struct string { | |
20 char *data; /* data */ | |
21 size_t datasz; /* allocated size */ | |
22 size_t len; /* current string length */ | |
23 } String; | |
24 | |
25 struct filterdomain { | |
26 char *domain; | |
27 int inverse; | |
28 struct filterdomain *next; | |
29 }; | |
30 | |
31 struct filterrule { | |
32 /* type: match mask, must be atleast 32-bit, see FilterType enum… | |
33 unsigned long block; | |
34 int matchbegin; | |
35 int matchend; | |
36 /* is exception rule: prefix @@ for ABP or #@# for CSS */ | |
37 int isexception; | |
38 const char *css; /* if non-NULL is CSS rule / hide element rule … | |
39 const char *uri; | |
40 struct filterdomain *domains; | |
41 struct filterrule *next; | |
42 }; | |
43 | |
44 enum { | |
45 FilterTypeScript = 1 << 0, | |
46 FilterTypeImage = 1 << 1, | |
47 FilterTypeCSS = 1 << 2, | |
48 FilterTypeObject = 1 << 3, | |
49 FilterTypeXHR = 1 << 4, | |
50 FilterTypeObjectSub = 1 << 5, | |
51 FilterTypeSubDoc = 1 << 6, | |
52 FilterTypePing = 1 << 7, | |
53 FilterTypeDocument = 1 << 8, | |
54 FilterTypeElemHide = 1 << 9, | |
55 FilterTypeOther = 1 << 10, | |
56 FilterTypeGenericHide = 1 << 11, | |
57 FilterTypeGenericBlock = 1 << 12, | |
58 FilterTypeMatchCase = 1 << 13, | |
59 }; | |
60 | |
61 struct filtertype { | |
62 /* `type` must be atleast 32-bit, see FilterType enum */ | |
63 unsigned long type; | |
64 char *name; | |
65 size_t namelen; | |
66 int allowinverse; | |
67 int allownormal; | |
68 int onlyexception; | |
69 int (*fn)(struct filterrule *, char *); | |
70 }; | |
71 | |
72 static int parsedomainsoption(struct filterrule *, char *); | |
73 | |
74 #define STRP(s) s,sizeof(s)-1 | |
75 | |
76 static struct filtertype filtertypes[] = { | |
77 /* NOTE: options with 'type' = 0 are silently ignored and treate… | |
78 * requests for now */ | |
79 { 0, STRP("collapse"), 1, 1, 0, NU… | |
80 { FilterTypeDocument, STRP("document"), 1, 0, 1, NU… | |
81 { 0, STRP("domain"), 0, 1, 0, | |
82 /* domain=... */ &parsedomainsopti… | |
83 { 0, STRP("donottrack"), 1, 1, 0, NU… | |
84 { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NU… | |
85 { 0, STRP("font"), 1, 1, 0, NU… | |
86 { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NU… | |
87 { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NU… | |
88 { FilterTypeImage, STRP("image"), 1, 1, 0, NU… | |
89 { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NU… | |
90 { 0, STRP("media"), 1, 1, 0, NU… | |
91 { FilterTypeObject, STRP("object"), 1, 1, 0, NU… | |
92 { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NU… | |
93 { FilterTypeOther, STRP("other"), 1, 1, 0, NU… | |
94 { FilterTypePing, STRP("ping"), 1, 1, 0, NU… | |
95 { 0, STRP("popup"), 1, 1, 0, NU… | |
96 { FilterTypeScript, STRP("script"), 1, 1, 0, NU… | |
97 { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NU… | |
98 { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NU… | |
99 { 0, STRP("third-party"), 1, 1, 0, NU… | |
100 { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NU… | |
101 /* NOTE: site-key not supported */ | |
102 }; | |
103 | |
104 static String globalcss; | |
105 static struct filterrule *rules; | |
106 | |
107 static void | |
108 weprintf(const char *fmt, ...) | |
109 { | |
110 va_list ap; | |
111 | |
112 fprintf(stderr, "surf-adblock: "); | |
113 | |
114 va_start(ap, fmt); | |
115 vfprintf(stderr, fmt, ap); | |
116 va_end(ap); | |
117 } | |
118 | |
119 static void * | |
120 wecalloc(size_t nmemb, size_t size) | |
121 { | |
122 void *p; | |
123 | |
124 if (!(p = calloc(nmemb, size))) | |
125 weprintf("calloc: %s\n", strerror(errno)); | |
126 | |
127 return p; | |
128 } | |
129 | |
130 static char * | |
131 westrndup(const char *s, size_t n) | |
132 { | |
133 char *p; | |
134 | |
135 if (!(p = strndup(s, n))) | |
136 weprintf("strndup: %s\n", strerror(errno)); | |
137 return p; | |
138 } | |
139 | |
140 static char * | |
141 westrdup(const char *s) | |
142 { | |
143 char *p; | |
144 | |
145 if (!(p = strdup(s))) | |
146 weprintf("strdup: %s\n", strerror(errno)); | |
147 | |
148 return p; | |
149 } | |
150 | |
151 static size_t | |
152 string_buffer_realloc(String *s, size_t newsz) | |
153 { | |
154 char *tmp; | |
155 size_t allocsz; | |
156 | |
157 for (allocsz = 64; allocsz <= newsz; allocsz *= 2) | |
158 ; | |
159 if (!(tmp = realloc(s->data, allocsz))) { | |
160 weprintf("realloc: %s\n", strerror(errno)); | |
161 } else { | |
162 s->data = tmp; | |
163 s->datasz = allocsz; | |
164 } | |
165 | |
166 return s->datasz; | |
167 } | |
168 | |
169 static size_t | |
170 string_append(String *s, const char *data, size_t len) | |
171 { | |
172 size_t newlen; | |
173 | |
174 if (!len) | |
175 return len; | |
176 | |
177 newlen = s->len + len; | |
178 /* check if allocation is necesary, don't shrink buffer, | |
179 * should be more than datasz ofcourse. */ | |
180 if (newlen >= s->datasz) { | |
181 if (string_buffer_realloc(s, newlen + 1) <= newlen) | |
182 return 0; | |
183 } | |
184 memcpy(s->data + s->len, data, len); | |
185 s->len = newlen; | |
186 s->data[s->len] = '\0'; | |
187 | |
188 return len; | |
189 } | |
190 | |
191 #define END 0 | |
192 #define UNMATCHABLE -2 | |
193 #define CARET -3 | |
194 #define STAR -4 | |
195 | |
196 static int | |
197 str_next(const char *str, size_t n, size_t *step) | |
198 { | |
199 if (!n) { | |
200 *step = 0; | |
201 return 0; | |
202 } | |
203 if (str[0] >= 128U) { | |
204 wchar_t wc; | |
205 int k = mbtowc(&wc, str, n); | |
206 if (k<0) { | |
207 *step = 1; | |
208 return -1; | |
209 } | |
210 *step = k; | |
211 return wc; | |
212 } | |
213 *step = 1; | |
214 | |
215 return str[0]; | |
216 } | |
217 | |
218 static int | |
219 pat_next(const char *pat, size_t m, size_t *step) | |
220 { | |
221 int esc = 0; | |
222 | |
223 if (!m || !*pat) { | |
224 *step = 0; | |
225 return END; | |
226 } | |
227 *step = 1; | |
228 if (pat[0]=='\\' && pat[1]) { | |
229 *step = 2; | |
230 pat++; | |
231 esc = 1; | |
232 goto escaped; | |
233 } | |
234 if (pat[0]=='^') | |
235 return CARET; | |
236 if (pat[0] == '*') | |
237 return STAR; | |
238 escaped: | |
239 if (pat[0] >= 128U) { | |
240 wchar_t wc; | |
241 int k = mbtowc(&wc, pat, m); | |
242 if (k<0) { | |
243 *step = 0; | |
244 return UNMATCHABLE; | |
245 } | |
246 *step = k + esc; | |
247 return wc; | |
248 } | |
249 return pat[0]; | |
250 } | |
251 | |
252 static int | |
253 casefold(int k) | |
254 { | |
255 int c; | |
256 | |
257 /* optimization: -2% last measured. | |
258 if ((unsigned)k < 128) { | |
259 c = toupper(k); | |
260 return c == k ? tolower(k) : c; | |
261 }*/ | |
262 c = towupper(k); | |
263 return c == k ? towlower(k) : c; | |
264 } | |
265 | |
266 /* match() based on musl-libc fnmatch: | |
267 https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */ | |
268 static int | |
269 match(const char *pat, const char *str, int fcase) | |
270 { | |
271 size_t m = -1, n = -1; | |
272 const char *p, *ptail, *endpat; | |
273 const char *s, *stail, *endstr; | |
274 size_t pinc, sinc, tailcnt=0; | |
275 int c, k, kfold; | |
276 | |
277 for (;;) { | |
278 switch ((c = pat_next(pat, m, &pinc))) { | |
279 case UNMATCHABLE: | |
280 return 1; | |
281 case STAR: | |
282 pat++; | |
283 m--; | |
284 break; | |
285 case CARET: | |
286 k = str_next(str, n, &sinc); | |
287 if (k <= 0) | |
288 return (c==END) ? 0 : 1; | |
289 str += sinc; | |
290 n -= sinc; | |
291 if (k != '?' && k != '/') | |
292 return 1; | |
293 pat++; | |
294 m--; | |
295 break; | |
296 default: | |
297 k = str_next(str, n, &sinc); | |
298 if (k <= 0) | |
299 return (c==END) ? 0 : 1; | |
300 str += sinc; | |
301 n -= sinc; | |
302 kfold = fcase ? casefold(k) : k; | |
303 if (k != c && kfold != c) | |
304 return 1; | |
305 pat+=pinc; | |
306 m-=pinc; | |
307 continue; | |
308 } | |
309 break; | |
310 } | |
311 | |
312 /* Compute real pat length if it was initially unknown/-1 */ | |
313 m = strnlen(pat, m); | |
314 endpat = pat + m; | |
315 | |
316 /* Find the last * in pat and count chars needed after it */ | |
317 for (p=ptail=pat; p<endpat; p+=pinc) { | |
318 switch (pat_next(p, endpat-p, &pinc)) { | |
319 case UNMATCHABLE: | |
320 return 1; | |
321 case STAR: | |
322 tailcnt=0; | |
323 ptail = p+1; | |
324 break; | |
325 default: | |
326 tailcnt++; | |
327 break; | |
328 } | |
329 } | |
330 | |
331 /* Past this point we need not check for UNMATCHABLE in pat, | |
332 * because all of pat has already been parsed once. */ | |
333 | |
334 /* Compute real str length if it was initially unknown/-1 */ | |
335 n = strnlen(str, n); | |
336 endstr = str + n; | |
337 if (n < tailcnt) return 1; | |
338 | |
339 /* Find the final tailcnt chars of str, accounting for UTF-8. | |
340 * On illegal sequences we may get it wrong, but in that case | |
341 * we necessarily have a matching failure anyway. */ | |
342 for (s=endstr; s>str && tailcnt; tailcnt--) { | |
343 if (s[-1] < 128U || MB_CUR_MAX==1) s--; | |
344 else while ((unsigned char)*--s-0x80U<0x40 && s>str); | |
345 } | |
346 if (tailcnt) return 1; | |
347 stail = s; | |
348 | |
349 /* Check that the pat and str tails match */ | |
350 p = ptail; | |
351 for (;;) { | |
352 c = pat_next(p, endpat-p, &pinc); | |
353 p += pinc; | |
354 if ((k = str_next(s, endstr-s, &sinc)) <= 0) { | |
355 if (c != END) return 1; | |
356 break; | |
357 } | |
358 s += sinc; | |
359 if (c == CARET) { | |
360 if (k != '/' && k != '?') | |
361 return 1; | |
362 } else { | |
363 kfold = fcase ? casefold(k) : k; | |
364 if (k != c && kfold != c) | |
365 return 1; | |
366 } | |
367 } | |
368 | |
369 /* We're all done with the tails now, so throw them out */ | |
370 endstr = stail; | |
371 endpat = ptail; | |
372 | |
373 /* Match pattern components until there are none left */ | |
374 while (pat<endpat) { | |
375 p = pat; | |
376 s = str; | |
377 for (;;) { | |
378 c = pat_next(p, endpat-p, &pinc); | |
379 p += pinc; | |
380 /* Encountering * completes/commits a component … | |
381 if (c == STAR) { | |
382 pat = p; | |
383 str = s; | |
384 break; | |
385 } | |
386 k = str_next(s, endstr-s, &sinc); | |
387 if (!k) | |
388 return 1; | |
389 s += sinc; | |
390 if (c == CARET) { | |
391 if (k != '/' && k != '?') | |
392 break; | |
393 } else { | |
394 kfold = fcase ? casefold(k) : k; | |
395 if (k != c && kfold != c) | |
396 break; | |
397 } | |
398 | |
399 } | |
400 if (c == STAR) continue; | |
401 /* If we failed, advance str, by 1 char if it's a valid | |
402 * char, or past all invalid bytes otherwise. */ | |
403 k = str_next(str, endstr-str, &sinc); | |
404 if (k > 0) str += sinc; | |
405 else for (str++; str_next(str, endstr-str, &sinc)<0; str… | |
406 } | |
407 | |
408 return 0; | |
409 } | |
410 | |
411 /* | |
412 domain=... if domain is prefixed with ~, ignore. | |
413 multiple domains can be separated with | | |
414 */ | |
415 static int | |
416 parsedomains(const char *s, int sep, struct filterdomain **head) | |
417 { | |
418 struct filterdomain *d, *last = *head = NULL; | |
419 char *p; | |
420 int inverse; | |
421 | |
422 do { | |
423 inverse = 0; | |
424 if (*s == '~') { | |
425 inverse = !inverse; | |
426 s++; | |
427 } | |
428 if (!*s || *s == sep) | |
429 break; | |
430 | |
431 if (!(d = wecalloc(1, sizeof(struct filterdomain)))) | |
432 return -1; | |
433 if ((p = strchr(s, sep))) { /* TODO: should not contain … | |
434 d->domain = westrndup(s, p - s); | |
435 s = p + 1; | |
436 } else { | |
437 d->domain = westrdup(s); | |
438 } | |
439 if (!d->domain) | |
440 return -1; | |
441 d->inverse = inverse; | |
442 | |
443 if (!*head) | |
444 *head = last = d; | |
445 else | |
446 last = last->next = d; | |
447 } while (p); | |
448 | |
449 return (*head != NULL); | |
450 } | |
451 | |
452 static int | |
453 parsedomainselement(struct filterrule *f, char *s) | |
454 { | |
455 struct filterdomain *d, *last; | |
456 | |
457 for (last = f->domains; last && last->next; last = last->next) | |
458 ; | |
459 | |
460 if (parsedomains(s, ',', &d) < 0) | |
461 return -1; | |
462 if (last) | |
463 last->next = d; | |
464 else | |
465 f->domains = d; | |
466 | |
467 return (d != NULL); | |
468 } | |
469 | |
470 static int | |
471 parsedomainsoption(struct filterrule *f, char *s) | |
472 { | |
473 struct filterdomain *d, *last; | |
474 | |
475 for (last = f->domains; last && last->next; last = last->next) | |
476 ; | |
477 | |
478 if (parsedomains(s, '|', &d) < 0) | |
479 return -1; | |
480 if (last) | |
481 last->next = d; | |
482 else | |
483 f->domains = d; | |
484 | |
485 return (d != NULL); | |
486 } | |
487 | |
488 static int | |
489 filtertype_cmp(const void *a, const void *b) | |
490 { | |
491 return strcmp(((struct filtertype *)a)->name, | |
492 ((struct filtertype *)b)->name); | |
493 } | |
494 | |
495 /* check if domain is the same domain or a subdomain of `s` */ | |
496 static int | |
497 matchdomain(const char *s, const char *domain) | |
498 { | |
499 size_t l1, l2; | |
500 | |
501 l1 = strlen(s); | |
502 l2 = strlen(domain); | |
503 | |
504 /* subdomain-specific (longer) or other domain */ | |
505 if (l1 > l2) | |
506 return 0; | |
507 /* subdomain */ | |
508 if (l2 > l1 && domain[l2 - l1 - 1] == '.') | |
509 return !strcmp(&domain[l2 - l1], s); | |
510 | |
511 return !strcmp(s, domain); | |
512 } | |
513 | |
514 static int | |
515 matchrule(struct filterrule *f, const char *fromuri, const char *fromdom… | |
516 const char *fromrel, | |
517 const char *requri, const char *reqdomain, const char *reqrel, | |
518 const char *type) | |
519 { | |
520 /* NOTE: order matters, see FilterType enum values */ | |
521 struct filterdomain *d; | |
522 char pat[1024]; | |
523 const char *uri; | |
524 int len, r; | |
525 | |
526 r = f->domains ? 0 : 1; | |
527 for (d = f->domains; d; d = d->next) { | |
528 if (matchdomain(d->domain, fromdomain)) { | |
529 if (r && d->inverse) | |
530 r = 0; | |
531 else if (!r && !d->inverse) | |
532 r = 1; | |
533 } else if (r && !d->inverse) { | |
534 r = 0; | |
535 } | |
536 } | |
537 if (f->css) { | |
538 /* DEBUG */ | |
539 #if 0 | |
540 if (f->isexception) | |
541 printf("DEBUG, exception rule, CSS: %s, match? %… | |
542 f->css, r); | |
543 #endif | |
544 return r; | |
545 } | |
546 | |
547 #if 1 | |
548 /* skip allow rule, TODO: inverse? */ | |
549 if (!r) | |
550 return 0; | |
551 #endif | |
552 | |
553 /* match begin including domain */ | |
554 if (f->matchbegin) { | |
555 /* TODO: match domain part of pattern */ | |
556 /* TODO: preprocess pattern if it is matchbegin? */ | |
557 | |
558 len = strcspn(f->uri, "^/"); | |
559 | |
560 /* match domain without dot */ | |
561 r = snprintf(pat, sizeof(pat), "%.*s", | |
562 len, f->uri); | |
563 if (r == -1 || (size_t)r >= sizeof(pat)) { | |
564 fprintf(stderr, "warning: pattern too large, ign… | |
565 return 0; | |
566 } | |
567 | |
568 /* TODO: block type mask */ | |
569 if (match(pat, reqdomain, (f->block & FilterTypeMatchCas… | |
570 /* match domain with dot */ | |
571 r = snprintf(pat, sizeof(pat), "*.%.*s", | |
572 len, f->uri); | |
573 if (r == -1 || (size_t)r >= sizeof(pat)) { | |
574 fprintf(stderr, "warning: pattern too la… | |
575 return 0; | |
576 } | |
577 | |
578 /* TODO: block type mask */ | |
579 if (match(pat, reqdomain, (f->block & FilterType… | |
580 return 0; | |
581 } | |
582 | |
583 /* match on path */ | |
584 r = snprintf(pat, sizeof(pat), "*%s%s", | |
585 f->uri + len, | |
586 f->matchend ? "" : "*"); | |
587 uri = reqrel; | |
588 } else { | |
589 r = snprintf(pat, sizeof(pat), "*%s%s", | |
590 f->uri, | |
591 f->matchend ? "" : "*"); | |
592 uri = requri; | |
593 | |
594 } | |
595 if (r == -1 || (size_t)r >= sizeof(pat)) { | |
596 fprintf(stderr, "warning: pattern too large, ignoring\n"… | |
597 return 0; | |
598 } | |
599 | |
600 /* TODO: block type mask */ | |
601 if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) | |
602 return 1; | |
603 | |
604 return 0; | |
605 } | |
606 | |
607 static int | |
608 parserule(struct filterrule *f, char *s) | |
609 { | |
610 struct filtertype key, *ft; | |
611 int inverse = 0; | |
612 char *p, *values; | |
613 | |
614 if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']')) | |
615 return 0; /* skip comment or empty line */ | |
616 for (; *s && isspace(*s); s++) | |
617 ; | |
618 if (!*s) | |
619 return 0; /* line had only whitespace: skip */ | |
620 | |
621 memset(f, 0, sizeof(struct filterrule)); | |
622 | |
623 if ((p = strstr(s, "#@#"))) { | |
624 *p = '\0'; | |
625 if (parsedomainselement(f, s) < 0) | |
626 return -1; | |
627 *p = '#'; | |
628 if (!(f->css = westrdup(p + 3))) | |
629 return -1; | |
630 f->isexception = 1; | |
631 goto end; /* end of CSS rule */ | |
632 } | |
633 | |
634 /* element hiding rule, NOTE: no wildcards are supported, | |
635 "Simplified element hiding syntax" (legacy) is not supported.… | |
636 if ((p = strstr(s, "##"))) { | |
637 *p = '\0'; | |
638 if (parsedomainselement(f, s) < 0) | |
639 return -1; | |
640 *p = '#'; | |
641 if (!(f->css = westrdup(p + 2))) | |
642 return -1; | |
643 goto end; /* end of rule */ | |
644 } | |
645 | |
646 if (!strncmp(s, "@@", 2)) { | |
647 f->isexception = 1; | |
648 s += 2; | |
649 } | |
650 if (*s == '|') { | |
651 s++; | |
652 if (*s == '|') { | |
653 f->matchbegin = 1; | |
654 s++; | |
655 } else { | |
656 f->matchend = 1; | |
657 } | |
658 } | |
659 | |
660 /* no options, use rest of line as uri. */ | |
661 if (!(p = strrchr(s, '$'))) { | |
662 if (!(f->uri = westrdup(s))) | |
663 return -1; | |
664 goto end; | |
665 } | |
666 | |
667 /* has options */ | |
668 if (!(f->uri = westrndup(s, p - s))) | |
669 return -1; | |
670 | |
671 s = ++p; | |
672 | |
673 /* blockmask, has options? default: allow all options, case-sens… | |
674 * has no options? default: block all options, case-sensitive */ | |
675 f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL; | |
676 do { | |
677 if ((p = strchr(s, ','))) | |
678 *p = '\0'; | |
679 /* match option */ | |
680 inverse = 0; | |
681 if (*s == '~') { | |
682 inverse = 1; | |
683 s++; | |
684 } | |
685 if ((values = strchr(s, '='))) | |
686 *(values) = '\0'; | |
687 key.name = s; | |
688 | |
689 ft = bsearch(&key, &filtertypes, | |
690 sizeof(filtertypes) / sizeof(*filtertypes), | |
691 sizeof(*filtertypes), filtertype_cmp); | |
692 | |
693 /* restore NUL-terminator for domain= option */ | |
694 if (values) | |
695 *(values++) = '='; | |
696 | |
697 if (ft) { | |
698 if (inverse) | |
699 f->block &= ~(ft->type); | |
700 else | |
701 f->block |= ft->type; | |
702 if (ft->fn && values) | |
703 ft->fn(f, values); | |
704 } else { | |
705 /* DEBUG */ | |
706 #if 0 | |
707 fprintf(stderr, "ignored: unknown option: '%s' " | |
708 "in rule: %s\n", key.name, f->uri); | |
709 #endif | |
710 } | |
711 | |
712 /* restore ',' */ | |
713 if (p) { | |
714 *p = ','; | |
715 s = p + 1; | |
716 } | |
717 } while (p); | |
718 end: | |
719 | |
720 return 1; | |
721 } | |
722 | |
723 #if 0 | |
724 static void | |
725 debugrule(struct filterrule *r) | |
726 { | |
727 printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: " | |
728 "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "", | |
729 r->isexception, r->block); | |
730 } | |
731 #endif | |
732 | |
733 static int | |
734 loadrules(FILE *fp) | |
735 { | |
736 struct filterrule f, *r, *rn = NULL; | |
737 char *line = NULL; | |
738 size_t linesiz = 0; | |
739 ssize_t n; | |
740 int ret; | |
741 | |
742 /* load rules */ | |
743 while ((n = getline(&line, &linesiz, fp)) > 0) { | |
744 if (line[n - 1] == '\n') | |
745 line[--n] = '\0'; | |
746 if (n > 0 && line[n - 1] == '\r') | |
747 line[--n] = '\0'; | |
748 | |
749 if ((ret = parserule(&f, line) > 0)) { | |
750 if (!(r = wecalloc(1, sizeof(struct filterrule))… | |
751 return -1; | |
752 if (!rules) | |
753 rules = rn = r; | |
754 else | |
755 rn = rn->next = r; | |
756 memcpy(rn, &f, sizeof(struct filterrule)); | |
757 } else if (ret < 0) { | |
758 return -1; | |
759 } | |
760 } | |
761 if (ferror(fp)) { | |
762 weprintf("getline: %s\n", strerror(errno)); | |
763 return -1; | |
764 } | |
765 return (rules != NULL); | |
766 } | |
767 | |
768 char * | |
769 getglobalcss(void) | |
770 { | |
771 return globalcss.data; | |
772 } | |
773 | |
774 char * | |
775 getdocumentcss(const char *fromuri) | |
776 { | |
777 const char *s; | |
778 char fromdomain[256]; | |
779 String sitecss; | |
780 struct filterrule *r; | |
781 size_t len; | |
782 | |
783 /* skip protocol */ | |
784 if ((s = strstr(fromuri, "://"))) | |
785 fromuri = s + sizeof("://") - 1; | |
786 len = strcspn(fromuri, "/"); /* TODO: ":/" */ | |
787 memcpy(fromdomain, fromuri, len); | |
788 fromdomain[len] = '\0'; | |
789 | |
790 printf("fromuri: %s\n", fromuri); | |
791 printf("fromdomain: %s\n", fromdomain); | |
792 | |
793 /* DEBUG: timing */ | |
794 struct timespec tp_start, tp_end, tp_diff; | |
795 if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) { | |
796 fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
797 } | |
798 | |
799 /* site-specific CSS */ | |
800 memset(&sitecss, 0, sizeof(sitecss)); | |
801 for (r = rules; r; r = r->next) { | |
802 if (!r->css || !r->domains || | |
803 !matchrule(r, "", fromdomain, "", "", "", "", "")) | |
804 continue; | |
805 | |
806 len = strlen(r->css); | |
807 if (string_append(&sitecss, r->css, len) < len) | |
808 goto err; | |
809 | |
810 s = r->isexception ? "{display:initial;}" : "{display:no… | |
811 len = strlen(s); | |
812 if (string_append(&sitecss, s, len) < len) | |
813 goto err; | |
814 } | |
815 /* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>… | |
816 | |
817 /* DEBUG: timing */ | |
818 if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) { | |
819 fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
820 } | |
821 | |
822 tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec; | |
823 tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec; | |
824 if (tp_diff.tv_nsec < 0) { | |
825 tp_diff.tv_sec--; | |
826 tp_diff.tv_nsec += 1000000000L; | |
827 } | |
828 | |
829 printf("timing: %lld sec, %.3f ms\n", | |
830 (long long)tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000… | |
831 | |
832 if (globalcss.data) | |
833 printf("global CSS length in bytes: %zu\n", strlen(globa… | |
834 if (sitecss.data) | |
835 printf("site CSS length in bytes: %zu\n", strlen(sitecss… | |
836 | |
837 return sitecss.data; | |
838 | |
839 err: | |
840 free(sitecss.data); | |
841 /*memset(&sitecss, 0, sizeof(sitecss));*/ | |
842 | |
843 return NULL; | |
844 } | |
845 | |
846 int | |
847 allowrequest(const char *fromuri, const char *requri) | |
848 { | |
849 struct filterrule *r; | |
850 char fromdomain[256], reqdomain[256]; | |
851 const char *s, *reqrel, *fromrel; | |
852 size_t len; | |
853 int status = 1; | |
854 | |
855 /* skip protocol part */ | |
856 if ((s = strstr(fromuri, "://"))) | |
857 fromuri = s + sizeof("://") - 1; | |
858 if ((s = strstr(requri, "://"))) | |
859 requri = s + sizeof("://") - 1; | |
860 | |
861 len = strcspn(fromuri, ":/"); /* TODO: ":/", but support IPV6...… | |
862 memcpy(fromdomain, fromuri, len); | |
863 fromdomain[len] = '\0'; | |
864 | |
865 len = strcspn(requri, ":/"); /* TODO: ":/", but support IPV6... … | |
866 memcpy(reqdomain, requri, len); | |
867 reqdomain[len] = '\0'; | |
868 | |
869 fromrel = &fromuri[strcspn(fromuri, "/")]; | |
870 reqrel = &requri[strcspn(requri, "/")]; | |
871 | |
872 #if 0 | |
873 printf("req %s = %s\n", requri, reqrel); | |
874 printf("from %s = %s\n", fromuri, fromrel); | |
875 #endif | |
876 | |
877 /* DEBUG: timing */ | |
878 struct timespec tp_start, tp_end, tp_diff; | |
879 if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) | |
880 fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
881 | |
882 /* match rules */ | |
883 for (r = rules; r; r = r->next) { | |
884 if (!r->css && matchrule(r, fromuri, fromdomain, | |
885 fromrel, requri, reqdomain, req… | |
886 #if 0 | |
887 printf("reqrel: %s\n", reqrel); | |
888 printf("reqdomain: %s\n", reqdomain); | |
889 printf("requri: %s\n", requri); | |
890 printf("from uri: %s\n", fromuri); | |
891 printf("from domain: %s\n", fromdomain); | |
892 #endif | |
893 | |
894 fprintf(stderr, "blocked: %s, %s\n", fromdomain,… | |
895 fprintf(stderr, "rule: %s\n", r->uri); | |
896 fprintf(stderr, "===\n"); | |
897 | |
898 /* DEBUG: for showing the timing */ | |
899 status = 0; | |
900 goto end; | |
901 /*return 1;*/ | |
902 } | |
903 } | |
904 | |
905 end: | |
906 /* DEBUG: timing */ | |
907 if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) { | |
908 fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
909 } | |
910 | |
911 tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec; | |
912 tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec; | |
913 if (tp_diff.tv_nsec < 0) { | |
914 tp_diff.tv_sec--; | |
915 tp_diff.tv_nsec += 1000000000L; | |
916 } | |
917 | |
918 printf("%s [%s] timing: %lld sec, %.3f ms\n", | |
919 requri, fromuri, (long long)tp_diff.tv_sec, | |
920 (float)tp_diff.tv_nsec / 1000000.0f); | |
921 | |
922 return status; | |
923 } | |
924 | |
925 void | |
926 cleanup(void) | |
927 { | |
928 struct filterrule *r; | |
929 struct filterdomain *d; | |
930 | |
931 free(globalcss.data); | |
932 memset(&globalcss, 0, sizeof(globalcss)); | |
933 | |
934 for (r = rules; r; r = rules) { | |
935 for (d = r->domains; d; d = r->domains) { | |
936 free(d->domain); | |
937 r->domains = d->next; | |
938 free(d); | |
939 } | |
940 free(r->css); | |
941 free(r->uri); | |
942 rules = r->next; | |
943 free(r); | |
944 } | |
945 rules = NULL; | |
946 } | |
947 | |
948 void | |
949 init(void) | |
950 { | |
951 struct filterrule *r; | |
952 FILE *fp; | |
953 const char *s; | |
954 char filepath[PATH_MAX], *e; | |
955 size_t len; | |
956 int n; | |
957 | |
958 if ((e = getenv("SURF_ADBLOCK_FILE"))) { | |
959 n = snprintf(filepath, sizeof(filepath), "%s", e); | |
960 } else { | |
961 if (!(e = getenv("HOME"))) | |
962 e = ""; | |
963 n = snprintf(filepath, sizeof(filepath), | |
964 "%s%s.surf/adblockrules", e, e[0] ? "/" : "… | |
965 } | |
966 if (n < 0 || (size_t)n >= sizeof(filepath)) { | |
967 weprintf("fatal: rules file path too long"); | |
968 return; | |
969 } | |
970 | |
971 if (!(fp = fopen(filepath, "r"))) { | |
972 weprintf("fatal: cannot open rules file %s: %s\n", | |
973 filepath, strerror(errno)); | |
974 return; | |
975 } | |
976 | |
977 n = loadrules(fp); | |
978 fclose(fp); | |
979 if (n < 1) { | |
980 if (n < 0) { | |
981 weprintf("fatal: cannot read rules from file %s:… | |
982 filepath, strerror(errno)); | |
983 } else { | |
984 weprintf("fatal: cannot read any rule from file … | |
985 filepath); | |
986 } | |
987 return; | |
988 } | |
989 | |
990 /* general CSS rules: all sites */ | |
991 for (r = rules; r; r = r->next) { | |
992 if (!r->css || r->domains) | |
993 continue; | |
994 | |
995 len = strlen(r->css); | |
996 if (string_append(&globalcss, r->css, len) < len) { | |
997 weprintf("cannot append CSS rule to global CSS s… | |
998 cleanup(); | |
999 return; | |
1000 } | |
1001 | |
1002 s = r->isexception ? "{display:initial;}" : "{display:no… | |
1003 len = strlen(s); | |
1004 if (string_append(&globalcss, s, len) < len) { | |
1005 weprintf("cannot append CSS rule to global CSS s… | |
1006 cleanup(); | |
1007 return; | |
1008 } | |
1009 } | |
1010 } |