separate adblock and surf-specific code - surf-adblock - Surf adblock web exten… | |
git clone git://git.codemadness.org/surf-adblock | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 08e747efa80a44603f80db0fdacb3f63ad210b8e | |
parent c4841f33f64cd77b30def9c0b11d1ac9ece7f821 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sun, 4 Jun 2017 14:15:27 +0200 | |
separate adblock and surf-specific code | |
this will be useful later on for writing test-cases also or re-using | |
the code in a daemon. | |
Diffstat: | |
M Makefile | 2 +- | |
M TODO | 2 ++ | |
A adblock.c | 956 +++++++++++++++++++++++++++++… | |
M surf-adblock.c | 944 +----------------------------… | |
4 files changed, 973 insertions(+), 931 deletions(-) | |
--- | |
diff --git a/Makefile b/Makefile | |
@@ -1,6 +1,6 @@ | |
include config.mk | |
-SRC = surf-adblock.c | |
+SRC = surf-adblock.c adblock.c | |
OBJ = ${SRC:.c=.lo} | |
all: surf-adblock.la | |
diff --git a/TODO b/TODO | |
@@ -1,3 +1,5 @@ | |
+- optimize towupper for fnmatch? check < 128, see musl optimization. | |
+ | |
- fix blocking of : ||ads.somesite.com^ | |
- fix tweakers.net popup / rule. | |
diff --git a/adblock.c b/adblock.c | |
@@ -0,0 +1,956 @@ | |
+#include <sys/stat.h> | |
+#include <sys/types.h> | |
+ | |
+#include <ctype.h> | |
+#include <errno.h> | |
+#include <fcntl.h> | |
+#include <limits.h> | |
+#include <stdarg.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+#include <wchar.h> | |
+#include <wctype.h> | |
+ | |
+#include "adblock.h" | |
+ | |
+/* String data / memory pool */ | |
+typedef struct string { | |
+ char *data; /* data */ | |
+ size_t datasz; /* allocated size */ | |
+ size_t len; /* current string length */ | |
+} String; | |
+ | |
+struct filterdomain { | |
+ char *domain; | |
+ int inverse; | |
+ struct filterdomain *next; | |
+}; | |
+ | |
+struct filterrule { | |
+ /* type: match mask, must be atleast 32-bit, see FilterType enum */ | |
+ unsigned long block; | |
+ int matchbegin; | |
+ int matchend; | |
+ /* is exception rule: prefix @@ for ABP or #@# for CSS */ | |
+ int isexception; | |
+ char *css; /* if non-NULL is CSS rule / hide element rule */ | |
+ char *uri; | |
+ struct filterdomain *domains; | |
+ struct filterrule *next; | |
+}; | |
+ | |
+enum { | |
+ FilterTypeScript = 1 << 0, | |
+ FilterTypeImage = 1 << 1, | |
+ FilterTypeCSS = 1 << 2, | |
+ FilterTypeObject = 1 << 3, | |
+ FilterTypeXHR = 1 << 4, | |
+ FilterTypeObjectSub = 1 << 5, | |
+ FilterTypeSubDoc = 1 << 6, | |
+ FilterTypePing = 1 << 7, | |
+ FilterTypeDocument = 1 << 8, | |
+ FilterTypeElemHide = 1 << 9, | |
+ FilterTypeOther = 1 << 10, | |
+ FilterTypeGenericHide = 1 << 11, | |
+ FilterTypeGenericBlock = 1 << 12, | |
+ FilterTypeMatchCase = 1 << 13, | |
+}; | |
+ | |
+struct filtertype { | |
+ /* `type` must be atleast 32-bit, see FilterType enum */ | |
+ unsigned long type; | |
+ char *name; | |
+ size_t namelen; | |
+ int allowinverse; | |
+ int allownormal; | |
+ int onlyexception; | |
+ int (*fn)(struct filterrule *, char *); | |
+}; | |
+ | |
+static int parsedomainsoption(struct filterrule *, char *); | |
+ | |
+#define STRP(s) s,sizeof(s)-1 | |
+ | |
+static struct filtertype filtertypes[] = { | |
+ /* NOTE: options with 'type' = 0 are silently ignored and treated as | |
+ * requests for now */ | |
+ { 0, STRP("collapse"), 1, 1, 0, NULL }, | |
+ { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL }, | |
+ { 0, STRP("domain"), 0, 1, 0, | |
+ /* domain=... */ &parsedomainsoption }, | |
+ { 0, STRP("donottrack"), 1, 1, 0, NULL }, | |
+ { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL }, | |
+ { 0, STRP("font"), 1, 1, 0, NULL }, | |
+ { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL }, | |
+ { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL }, | |
+ { FilterTypeImage, STRP("image"), 1, 1, 0, NULL }, | |
+ { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL }, | |
+ { 0, STRP("media"), 1, 1, 0, NULL }, | |
+ { FilterTypeObject, STRP("object"), 1, 1, 0, NULL }, | |
+ { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL }, | |
+ { FilterTypeOther, STRP("other"), 1, 1, 0, NULL }, | |
+ { FilterTypePing, STRP("ping"), 1, 1, 0, NULL }, | |
+ { 0, STRP("popup"), 1, 1, 0, NULL }, | |
+ { FilterTypeScript, STRP("script"), 1, 1, 0, NULL }, | |
+ { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL }, | |
+ { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL }, | |
+ { 0, STRP("third-party"), 1, 1, 0, NULL }, | |
+ { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL }, | |
+ /* NOTE: site-key not supported */ | |
+}; | |
+ | |
+static String globalcss; | |
+static struct filterrule *rules; | |
+ | |
+static void | |
+weprintf(const char *fmt, ...) | |
+{ | |
+ va_list ap; | |
+ | |
+ fprintf(stderr, "surf-adblock: "); | |
+ | |
+ va_start(ap, fmt); | |
+ vfprintf(stderr, fmt, ap); | |
+ va_end(ap); | |
+} | |
+ | |
+static void * | |
+wecalloc(size_t nmemb, size_t size) | |
+{ | |
+ void *p; | |
+ | |
+ if (!(p = calloc(nmemb, size))) | |
+ weprintf("calloc: %s\n", strerror(errno)); | |
+ | |
+ return p; | |
+} | |
+ | |
+static char * | |
+westrndup(const char *s, size_t n) | |
+{ | |
+ char *p; | |
+ | |
+ if (!(p = strndup(s, n))) | |
+ weprintf("strndup: %s\n", strerror(errno)); | |
+ return p; | |
+} | |
+ | |
+static char * | |
+westrdup(const char *s) | |
+{ | |
+ char *p; | |
+ | |
+ if (!(p = strdup(s))) | |
+ weprintf("strdup: %s\n", strerror(errno)); | |
+ | |
+ return p; | |
+} | |
+ | |
+void | |
+cleanup(void) | |
+{ | |
+ struct filterrule *r; | |
+ struct filterdomain *d; | |
+ | |
+ free(globalcss.data); | |
+ | |
+ for (r = rules; r; r = rules) { | |
+ for (d = r->domains; d; d = r->domains) { | |
+ free(d->domain); | |
+ r->domains = d->next; | |
+ free(d); | |
+ } | |
+ free(r->css); | |
+ free(r->uri); | |
+ rules = r->next; | |
+ free(r); | |
+ } | |
+} | |
+ | |
+static size_t | |
+string_buffer_realloc(String *s, size_t newsz) | |
+{ | |
+ char *tmp; | |
+ size_t allocsz; | |
+ | |
+ for (allocsz = 64; allocsz <= newsz; allocsz *= 2) | |
+ ; | |
+ if (!(tmp = realloc(s->data, allocsz))) { | |
+ weprintf("realloc: %s\n", strerror(errno)); | |
+ } else { | |
+ s->data = tmp; | |
+ s->datasz = allocsz; | |
+ } | |
+ | |
+ return s->datasz; | |
+} | |
+ | |
+static size_t | |
+string_append(String *s, const char *data, size_t len) | |
+{ | |
+ size_t newlen; | |
+ | |
+ if (!len) | |
+ return len; | |
+ | |
+ newlen = s->len + len; | |
+ /* check if allocation is necesary, don't shrink buffer, | |
+ * should be more than datasz ofcourse. */ | |
+ if (newlen >= s->datasz) { | |
+ if (string_buffer_realloc(s, newlen + 1) <= newlen) | |
+ return 0; | |
+ } | |
+ memcpy(s->data + s->len, data, len); | |
+ s->len = newlen; | |
+ s->data[s->len] = '\0'; | |
+ return len; | |
+} | |
+ | |
+#define END 0 | |
+#define UNMATCHABLE -2 | |
+#define BRACKET -3 | |
+#define CARET -4 | |
+#define STAR -5 | |
+ | |
+static int | |
+str_next(const char *str, size_t n, size_t *step) | |
+{ | |
+ if (!n) { | |
+ *step = 0; | |
+ return 0; | |
+ } | |
+ if (str[0] >= 128U) { | |
+ wchar_t wc; | |
+ int k = mbtowc(&wc, str, n); | |
+ if (k<0) { | |
+ *step = 1; | |
+ return -1; | |
+ } | |
+ *step = k; | |
+ return wc; | |
+ } | |
+ *step = 1; | |
+ | |
+ return str[0]; | |
+} | |
+ | |
+static int | |
+pat_next(const char *pat, size_t m, size_t *step) | |
+{ | |
+ int esc = 0; | |
+ | |
+ if (!m || !*pat) { | |
+ *step = 0; | |
+ return END; | |
+ } | |
+ *step = 1; | |
+ if (pat[0]=='\\' && pat[1]) { | |
+ *step = 2; | |
+ pat++; | |
+ esc = 1; | |
+ goto escaped; | |
+ } | |
+ if (pat[0]=='^') | |
+ return CARET; | |
+ if (pat[0] == '*') | |
+ return STAR; | |
+escaped: | |
+ if (pat[0] >= 128U) { | |
+ wchar_t wc; | |
+ int k = mbtowc(&wc, pat, m); | |
+ if (k<0) { | |
+ *step = 0; | |
+ return UNMATCHABLE; | |
+ } | |
+ *step = k + esc; | |
+ return wc; | |
+ } | |
+ return pat[0]; | |
+} | |
+ | |
+static int | |
+casefold(int k) | |
+{ | |
+ int c = towupper(k); | |
+ return c == k ? towlower(k) : c; | |
+} | |
+ | |
+/* match() based on musl-libc fnmatch: | |
+ https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */ | |
+static int | |
+match(const char *pat, const char *str, int fcase) | |
+{ | |
+ size_t m = -1, n = -1; | |
+ const char *p, *ptail, *endpat; | |
+ const char *s, *stail, *endstr; | |
+ size_t pinc, sinc, tailcnt=0; | |
+ int c, k, kfold; | |
+ | |
+ for (;;) { | |
+ switch ((c = pat_next(pat, m, &pinc))) { | |
+ case UNMATCHABLE: | |
+ return 1; | |
+ case STAR: | |
+ pat++; | |
+ m--; | |
+ break; | |
+ default: | |
+ k = str_next(str, n, &sinc); | |
+ /* TODO: write a test-case */ | |
+ if (c == CARET && (k == '?' || k == '/' || k <= 0)) | |
+ return 1; | |
+ if (k <= 0) | |
+ return (c==END) ? 0 : 1; | |
+ str += sinc; | |
+ n -= sinc; | |
+ kfold = fcase ? casefold(k) : k; | |
+ if (k != c && kfold != c) | |
+ return 1; | |
+ pat+=pinc; | |
+ m-=pinc; | |
+ continue; | |
+ } | |
+ break; | |
+ } | |
+ | |
+ /* Compute real pat length if it was initially unknown/-1 */ | |
+ m = strnlen(pat, m); | |
+ endpat = pat + m; | |
+ | |
+ /* Find the last * in pat and count chars needed after it */ | |
+ for (p=ptail=pat; p<endpat; p+=pinc) { | |
+ switch (pat_next(p, endpat-p, &pinc)) { | |
+ case UNMATCHABLE: | |
+ return 1; | |
+ case STAR: | |
+ tailcnt=0; | |
+ ptail = p+1; | |
+ break; | |
+ default: | |
+ tailcnt++; | |
+ break; | |
+ } | |
+ } | |
+ | |
+ /* Past this point we need not check for UNMATCHABLE in pat, | |
+ * because all of pat has already been parsed once. */ | |
+ | |
+ /* Compute real str length if it was initially unknown/-1 */ | |
+ n = strnlen(str, n); | |
+ endstr = str + n; | |
+ if (n < tailcnt) return 1; | |
+ | |
+ /* Find the final tailcnt chars of str, accounting for UTF-8. | |
+ * On illegal sequences we may get it wrong, but in that case | |
+ * we necessarily have a matching failure anyway. */ | |
+ for (s=endstr; s>str && tailcnt; tailcnt--) { | |
+ if (s[-1] < 128U || MB_CUR_MAX==1) s--; | |
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str); | |
+ } | |
+ if (tailcnt) return 1; | |
+ stail = s; | |
+ | |
+ /* Check that the pat and str tails match */ | |
+ p = ptail; | |
+ for (;;) { | |
+ c = pat_next(p, endpat-p, &pinc); | |
+ p += pinc; | |
+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) { | |
+ if (c != END) return 1; | |
+ break; | |
+ } | |
+ s += sinc; | |
+ kfold = fcase ? casefold(k) : k; | |
+ if (k != c && kfold != c) | |
+ return 1; | |
+ } | |
+ | |
+ /* We're all done with the tails now, so throw them out */ | |
+ endstr = stail; | |
+ endpat = ptail; | |
+ | |
+ /* Match pattern components until there are none left */ | |
+ while (pat<endpat) { | |
+ p = pat; | |
+ s = str; | |
+ for (;;) { | |
+ c = pat_next(p, endpat-p, &pinc); | |
+ p += pinc; | |
+ /* Encountering * completes/commits a component */ | |
+ if (c == STAR) { | |
+ pat = p; | |
+ str = s; | |
+ break; | |
+ } | |
+ k = str_next(s, endstr-s, &sinc); | |
+ if (!k) | |
+ return 1; | |
+ kfold = fcase ? casefold(k) : k; | |
+ if (k != c && kfold != c) | |
+ break; | |
+ s += sinc; | |
+ } | |
+ if (c == STAR) continue; | |
+ /* If we failed, advance str, by 1 char if it's a valid | |
+ * char, or past all invalid bytes otherwise. */ | |
+ k = str_next(str, endstr-str, &sinc); | |
+ if (k > 0) str += sinc; | |
+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+/* | |
+domain=... if domain is prefixed with ~, ignore. | |
+multiple domains can be separated with | | |
+*/ | |
+static int | |
+parsedomains(const char *s, int sep, struct filterdomain **head) | |
+{ | |
+ struct filterdomain *d, *last = *head = NULL; | |
+ char *p; | |
+ int inverse; | |
+ | |
+ do { | |
+ inverse = 0; | |
+ if (*s == '~') { | |
+ inverse = !inverse; | |
+ s++; | |
+ } | |
+ if (!*s || *s == sep) | |
+ break; | |
+ | |
+ if (!(d = wecalloc(1, sizeof(struct filterdomain)))) | |
+ return -1; | |
+ if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */ | |
+ d->domain = westrndup(s, p - s); | |
+ s = p + 1; | |
+ } else { | |
+ d->domain = westrdup(s); | |
+ } | |
+ if (!d->domain) | |
+ return -1; | |
+ d->inverse = inverse; | |
+ | |
+ if (!*head) | |
+ *head = last = d; | |
+ else | |
+ last = last->next = d; | |
+ } while (p); | |
+ | |
+ return (*head != NULL); | |
+} | |
+ | |
+static int | |
+parsedomainselement(struct filterrule *f, char *s) | |
+{ | |
+ struct filterdomain *d, *last; | |
+ | |
+ for (last = f->domains; last && last->next; last = last->next) | |
+ ; | |
+ | |
+ if (parsedomains(s, ',', &d) < 0) | |
+ return -1; | |
+ if (last) | |
+ last->next = d; | |
+ else | |
+ f->domains = d; | |
+ | |
+ return (d != NULL); | |
+} | |
+ | |
+static int | |
+parsedomainsoption(struct filterrule *f, char *s) | |
+{ | |
+ struct filterdomain *d, *last; | |
+ | |
+ for (last = f->domains; last && last->next; last = last->next) | |
+ ; | |
+ | |
+ if (parsedomains(s, '|', &d) < 0) | |
+ return -1; | |
+ if (last) | |
+ last->next = d; | |
+ else | |
+ f->domains = d; | |
+ | |
+ return (d != NULL); | |
+} | |
+ | |
+static int | |
+filtertype_cmp(const void *a, const void *b) | |
+{ | |
+ return strcmp(((struct filtertype *)a)->name, | |
+ ((struct filtertype *)b)->name); | |
+} | |
+ | |
+/* check if domain is the same domain or a subdomain of `s` */ | |
+static int | |
+matchdomain(const char *s, const char *domain) | |
+{ | |
+ size_t l1, l2; | |
+ | |
+ l1 = strlen(s); | |
+ l2 = strlen(domain); | |
+ | |
+ /* subdomain-specific (longer) or other domain */ | |
+ if (l1 > l2) | |
+ return 0; | |
+ /* subdomain */ | |
+ if (l2 > l1 && domain[l2 - l1 - 1] == '.') | |
+ return !strcmp(&domain[l2 - l1], s); | |
+ | |
+ return !strcmp(s, domain); | |
+} | |
+ | |
+static int | |
+matchrule(struct filterrule *f, const char *uri, const char *type, | |
+ const char *domain) | |
+{ | |
+ /* NOTE: order matters, see FilterType enum values */ | |
+ struct filterdomain *d; | |
+ char pat[1024]; | |
+ int r, m; | |
+ | |
+ r = f->domains ? 0 : 1; | |
+ for (d = f->domains; d; d = d->next) { | |
+ if (matchdomain(d->domain, domain)) { | |
+ if (r && d->inverse) | |
+ r = 0; | |
+ else if (!r && !d->inverse) | |
+ r = 1; | |
+ } else if (r && !d->inverse) { | |
+ r = 0; | |
+ } | |
+ } | |
+ if (f->css) { | |
+ /* DEBUG */ | |
+#if 0 | |
+ if (f->isexception) | |
+ printf("DEBUG, exception rule, CSS: %s, match? %d\n", | |
+ f->css, r); | |
+#endif | |
+ return r; | |
+ } | |
+ | |
+#if 1 | |
+ /* skip allow rule, TODO: inverse? */ | |
+ if (!r) | |
+ return 0; | |
+#endif | |
+ | |
+#if 0 | |
+ /* DEBUG: test, match if it is a simple pattern */ | |
+ char *p; | |
+ p = strchr(f->uri, '*'); | |
+ if (!p) | |
+ p = strchr(f->uri, '^'); | |
+ if (!p) { | |
+ /* TODO: write a test-case */ | |
+ if (f->block & FilterTypeMatchCase) { | |
+ if (f->matchbegin) | |
+ m = strncmp(uri, f->uri, strlen(f->uri)) == 0; | |
+ else if (f->matchend) | |
+ m = strlen(f->uri) <= strlen(uri) && | |
+ strcmp(&uri[strlen(uri) - strlen(f->ur… | |
+ else | |
+ m = strstr(uri, f->uri) ? 1 : 0; | |
+ } else { | |
+ if (f->matchbegin) | |
+ m = strncasecmp(uri, f->uri, strlen(f->uri)) =… | |
+ else if (f->matchend) | |
+ m = strlen(f->uri) <= strlen(uri) && | |
+ strcasecmp(&uri[strlen(uri) - strlen(f… | |
+ else | |
+ m = strcasestr(uri, f->uri) ? 1 : 0; | |
+ } | |
+ /*m = r ? !m : m;*/ | |
+ return m; | |
+ } | |
+#endif | |
+ | |
+ r = snprintf(pat, sizeof(pat), "%s%s%s", | |
+ f->matchbegin ? "" : "*", | |
+ f->uri, | |
+ f->matchend ? "" : "*"); | |
+ if (r == -1 || (size_t)r >= sizeof(pat)) { | |
+ fprintf(stderr, "warning: pattern too large, ignoring\n"); | |
+ return 0; | |
+ } | |
+ | |
+ m = 0; | |
+ if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) { | |
+#if 0 | |
+ for (; *type; type++) { | |
+ for (i = 0; blockstr[i]; i++) { | |
+ if (blockstr[i] == *type && | |
+ f->block & (1 << i)) | |
+ printf("block type '%c'\n", blockstr[i… | |
+ return 1; | |
+ } | |
+ } | |
+ } | |
+ | |
+ return 0; | |
+#endif | |
+ m = 1; | |
+ } | |
+ /*m = r ? !m : m;*/ | |
+ return m; | |
+} | |
+ | |
+static int | |
+parserule(struct filterrule *f, char *s) | |
+{ | |
+ struct filtertype key, *ft; | |
+ int inverse = 0; | |
+ char *p, *values; | |
+ | |
+ if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']')) | |
+ return 0; /* skip comment or empty line */ | |
+ for (; *s && isspace(*s); s++) | |
+ ; | |
+ if (!*s) | |
+ return 0; /* line had only whitespace: skip */ | |
+ | |
+ memset(f, 0, sizeof(struct filterrule)); | |
+ | |
+ if ((p = strstr(s, "#@#"))) { | |
+ *p = '\0'; | |
+ if (parsedomainselement(f, s) < 0) | |
+ return -1; | |
+ *p = '#'; | |
+ if (!(f->css = westrdup(p + 3))) | |
+ return -1; | |
+ f->isexception = 1; | |
+ goto end; /* end of CSS rule */ | |
+ } | |
+ | |
+ /* element hiding rule, NOTE: no wildcards are supported, | |
+ "Simplified element hiding syntax" is not supported. */ | |
+ if ((p = strstr(s, "##"))) { | |
+ *p = '\0'; | |
+ if (parsedomainselement(f, s) < 0) | |
+ return -1; | |
+ *p = '#'; | |
+ if (!(f->css = westrdup(p + 2))) | |
+ return -1; | |
+ goto end; /* end of rule */ | |
+ } | |
+ | |
+ if (!strncmp(s, "@@", 2)) { | |
+ f->isexception = 1; | |
+ s += 2; | |
+ } | |
+ if (*s == '|') { | |
+ s++; | |
+ if (*s == '|') { | |
+ f->matchbegin = 1; | |
+ s++; | |
+ } else { | |
+ f->matchend = 1; | |
+ } | |
+ } | |
+ | |
+ /* no options, use rest of line as uri. */ | |
+ if (!(p = strrchr(s, '$'))) { | |
+ if (!(f->uri = westrdup(s))) | |
+ return -1; | |
+ goto end; | |
+ } | |
+ | |
+ /* has options */ | |
+ if (!(f->uri = westrndup(s, p - s))) | |
+ return -1; | |
+ s = ++p; | |
+ | |
+ /* blockmask, has options? default: allow all options, case-sensitive | |
+ * has no options? default: block all options, case-sensitive */ | |
+ f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL; | |
+ do { | |
+ if ((p = strchr(s, ','))) | |
+ *p = '\0'; | |
+ /* match option */ | |
+ inverse = 0; | |
+ if (*s == '~') { | |
+ inverse = 1; | |
+ s++; | |
+ } | |
+ if ((values = strchr(s, '='))) | |
+ *(values) = '\0'; | |
+ key.name = s; | |
+ | |
+ ft = bsearch(&key, &filtertypes, | |
+ sizeof(filtertypes) / sizeof(*filtertypes), | |
+ sizeof(*filtertypes), filtertype_cmp); | |
+ | |
+ /* restore NUL-terminator for domain= option */ | |
+ if (values) | |
+ *(values++) = '='; | |
+ | |
+ if (ft) { | |
+ if (inverse) | |
+ f->block &= ~(ft->type); | |
+ else | |
+ f->block |= ft->type; | |
+ if (ft->fn && values) | |
+ ft->fn(f, values); | |
+ } else { | |
+ /* DEBUG */ | |
+ fprintf(stderr, "ignored: unknown option: '%s' " | |
+ "in rule: %s\n", key.name, f->uri); | |
+ } | |
+ | |
+ /* restore ',' */ | |
+ if (p) { | |
+ *p = ','; | |
+ s = p + 1; | |
+ } | |
+ } while (p); | |
+end: | |
+ | |
+ return 1; | |
+} | |
+ | |
+#if 0 | |
+static void | |
+debugrule(struct filterrule *r) | |
+{ | |
+ printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: " | |
+ "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "", | |
+ r->isexception, r->block); | |
+} | |
+#endif | |
+ | |
+static int | |
+loadrules(FILE *fp) | |
+{ | |
+ struct filterrule f, *r, *rn = NULL; | |
+ char *line = NULL; | |
+ size_t linesiz = 0; | |
+ ssize_t n; | |
+ int ret; | |
+ | |
+ /* TODO: handle ferror() */ | |
+ /* load rules */ | |
+ while ((n = getline(&line, &linesiz, fp)) > 0) { | |
+ if (line[n - 1] == '\n') | |
+ line[--n] = '\0'; | |
+ if (n > 0 && line[n - 1] == '\r') | |
+ line[--n] = '\0'; | |
+ | |
+ if ((ret = parserule(&f, line) > 0)) { | |
+ if (!(r = wecalloc(1, sizeof(struct filterrule)))) | |
+ return -1; | |
+ if (!rules) | |
+ rules = rn = r; | |
+ else | |
+ rn = rn->next = r; | |
+ memcpy(rn, &f, sizeof(struct filterrule)); | |
+ } else if (ret < 0) { | |
+ return -1; | |
+ } | |
+ } | |
+ return (rules != NULL); | |
+} | |
+ | |
+char * | |
+getglobalcss(void) | |
+{ | |
+ return globalcss.data; | |
+} | |
+ | |
+char * | |
+getdocumentcss(const char *uri) | |
+{ | |
+ const char *s; | |
+ char domain[256]; | |
+ String sitecss; | |
+ struct filterrule *r; | |
+ size_t len; | |
+ | |
+ if (!uri) | |
+ return NULL; | |
+ | |
+ if (!(s = strstr(uri, "://"))) | |
+ return NULL; | |
+ s += sizeof("://") - 1; | |
+ len = strcspn(s, "/"); | |
+ memcpy(domain, s, len); | |
+ domain[len] = '\0'; | |
+ | |
+ printf("uri: %s\n", uri); | |
+ printf("domain: %s\n", domain); | |
+ | |
+ /* DEBUG: timing */ | |
+ struct timespec tp_start, tp_end, tp_diff; | |
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) { | |
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
+ } | |
+ | |
+ /* site-specific CSS */ | |
+ memset(&sitecss, 0, sizeof(sitecss)); | |
+ for (r = rules; r; r = r->next) { | |
+ if (!r->css || !r->domains || !matchrule(r, "", "", domain)) | |
+ continue; | |
+ | |
+ len = strlen(r->css); | |
+ if (string_append(&sitecss, r->css, len) < len) | |
+ goto err; | |
+ | |
+ s = r->isexception ? "{display:initial;}" : "{display:none;}"; | |
+ len = strlen(s); | |
+ if (string_append(&sitecss, s, len) < len) | |
+ goto err; | |
+ } | |
+/* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/ | |
+ | |
+ /* DEBUG: timing */ | |
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) { | |
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
+ } | |
+ | |
+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec; | |
+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec; | |
+ if (tp_diff.tv_nsec < 0) { | |
+ tp_diff.tv_sec--; | |
+ tp_diff.tv_nsec += 1000000000L; | |
+ } | |
+ | |
+ printf("timing: %zu sec, %.3f ms\n", | |
+ tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f); | |
+ | |
+ if (globalcss.data) | |
+ printf("global CSS length in bytes: %zu\n", strlen(globalcss.d… | |
+ if (sitecss.data) | |
+ printf("site CSS length in bytes: %zu\n", strlen(sitecss.data)… | |
+ | |
+ return sitecss.data; | |
+ | |
+err: | |
+ free(sitecss.data); | |
+ return NULL; | |
+} | |
+ | |
+int | |
+checkrequest(const char *uri, const char *requri) | |
+{ | |
+ char domain[256]; | |
+ struct filterrule *r; | |
+ const char *s; | |
+ size_t len; | |
+ int status = 1; | |
+ | |
+ if (!uri || !strcmp(requri, uri)) | |
+ return 1; | |
+ | |
+ s = strstr(uri, "://") + sizeof("://") - 1; | |
+ len = strcspn(s, "/"); | |
+ memcpy(domain, s, len); | |
+ domain[len] = '\0'; | |
+ | |
+ /* DEBUG: timing */ | |
+ struct timespec tp_start, tp_end, tp_diff; | |
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) { | |
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
+ } | |
+ | |
+ /* match rules */ | |
+ for (r = rules; r; r = r->next) { | |
+ if (!r->css && matchrule(r, requri, "csio^", domain)) { | |
+ printf("requri: %s\n", requri); | |
+ printf("uri: %s\n", uri); | |
+ printf("domain: %s\n", domain); | |
+ | |
+ fprintf(stderr, "blocked: %s, %s\n", domain, requri); | |
+ | |
+ /* DEBUG: for showing the timing */ | |
+ status = 0; | |
+ goto end; | |
+ /*return 1;*/ | |
+ } | |
+ } | |
+ | |
+end: | |
+ /* DEBUG: timing */ | |
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) { | |
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
+ } | |
+ | |
+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec; | |
+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec; | |
+ if (tp_diff.tv_nsec < 0) { | |
+ tp_diff.tv_sec--; | |
+ tp_diff.tv_nsec += 1000000000L; | |
+ } | |
+ | |
+ printf("%s [%s] timing: %zu sec, %.3f ms\n", | |
+ requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.… | |
+ | |
+ return status; | |
+} | |
+ | |
+void | |
+init(void) | |
+{ | |
+ struct filterrule *r; | |
+ FILE *fp; | |
+ char filepath[PATH_MAX], *e; | |
+ size_t len; | |
+ int n; | |
+ | |
+ if ((e = getenv("SURF_ADBLOCK_FILE"))) { | |
+ n = snprintf(filepath, sizeof(filepath), "%s", e); | |
+ } else { | |
+ if (!(e = getenv("HOME"))) | |
+ e = ""; | |
+ n = snprintf(filepath, sizeof(filepath), | |
+ "%s%s.surf/adblockrules", e, e[0] ? "/" : ""); | |
+ } | |
+ if (n < 0 || (size_t)n >= sizeof(filepath)) { | |
+ weprintf("fatal: rules file path too long"); | |
+ return; | |
+ } | |
+ | |
+ if (!(fp = fopen(filepath, "r"))) { | |
+ weprintf("fatal: cannot open rules file %s: %s\n", | |
+ filepath, strerror(errno)); | |
+ return; | |
+ } | |
+ | |
+ n = loadrules(fp); | |
+ fclose(fp); | |
+ if (n < 1) { | |
+ if (n < 0) { | |
+ weprintf("fatal: cannot read rules from file %s: %s\n", | |
+ filepath, strerror(errno)); | |
+ } else { | |
+ weprintf("fatal: cannot read any rule from file %s\n", | |
+ filepath); | |
+ } | |
+ return; | |
+ } | |
+ | |
+ /* general CSS rules: all sites */ | |
+ for (r = rules; r; r = r->next) { | |
+ if (!r->css || r->domains) | |
+ continue; | |
+ | |
+ len = strlen(r->css); | |
+ if (string_append(&globalcss, r->css, strlen(r->css)) < len) { | |
+ weprintf("cannot load global css selectors " | |
+ "in memory\n"); | |
+ cleanup(); | |
+ return; | |
+ } | |
+ len = sizeof("{display:none;}") - 1; | |
+ if (string_append(&globalcss, "{display:none;}", len) < len) { | |
+ weprintf("cannot append css rule " | |
+ "to global css selectors\n"); | |
+ cleanup(); | |
+ return; | |
+ } | |
+ } | |
+} | |
diff --git a/surf-adblock.c b/surf-adblock.c | |
@@ -8,769 +8,30 @@ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
-#include <wchar.h> | |
-#include <wctype.h> | |
#include <webkit2/webkit-web-extension.h> | |
#include <webkitdom/webkitdom.h> | |
-/* String data / memory pool */ | |
-typedef struct string { | |
- char *data; /* data */ | |
- size_t datasz; /* allocated size */ | |
- size_t len; /* current string length */ | |
-} String; | |
+#include "adblock.h" | |
typedef struct Page { | |
guint64 id; | |
WebKitWebPage *webpage; | |
- WebKitDOMDOMWindow *view; | |
+ /*WebKitDOMDOMWindow *view;*/ | |
struct Page *next; | |
} Page; | |
-struct filterdomain { | |
- char *domain; | |
- int inverse; | |
- struct filterdomain *next; | |
-}; | |
- | |
-struct filterrule { | |
- /* type: match mask, must be atleast 32-bit, see FilterType enum */ | |
- unsigned long block; | |
- int matchbegin; | |
- int matchend; | |
- /* is exception rule: prefix @@ for ABP or #@# for CSS */ | |
- int isexception; | |
- char *css; /* if non-NULL is CSS rule / hide element rule */ | |
- char *uri; | |
- struct filterdomain *domains; | |
- struct filterrule *next; | |
-}; | |
- | |
-enum { | |
- FilterTypeScript = 1 << 0, | |
- FilterTypeImage = 1 << 1, | |
- FilterTypeCSS = 1 << 2, | |
- FilterTypeObject = 1 << 3, | |
- FilterTypeXHR = 1 << 4, | |
- FilterTypeObjectSub = 1 << 5, | |
- FilterTypeSubDoc = 1 << 6, | |
- FilterTypePing = 1 << 7, | |
- FilterTypeDocument = 1 << 8, | |
- FilterTypeElemHide = 1 << 9, | |
- FilterTypeOther = 1 << 10, | |
- FilterTypeGenericHide = 1 << 11, | |
- FilterTypeGenericBlock = 1 << 12, | |
- FilterTypeMatchCase = 1 << 13, | |
-}; | |
- | |
-struct filtertype { | |
- /* `type` must be atleast 32-bit, see FilterType enum */ | |
- unsigned long type; | |
- char *name; | |
- size_t namelen; | |
- int allowinverse; | |
- int allownormal; | |
- int onlyexception; | |
- int (*fn)(struct filterrule *, char *); | |
-}; | |
- | |
-static int parsedomainsoption(struct filterrule *, char *); | |
- | |
-#define STRP(s) s,sizeof(s)-1 | |
- | |
-static struct filtertype filtertypes[] = { | |
- /* NOTE: options with 'type' = 0 are silently ignored and treated as | |
- * requests for now */ | |
- { 0, STRP("collapse"), 1, 1, 0, NULL }, | |
- { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL }, | |
- { 0, STRP("domain"), 0, 1, 0, | |
- /* domain=... */ &parsedomainsoption }, | |
- { 0, STRP("donottrack"), 1, 1, 0, NULL }, | |
- { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL }, | |
- { 0, STRP("font"), 1, 1, 0, NULL }, | |
- { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL }, | |
- { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL }, | |
- { FilterTypeImage, STRP("image"), 1, 1, 0, NULL }, | |
- { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL }, | |
- { 0, STRP("media"), 1, 1, 0, NULL }, | |
- { FilterTypeObject, STRP("object"), 1, 1, 0, NULL }, | |
- { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL }, | |
- { FilterTypeOther, STRP("other"), 1, 1, 0, NULL }, | |
- { FilterTypePing, STRP("ping"), 1, 1, 0, NULL }, | |
- { 0, STRP("popup"), 1, 1, 0, NULL }, | |
- { FilterTypeScript, STRP("script"), 1, 1, 0, NULL }, | |
- { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL }, | |
- { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL }, | |
- { 0, STRP("third-party"), 1, 1, 0, NULL }, | |
- { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL }, | |
- /* NOTE: site-key not supported */ | |
-}; | |
- | |
-static String globalcss; | |
static Page *pages; | |
-static struct filterrule *rules; | |
- | |
-static void | |
-cleanup(void) | |
-{ | |
- struct filterrule *r; | |
- struct filterdomain *d; | |
- | |
- free(globalcss.data); | |
- | |
- for (r = rules; r; r = rules) { | |
- for (d = r->domains; d; d = r->domains) { | |
- free(d->domain); | |
- r->domains = d->next; | |
- free(d); | |
- } | |
- free(r->css); | |
- free(r->uri); | |
- rules = r->next; | |
- free(r); | |
- } | |
-} | |
- | |
-static void | |
-weprintf(const char *fmt, ...) | |
-{ | |
- va_list ap; | |
- | |
- fprintf(stderr, "surf-adblock: "); | |
- | |
- va_start(ap, fmt); | |
- vfprintf(stderr, fmt, ap); | |
- va_end(ap); | |
-} | |
- | |
-static size_t | |
-string_buffer_realloc(String *s, size_t newsz) | |
-{ | |
- char *tmp; | |
- size_t allocsz; | |
- | |
- for (allocsz = 64; allocsz <= newsz; allocsz *= 2) | |
- ; | |
- if (!(tmp = realloc(s->data, allocsz))) { | |
- weprintf("realloc: %s\n", strerror(errno)); | |
- } else { | |
- s->data = tmp; | |
- s->datasz = allocsz; | |
- } | |
- | |
- return s->datasz; | |
-} | |
- | |
-static size_t | |
-string_append(String *s, const char *data, size_t len) | |
-{ | |
- size_t newlen; | |
- | |
- if (!len) | |
- return len; | |
- | |
- newlen = s->len + len; | |
- /* check if allocation is necesary, don't shrink buffer, | |
- * should be more than datasz ofcourse. */ | |
- if (newlen >= s->datasz) { | |
- if (string_buffer_realloc(s, newlen + 1) <= newlen) | |
- return 0; | |
- } | |
- memcpy(s->data + s->len, data, len); | |
- s->len = newlen; | |
- s->data[s->len] = '\0'; | |
- return len; | |
-} | |
- | |
-static void * | |
-wecalloc(size_t nmemb, size_t size) | |
-{ | |
- void *p; | |
- | |
- if (!(p = calloc(nmemb, size))) | |
- weprintf("calloc: %s\n", strerror(errno)); | |
- | |
- return p; | |
-} | |
- | |
-static char * | |
-westrndup(const char *s, size_t n) | |
-{ | |
- char *p; | |
- | |
- if (!(p = strndup(s, n))) | |
- weprintf("strndup: %s\n", strerror(errno)); | |
- return p; | |
-} | |
- | |
-static char * | |
-westrdup(const char *s) | |
-{ | |
- char *p; | |
- | |
- if (!(p = strdup(s))) | |
- weprintf("strdup: %s\n", strerror(errno)); | |
- | |
- return p; | |
-} | |
- | |
-#define END 0 | |
-#define UNMATCHABLE -2 | |
-#define BRACKET -3 | |
-#define CARET -4 | |
-#define STAR -5 | |
- | |
-static int | |
-str_next(const char *str, size_t n, size_t *step) | |
-{ | |
- if (!n) { | |
- *step = 0; | |
- return 0; | |
- } | |
- if (str[0] >= 128U) { | |
- wchar_t wc; | |
- int k = mbtowc(&wc, str, n); | |
- if (k<0) { | |
- *step = 1; | |
- return -1; | |
- } | |
- *step = k; | |
- return wc; | |
- } | |
- *step = 1; | |
- | |
- return str[0]; | |
-} | |
- | |
-static int | |
-pat_next(const char *pat, size_t m, size_t *step) | |
-{ | |
- int esc = 0; | |
- | |
- if (!m || !*pat) { | |
- *step = 0; | |
- return END; | |
- } | |
- *step = 1; | |
- if (pat[0]=='\\' && pat[1]) { | |
- *step = 2; | |
- pat++; | |
- esc = 1; | |
- goto escaped; | |
- } | |
- if (pat[0]=='^') | |
- return CARET; | |
- if (pat[0] == '*') | |
- return STAR; | |
-escaped: | |
- if (pat[0] >= 128U) { | |
- wchar_t wc; | |
- int k = mbtowc(&wc, pat, m); | |
- if (k<0) { | |
- *step = 0; | |
- return UNMATCHABLE; | |
- } | |
- *step = k + esc; | |
- return wc; | |
- } | |
- return pat[0]; | |
-} | |
- | |
-static int | |
-casefold(int k) | |
-{ | |
- int c = towupper(k); | |
- return c == k ? towlower(k) : c; | |
-} | |
- | |
-/* match() based on musl-libc fnmatch: | |
- https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */ | |
-static int | |
-match(const char *pat, const char *str, int fcase) | |
-{ | |
- size_t m = -1, n = -1; | |
- const char *p, *ptail, *endpat; | |
- const char *s, *stail, *endstr; | |
- size_t pinc, sinc, tailcnt=0; | |
- int c, k, kfold; | |
- | |
- for (;;) { | |
- switch ((c = pat_next(pat, m, &pinc))) { | |
- case UNMATCHABLE: | |
- return 1; | |
- case STAR: | |
- pat++; | |
- m--; | |
- break; | |
- default: | |
- k = str_next(str, n, &sinc); | |
- /* TODO: write a test-case */ | |
- if (c == CARET && (k == '?' || k == '/' || k <= 0)) | |
- return 1; | |
- if (k <= 0) | |
- return (c==END) ? 0 : 1; | |
- str += sinc; | |
- n -= sinc; | |
- kfold = fcase ? casefold(k) : k; | |
- if (k != c && kfold != c) | |
- return 1; | |
- pat+=pinc; | |
- m-=pinc; | |
- continue; | |
- } | |
- break; | |
- } | |
- | |
- /* Compute real pat length if it was initially unknown/-1 */ | |
- m = strnlen(pat, m); | |
- endpat = pat + m; | |
- | |
- /* Find the last * in pat and count chars needed after it */ | |
- for (p=ptail=pat; p<endpat; p+=pinc) { | |
- switch (pat_next(p, endpat-p, &pinc)) { | |
- case UNMATCHABLE: | |
- return 1; | |
- case STAR: | |
- tailcnt=0; | |
- ptail = p+1; | |
- break; | |
- default: | |
- tailcnt++; | |
- break; | |
- } | |
- } | |
- | |
- /* Past this point we need not check for UNMATCHABLE in pat, | |
- * because all of pat has already been parsed once. */ | |
- | |
- /* Compute real str length if it was initially unknown/-1 */ | |
- n = strnlen(str, n); | |
- endstr = str + n; | |
- if (n < tailcnt) return 1; | |
- | |
- /* Find the final tailcnt chars of str, accounting for UTF-8. | |
- * On illegal sequences we may get it wrong, but in that case | |
- * we necessarily have a matching failure anyway. */ | |
- for (s=endstr; s>str && tailcnt; tailcnt--) { | |
- if (s[-1] < 128U || MB_CUR_MAX==1) s--; | |
- else while ((unsigned char)*--s-0x80U<0x40 && s>str); | |
- } | |
- if (tailcnt) return 1; | |
- stail = s; | |
- | |
- /* Check that the pat and str tails match */ | |
- p = ptail; | |
- for (;;) { | |
- c = pat_next(p, endpat-p, &pinc); | |
- p += pinc; | |
- if ((k = str_next(s, endstr-s, &sinc)) <= 0) { | |
- if (c != END) return 1; | |
- break; | |
- } | |
- s += sinc; | |
- kfold = fcase ? casefold(k) : k; | |
- if (k != c && kfold != c) | |
- return 1; | |
- } | |
- | |
- /* We're all done with the tails now, so throw them out */ | |
- endstr = stail; | |
- endpat = ptail; | |
- | |
- /* Match pattern components until there are none left */ | |
- while (pat<endpat) { | |
- p = pat; | |
- s = str; | |
- for (;;) { | |
- c = pat_next(p, endpat-p, &pinc); | |
- p += pinc; | |
- /* Encountering * completes/commits a component */ | |
- if (c == STAR) { | |
- pat = p; | |
- str = s; | |
- break; | |
- } | |
- k = str_next(s, endstr-s, &sinc); | |
- if (!k) | |
- return 1; | |
- kfold = fcase ? casefold(k) : k; | |
- if (k != c && kfold != c) | |
- break; | |
- s += sinc; | |
- } | |
- if (c == STAR) continue; | |
- /* If we failed, advance str, by 1 char if it's a valid | |
- * char, or past all invalid bytes otherwise. */ | |
- k = str_next(str, endstr-str, &sinc); | |
- if (k > 0) str += sinc; | |
- else for (str++; str_next(str, endstr-str, &sinc)<0; str++); | |
- } | |
- | |
- return 0; | |
-} | |
- | |
-/* | |
-domain=... if domain is prefixed with ~, ignore. | |
-multiple domains can be separated with | | |
-*/ | |
-static int | |
-parsedomains(const char *s, int sep, struct filterdomain **head) | |
-{ | |
- struct filterdomain *d, *last = *head = NULL; | |
- char *p; | |
- int inverse; | |
- | |
- do { | |
- inverse = 0; | |
- if (*s == '~') { | |
- inverse = !inverse; | |
- s++; | |
- } | |
- if (!*s || *s == sep) | |
- break; | |
- | |
- if (!(d = wecalloc(1, sizeof(struct filterdomain)))) | |
- return -1; | |
- if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */ | |
- d->domain = westrndup(s, p - s); | |
- s = p + 1; | |
- } else { | |
- d->domain = westrdup(s); | |
- } | |
- if (!d->domain) | |
- return -1; | |
- d->inverse = inverse; | |
- | |
- if (!*head) | |
- *head = last = d; | |
- else | |
- last = last->next = d; | |
- } while (p); | |
- | |
- return (*head != NULL); | |
-} | |
- | |
-static int | |
-parsedomainselement(struct filterrule *f, char *s) | |
-{ | |
- struct filterdomain *d, *last; | |
- | |
- for (last = f->domains; last && last->next; last = last->next) | |
- ; | |
- | |
- if (parsedomains(s, ',', &d) < 0) | |
- return -1; | |
- if (last) | |
- last->next = d; | |
- else | |
- f->domains = d; | |
- | |
- return (d != NULL); | |
-} | |
- | |
-static int | |
-parsedomainsoption(struct filterrule *f, char *s) | |
-{ | |
- struct filterdomain *d, *last; | |
- | |
- for (last = f->domains; last && last->next; last = last->next) | |
- ; | |
- | |
- if (parsedomains(s, '|', &d) < 0) | |
- return -1; | |
- if (last) | |
- last->next = d; | |
- else | |
- f->domains = d; | |
- | |
- return (d != NULL); | |
-} | |
- | |
-static int | |
-filtertype_cmp(const void *a, const void *b) | |
-{ | |
- return strcmp(((struct filtertype *)a)->name, | |
- ((struct filtertype *)b)->name); | |
-} | |
- | |
-/* check if domain is the same domain or a subdomain of `s` */ | |
-static int | |
-matchdomain(const char *s, const char *domain) | |
-{ | |
- size_t l1, l2; | |
- | |
- l1 = strlen(s); | |
- l2 = strlen(domain); | |
- | |
- /* subdomain-specific (longer) or other domain */ | |
- if (l1 > l2) | |
- return 0; | |
- /* subdomain */ | |
- if (l2 > l1 && domain[l2 - l1 - 1] == '.') | |
- return !strcmp(&domain[l2 - l1], s); | |
- | |
- return !strcmp(s, domain); | |
-} | |
- | |
-static int | |
-matchrule(struct filterrule *f, const char *uri, const char *type, | |
- const char *domain) | |
-{ | |
- /* NOTE: order matters, see FilterType enum values */ | |
- struct filterdomain *d; | |
- char pat[1024]; | |
- int r, m; | |
- | |
- r = f->domains ? 0 : 1; | |
- for (d = f->domains; d; d = d->next) { | |
- if (matchdomain(d->domain, domain)) { | |
- if (r && d->inverse) | |
- r = 0; | |
- else if (!r && !d->inverse) | |
- r = 1; | |
- } else if (r && !d->inverse) { | |
- r = 0; | |
- } | |
- } | |
- if (f->css) { | |
- /* DEBUG */ | |
-#if 0 | |
- if (f->isexception) | |
- printf("DEBUG, exception rule, CSS: %s, match? %d\n", | |
- f->css, r); | |
-#endif | |
- return r; | |
- } | |
- | |
-#if 1 | |
- /* skip allow rule, TODO: inverse? */ | |
- if (!r) | |
- return 0; | |
-#endif | |
- | |
-#if 0 | |
- /* DEBUG: test, match if it is a simple pattern */ | |
- char *p; | |
- p = strchr(f->uri, '*'); | |
- if (!p) | |
- p = strchr(f->uri, '^'); | |
- if (!p) { | |
- /* TODO: write a test-case */ | |
- if (f->block & FilterTypeMatchCase) { | |
- if (f->matchbegin) | |
- m = strncmp(uri, f->uri, strlen(f->uri)) == 0; | |
- else if (f->matchend) | |
- m = strlen(f->uri) <= strlen(uri) && | |
- strcmp(&uri[strlen(uri) - strlen(f->ur… | |
- else | |
- m = strstr(uri, f->uri) ? 1 : 0; | |
- } else { | |
- if (f->matchbegin) | |
- m = strncasecmp(uri, f->uri, strlen(f->uri)) =… | |
- else if (f->matchend) | |
- m = strlen(f->uri) <= strlen(uri) && | |
- strcasecmp(&uri[strlen(uri) - strlen(f… | |
- else | |
- m = strcasestr(uri, f->uri) ? 1 : 0; | |
- } | |
- /*m = r ? !m : m;*/ | |
- return m; | |
- } | |
-#endif | |
- | |
- r = snprintf(pat, sizeof(pat), "%s%s%s", | |
- f->matchbegin ? "" : "*", | |
- f->uri, | |
- f->matchend ? "" : "*"); | |
- if (r == -1 || (size_t)r >= sizeof(pat)) { | |
- fprintf(stderr, "warning: pattern too large, ignoring\n"); | |
- return 0; | |
- } | |
- | |
- m = 0; | |
- if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) { | |
-#if 0 | |
- for (; *type; type++) { | |
- for (i = 0; blockstr[i]; i++) { | |
- if (blockstr[i] == *type && | |
- f->block & (1 << i)) | |
- printf("block type '%c'\n", blockstr[i… | |
- return 1; | |
- } | |
- } | |
- } | |
- | |
- return 0; | |
-#endif | |
- m = 1; | |
- } | |
- /*m = r ? !m : m;*/ | |
- return m; | |
-} | |
- | |
-static int | |
-parserule(struct filterrule *f, char *s) | |
-{ | |
- struct filtertype key, *ft; | |
- int inverse = 0; | |
- char *p, *values; | |
- | |
- if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']')) | |
- return 0; /* skip comment or empty line */ | |
- for (; *s && isspace(*s); s++) | |
- ; | |
- if (!*s) | |
- return 0; /* line had only whitespace: skip */ | |
- | |
- memset(f, 0, sizeof(struct filterrule)); | |
- | |
- if ((p = strstr(s, "#@#"))) { | |
- *p = '\0'; | |
- if (parsedomainselement(f, s) < 0) | |
- return -1; | |
- *p = '#'; | |
- if (!(f->css = westrdup(p + 3))) | |
- return -1; | |
- f->isexception = 1; | |
- goto end; /* end of CSS rule */ | |
- } | |
- | |
- /* element hiding rule, NOTE: no wildcards are supported, | |
- "Simplified element hiding syntax" is not supported. */ | |
- if ((p = strstr(s, "##"))) { | |
- *p = '\0'; | |
- if (parsedomainselement(f, s) < 0) | |
- return -1; | |
- *p = '#'; | |
- if (!(f->css = westrdup(p + 2))) | |
- return -1; | |
- goto end; /* end of rule */ | |
- } | |
- | |
- if (!strncmp(s, "@@", 2)) { | |
- f->isexception = 1; | |
- s += 2; | |
- } | |
- if (*s == '|') { | |
- s++; | |
- if (*s == '|') { | |
- f->matchbegin = 1; | |
- s++; | |
- } else { | |
- f->matchend = 1; | |
- } | |
- } | |
- | |
- /* no options, use rest of line as uri. */ | |
- if (!(p = strrchr(s, '$'))) { | |
- if (!(f->uri = westrdup(s))) | |
- return -1; | |
- goto end; | |
- } | |
- | |
- /* has options */ | |
- if (!(f->uri = westrndup(s, p - s))) | |
- return -1; | |
- s = ++p; | |
- | |
- /* blockmask, has options? default: allow all options, case-sensitive | |
- * has no options? default: block all options, case-sensitive */ | |
- f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL; | |
- do { | |
- if ((p = strchr(s, ','))) | |
- *p = '\0'; | |
- /* match option */ | |
- inverse = 0; | |
- if (*s == '~') { | |
- inverse = 1; | |
- s++; | |
- } | |
- if ((values = strchr(s, '='))) | |
- *(values) = '\0'; | |
- key.name = s; | |
- | |
- ft = bsearch(&key, &filtertypes, | |
- sizeof(filtertypes) / sizeof(*filtertypes), | |
- sizeof(*filtertypes), filtertype_cmp); | |
- | |
- /* restore NUL-terminator for domain= option */ | |
- if (values) | |
- *(values++) = '='; | |
- | |
- if (ft) { | |
- if (inverse) | |
- f->block &= ~(ft->type); | |
- else | |
- f->block |= ft->type; | |
- if (ft->fn && values) | |
- ft->fn(f, values); | |
- } else { | |
- /* DEBUG */ | |
- fprintf(stderr, "ignored: unknown option: '%s' " | |
- "in rule: %s\n", key.name, f->uri); | |
- } | |
- | |
- /* restore ',' */ | |
- if (p) { | |
- *p = ','; | |
- s = p + 1; | |
- } | |
- } while (p); | |
-end: | |
- | |
- return 1; | |
-} | |
- | |
-#if 0 | |
-static void | |
-debugrule(struct filterrule *r) | |
-{ | |
- printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: " | |
- "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "", | |
- r->isexception, r->block); | |
-} | |
-#endif | |
- | |
-static int | |
-loadrules(FILE *fp) | |
-{ | |
- struct filterrule f, *r, *rn = NULL; | |
- char *line = NULL; | |
- size_t linesiz = 0; | |
- ssize_t n; | |
- int ret; | |
- | |
- /* TODO: handle ferror() */ | |
- /* load rules */ | |
- while ((n = getline(&line, &linesiz, fp)) > 0) { | |
- if (line[n - 1] == '\n') | |
- line[--n] = '\0'; | |
- if (n > 0 && line[n - 1] == '\r') | |
- line[--n] = '\0'; | |
- | |
- if ((ret = parserule(&f, line) > 0)) { | |
- if (!(r = wecalloc(1, sizeof(struct filterrule)))) | |
- return -1; | |
- if (!rules) | |
- rules = rn = r; | |
- else | |
- rn = rn->next = r; | |
- memcpy(rn, &f, sizeof(struct filterrule)); | |
- } else if (ret < 0) { | |
- return -1; | |
- } | |
- } | |
- return (rules != NULL); | |
-} | |
static Page * | |
newpage(WebKitWebPage *page) | |
{ | |
Page *p; | |
- if (!(p = wecalloc(1, sizeof(Page)))) | |
+ if (!(p = calloc(1, sizeof(Page)))) { | |
+ fprintf(stderr, "surf-adblock: calloc: %s\n", strerror(errno)); | |
return NULL; | |
+ } | |
p->next = pages; | |
pages = p; | |
@@ -783,146 +44,32 @@ newpage(WebKitWebPage *page) | |
static void | |
documentloaded(WebKitWebPage *wp, Page *p) | |
{ | |
- char domain[256]; | |
WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp); | |
WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc); | |
WebKitDOMElement *el; | |
- String sitecss; | |
- struct filterrule *r; | |
- const char *s, *uri = webkit_web_page_get_uri(p->webpage); | |
- size_t len; | |
- | |
- if (!uri) | |
- return; | |
- | |
- s = strstr(uri, "://") + sizeof("://") - 1; | |
- len = strcspn(s, "/"); | |
- memcpy(domain, s, len); | |
- domain[len] = '\0'; | |
- | |
- printf("uri: %s\n", uri); | |
- printf("domain: %s\n", domain); | |
- | |
- /* DEBUG: timing */ | |
- struct timespec tp_start, tp_end, tp_diff; | |
- if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) { | |
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
- } | |
- | |
- /* site-specific CSS */ | |
- memset(&sitecss, 0, sizeof(sitecss)); | |
- for (r = rules; r; r = r->next) { | |
- if (!r->css || !r->domains || !matchrule(r, "", "", domain)) | |
- continue; | |
- | |
- len = strlen(r->css); | |
- if (string_append(&sitecss, r->css, len) < len) | |
- return; | |
- | |
- s = r->isexception ? "{display:initial;}" : "{display:none;}"; | |
- len = strlen(s); | |
- if (string_append(&sitecss, s, len) < len) | |
- return; | |
- } | |
-/* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/ | |
- | |
- /* DEBUG: timing */ | |
- if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) { | |
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
- } | |
- | |
- tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec; | |
- tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec; | |
- if (tp_diff.tv_nsec < 0) { | |
- tp_diff.tv_sec--; | |
- tp_diff.tv_nsec += 1000000000L; | |
- } | |
- | |
- printf("timing: %zu sec, %.3f ms\n", | |
- tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f); | |
+ const char *uri = webkit_web_page_get_uri(p->webpage); | |
+ char *css, *globalcss; | |
- if (globalcss.data) | |
- printf("global CSS length in bytes: %zu\n", strlen(globalcss.d… | |
- if (sitecss.data) | |
- printf("site CSS length in bytes: %zu\n", strlen(sitecss.data)… | |
+ /*p->view = webkit_dom_document_get_default_view(doc);*/ | |
- p->view = webkit_dom_document_get_default_view(doc); | |
- | |
- if (globalcss.data) { | |
+ if ((globalcss = getglobalcss())) { | |
el = webkit_dom_document_create_element(doc, "style", NULL); | |
webkit_dom_element_set_attribute(el, "type", "text/css", NULL); | |
- webkit_dom_element_set_inner_html(el, globalcss.data, NULL); | |
+ webkit_dom_element_set_inner_html(el, globalcss, NULL); | |
webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), | |
WEBKIT_DOM_NODE(el), NULL); | |
} | |
- if (sitecss.data) { | |
+ if ((css = getdocumentcss(uri))) { | |
el = webkit_dom_document_create_element(doc, "style", NULL); | |
webkit_dom_element_set_attribute(el, "type", "text/css", NULL); | |
- webkit_dom_element_set_inner_html(el, sitecss.data, NULL); | |
+ webkit_dom_element_set_inner_html(el, css, NULL); | |
webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), | |
WEBKIT_DOM_NODE(el), NULL); | |
} | |
- free(sitecss.data); | |
-} | |
- | |
-int | |
-checkrequest(const char *uri, const char *requri) | |
-{ | |
- char domain[256]; | |
- struct filterrule *r; | |
- const char *s; | |
- size_t len; | |
- int status = 1; | |
- | |
- if (!uri || !strcmp(requri, uri)) | |
- return 1; | |
- | |
- s = strstr(uri, "://") + sizeof("://") - 1; | |
- len = strcspn(s, "/"); | |
- memcpy(domain, s, len); | |
- domain[len] = '\0'; | |
- | |
- /* DEBUG: timing */ | |
- struct timespec tp_start, tp_end, tp_diff; | |
- if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) { | |
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
- } | |
- | |
- /* match rules */ | |
- for (r = rules; r; r = r->next) { | |
- if (!r->css && matchrule(r, requri, "csio^", domain)) { | |
- printf("requri: %s\n", requri); | |
- printf("uri: %s\n", uri); | |
- printf("domain: %s\n", domain); | |
- | |
- fprintf(stderr, "blocked: %s, %s\n", domain, requri); | |
- | |
- /* DEBUG: for showing the timing */ | |
- status = 0; | |
- goto end; | |
- /*return 1;*/ | |
- } | |
- } | |
- | |
-end: | |
- /* DEBUG: timing */ | |
- if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) { | |
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno)); | |
- } | |
- | |
- tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec; | |
- tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec; | |
- if (tp_diff.tv_nsec < 0) { | |
- tp_diff.tv_sec--; | |
- tp_diff.tv_nsec += 1000000000L; | |
- } | |
- | |
- printf("%s [%s] timing: %zu sec, %.3f ms\n", | |
- requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.… | |
- | |
- return status; | |
+ free(css); | |
+ /* NOTE: globalcss free'd at cleanup() */ | |
} | |
static gboolean | |
@@ -941,69 +88,6 @@ sendrequest(WebKitWebPage *wp, WebKitURIRequest *req, | |
return status; | |
} | |
-void | |
-init(void) | |
-{ | |
- struct filterrule *r; | |
- FILE *fp; | |
- char filepath[PATH_MAX], *e; | |
- size_t len; | |
- int n; | |
- | |
- if ((e = getenv("SURF_ADBLOCK_FILE"))) { | |
- n = snprintf(filepath, sizeof(filepath), "%s", e); | |
- } else { | |
- if (!(e = getenv("HOME"))) | |
- e = ""; | |
- n = snprintf(filepath, sizeof(filepath), | |
- "%s%s.surf/adblockrules", e, e[0] ? "/" : ""); | |
- } | |
- if (n < 0 || (size_t)n >= sizeof(filepath)) { | |
- weprintf("fatal: rules file path too long"); | |
- return; | |
- } | |
- | |
- if (!(fp = fopen(filepath, "r"))) { | |
- weprintf("fatal: cannot open rules file %s: %s\n", | |
- filepath, strerror(errno)); | |
- return; | |
- } | |
- | |
- n = loadrules(fp); | |
- fclose(fp); | |
- if (n < 1) { | |
- if (n < 0) { | |
- weprintf("fatal: cannot read rules from file %s: %s\n", | |
- filepath, strerror(errno)); | |
- } else { | |
- weprintf("fatal: cannot read any rule from file %s\n", | |
- filepath); | |
- } | |
- return; | |
- } | |
- | |
- /* general CSS rules: all sites */ | |
- for (r = rules; r; r = r->next) { | |
- if (!r->css || r->domains) | |
- continue; | |
- | |
- len = strlen(r->css); | |
- if (string_append(&globalcss, r->css, strlen(r->css)) < len) { | |
- weprintf("cannot load global css selectors " | |
- "in memory\n"); | |
- cleanup(); | |
- return; | |
- } | |
- len = sizeof("{display:none;}") - 1; | |
- if (string_append(&globalcss, "{display:none;}", len) < len) { | |
- weprintf("cannot append css rule " | |
- "to global css selectors\n"); | |
- cleanup(); | |
- return; | |
- } | |
- } | |
-} | |
- | |
static void | |
webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused) | |
{ |