initial insertion (twss) - surf-adblock - Surf adblock web extension | |
git clone git://git.codemadness.org/surf-adblock | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 888e9c8a2237bf9c374e04737f103e91cdc30267 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Thu, 14 Jul 2016 22:18:31 +0200 | |
initial insertion (twss) | |
Diffstat: | |
A LICENSE | 16 ++++++++++++++++ | |
A Makefile | 35 +++++++++++++++++++++++++++++… | |
A README | 1 + | |
A TODO | 11 +++++++++++ | |
A config.mk | 32 +++++++++++++++++++++++++++++… | |
A dl.sh | 20 ++++++++++++++++++++ | |
A surf-adblock.c | 781 +++++++++++++++++++++++++++++… | |
7 files changed, 896 insertions(+), 0 deletions(-) | |
--- | |
diff --git a/LICENSE b/LICENSE | |
@@ -0,0 +1,16 @@ | |
+ISC License | |
+ | |
+Copyright (c) 2016 Hiltjo Posthuma <[email protected]> | |
+Copyright (c) 2016 Quentin Rameau <[email protected]> | |
+ | |
+Permission to use, copy, modify, and/or distribute this software for any | |
+purpose with or without fee is hereby granted, provided that the above | |
+copyright notice and this permission notice appear in all copies. | |
+ | |
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
diff --git a/Makefile b/Makefile | |
@@ -0,0 +1,35 @@ | |
+include config.mk | |
+ | |
+SRC = surf-adblock.c | |
+OBJ = ${SRC:.c=.lo} | |
+ | |
+all: surf-adblock.la | |
+ | |
+.SUFFIXES: .la .lo .o .c | |
+ | |
+.c.o: | |
+ ${CC} -c ${CFLAGS} $< | |
+ | |
+.c.lo: | |
+ ${LIBTOOL} --mode compile --tag CC ${CC} ${LIBCFLAGS} -c $< | |
+ | |
+${OBJ}: config.mk | |
+ | |
+surf-adblock.la: ${OBJ} | |
+ ${LIBTOOL} --mode link --tag CC ${CC} ${LIB} ${LIBLDFLAGS} -o $@ \ | |
+ ${OBJ} -rpath ${DESTDIR}${LIBPREFIX} | |
+ | |
+clean: | |
+ rm -rf surf-adblock.la .libs ${OBJ} ${OBJ:.lo=.o} | |
+ | |
+install: all surf-adblock.la | |
+ mkdir -p ${DESTDIR}${LIBPREFIX} | |
+ ${LIBTOOL} --mode install install -c surf-adblock.la \ | |
+ ${DESTDIR}${LIBPREFIX}/surf-adblock.la | |
+ | |
+uninstall: | |
+ ${LIBTOOL} --mode uninstall rm -f \ | |
+ ${DESTDIR}${LIBPREFIX}/surf-adblock.la | |
+ rm -df ${DESTDIR}${LIBPREFIX} || true | |
+ | |
+.PHONY: all clean install uninstall | |
diff --git a/README b/README | |
@@ -0,0 +1 @@ | |
+adblock parser (WIP) | |
diff --git a/TODO b/TODO | |
@@ -0,0 +1,11 @@ | |
+Docs: | |
+- https://adblockplus.org/en/filter-cheatsheet | |
+- https://adblockplus.org/filters | |
+ | |
+- separate between site-specific and global block rules. | |
+- optimize matching. | |
+- optimize memory allocation. | |
+- optimize: pregenerate one global stylesheet that applies to all sites? | |
+? support exception rules #@# | |
+ | |
+ | |
diff --git a/config.mk b/config.mk | |
@@ -0,0 +1,32 @@ | |
+VERSION = 0.1 | |
+ | |
+# Customize below to fit your system | |
+ | |
+# paths | |
+PREFIX = /usr/local | |
+MANPREFIX = ${PREFIX}/share/man | |
+LIBPREFIX = ${PREFIX}/lib/surf | |
+ | |
+X11INC = /usr/X11R6/include | |
+X11LIB = /usr/X11R6/lib | |
+ | |
+GTKINC = `pkg-config --cflags gtk+-3.0 webkit2gtk-4.0` | |
+GTKLIB = `pkg-config --libs gtk+-3.0 webkit2gtk-4.0` | |
+WEBEXTINC = `pkg-config --cflags webkit2gtk-4.0 webkit2gtk-web-extension-4.0` | |
+WEBEXTLIB = `pkg-config --libs webkit2gtk-4.0 webkit2gtk-web-extension-4.0` | |
+ | |
+# includes and libs | |
+INCS = -I. -I/usr/include -I${X11INC} ${GTKINC} | |
+LIBS = -L/usr/lib -lc -L${X11LIB} -lX11 ${GTKLIB} -lgthread-2.0 | |
+ | |
+# flags | |
+CPPFLAGS = -DVERSION=\"${VERSION}\" -DWEBEXTDIR=\"${LIBPREFIX}\" -D_DEFAULT_SO… | |
+CFLAGS = -std=c99 -pedantic -Wall -Os ${INCS} ${CPPFLAGS} | |
+LDFLAGS = -s ${LIBS} | |
+LIBCPPFLAGS = -DWEBEXTDIR=\"${LIBPREFIX}\" -DWEBKIT_DOM_USE_UNSTABLE_API | |
+LIBCFLAGS = -std=c99 -pedantic -Wall -Os ${WEBEXTINC} ${LIBCPPFLAGS} | |
+LIBLDFLAGS = -s ${WEBEXTLIB} -module -avoid-version -no-undefined | |
+ | |
+# compiler and linker | |
+CC = cc | |
+LIBTOOL = libtool --quiet | |
diff --git a/dl.sh b/dl.sh | |
@@ -0,0 +1,20 @@ | |
+#!/bin/sh | |
+# download adblock lists | |
+ | |
+set -e | |
+ | |
+(while read -r l; do | |
+ curl -L "$l" | |
+ echo "" # not all lists have line at EOF. | |
+done <<! | |
+https://easylist.github.io/easylist/easylist.txt | |
+https://easylist.github.io/easylist/easyprivacy.txt | |
+https://easylist-downloads.adblockplus.org/antiadblockfilters.txt | |
+https://easylist-downloads.adblockplus.org/easylistdutch.txt | |
+https://easylist.github.io/easylistgermany/easylistgermany.txt | |
+https://easylist-downloads.adblockplus.org/liste_fr.txt | |
+https://easylist.github.io/easylist/fanboy-annoyance.txt | |
+https://easylist.github.io/easylist/fanboy-social.txt | |
+! | |
+) | awk '{if(!x[$0]++){print $0;}}' > list | |
+# remove duplicate lines but keep the order. | |
diff --git a/surf-adblock.c b/surf-adblock.c | |
@@ -0,0 +1,781 @@ | |
+#include <sys/stat.h> | |
+#include <sys/types.h> | |
+ | |
+#include <ctype.h> | |
+#include <errno.h> | |
+#include <fcntl.h> | |
+#include <limits.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+#include <wchar.h> | |
+#include <wctype.h> | |
+ | |
+#include <webkit2/webkit-web-extension.h> | |
+#include <webkitdom/webkitdom.h> | |
+#include <webkitdom/WebKitDOMDOMWindowUnstable.h> | |
+ | |
+typedef struct Page { | |
+ guint64 id; | |
+ WebKitWebPage *webpage; | |
+ WebKitDOMDOMWindow *view; | |
+ struct Page *next; | |
+} Page; | |
+ | |
+struct filterdomain { | |
+ char *domain; | |
+ int inverse; | |
+ struct filterdomain *next; | |
+}; | |
+ | |
+struct filterrule { | |
+ char *line; /* DEBUG */ | |
+ /* type: match mask, must be atleast 32-bit, see FilterType enum */ | |
+ unsigned long block; | |
+ int matchbegin; | |
+ int matchend; | |
+ /* is exception rule: prefix @@ for ABP or #@# for CSS */ | |
+ int isexception; | |
+ char *css; /* if non-NULL is CSS rule / hide element rule */ | |
+ char *uri; | |
+ struct filterdomain *domains; | |
+ struct filterrule *next; | |
+}; | |
+ | |
+enum { | |
+ FilterTypeScript = 1 << 0, | |
+ FilterTypeImage = 1 << 1, | |
+ FilterTypeCSS = 1 << 2, | |
+ FilterTypeObject = 1 << 3, | |
+ FilterTypeXHR = 1 << 4, | |
+ FilterTypeObjectSub = 1 << 5, | |
+ FilterTypeSubDoc = 1 << 6, | |
+ FilterTypePing = 1 << 7, | |
+ FilterTypeDocument = 1 << 8, | |
+ FilterTypeElemHide = 1 << 9, | |
+ FilterTypeOther = 1 << 10, | |
+ FilterTypeGenericHide = 1 << 11, | |
+ FilterTypeGenericBlock = 1 << 12, | |
+ FilterTypeMatchCase = 1 << 13, | |
+}; | |
+ | |
+struct filtertype { | |
+ /* `type` must be atleast 32-bit, see FilterType enum */ | |
+ unsigned long type; | |
+ char *name; | |
+ size_t namelen; | |
+ int allowinverse; | |
+ int allownormal; | |
+ int onlyexception; | |
+ void (*fn)(struct filterrule *, char *); | |
+}; | |
+ | |
+static void parsedomainsoption(struct filterrule *, char *); | |
+ | |
+#define STRP(s) s,sizeof(s)-1 | |
+ | |
+struct filtertype filtertypes[] = { | |
+ /* NOTE: options with 'type' = 0 are silently ignored and treated as | |
+ * requests for now */ | |
+ { 0, STRP("collapse"), 1, 1, 0, NULL }, | |
+ { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL }, | |
+ { 0, STRP("domain"), 0, 1, 0, &parsedo… | |
+ { 0, STRP("donottrack"), 1, 1, 0, NULL }, | |
+ { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL }, | |
+ { 0, STRP("font"), 1, 1, 0, NULL }, | |
+ { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL }, | |
+ { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL }, | |
+ { FilterTypeImage, STRP("image"), 1, 1, 0, NULL }, | |
+ { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL }, | |
+ { 0, STRP("media"), 1, 1, 0, NULL }, | |
+ { FilterTypeObject, STRP("object"), 1, 1, 0, NULL }, | |
+ { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL }, | |
+ { FilterTypeOther, STRP("other"), 1, 1, 0, NULL }, | |
+ { FilterTypePing, STRP("ping"), 1, 1, 0, NULL }, | |
+ { 0, STRP("popup"), 1, 1, 0, NULL }, | |
+ { FilterTypeScript, STRP("script"), 1, 1, 0, NULL }, | |
+ { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL }, | |
+ { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL }, | |
+ { 0, STRP("third-party"), 1, 1, 0, NULL }, | |
+ { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL }, | |
+ /* NOTE: site-key not supported */ | |
+}; | |
+ | |
+static Page *pages; | |
+static char globalcss[5000000]; /* TEST: dynamic allocate later */ | |
+static struct filterrule *rules; | |
+ | |
+void * | |
+ecalloc(size_t nmemb, size_t size) | |
+{ | |
+ void *p; | |
+ | |
+ if (!(p = calloc(nmemb, size))) { | |
+ fprintf(stderr, "calloc: %s\n", strerror(errno)); | |
+ exit(1); | |
+ } | |
+ | |
+ return p; | |
+} | |
+ | |
+char * | |
+estrdup(const char *s) | |
+{ | |
+ char *p; | |
+ | |
+ if (!(p = strdup(s))) { | |
+ fprintf(stderr, "strdup: %s\n", strerror(errno)); | |
+ exit(1); | |
+ } | |
+ | |
+ return p; | |
+} | |
+ | |
+#define END 0 | |
+#define UNMATCHABLE -2 | |
+#define BRACKET -3 | |
+#define CARET -4 | |
+#define STAR -5 | |
+ | |
+static int | |
+str_next(const char *str, size_t n, size_t *step) | |
+{ | |
+ if (!n) { | |
+ *step = 0; | |
+ return 0; | |
+ } | |
+ if (str[0] >= 128U) { | |
+ wchar_t wc; | |
+ int k = mbtowc(&wc, str, n); | |
+ if (k<0) { | |
+ *step = 1; | |
+ return -1; | |
+ } | |
+ *step = k; | |
+ return wc; | |
+ } | |
+ *step = 1; | |
+ | |
+ return str[0]; | |
+} | |
+ | |
+static int | |
+pat_next(const char *pat, size_t m, size_t *step) | |
+{ | |
+ int esc = 0; | |
+ | |
+ if (!m || !*pat) { | |
+ *step = 0; | |
+ return END; | |
+ } | |
+ *step = 1; | |
+ if (pat[0]=='\\' && pat[1]) { | |
+ *step = 2; | |
+ pat++; | |
+ esc = 1; | |
+ goto escaped; | |
+ } | |
+ if (pat[0]=='^') | |
+ return CARET; | |
+ if (pat[0] == '*') | |
+ return STAR; | |
+escaped: | |
+ if (pat[0] >= 128U) { | |
+ wchar_t wc; | |
+ int k = mbtowc(&wc, pat, m); | |
+ if (k<0) { | |
+ *step = 0; | |
+ return UNMATCHABLE; | |
+ } | |
+ *step = k + esc; | |
+ return wc; | |
+ } | |
+ return pat[0]; | |
+} | |
+ | |
+static int | |
+casefold(int k) | |
+{ | |
+ int c = towupper(k); | |
+ return c == k ? towlower(k) : c; | |
+} | |
+ | |
+/* match() based on musl-libc fnmatch: | |
+ https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */ | |
+int | |
+match(const char *pat, const char *str, int fcase) | |
+{ | |
+ size_t m = -1, n = -1; | |
+ const char *p, *ptail, *endpat; | |
+ const char *s, *stail, *endstr; | |
+ size_t pinc, sinc, tailcnt=0; | |
+ int c, k, kfold; | |
+ | |
+ for (;;) { | |
+ switch ((c = pat_next(pat, m, &pinc))) { | |
+ case UNMATCHABLE: | |
+ return 1; | |
+ case STAR: | |
+ pat++; | |
+ m--; | |
+ break; | |
+ default: | |
+ k = str_next(str, n, &sinc); | |
+ if (k <= 0) | |
+ return (c==END) ? 0 : 1; | |
+ if (c == CARET && (iswdigit(k) || iswalpha(k) || strch… | |
+ return 1; | |
+ str += sinc; | |
+ n -= sinc; | |
+ kfold = fcase ? casefold(k) : k; | |
+ if (k != c && kfold != c) | |
+ return 1; | |
+ pat+=pinc; | |
+ m-=pinc; | |
+ continue; | |
+ } | |
+ break; | |
+ } | |
+ | |
+ /* Compute real pat length if it was initially unknown/-1 */ | |
+ m = strnlen(pat, m); | |
+ endpat = pat + m; | |
+ | |
+ /* Find the last * in pat and count chars needed after it */ | |
+ for (p=ptail=pat; p<endpat; p+=pinc) { | |
+ switch (pat_next(p, endpat-p, &pinc)) { | |
+ case UNMATCHABLE: | |
+ return 1; | |
+ case STAR: | |
+ tailcnt=0; | |
+ ptail = p+1; | |
+ break; | |
+ default: | |
+ tailcnt++; | |
+ break; | |
+ } | |
+ } | |
+ | |
+ /* Past this point we need not check for UNMATCHABLE in pat, | |
+ * because all of pat has already been parsed once. */ | |
+ | |
+ /* Compute real str length if it was initially unknown/-1 */ | |
+ n = strnlen(str, n); | |
+ endstr = str + n; | |
+ if (n < tailcnt) return 1; | |
+ | |
+ /* Find the final tailcnt chars of str, accounting for UTF-8. | |
+ * On illegal sequences we may get it wrong, but in that case | |
+ * we necessarily have a matching failure anyway. */ | |
+ for (s=endstr; s>str && tailcnt; tailcnt--) { | |
+ if (s[-1] < 128U || MB_CUR_MAX==1) s--; | |
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str); | |
+ } | |
+ if (tailcnt) return 1; | |
+ stail = s; | |
+ | |
+ /* Check that the pat and str tails match */ | |
+ p = ptail; | |
+ for (;;) { | |
+ c = pat_next(p, endpat-p, &pinc); | |
+ p += pinc; | |
+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) { | |
+ if (c != END) return 1; | |
+ break; | |
+ } | |
+ s += sinc; | |
+ kfold = fcase ? casefold(k) : k; | |
+ if (k != c && kfold != c) | |
+ return 1; | |
+ } | |
+ | |
+ /* We're all done with the tails now, so throw them out */ | |
+ endstr = stail; | |
+ endpat = ptail; | |
+ | |
+ /* Match pattern components until there are none left */ | |
+ while (pat<endpat) { | |
+ p = pat; | |
+ s = str; | |
+ for (;;) { | |
+ c = pat_next(p, endpat-p, &pinc); | |
+ p += pinc; | |
+ /* Encountering * completes/commits a component */ | |
+ if (c == STAR) { | |
+ pat = p; | |
+ str = s; | |
+ break; | |
+ } | |
+ k = str_next(s, endstr-s, &sinc); | |
+ if (!k) | |
+ return 1; | |
+ kfold = fcase ? casefold(k) : k; | |
+ if (k != c && kfold != c) | |
+ break; | |
+ s += sinc; | |
+ } | |
+ if (c == STAR) continue; | |
+ /* If we failed, advance str, by 1 char if it's a valid | |
+ * char, or past all invalid bytes otherwise. */ | |
+ k = str_next(str, endstr-str, &sinc); | |
+ if (k > 0) str += sinc; | |
+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+ | |
+/* | |
+domain=... if domain is prefixed with ~, ignore. | |
+multiple domains can be separated with | | |
+*/ | |
+struct filterdomain * | |
+parsedomains(char *s, int sep) | |
+{ | |
+ struct filterdomain *head = NULL, *d, *last = NULL; | |
+ char *p; | |
+ int inverse; | |
+ | |
+ do { | |
+ inverse = 0; | |
+ if (*s == '~') { | |
+ inverse = !inverse; | |
+ s++; | |
+ } | |
+ if (!*s || *s == sep) | |
+ break; | |
+ | |
+ if ((p = strchr(s, sep))) /* TODO: should not contain , */ | |
+ *p = '\0'; | |
+ | |
+ d = ecalloc(1, sizeof(struct filterdomain)); | |
+ d->inverse = inverse; | |
+ d->domain = estrdup(s); | |
+ | |
+ if (!head) | |
+ head = last = d; | |
+ else | |
+ last = last->next = d; | |
+ | |
+ if (p) { | |
+ *p = sep; | |
+ s = p + 1; | |
+ } | |
+ } while (p); | |
+ | |
+ return head; | |
+} | |
+ | |
+void | |
+parsedomainselement(struct filterrule *f, char *s) | |
+{ | |
+ struct filterdomain *d, *last; | |
+ | |
+ for (last = f->domains; last && last->next; last = last->next) | |
+ ; | |
+ | |
+ d = parsedomains(s, ','); | |
+ if (last) | |
+ last->next = d; | |
+ else | |
+ f->domains = d; | |
+} | |
+ | |
+void | |
+parsedomainsoption(struct filterrule *f, char *s) | |
+{ | |
+ struct filterdomain *d, *last; | |
+ | |
+ for (last = f->domains; last && last->next; last = last->next) | |
+ ; | |
+ | |
+ d = parsedomains(s, '|'); | |
+ if (last) | |
+ last->next = d; | |
+ else | |
+ f->domains = d; | |
+} | |
+ | |
+int | |
+filtertype_cmp(const void *a, const void *b) | |
+{ | |
+ return strcmp(((struct filtertype *)a)->name, ((struct filtertype *)b)… | |
+} | |
+ | |
+/* check if domain is the same domain or a subdomain of `s` */ | |
+int | |
+matchdomain(const char *s, const char *domain) | |
+{ | |
+ size_t l1, l2; | |
+ | |
+ l1 = strlen(s); | |
+ l2 = strlen(domain); | |
+ | |
+ /* subdomain-specific (longer) or other domain */ | |
+ if (l1 > l2) | |
+ return 0; | |
+ /* subdomain */ | |
+ if (l2 > l1 && domain[l2 - l1 - 1] == '.') | |
+ return !strcmp(&domain[l2 - l1], s); | |
+ | |
+ return !strcmp(s, domain); | |
+} | |
+ | |
+int | |
+matchrule(struct filterrule *f, const char *uri, const char *type, const char … | |
+{ | |
+ /* NOTE: order matters, see FilterType enum values */ | |
+ const char *blockstr = "sicoxOSpde^"; | |
+ struct filterdomain *d; | |
+ char pat[1024]; | |
+ int r, i; | |
+ | |
+ /* ignore exception rules for now, these are usually paid | |
+ * for by sites to allow advertisements. */ | |
+ if (f->isexception) | |
+ return 0; | |
+ | |
+ if (f->css) { | |
+ r = f->domains ? 0 : 1; | |
+ for (d = f->domains; d; d = d->next) { | |
+ if (matchdomain(d->domain, domain)) { | |
+ if (r && d->inverse) | |
+ r = 0; | |
+ else if (!r && !d->inverse) | |
+ r = 1; | |
+ } else if (r && !d->inverse) { | |
+ r = 0; | |
+ } | |
+ } | |
+ return r; | |
+ } | |
+ | |
+ r = snprintf(pat, sizeof(pat), "%s%s%s", | |
+ f->matchbegin ? "" : "*", | |
+ f->uri, | |
+ f->matchend ? "" : "*"); | |
+ if (r == -1 || (size_t)r >= sizeof(pat)) { | |
+ fprintf(stderr, "warning: pattern too large, ignoring\n"); | |
+ return 0; | |
+ } | |
+ | |
+ r = f->domains ? 0 : 1; | |
+ for (d = f->domains; d; d = d->next) { | |
+ if (matchdomain(d->domain, domain)) { | |
+ if (r && d->inverse) | |
+ r = 0; | |
+ else if (!r && !d->inverse) | |
+ r = 1; | |
+ } else if (r && !d->inverse) { | |
+ r = 0; | |
+ } | |
+ } | |
+ | |
+ if (r && !match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) { | |
+ for (; *type; type++) { | |
+ for (i = 0; blockstr[i]; i++) { | |
+ if (blockstr[i] == *type && | |
+ f->block & (1 << i)) { | |
+ printf("block type '%c'\n", blockstr[i… | |
+ } | |
+ } | |
+ } | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+int | |
+parserule(struct filterrule *f, char *s) | |
+{ | |
+ struct filtertype key, *ft; | |
+ int inverse = 0; | |
+ char *p, *values; | |
+ | |
+ if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']')) | |
+ return 0; /* skip comment or empty line */ | |
+ for (; *s && isspace(*s); s++) | |
+ ; | |
+ if (!*s) | |
+ return 0; /* line had only whitespace: skip */ | |
+ | |
+ memset(f, 0, sizeof(struct filterrule)); | |
+ | |
+ if ((p = strstr(s, "#@#"))) { | |
+ *p = '\0'; | |
+ parsedomainselement(f, s); | |
+ *p = '#'; | |
+ f->css = estrdup(p + 3); | |
+ f->isexception = 1; | |
+ goto end; /* end of CSS rule */ | |
+ } | |
+ | |
+ /* element hiding rule, NOTE: no wildcards are supported, | |
+ "Simplified element hiding syntax" is not supported. */ | |
+ if ((p = strstr(s, "##"))) { | |
+ *p = '\0'; | |
+ parsedomainselement(f, s); | |
+ *p = '#'; | |
+ f->css = estrdup(p + 2); | |
+ goto end; /* end of rule */ | |
+ } | |
+ | |
+ if (!strncmp(s, "@@", 2)) { | |
+ f->isexception = 1; | |
+ s += 2; | |
+ } | |
+ if (*s == '|') { | |
+ s++; | |
+ if (*s == '|') { | |
+ f->matchbegin = 1; | |
+ s++; | |
+ } else { | |
+ f->matchend = 1; | |
+ } | |
+ } | |
+ | |
+ /* no options, use rest of line as uri. */ | |
+ if (!(p = strrchr(s, '$'))) { | |
+ f->uri = estrdup(s); | |
+ goto end; | |
+ } | |
+ | |
+ /* has options */ | |
+ *p = '\0'; | |
+ f->uri = estrdup(s); | |
+ *p = '$'; | |
+ s = ++p; | |
+ | |
+ /* blockmask, has options? default: allow all options, case-sensitive | |
+ * has no options? default: block all options, case-sensitive */ | |
+ f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL; | |
+ do { | |
+ if ((p = strchr(s, ','))) | |
+ *p = '\0'; | |
+ /* match option */ | |
+ inverse = 0; | |
+ if (*s == '~') { | |
+ inverse = 1; | |
+ s++; | |
+ } | |
+ if ((values = strchr(s, '='))) | |
+ *(values) = '\0'; | |
+ key.name = s; | |
+ | |
+ ft = bsearch(&key, &filtertypes, | |
+ sizeof(filtertypes) / sizeof(*filtertypes), sizeof(*filter… | |
+ filtertype_cmp); | |
+ | |
+ /* restore NUL-terminator for domain= option */ | |
+ if (values) | |
+ *(values++) = '='; | |
+ | |
+ if (ft) { | |
+ if (inverse) | |
+ f->block &= ~(ft->type); | |
+ else | |
+ f->block |= ft->type; | |
+ if (ft->fn && values) | |
+ ft->fn(f, values); | |
+ } else { | |
+ /* DEBUG */ | |
+ fprintf(stderr, "unknown option: '%s' in rule: %s\n", | |
+ key.name, f->uri); | |
+ } | |
+ | |
+ /* restore ',' */ | |
+ if (p) { | |
+ *p = ','; | |
+ s = p + 1; | |
+ } | |
+ } while (p); | |
+end: | |
+ | |
+ return 1; | |
+} | |
+ | |
+void | |
+debugrule(struct filterrule *r) | |
+{ | |
+ printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: %lu\n===… | |
+ r->uri ? r->uri : "", r->css ? r->css : "", r->isexception, r-… | |
+} | |
+ | |
+struct filterrule * | |
+loadrules(FILE *fp) | |
+{ | |
+ char *line = NULL; | |
+ size_t linesiz = 0; | |
+ ssize_t n; | |
+ struct filterrule f, *r, *rn = NULL, *rules = NULL; | |
+ | |
+ /* TODO: handle ferror() */ | |
+ /* load rules */ | |
+ while ((n = getline(&line, &linesiz, fp)) > 0) { | |
+ if (line[n - 1] == '\n') | |
+ line[--n] = '\0'; | |
+ if (n > 0 && line[n - 1] == '\r') | |
+ line[--n] = '\0'; | |
+ | |
+ if (parserule(&f, line)) { | |
+ r = ecalloc(1, sizeof(struct filterrule)); | |
+ if (!rules) | |
+ rules = rn = r; | |
+ else | |
+ rn = rn->next = r; | |
+ memcpy(rn, &f, sizeof(struct filterrule)); | |
+ r->line = estrdup(line); /* DEBUG */ | |
+ } | |
+ } | |
+ return rules; | |
+} | |
+ | |
+Page * | |
+newpage(WebKitWebPage *page) | |
+{ | |
+ Page *p; | |
+ | |
+ p = ecalloc(1, sizeof(Page)); | |
+ p->next = pages; | |
+ pages = p; | |
+ | |
+ p->id = webkit_web_page_get_id(page); | |
+ p->webpage = page; | |
+ | |
+ return p; | |
+} | |
+ | |
+static void | |
+documentloaded(WebKitWebPage *wp, Page *p) | |
+{ | |
+ WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp); | |
+ WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc); | |
+ WebKitDOMElement *el; | |
+ char sitecss[1000000] = ""; /* TODO: dynamic allocate */ | |
+ struct filterrule *r; | |
+ char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage)); | |
+ char *domain, *s; | |
+ | |
+ /* TODO: improve, hacky */ | |
+ if ((domain = strstr(uri, "://"))) { | |
+ domain += sizeof("://") - 1; | |
+ } else { | |
+ domain = uri; | |
+ } | |
+ if ((s = strchr(domain, '/'))) | |
+ *s = '\0'; | |
+ | |
+ printf("uri: %s\n", uri); | |
+ printf("domain: %s\n", domain); | |
+ | |
+ /* site-specific CSS */ | |
+ for (r = rules; r; r = r->next) { | |
+ if (!r->css || !r->domains || !matchrule(r, "", "", domain)) | |
+ continue; | |
+ strlcat(sitecss, r->css, sizeof(sitecss)); | |
+ strlcat(sitecss, "{display:none;}", sizeof(sitecss)); | |
+ } | |
+ printf("sitecss: %s\n", sitecss); | |
+ | |
+ p->view = webkit_dom_document_get_default_view(doc); | |
+ | |
+ el = webkit_dom_document_create_element(doc, "style", NULL); | |
+ webkit_dom_element_set_attribute(el, "type", "text/css", NULL); | |
+ webkit_dom_element_set_inner_html(el, globalcss, NULL); | |
+ webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el… | |
+ | |
+ el = webkit_dom_document_create_element(doc, "style", NULL); | |
+ webkit_dom_element_set_attribute(el, "type", "text/css", NULL); | |
+ webkit_dom_element_set_inner_html(el, sitecss, NULL); | |
+ webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el… | |
+ | |
+ free(uri); | |
+} | |
+ | |
+static gboolean | |
+sendrequest(WebKitWebPage *wp, WebKitURIRequest *req, | |
+ WebKitURIResponse *res, Page *p) | |
+{ | |
+ struct filterrule *r; | |
+ | |
+ char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage)); | |
+ char *requri = estrdup((char *)webkit_uri_request_get_uri(req)); | |
+ char *domain, *s; | |
+ | |
+ /* TODO: improve, hacky */ | |
+ if ((domain = strstr(uri, "://"))) { | |
+ domain += sizeof("://") - 1; | |
+ } else { | |
+ domain = uri; | |
+ } | |
+ | |
+ if ((s = strchr(domain, '/'))) | |
+ *s = '\0'; | |
+ | |
+ /* match rules */ | |
+ for (r = rules; r; r = r->next) { | |
+ if (!r->css && matchrule(r, requri, "csio^", domain)) { | |
+ printf("requri: %s\n", requri); | |
+ printf("uri: %s\n", uri); | |
+ printf("domain: %s\n", domain); | |
+ | |
+ fprintf(stderr, "blocked: %s, %s\n", domain, requri); | |
+ free(uri); | |
+ free(requri); | |
+ return TRUE; | |
+ } | |
+ } | |
+ free(uri); | |
+ free(requri); | |
+ | |
+ return FALSE; | |
+} | |
+ | |
+static void | |
+objectcleared(WebKitScriptWorld *w, WebKitWebPage *wp, WebKitFrame *f, Page *p) | |
+{ | |
+} | |
+ | |
+static void | |
+webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused) | |
+{ | |
+ Page *np = newpage(p); | |
+ WebKitScriptWorld *w = webkit_script_world_get_default(); | |
+ | |
+ g_signal_connect(p, "send-request", | |
+ G_CALLBACK(sendrequest), np); | |
+ g_signal_connect(w, "window-object-cleared", | |
+ G_CALLBACK(objectcleared), np); | |
+ g_signal_connect(p, "document-loaded", | |
+ G_CALLBACK(documentloaded), np); | |
+} | |
+ | |
+G_MODULE_EXPORT void | |
+webkit_web_extension_initialize_with_user_data(WebKitWebExtension *e, GVariant… | |
+{ | |
+ struct filterrule *r; | |
+ FILE *fp; | |
+ const char *filename = "/home/hiltjo/adblock_rules"; | |
+ | |
+ if (!(fp = fopen(filename, "r"))) { | |
+ fprintf(stderr, "cannot read rules from file: %s\n", filename); | |
+ return; | |
+ } | |
+ if (!(rules = loadrules(fp))) { | |
+ fprintf(stderr, "cannot load adblock rules\n"); | |
+ return; | |
+ } | |
+ fclose(fp); | |
+ | |
+ /* general rules: all sites */ | |
+ for (r = rules; r; r = r->next) { | |
+ if (!r->css || r->domains) | |
+ continue; | |
+ strlcat(globalcss, r->css, sizeof(globalcss)); | |
+ strlcat(globalcss, "{display:none;}", sizeof(globalcss)); | |
+ } | |
+ | |
+ g_signal_connect(e, "page-created", | |
+ G_CALLBACK(webpagecreated), NULL); | |
+} |