GopherProxy

	initial insertion (twss) - surf-adblock - Surf adblock web extension
	git clone git://git.codemadness.org/surf-adblock
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 888e9c8a2237bf9c374e04737f103e91cdc30267
	Author: Hiltjo Posthuma <[email protected]>
	Date: Thu, 14 Jul 2016 22:18:31 +0200

	initial insertion (twss)

	Diffstat:
	A LICENSE \| 16 ++++++++++++++++
	A Makefile \| 35 +++++++++++++++++++++++++++++…
	A README \| 1 +
	A TODO \| 11 +++++++++++
	A config.mk \| 32 +++++++++++++++++++++++++++++…
	A dl.sh \| 20 ++++++++++++++++++++
	A surf-adblock.c \| 781 +++++++++++++++++++++++++++++…

	7 files changed, 896 insertions(+), 0 deletions(-)
	---
	diff --git a/LICENSE b/LICENSE
	@@ -0,0 +1,16 @@
	+ISC License
	+
	+Copyright (c) 2016 Hiltjo Posthuma <[email protected]>
	+Copyright (c) 2016 Quentin Rameau <[email protected]>
	+
	+Permission to use, copy, modify, and/or distribute this software for any
	+purpose with or without fee is hereby granted, provided that the above
	+copyright notice and this permission notice appear in all copies.
	+
	+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
	+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
	+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
	+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
	+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
	+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
	+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
	diff --git a/Makefile b/Makefile
	@@ -0,0 +1,35 @@
	+include config.mk
	+
	+SRC = surf-adblock.c
	+OBJ = ${SRC:.c=.lo}
	+
	+all: surf-adblock.la
	+
	+.SUFFIXES: .la .lo .o .c
	+
	+.c.o:
	+ ${CC} -c ${CFLAGS} $<
	+
	+.c.lo:
	+ ${LIBTOOL} --mode compile --tag CC ${CC} ${LIBCFLAGS} -c $<
	+
	+${OBJ}: config.mk
	+
	+surf-adblock.la: ${OBJ}
	+ ${LIBTOOL} --mode link --tag CC ${CC} ${LIB} ${LIBLDFLAGS} -o $@ \
	+ ${OBJ} -rpath ${DESTDIR}${LIBPREFIX}
	+
	+clean:
	+ rm -rf surf-adblock.la .libs ${OBJ} ${OBJ:.lo=.o}
	+
	+install: all surf-adblock.la
	+ mkdir -p ${DESTDIR}${LIBPREFIX}
	+ ${LIBTOOL} --mode install install -c surf-adblock.la \
	+ ${DESTDIR}${LIBPREFIX}/surf-adblock.la
	+
	+uninstall:
	+ ${LIBTOOL} --mode uninstall rm -f \
	+ ${DESTDIR}${LIBPREFIX}/surf-adblock.la
	+ rm -df ${DESTDIR}${LIBPREFIX} \|\| true
	+
	+.PHONY: all clean install uninstall
	diff --git a/README b/README
	@@ -0,0 +1 @@
	+adblock parser (WIP)
	diff --git a/TODO b/TODO
	@@ -0,0 +1,11 @@
	+Docs:
	+- https://adblockplus.org/en/filter-cheatsheet
	+- https://adblockplus.org/filters
	+
	+- separate between site-specific and global block rules.
	+- optimize matching.
	+- optimize memory allocation.
	+- optimize: pregenerate one global stylesheet that applies to all sites?
	+? support exception rules #@#
	+
	+
	diff --git a/config.mk b/config.mk
	@@ -0,0 +1,32 @@
	+VERSION = 0.1
	+
	+# Customize below to fit your system
	+
	+# paths
	+PREFIX = /usr/local
	+MANPREFIX = ${PREFIX}/share/man
	+LIBPREFIX = ${PREFIX}/lib/surf
	+
	+X11INC = /usr/X11R6/include
	+X11LIB = /usr/X11R6/lib
	+
	+GTKINC = `pkg-config --cflags gtk+-3.0 webkit2gtk-4.0`
	+GTKLIB = `pkg-config --libs gtk+-3.0 webkit2gtk-4.0`
	+WEBEXTINC = `pkg-config --cflags webkit2gtk-4.0 webkit2gtk-web-extension-4.0`
	+WEBEXTLIB = `pkg-config --libs webkit2gtk-4.0 webkit2gtk-web-extension-4.0`
	+
	+# includes and libs
	+INCS = -I. -I/usr/include -I${X11INC} ${GTKINC}
	+LIBS = -L/usr/lib -lc -L${X11LIB} -lX11 ${GTKLIB} -lgthread-2.0
	+
	+# flags
	+CPPFLAGS = -DVERSION=\"${VERSION}\" -DWEBEXTDIR=\"${LIBPREFIX}\" -D_DEFAULT_SO…
	+CFLAGS = -std=c99 -pedantic -Wall -Os ${INCS} ${CPPFLAGS}
	+LDFLAGS = -s ${LIBS}
	+LIBCPPFLAGS = -DWEBEXTDIR=\"${LIBPREFIX}\" -DWEBKIT_DOM_USE_UNSTABLE_API
	+LIBCFLAGS = -std=c99 -pedantic -Wall -Os ${WEBEXTINC} ${LIBCPPFLAGS}
	+LIBLDFLAGS = -s ${WEBEXTLIB} -module -avoid-version -no-undefined
	+
	+# compiler and linker
	+CC = cc
	+LIBTOOL = libtool --quiet
	diff --git a/dl.sh b/dl.sh
	@@ -0,0 +1,20 @@
	+#!/bin/sh
	+# download adblock lists
	+
	+set -e
	+
	+(while read -r l; do
	+ curl -L "$l"
	+ echo "" # not all lists have line at EOF.
	+done <<!
	+https://easylist.github.io/easylist/easylist.txt
	+https://easylist.github.io/easylist/easyprivacy.txt
	+https://easylist-downloads.adblockplus.org/antiadblockfilters.txt
	+https://easylist-downloads.adblockplus.org/easylistdutch.txt
	+https://easylist.github.io/easylistgermany/easylistgermany.txt
	+https://easylist-downloads.adblockplus.org/liste_fr.txt
	+https://easylist.github.io/easylist/fanboy-annoyance.txt
	+https://easylist.github.io/easylist/fanboy-social.txt
	+!
	+) \| awk '{if(!x[$0]++){print $0;}}' > list
	+# remove duplicate lines but keep the order.
	diff --git a/surf-adblock.c b/surf-adblock.c
	@@ -0,0 +1,781 @@
	+#include <sys/stat.h>
	+#include <sys/types.h>
	+
	+#include <ctype.h>
	+#include <errno.h>
	+#include <fcntl.h>
	+#include <limits.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+#include <wchar.h>
	+#include <wctype.h>
	+
	+#include <webkit2/webkit-web-extension.h>
	+#include <webkitdom/webkitdom.h>
	+#include <webkitdom/WebKitDOMDOMWindowUnstable.h>
	+
	+typedef struct Page {
	+ guint64 id;
	+ WebKitWebPage *webpage;
	+ WebKitDOMDOMWindow *view;
	+ struct Page *next;
	+} Page;
	+
	+struct filterdomain {
	+ char *domain;
	+ int inverse;
	+ struct filterdomain *next;
	+};
	+
	+struct filterrule {
	+ char line; / DEBUG */
	+ /* type: match mask, must be atleast 32-bit, see FilterType enum */
	+ unsigned long block;
	+ int matchbegin;
	+ int matchend;
	+ /* is exception rule: prefix @@ for ABP or #@# for CSS */
	+ int isexception;
	+ char css; / if non-NULL is CSS rule / hide element rule */
	+ char *uri;
	+ struct filterdomain *domains;
	+ struct filterrule *next;
	+};
	+
	+enum {
	+ FilterTypeScript = 1 << 0,
	+ FilterTypeImage = 1 << 1,
	+ FilterTypeCSS = 1 << 2,
	+ FilterTypeObject = 1 << 3,
	+ FilterTypeXHR = 1 << 4,
	+ FilterTypeObjectSub = 1 << 5,
	+ FilterTypeSubDoc = 1 << 6,
	+ FilterTypePing = 1 << 7,
	+ FilterTypeDocument = 1 << 8,
	+ FilterTypeElemHide = 1 << 9,
	+ FilterTypeOther = 1 << 10,
	+ FilterTypeGenericHide = 1 << 11,
	+ FilterTypeGenericBlock = 1 << 12,
	+ FilterTypeMatchCase = 1 << 13,
	+};
	+
	+struct filtertype {
	+ /* `type` must be atleast 32-bit, see FilterType enum */
	+ unsigned long type;
	+ char *name;
	+ size_t namelen;
	+ int allowinverse;
	+ int allownormal;
	+ int onlyexception;
	+ void (fn)(struct filterrule , char *);
	+};
	+
	+static void parsedomainsoption(struct filterrule , char );
	+
	+#define STRP(s) s,sizeof(s)-1
	+
	+struct filtertype filtertypes[] = {
	+ /* NOTE: options with 'type' = 0 are silently ignored and treated as
	+ * requests for now */
	+ { 0, STRP("collapse"), 1, 1, 0, NULL },
	+ { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
	+ { 0, STRP("domain"), 0, 1, 0, &parsedo…
	+ { 0, STRP("donottrack"), 1, 1, 0, NULL },
	+ { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
	+ { 0, STRP("font"), 1, 1, 0, NULL },
	+ { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
	+ { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
	+ { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
	+ { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
	+ { 0, STRP("media"), 1, 1, 0, NULL },
	+ { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
	+ { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
	+ { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
	+ { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
	+ { 0, STRP("popup"), 1, 1, 0, NULL },
	+ { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
	+ { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
	+ { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
	+ { 0, STRP("third-party"), 1, 1, 0, NULL },
	+ { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
	+ /* NOTE: site-key not supported */
	+};
	+
	+static Page *pages;
	+static char globalcss[5000000]; /* TEST: dynamic allocate later */
	+static struct filterrule *rules;
	+
	+void *
	+ecalloc(size_t nmemb, size_t size)
	+{
	+ void *p;
	+
	+ if (!(p = calloc(nmemb, size))) {
	+ fprintf(stderr, "calloc: %s\n", strerror(errno));
	+ exit(1);
	+ }
	+
	+ return p;
	+}
	+
	+char *
	+estrdup(const char *s)
	+{
	+ char *p;
	+
	+ if (!(p = strdup(s))) {
	+ fprintf(stderr, "strdup: %s\n", strerror(errno));
	+ exit(1);
	+ }
	+
	+ return p;
	+}
	+
	+#define END 0
	+#define UNMATCHABLE -2
	+#define BRACKET -3
	+#define CARET -4
	+#define STAR -5
	+
	+static int
	+str_next(const char str, size_t n, size_t step)
	+{
	+ if (!n) {
	+ *step = 0;
	+ return 0;
	+ }
	+ if (str[0] >= 128U) {
	+ wchar_t wc;
	+ int k = mbtowc(&wc, str, n);
	+ if (k<0) {
	+ *step = 1;
	+ return -1;
	+ }
	+ *step = k;
	+ return wc;
	+ }
	+ *step = 1;
	+
	+ return str[0];
	+}
	+
	+static int
	+pat_next(const char pat, size_t m, size_t step)
	+{
	+ int esc = 0;
	+
	+ if (!m \|\| !*pat) {
	+ *step = 0;
	+ return END;
	+ }
	+ *step = 1;
	+ if (pat[0]=='\\' && pat[1]) {
	+ *step = 2;
	+ pat++;
	+ esc = 1;
	+ goto escaped;
	+ }
	+ if (pat[0]=='^')
	+ return CARET;
	+ if (pat[0] == '*')
	+ return STAR;
	+escaped:
	+ if (pat[0] >= 128U) {
	+ wchar_t wc;
	+ int k = mbtowc(&wc, pat, m);
	+ if (k<0) {
	+ *step = 0;
	+ return UNMATCHABLE;
	+ }
	+ *step = k + esc;
	+ return wc;
	+ }
	+ return pat[0];
	+}
	+
	+static int
	+casefold(int k)
	+{
	+ int c = towupper(k);
	+ return c == k ? towlower(k) : c;
	+}
	+
	+/* match() based on musl-libc fnmatch:
	+ https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
	+int
	+match(const char pat, const char str, int fcase)
	+{
	+ size_t m = -1, n = -1;
	+ const char p, ptail, *endpat;
	+ const char s, stail, *endstr;
	+ size_t pinc, sinc, tailcnt=0;
	+ int c, k, kfold;
	+
	+ for (;;) {
	+ switch ((c = pat_next(pat, m, &pinc))) {
	+ case UNMATCHABLE:
	+ return 1;
	+ case STAR:
	+ pat++;
	+ m--;
	+ break;
	+ default:
	+ k = str_next(str, n, &sinc);
	+ if (k <= 0)
	+ return (c==END) ? 0 : 1;
	+ if (c == CARET && (iswdigit(k) \|\| iswalpha(k) \|\| strch…
	+ return 1;
	+ str += sinc;
	+ n -= sinc;
	+ kfold = fcase ? casefold(k) : k;
	+ if (k != c && kfold != c)
	+ return 1;
	+ pat+=pinc;
	+ m-=pinc;
	+ continue;
	+ }
	+ break;
	+ }
	+
	+ /* Compute real pat length if it was initially unknown/-1 */
	+ m = strnlen(pat, m);
	+ endpat = pat + m;
	+
	+ /* Find the last * in pat and count chars needed after it */
	+ for (p=ptail=pat; p<endpat; p+=pinc) {
	+ switch (pat_next(p, endpat-p, &pinc)) {
	+ case UNMATCHABLE:
	+ return 1;
	+ case STAR:
	+ tailcnt=0;
	+ ptail = p+1;
	+ break;
	+ default:
	+ tailcnt++;
	+ break;
	+ }
	+ }
	+
	+ /* Past this point we need not check for UNMATCHABLE in pat,
	+ * because all of pat has already been parsed once. */
	+
	+ /* Compute real str length if it was initially unknown/-1 */
	+ n = strnlen(str, n);
	+ endstr = str + n;
	+ if (n < tailcnt) return 1;
	+
	+ /* Find the final tailcnt chars of str, accounting for UTF-8.
	+ * On illegal sequences we may get it wrong, but in that case
	+ * we necessarily have a matching failure anyway. */
	+ for (s=endstr; s>str && tailcnt; tailcnt--) {
	+ if (s[-1] < 128U \|\| MB_CUR_MAX==1) s--;
	+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
	+ }
	+ if (tailcnt) return 1;
	+ stail = s;
	+
	+ /* Check that the pat and str tails match */
	+ p = ptail;
	+ for (;;) {
	+ c = pat_next(p, endpat-p, &pinc);
	+ p += pinc;
	+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
	+ if (c != END) return 1;
	+ break;
	+ }
	+ s += sinc;
	+ kfold = fcase ? casefold(k) : k;
	+ if (k != c && kfold != c)
	+ return 1;
	+ }
	+
	+ /* We're all done with the tails now, so throw them out */
	+ endstr = stail;
	+ endpat = ptail;
	+
	+ /* Match pattern components until there are none left */
	+ while (pat<endpat) {
	+ p = pat;
	+ s = str;
	+ for (;;) {
	+ c = pat_next(p, endpat-p, &pinc);
	+ p += pinc;
	+ /* Encountering * completes/commits a component */
	+ if (c == STAR) {
	+ pat = p;
	+ str = s;
	+ break;
	+ }
	+ k = str_next(s, endstr-s, &sinc);
	+ if (!k)
	+ return 1;
	+ kfold = fcase ? casefold(k) : k;
	+ if (k != c && kfold != c)
	+ break;
	+ s += sinc;
	+ }
	+ if (c == STAR) continue;
	+ /* If we failed, advance str, by 1 char if it's a valid
	+ * char, or past all invalid bytes otherwise. */
	+ k = str_next(str, endstr-str, &sinc);
	+ if (k > 0) str += sinc;
	+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
	+ }
	+
	+ return 0;
	+}
	+
	+
	+/*
	+domain=... if domain is prefixed with ~, ignore.
	+multiple domains can be separated with \|
	+*/
	+struct filterdomain *
	+parsedomains(char *s, int sep)
	+{
	+ struct filterdomain head = NULL, d, *last = NULL;
	+ char *p;
	+ int inverse;
	+
	+ do {
	+ inverse = 0;
	+ if (*s == '~') {
	+ inverse = !inverse;
	+ s++;
	+ }
	+ if (!s \|\| s == sep)
	+ break;
	+
	+ if ((p = strchr(s, sep))) /* TODO: should not contain , */
	+ *p = '\0';
	+
	+ d = ecalloc(1, sizeof(struct filterdomain));
	+ d->inverse = inverse;
	+ d->domain = estrdup(s);
	+
	+ if (!head)
	+ head = last = d;
	+ else
	+ last = last->next = d;
	+
	+ if (p) {
	+ *p = sep;
	+ s = p + 1;
	+ }
	+ } while (p);
	+
	+ return head;
	+}
	+
	+void
	+parsedomainselement(struct filterrule f, char s)
	+{
	+ struct filterdomain d, last;
	+
	+ for (last = f->domains; last && last->next; last = last->next)
	+ ;
	+
	+ d = parsedomains(s, ',');
	+ if (last)
	+ last->next = d;
	+ else
	+ f->domains = d;
	+}
	+
	+void
	+parsedomainsoption(struct filterrule f, char s)
	+{
	+ struct filterdomain d, last;
	+
	+ for (last = f->domains; last && last->next; last = last->next)
	+ ;
	+
	+ d = parsedomains(s, '\|');
	+ if (last)
	+ last->next = d;
	+ else
	+ f->domains = d;
	+}
	+
	+int
	+filtertype_cmp(const void a, const void b)
	+{
	+ return strcmp(((struct filtertype )a)->name, ((struct filtertype )b)…
	+}
	+
	+/* check if domain is the same domain or a subdomain of `s` */
	+int
	+matchdomain(const char s, const char domain)
	+{
	+ size_t l1, l2;
	+
	+ l1 = strlen(s);
	+ l2 = strlen(domain);
	+
	+ /* subdomain-specific (longer) or other domain */
	+ if (l1 > l2)
	+ return 0;
	+ /* subdomain */
	+ if (l2 > l1 && domain[l2 - l1 - 1] == '.')
	+ return !strcmp(&domain[l2 - l1], s);
	+
	+ return !strcmp(s, domain);
	+}
	+
	+int
	+matchrule(struct filterrule f, const char uri, const char *type, const char …
	+{
	+ /* NOTE: order matters, see FilterType enum values */
	+ const char *blockstr = "sicoxOSpde^";
	+ struct filterdomain *d;
	+ char pat[1024];
	+ int r, i;
	+
	+ /* ignore exception rules for now, these are usually paid
	+ * for by sites to allow advertisements. */
	+ if (f->isexception)
	+ return 0;
	+
	+ if (f->css) {
	+ r = f->domains ? 0 : 1;
	+ for (d = f->domains; d; d = d->next) {
	+ if (matchdomain(d->domain, domain)) {
	+ if (r && d->inverse)
	+ r = 0;
	+ else if (!r && !d->inverse)
	+ r = 1;
	+ } else if (r && !d->inverse) {
	+ r = 0;
	+ }
	+ }
	+ return r;
	+ }
	+
	+ r = snprintf(pat, sizeof(pat), "%s%s%s",
	+ f->matchbegin ? "" : "*",
	+ f->uri,
	+ f->matchend ? "" : "*");
	+ if (r == -1 \|\| (size_t)r >= sizeof(pat)) {
	+ fprintf(stderr, "warning: pattern too large, ignoring\n");
	+ return 0;
	+ }
	+
	+ r = f->domains ? 0 : 1;
	+ for (d = f->domains; d; d = d->next) {
	+ if (matchdomain(d->domain, domain)) {
	+ if (r && d->inverse)
	+ r = 0;
	+ else if (!r && !d->inverse)
	+ r = 1;
	+ } else if (r && !d->inverse) {
	+ r = 0;
	+ }
	+ }
	+
	+ if (r && !match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
	+ for (; *type; type++) {
	+ for (i = 0; blockstr[i]; i++) {
	+ if (blockstr[i] == *type &&
	+ f->block & (1 << i)) {
	+ printf("block type '%c'\n", blockstr[i…
	+ }
	+ }
	+ }
	+ return 1;
	+ }
	+ return 0;
	+}
	+
	+int
	+parserule(struct filterrule f, char s)
	+{
	+ struct filtertype key, *ft;
	+ int inverse = 0;
	+ char p, values;
	+
	+ if (s == '!' \|\| (s == '[' && s[strlen(s) - 1] == ']'))
	+ return 0; /* skip comment or empty line */
	+ for (; s && isspace(s); s++)
	+ ;
	+ if (!*s)
	+ return 0; /* line had only whitespace: skip */
	+
	+ memset(f, 0, sizeof(struct filterrule));
	+
	+ if ((p = strstr(s, "#@#"))) {
	+ *p = '\0';
	+ parsedomainselement(f, s);
	+ *p = '#';
	+ f->css = estrdup(p + 3);
	+ f->isexception = 1;
	+ goto end; /* end of CSS rule */
	+ }
	+
	+ /* element hiding rule, NOTE: no wildcards are supported,
	+ "Simplified element hiding syntax" is not supported. */
	+ if ((p = strstr(s, "##"))) {
	+ *p = '\0';
	+ parsedomainselement(f, s);
	+ *p = '#';
	+ f->css = estrdup(p + 2);
	+ goto end; /* end of rule */
	+ }
	+
	+ if (!strncmp(s, "@@", 2)) {
	+ f->isexception = 1;
	+ s += 2;
	+ }
	+ if (*s == '\|') {
	+ s++;
	+ if (*s == '\|') {
	+ f->matchbegin = 1;
	+ s++;
	+ } else {
	+ f->matchend = 1;
	+ }
	+ }
	+
	+ /* no options, use rest of line as uri. */
	+ if (!(p = strrchr(s, '$'))) {
	+ f->uri = estrdup(s);
	+ goto end;
	+ }
	+
	+ /* has options */
	+ *p = '\0';
	+ f->uri = estrdup(s);
	+ *p = '$';
	+ s = ++p;
	+
	+ /* blockmask, has options? default: allow all options, case-sensitive
	+ * has no options? default: block all options, case-sensitive */
	+ f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
	+ do {
	+ if ((p = strchr(s, ',')))
	+ *p = '\0';
	+ /* match option */
	+ inverse = 0;
	+ if (*s == '~') {
	+ inverse = 1;
	+ s++;
	+ }
	+ if ((values = strchr(s, '=')))
	+ *(values) = '\0';
	+ key.name = s;
	+
	+ ft = bsearch(&key, &filtertypes,
	+ sizeof(filtertypes) / sizeof(filtertypes), sizeof(filter…
	+ filtertype_cmp);
	+
	+ /* restore NUL-terminator for domain= option */
	+ if (values)
	+ *(values++) = '=';
	+
	+ if (ft) {
	+ if (inverse)
	+ f->block &= ~(ft->type);
	+ else
	+ f->block \|= ft->type;
	+ if (ft->fn && values)
	+ ft->fn(f, values);
	+ } else {
	+ /* DEBUG */
	+ fprintf(stderr, "unknown option: '%s' in rule: %s\n",
	+ key.name, f->uri);
	+ }
	+
	+ /* restore ',' */
	+ if (p) {
	+ *p = ',';
	+ s = p + 1;
	+ }
	+ } while (p);
	+end:
	+
	+ return 1;
	+}
	+
	+void
	+debugrule(struct filterrule *r)
	+{
	+ printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: %lu\n===…
	+ r->uri ? r->uri : "", r->css ? r->css : "", r->isexception, r-…
	+}
	+
	+struct filterrule *
	+loadrules(FILE *fp)
	+{
	+ char *line = NULL;
	+ size_t linesiz = 0;
	+ ssize_t n;
	+ struct filterrule f, r, rn = NULL, *rules = NULL;
	+
	+ /* TODO: handle ferror() */
	+ /* load rules */
	+ while ((n = getline(&line, &linesiz, fp)) > 0) {
	+ if (line[n - 1] == '\n')
	+ line[--n] = '\0';
	+ if (n > 0 && line[n - 1] == '\r')
	+ line[--n] = '\0';
	+
	+ if (parserule(&f, line)) {
	+ r = ecalloc(1, sizeof(struct filterrule));
	+ if (!rules)
	+ rules = rn = r;
	+ else
	+ rn = rn->next = r;
	+ memcpy(rn, &f, sizeof(struct filterrule));
	+ r->line = estrdup(line); /* DEBUG */
	+ }
	+ }
	+ return rules;
	+}
	+
	+Page *
	+newpage(WebKitWebPage *page)
	+{
	+ Page *p;
	+
	+ p = ecalloc(1, sizeof(Page));
	+ p->next = pages;
	+ pages = p;
	+
	+ p->id = webkit_web_page_get_id(page);
	+ p->webpage = page;
	+
	+ return p;
	+}
	+
	+static void
	+documentloaded(WebKitWebPage wp, Page p)
	+{
	+ WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp);
	+ WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc);
	+ WebKitDOMElement *el;
	+ char sitecss[1000000] = ""; /* TODO: dynamic allocate */
	+ struct filterrule *r;
	+ char uri = estrdup((char )webkit_web_page_get_uri(p->webpage));
	+ char domain, s;
	+
	+ /* TODO: improve, hacky */
	+ if ((domain = strstr(uri, "://"))) {
	+ domain += sizeof("://") - 1;
	+ } else {
	+ domain = uri;
	+ }
	+ if ((s = strchr(domain, '/')))
	+ *s = '\0';
	+
	+ printf("uri: %s\n", uri);
	+ printf("domain: %s\n", domain);
	+
	+ /* site-specific CSS */
	+ for (r = rules; r; r = r->next) {
	+ if (!r->css \|\| !r->domains \|\| !matchrule(r, "", "", domain))
	+ continue;
	+ strlcat(sitecss, r->css, sizeof(sitecss));
	+ strlcat(sitecss, "{display:none;}", sizeof(sitecss));
	+ }
	+ printf("sitecss: %s\n", sitecss);
	+
	+ p->view = webkit_dom_document_get_default_view(doc);
	+
	+ el = webkit_dom_document_create_element(doc, "style", NULL);
	+ webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
	+ webkit_dom_element_set_inner_html(el, globalcss, NULL);
	+ webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el…
	+
	+ el = webkit_dom_document_create_element(doc, "style", NULL);
	+ webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
	+ webkit_dom_element_set_inner_html(el, sitecss, NULL);
	+ webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el…
	+
	+ free(uri);
	+}
	+
	+static gboolean
	+sendrequest(WebKitWebPage wp, WebKitURIRequest req,
	+ WebKitURIResponse res, Page p)
	+{
	+ struct filterrule *r;
	+
	+ char uri = estrdup((char )webkit_web_page_get_uri(p->webpage));
	+ char requri = estrdup((char )webkit_uri_request_get_uri(req));
	+ char domain, s;
	+
	+ /* TODO: improve, hacky */
	+ if ((domain = strstr(uri, "://"))) {
	+ domain += sizeof("://") - 1;
	+ } else {
	+ domain = uri;
	+ }
	+
	+ if ((s = strchr(domain, '/')))
	+ *s = '\0';
	+
	+ /* match rules */
	+ for (r = rules; r; r = r->next) {
	+ if (!r->css && matchrule(r, requri, "csio^", domain)) {
	+ printf("requri: %s\n", requri);
	+ printf("uri: %s\n", uri);
	+ printf("domain: %s\n", domain);
	+
	+ fprintf(stderr, "blocked: %s, %s\n", domain, requri);
	+ free(uri);
	+ free(requri);
	+ return TRUE;
	+ }
	+ }
	+ free(uri);
	+ free(requri);
	+
	+ return FALSE;
	+}
	+
	+static void
	+objectcleared(WebKitScriptWorld w, WebKitWebPage wp, WebKitFrame f, Page p)
	+{
	+}
	+
	+static void
	+webpagecreated(WebKitWebExtension e, WebKitWebPage p, gpointer unused)
	+{
	+ Page *np = newpage(p);
	+ WebKitScriptWorld *w = webkit_script_world_get_default();
	+
	+ g_signal_connect(p, "send-request",
	+ G_CALLBACK(sendrequest), np);
	+ g_signal_connect(w, "window-object-cleared",
	+ G_CALLBACK(objectcleared), np);
	+ g_signal_connect(p, "document-loaded",
	+ G_CALLBACK(documentloaded), np);
	+}
	+
	+G_MODULE_EXPORT void
	+webkit_web_extension_initialize_with_user_data(WebKitWebExtension *e, GVariant…
	+{
	+ struct filterrule *r;
	+ FILE *fp;
	+ const char *filename = "/home/hiltjo/adblock_rules";
	+
	+ if (!(fp = fopen(filename, "r"))) {
	+ fprintf(stderr, "cannot read rules from file: %s\n", filename);
	+ return;
	+ }
	+ if (!(rules = loadrules(fp))) {
	+ fprintf(stderr, "cannot load adblock rules\n");
	+ return;
	+ }
	+ fclose(fp);
	+
	+ /* general rules: all sites */
	+ for (r = rules; r; r = r->next) {
	+ if (!r->css \|\| r->domains)
	+ continue;
	+ strlcat(globalcss, r->css, sizeof(globalcss));
	+ strlcat(globalcss, "{display:none;}", sizeof(globalcss));
	+ }
	+
	+ g_signal_connect(e, "page-created",
	+ G_CALLBACK(webpagecreated), NULL);
	+}