GopherProxy

	separate adblock and surf-specific code - surf-adblock - Surf adblock web exten…
	git clone git://git.codemadness.org/surf-adblock
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 08e747efa80a44603f80db0fdacb3f63ad210b8e
	parent c4841f33f64cd77b30def9c0b11d1ac9ece7f821
	Author: Hiltjo Posthuma <[email protected]>
	Date: Sun, 4 Jun 2017 14:15:27 +0200

	separate adblock and surf-specific code

	this will be useful later on for writing test-cases also or re-using
	the code in a daemon.

	Diffstat:
	M Makefile \| 2 +-
	M TODO \| 2 ++
	A adblock.c \| 956 +++++++++++++++++++++++++++++…
	M surf-adblock.c \| 944 +----------------------------…

	4 files changed, 973 insertions(+), 931 deletions(-)
	---
	diff --git a/Makefile b/Makefile
	@@ -1,6 +1,6 @@
	include config.mk

	-SRC = surf-adblock.c
	+SRC = surf-adblock.c adblock.c
	OBJ = ${SRC:.c=.lo}

	all: surf-adblock.la
	diff --git a/TODO b/TODO
	@@ -1,3 +1,5 @@
	+- optimize towupper for fnmatch? check < 128, see musl optimization.
	+
	- fix blocking of : \|\|ads.somesite.com^

	- fix tweakers.net popup / rule.
	diff --git a/adblock.c b/adblock.c
	@@ -0,0 +1,956 @@
	+#include <sys/stat.h>
	+#include <sys/types.h>
	+
	+#include <ctype.h>
	+#include <errno.h>
	+#include <fcntl.h>
	+#include <limits.h>
	+#include <stdarg.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+#include <wchar.h>
	+#include <wctype.h>
	+
	+#include "adblock.h"
	+
	+/* String data / memory pool */
	+typedef struct string {
	+ char data; / data */
	+ size_t datasz; /* allocated size */
	+ size_t len; /* current string length */
	+} String;
	+
	+struct filterdomain {
	+ char *domain;
	+ int inverse;
	+ struct filterdomain *next;
	+};
	+
	+struct filterrule {
	+ /* type: match mask, must be atleast 32-bit, see FilterType enum */
	+ unsigned long block;
	+ int matchbegin;
	+ int matchend;
	+ /* is exception rule: prefix @@ for ABP or #@# for CSS */
	+ int isexception;
	+ char css; / if non-NULL is CSS rule / hide element rule */
	+ char *uri;
	+ struct filterdomain *domains;
	+ struct filterrule *next;
	+};
	+
	+enum {
	+ FilterTypeScript = 1 << 0,
	+ FilterTypeImage = 1 << 1,
	+ FilterTypeCSS = 1 << 2,
	+ FilterTypeObject = 1 << 3,
	+ FilterTypeXHR = 1 << 4,
	+ FilterTypeObjectSub = 1 << 5,
	+ FilterTypeSubDoc = 1 << 6,
	+ FilterTypePing = 1 << 7,
	+ FilterTypeDocument = 1 << 8,
	+ FilterTypeElemHide = 1 << 9,
	+ FilterTypeOther = 1 << 10,
	+ FilterTypeGenericHide = 1 << 11,
	+ FilterTypeGenericBlock = 1 << 12,
	+ FilterTypeMatchCase = 1 << 13,
	+};
	+
	+struct filtertype {
	+ /* `type` must be atleast 32-bit, see FilterType enum */
	+ unsigned long type;
	+ char *name;
	+ size_t namelen;
	+ int allowinverse;
	+ int allownormal;
	+ int onlyexception;
	+ int (fn)(struct filterrule , char *);
	+};
	+
	+static int parsedomainsoption(struct filterrule , char );
	+
	+#define STRP(s) s,sizeof(s)-1
	+
	+static struct filtertype filtertypes[] = {
	+ /* NOTE: options with 'type' = 0 are silently ignored and treated as
	+ * requests for now */
	+ { 0, STRP("collapse"), 1, 1, 0, NULL },
	+ { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
	+ { 0, STRP("domain"), 0, 1, 0,
	+ /* domain=... */ &parsedomainsoption },
	+ { 0, STRP("donottrack"), 1, 1, 0, NULL },
	+ { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
	+ { 0, STRP("font"), 1, 1, 0, NULL },
	+ { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
	+ { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
	+ { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
	+ { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
	+ { 0, STRP("media"), 1, 1, 0, NULL },
	+ { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
	+ { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
	+ { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
	+ { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
	+ { 0, STRP("popup"), 1, 1, 0, NULL },
	+ { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
	+ { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
	+ { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
	+ { 0, STRP("third-party"), 1, 1, 0, NULL },
	+ { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
	+ /* NOTE: site-key not supported */
	+};
	+
	+static String globalcss;
	+static struct filterrule *rules;
	+
	+static void
	+weprintf(const char *fmt, ...)
	+{
	+ va_list ap;
	+
	+ fprintf(stderr, "surf-adblock: ");
	+
	+ va_start(ap, fmt);
	+ vfprintf(stderr, fmt, ap);
	+ va_end(ap);
	+}
	+
	+static void *
	+wecalloc(size_t nmemb, size_t size)
	+{
	+ void *p;
	+
	+ if (!(p = calloc(nmemb, size)))
	+ weprintf("calloc: %s\n", strerror(errno));
	+
	+ return p;
	+}
	+
	+static char *
	+westrndup(const char *s, size_t n)
	+{
	+ char *p;
	+
	+ if (!(p = strndup(s, n)))
	+ weprintf("strndup: %s\n", strerror(errno));
	+ return p;
	+}
	+
	+static char *
	+westrdup(const char *s)
	+{
	+ char *p;
	+
	+ if (!(p = strdup(s)))
	+ weprintf("strdup: %s\n", strerror(errno));
	+
	+ return p;
	+}
	+
	+void
	+cleanup(void)
	+{
	+ struct filterrule *r;
	+ struct filterdomain *d;
	+
	+ free(globalcss.data);
	+
	+ for (r = rules; r; r = rules) {
	+ for (d = r->domains; d; d = r->domains) {
	+ free(d->domain);
	+ r->domains = d->next;
	+ free(d);
	+ }
	+ free(r->css);
	+ free(r->uri);
	+ rules = r->next;
	+ free(r);
	+ }
	+}
	+
	+static size_t
	+string_buffer_realloc(String *s, size_t newsz)
	+{
	+ char *tmp;
	+ size_t allocsz;
	+
	+ for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
	+ ;
	+ if (!(tmp = realloc(s->data, allocsz))) {
	+ weprintf("realloc: %s\n", strerror(errno));
	+ } else {
	+ s->data = tmp;
	+ s->datasz = allocsz;
	+ }
	+
	+ return s->datasz;
	+}
	+
	+static size_t
	+string_append(String s, const char data, size_t len)
	+{
	+ size_t newlen;
	+
	+ if (!len)
	+ return len;
	+
	+ newlen = s->len + len;
	+ /* check if allocation is necesary, don't shrink buffer,
	+ * should be more than datasz ofcourse. */
	+ if (newlen >= s->datasz) {
	+ if (string_buffer_realloc(s, newlen + 1) <= newlen)
	+ return 0;
	+ }
	+ memcpy(s->data + s->len, data, len);
	+ s->len = newlen;
	+ s->data[s->len] = '\0';
	+ return len;
	+}
	+
	+#define END 0
	+#define UNMATCHABLE -2
	+#define BRACKET -3
	+#define CARET -4
	+#define STAR -5
	+
	+static int
	+str_next(const char str, size_t n, size_t step)
	+{
	+ if (!n) {
	+ *step = 0;
	+ return 0;
	+ }
	+ if (str[0] >= 128U) {
	+ wchar_t wc;
	+ int k = mbtowc(&wc, str, n);
	+ if (k<0) {
	+ *step = 1;
	+ return -1;
	+ }
	+ *step = k;
	+ return wc;
	+ }
	+ *step = 1;
	+
	+ return str[0];
	+}
	+
	+static int
	+pat_next(const char pat, size_t m, size_t step)
	+{
	+ int esc = 0;
	+
	+ if (!m \|\| !*pat) {
	+ *step = 0;
	+ return END;
	+ }
	+ *step = 1;
	+ if (pat[0]=='\\' && pat[1]) {
	+ *step = 2;
	+ pat++;
	+ esc = 1;
	+ goto escaped;
	+ }
	+ if (pat[0]=='^')
	+ return CARET;
	+ if (pat[0] == '*')
	+ return STAR;
	+escaped:
	+ if (pat[0] >= 128U) {
	+ wchar_t wc;
	+ int k = mbtowc(&wc, pat, m);
	+ if (k<0) {
	+ *step = 0;
	+ return UNMATCHABLE;
	+ }
	+ *step = k + esc;
	+ return wc;
	+ }
	+ return pat[0];
	+}
	+
	+static int
	+casefold(int k)
	+{
	+ int c = towupper(k);
	+ return c == k ? towlower(k) : c;
	+}
	+
	+/* match() based on musl-libc fnmatch:
	+ https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
	+static int
	+match(const char pat, const char str, int fcase)
	+{
	+ size_t m = -1, n = -1;
	+ const char p, ptail, *endpat;
	+ const char s, stail, *endstr;
	+ size_t pinc, sinc, tailcnt=0;
	+ int c, k, kfold;
	+
	+ for (;;) {
	+ switch ((c = pat_next(pat, m, &pinc))) {
	+ case UNMATCHABLE:
	+ return 1;
	+ case STAR:
	+ pat++;
	+ m--;
	+ break;
	+ default:
	+ k = str_next(str, n, &sinc);
	+ /* TODO: write a test-case */
	+ if (c == CARET && (k == '?' \|\| k == '/' \|\| k <= 0))
	+ return 1;
	+ if (k <= 0)
	+ return (c==END) ? 0 : 1;
	+ str += sinc;
	+ n -= sinc;
	+ kfold = fcase ? casefold(k) : k;
	+ if (k != c && kfold != c)
	+ return 1;
	+ pat+=pinc;
	+ m-=pinc;
	+ continue;
	+ }
	+ break;
	+ }
	+
	+ /* Compute real pat length if it was initially unknown/-1 */
	+ m = strnlen(pat, m);
	+ endpat = pat + m;
	+
	+ /* Find the last * in pat and count chars needed after it */
	+ for (p=ptail=pat; p<endpat; p+=pinc) {
	+ switch (pat_next(p, endpat-p, &pinc)) {
	+ case UNMATCHABLE:
	+ return 1;
	+ case STAR:
	+ tailcnt=0;
	+ ptail = p+1;
	+ break;
	+ default:
	+ tailcnt++;
	+ break;
	+ }
	+ }
	+
	+ /* Past this point we need not check for UNMATCHABLE in pat,
	+ * because all of pat has already been parsed once. */
	+
	+ /* Compute real str length if it was initially unknown/-1 */
	+ n = strnlen(str, n);
	+ endstr = str + n;
	+ if (n < tailcnt) return 1;
	+
	+ /* Find the final tailcnt chars of str, accounting for UTF-8.
	+ * On illegal sequences we may get it wrong, but in that case
	+ * we necessarily have a matching failure anyway. */
	+ for (s=endstr; s>str && tailcnt; tailcnt--) {
	+ if (s[-1] < 128U \|\| MB_CUR_MAX==1) s--;
	+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
	+ }
	+ if (tailcnt) return 1;
	+ stail = s;
	+
	+ /* Check that the pat and str tails match */
	+ p = ptail;
	+ for (;;) {
	+ c = pat_next(p, endpat-p, &pinc);
	+ p += pinc;
	+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
	+ if (c != END) return 1;
	+ break;
	+ }
	+ s += sinc;
	+ kfold = fcase ? casefold(k) : k;
	+ if (k != c && kfold != c)
	+ return 1;
	+ }
	+
	+ /* We're all done with the tails now, so throw them out */
	+ endstr = stail;
	+ endpat = ptail;
	+
	+ /* Match pattern components until there are none left */
	+ while (pat<endpat) {
	+ p = pat;
	+ s = str;
	+ for (;;) {
	+ c = pat_next(p, endpat-p, &pinc);
	+ p += pinc;
	+ /* Encountering * completes/commits a component */
	+ if (c == STAR) {
	+ pat = p;
	+ str = s;
	+ break;
	+ }
	+ k = str_next(s, endstr-s, &sinc);
	+ if (!k)
	+ return 1;
	+ kfold = fcase ? casefold(k) : k;
	+ if (k != c && kfold != c)
	+ break;
	+ s += sinc;
	+ }
	+ if (c == STAR) continue;
	+ /* If we failed, advance str, by 1 char if it's a valid
	+ * char, or past all invalid bytes otherwise. */
	+ k = str_next(str, endstr-str, &sinc);
	+ if (k > 0) str += sinc;
	+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
	+ }
	+
	+ return 0;
	+}
	+
	+/*
	+domain=... if domain is prefixed with ~, ignore.
	+multiple domains can be separated with \|
	+*/
	+static int
	+parsedomains(const char s, int sep, struct filterdomain *head)
	+{
	+ struct filterdomain d, last = *head = NULL;
	+ char *p;
	+ int inverse;
	+
	+ do {
	+ inverse = 0;
	+ if (*s == '~') {
	+ inverse = !inverse;
	+ s++;
	+ }
	+ if (!s \|\| s == sep)
	+ break;
	+
	+ if (!(d = wecalloc(1, sizeof(struct filterdomain))))
	+ return -1;
	+ if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
	+ d->domain = westrndup(s, p - s);
	+ s = p + 1;
	+ } else {
	+ d->domain = westrdup(s);
	+ }
	+ if (!d->domain)
	+ return -1;
	+ d->inverse = inverse;
	+
	+ if (!*head)
	+ *head = last = d;
	+ else
	+ last = last->next = d;
	+ } while (p);
	+
	+ return (*head != NULL);
	+}
	+
	+static int
	+parsedomainselement(struct filterrule f, char s)
	+{
	+ struct filterdomain d, last;
	+
	+ for (last = f->domains; last && last->next; last = last->next)
	+ ;
	+
	+ if (parsedomains(s, ',', &d) < 0)
	+ return -1;
	+ if (last)
	+ last->next = d;
	+ else
	+ f->domains = d;
	+
	+ return (d != NULL);
	+}
	+
	+static int
	+parsedomainsoption(struct filterrule f, char s)
	+{
	+ struct filterdomain d, last;
	+
	+ for (last = f->domains; last && last->next; last = last->next)
	+ ;
	+
	+ if (parsedomains(s, '\|', &d) < 0)
	+ return -1;
	+ if (last)
	+ last->next = d;
	+ else
	+ f->domains = d;
	+
	+ return (d != NULL);
	+}
	+
	+static int
	+filtertype_cmp(const void a, const void b)
	+{
	+ return strcmp(((struct filtertype *)a)->name,
	+ ((struct filtertype *)b)->name);
	+}
	+
	+/* check if domain is the same domain or a subdomain of `s` */
	+static int
	+matchdomain(const char s, const char domain)
	+{
	+ size_t l1, l2;
	+
	+ l1 = strlen(s);
	+ l2 = strlen(domain);
	+
	+ /* subdomain-specific (longer) or other domain */
	+ if (l1 > l2)
	+ return 0;
	+ /* subdomain */
	+ if (l2 > l1 && domain[l2 - l1 - 1] == '.')
	+ return !strcmp(&domain[l2 - l1], s);
	+
	+ return !strcmp(s, domain);
	+}
	+
	+static int
	+matchrule(struct filterrule f, const char uri, const char *type,
	+ const char *domain)
	+{
	+ /* NOTE: order matters, see FilterType enum values */
	+ struct filterdomain *d;
	+ char pat[1024];
	+ int r, m;
	+
	+ r = f->domains ? 0 : 1;
	+ for (d = f->domains; d; d = d->next) {
	+ if (matchdomain(d->domain, domain)) {
	+ if (r && d->inverse)
	+ r = 0;
	+ else if (!r && !d->inverse)
	+ r = 1;
	+ } else if (r && !d->inverse) {
	+ r = 0;
	+ }
	+ }
	+ if (f->css) {
	+ /* DEBUG */
	+#if 0
	+ if (f->isexception)
	+ printf("DEBUG, exception rule, CSS: %s, match? %d\n",
	+ f->css, r);
	+#endif
	+ return r;
	+ }
	+
	+#if 1
	+ /* skip allow rule, TODO: inverse? */
	+ if (!r)
	+ return 0;
	+#endif
	+
	+#if 0
	+ /* DEBUG: test, match if it is a simple pattern */
	+ char *p;
	+ p = strchr(f->uri, '*');
	+ if (!p)
	+ p = strchr(f->uri, '^');
	+ if (!p) {
	+ /* TODO: write a test-case */
	+ if (f->block & FilterTypeMatchCase) {
	+ if (f->matchbegin)
	+ m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
	+ else if (f->matchend)
	+ m = strlen(f->uri) <= strlen(uri) &&
	+ strcmp(&uri[strlen(uri) - strlen(f->ur…
	+ else
	+ m = strstr(uri, f->uri) ? 1 : 0;
	+ } else {
	+ if (f->matchbegin)
	+ m = strncasecmp(uri, f->uri, strlen(f->uri)) =…
	+ else if (f->matchend)
	+ m = strlen(f->uri) <= strlen(uri) &&
	+ strcasecmp(&uri[strlen(uri) - strlen(f…
	+ else
	+ m = strcasestr(uri, f->uri) ? 1 : 0;
	+ }
	+ /m = r ? !m : m;/
	+ return m;
	+ }
	+#endif
	+
	+ r = snprintf(pat, sizeof(pat), "%s%s%s",
	+ f->matchbegin ? "" : "*",
	+ f->uri,
	+ f->matchend ? "" : "*");
	+ if (r == -1 \|\| (size_t)r >= sizeof(pat)) {
	+ fprintf(stderr, "warning: pattern too large, ignoring\n");
	+ return 0;
	+ }
	+
	+ m = 0;
	+ if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
	+#if 0
	+ for (; *type; type++) {
	+ for (i = 0; blockstr[i]; i++) {
	+ if (blockstr[i] == *type &&
	+ f->block & (1 << i))
	+ printf("block type '%c'\n", blockstr[i…
	+ return 1;
	+ }
	+ }
	+ }
	+
	+ return 0;
	+#endif
	+ m = 1;
	+ }
	+ /m = r ? !m : m;/
	+ return m;
	+}
	+
	+static int
	+parserule(struct filterrule f, char s)
	+{
	+ struct filtertype key, *ft;
	+ int inverse = 0;
	+ char p, values;
	+
	+ if (s == '!' \|\| (s == '[' && s[strlen(s) - 1] == ']'))
	+ return 0; /* skip comment or empty line */
	+ for (; s && isspace(s); s++)
	+ ;
	+ if (!*s)
	+ return 0; /* line had only whitespace: skip */
	+
	+ memset(f, 0, sizeof(struct filterrule));
	+
	+ if ((p = strstr(s, "#@#"))) {
	+ *p = '\0';
	+ if (parsedomainselement(f, s) < 0)
	+ return -1;
	+ *p = '#';
	+ if (!(f->css = westrdup(p + 3)))
	+ return -1;
	+ f->isexception = 1;
	+ goto end; /* end of CSS rule */
	+ }
	+
	+ /* element hiding rule, NOTE: no wildcards are supported,
	+ "Simplified element hiding syntax" is not supported. */
	+ if ((p = strstr(s, "##"))) {
	+ *p = '\0';
	+ if (parsedomainselement(f, s) < 0)
	+ return -1;
	+ *p = '#';
	+ if (!(f->css = westrdup(p + 2)))
	+ return -1;
	+ goto end; /* end of rule */
	+ }
	+
	+ if (!strncmp(s, "@@", 2)) {
	+ f->isexception = 1;
	+ s += 2;
	+ }
	+ if (*s == '\|') {
	+ s++;
	+ if (*s == '\|') {
	+ f->matchbegin = 1;
	+ s++;
	+ } else {
	+ f->matchend = 1;
	+ }
	+ }
	+
	+ /* no options, use rest of line as uri. */
	+ if (!(p = strrchr(s, '$'))) {
	+ if (!(f->uri = westrdup(s)))
	+ return -1;
	+ goto end;
	+ }
	+
	+ /* has options */
	+ if (!(f->uri = westrndup(s, p - s)))
	+ return -1;
	+ s = ++p;
	+
	+ /* blockmask, has options? default: allow all options, case-sensitive
	+ * has no options? default: block all options, case-sensitive */
	+ f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
	+ do {
	+ if ((p = strchr(s, ',')))
	+ *p = '\0';
	+ /* match option */
	+ inverse = 0;
	+ if (*s == '~') {
	+ inverse = 1;
	+ s++;
	+ }
	+ if ((values = strchr(s, '=')))
	+ *(values) = '\0';
	+ key.name = s;
	+
	+ ft = bsearch(&key, &filtertypes,
	+ sizeof(filtertypes) / sizeof(*filtertypes),
	+ sizeof(*filtertypes), filtertype_cmp);
	+
	+ /* restore NUL-terminator for domain= option */
	+ if (values)
	+ *(values++) = '=';
	+
	+ if (ft) {
	+ if (inverse)
	+ f->block &= ~(ft->type);
	+ else
	+ f->block \|= ft->type;
	+ if (ft->fn && values)
	+ ft->fn(f, values);
	+ } else {
	+ /* DEBUG */
	+ fprintf(stderr, "ignored: unknown option: '%s' "
	+ "in rule: %s\n", key.name, f->uri);
	+ }
	+
	+ /* restore ',' */
	+ if (p) {
	+ *p = ',';
	+ s = p + 1;
	+ }
	+ } while (p);
	+end:
	+
	+ return 1;
	+}
	+
	+#if 0
	+static void
	+debugrule(struct filterrule *r)
	+{
	+ printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
	+ "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
	+ r->isexception, r->block);
	+}
	+#endif
	+
	+static int
	+loadrules(FILE *fp)
	+{
	+ struct filterrule f, r, rn = NULL;
	+ char *line = NULL;
	+ size_t linesiz = 0;
	+ ssize_t n;
	+ int ret;
	+
	+ /* TODO: handle ferror() */
	+ /* load rules */
	+ while ((n = getline(&line, &linesiz, fp)) > 0) {
	+ if (line[n - 1] == '\n')
	+ line[--n] = '\0';
	+ if (n > 0 && line[n - 1] == '\r')
	+ line[--n] = '\0';
	+
	+ if ((ret = parserule(&f, line) > 0)) {
	+ if (!(r = wecalloc(1, sizeof(struct filterrule))))
	+ return -1;
	+ if (!rules)
	+ rules = rn = r;
	+ else
	+ rn = rn->next = r;
	+ memcpy(rn, &f, sizeof(struct filterrule));
	+ } else if (ret < 0) {
	+ return -1;
	+ }
	+ }
	+ return (rules != NULL);
	+}
	+
	+char *
	+getglobalcss(void)
	+{
	+ return globalcss.data;
	+}
	+
	+char *
	+getdocumentcss(const char *uri)
	+{
	+ const char *s;
	+ char domain[256];
	+ String sitecss;
	+ struct filterrule *r;
	+ size_t len;
	+
	+ if (!uri)
	+ return NULL;
	+
	+ if (!(s = strstr(uri, "://")))
	+ return NULL;
	+ s += sizeof("://") - 1;
	+ len = strcspn(s, "/");
	+ memcpy(domain, s, len);
	+ domain[len] = '\0';
	+
	+ printf("uri: %s\n", uri);
	+ printf("domain: %s\n", domain);
	+
	+ /* DEBUG: timing */
	+ struct timespec tp_start, tp_end, tp_diff;
	+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
	+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
	+ }
	+
	+ /* site-specific CSS */
	+ memset(&sitecss, 0, sizeof(sitecss));
	+ for (r = rules; r; r = r->next) {
	+ if (!r->css \|\| !r->domains \|\| !matchrule(r, "", "", domain))
	+ continue;
	+
	+ len = strlen(r->css);
	+ if (string_append(&sitecss, r->css, len) < len)
	+ goto err;
	+
	+ s = r->isexception ? "{display:initial;}" : "{display:none;}";
	+ len = strlen(s);
	+ if (string_append(&sitecss, s, len) < len)
	+ goto err;
	+ }
	+/* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
	+
	+ /* DEBUG: timing */
	+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
	+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
	+ }
	+
	+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
	+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
	+ if (tp_diff.tv_nsec < 0) {
	+ tp_diff.tv_sec--;
	+ tp_diff.tv_nsec += 1000000000L;
	+ }
	+
	+ printf("timing: %zu sec, %.3f ms\n",
	+ tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
	+
	+ if (globalcss.data)
	+ printf("global CSS length in bytes: %zu\n", strlen(globalcss.d…
	+ if (sitecss.data)
	+ printf("site CSS length in bytes: %zu\n", strlen(sitecss.data)…
	+
	+ return sitecss.data;
	+
	+err:
	+ free(sitecss.data);
	+ return NULL;
	+}
	+
	+int
	+checkrequest(const char uri, const char requri)
	+{
	+ char domain[256];
	+ struct filterrule *r;
	+ const char *s;
	+ size_t len;
	+ int status = 1;
	+
	+ if (!uri \|\| !strcmp(requri, uri))
	+ return 1;
	+
	+ s = strstr(uri, "://") + sizeof("://") - 1;
	+ len = strcspn(s, "/");
	+ memcpy(domain, s, len);
	+ domain[len] = '\0';
	+
	+ /* DEBUG: timing */
	+ struct timespec tp_start, tp_end, tp_diff;
	+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
	+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
	+ }
	+
	+ /* match rules */
	+ for (r = rules; r; r = r->next) {
	+ if (!r->css && matchrule(r, requri, "csio^", domain)) {
	+ printf("requri: %s\n", requri);
	+ printf("uri: %s\n", uri);
	+ printf("domain: %s\n", domain);
	+
	+ fprintf(stderr, "blocked: %s, %s\n", domain, requri);
	+
	+ /* DEBUG: for showing the timing */
	+ status = 0;
	+ goto end;
	+ /return 1;/
	+ }
	+ }
	+
	+end:
	+ /* DEBUG: timing */
	+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
	+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
	+ }
	+
	+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
	+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
	+ if (tp_diff.tv_nsec < 0) {
	+ tp_diff.tv_sec--;
	+ tp_diff.tv_nsec += 1000000000L;
	+ }
	+
	+ printf("%s [%s] timing: %zu sec, %.3f ms\n",
	+ requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.…
	+
	+ return status;
	+}
	+
	+void
	+init(void)
	+{
	+ struct filterrule *r;
	+ FILE *fp;
	+ char filepath[PATH_MAX], *e;
	+ size_t len;
	+ int n;
	+
	+ if ((e = getenv("SURF_ADBLOCK_FILE"))) {
	+ n = snprintf(filepath, sizeof(filepath), "%s", e);
	+ } else {
	+ if (!(e = getenv("HOME")))
	+ e = "";
	+ n = snprintf(filepath, sizeof(filepath),
	+ "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
	+ }
	+ if (n < 0 \|\| (size_t)n >= sizeof(filepath)) {
	+ weprintf("fatal: rules file path too long");
	+ return;
	+ }
	+
	+ if (!(fp = fopen(filepath, "r"))) {
	+ weprintf("fatal: cannot open rules file %s: %s\n",
	+ filepath, strerror(errno));
	+ return;
	+ }
	+
	+ n = loadrules(fp);
	+ fclose(fp);
	+ if (n < 1) {
	+ if (n < 0) {
	+ weprintf("fatal: cannot read rules from file %s: %s\n",
	+ filepath, strerror(errno));
	+ } else {
	+ weprintf("fatal: cannot read any rule from file %s\n",
	+ filepath);
	+ }
	+ return;
	+ }
	+
	+ /* general CSS rules: all sites */
	+ for (r = rules; r; r = r->next) {
	+ if (!r->css \|\| r->domains)
	+ continue;
	+
	+ len = strlen(r->css);
	+ if (string_append(&globalcss, r->css, strlen(r->css)) < len) {
	+ weprintf("cannot load global css selectors "
	+ "in memory\n");
	+ cleanup();
	+ return;
	+ }
	+ len = sizeof("{display:none;}") - 1;
	+ if (string_append(&globalcss, "{display:none;}", len) < len) {
	+ weprintf("cannot append css rule "
	+ "to global css selectors\n");
	+ cleanup();
	+ return;
	+ }
	+ }
	+}
	diff --git a/surf-adblock.c b/surf-adblock.c
	@@ -8,769 +8,30 @@
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	-#include <wchar.h>
	-#include <wctype.h>

	#include <webkit2/webkit-web-extension.h>
	#include <webkitdom/webkitdom.h>

	-/* String data / memory pool */
	-typedef struct string {
	- char data; / data */
	- size_t datasz; /* allocated size */
	- size_t len; /* current string length */
	-} String;
	+#include "adblock.h"

	typedef struct Page {
	guint64 id;
	WebKitWebPage *webpage;
	- WebKitDOMDOMWindow *view;
	+ /WebKitDOMDOMWindow view;*/
	struct Page *next;
	} Page;

	-struct filterdomain {
	- char *domain;
	- int inverse;
	- struct filterdomain *next;
	-};
	-
	-struct filterrule {
	- /* type: match mask, must be atleast 32-bit, see FilterType enum */
	- unsigned long block;
	- int matchbegin;
	- int matchend;
	- /* is exception rule: prefix @@ for ABP or #@# for CSS */
	- int isexception;
	- char css; / if non-NULL is CSS rule / hide element rule */
	- char *uri;
	- struct filterdomain *domains;
	- struct filterrule *next;
	-};
	-
	-enum {
	- FilterTypeScript = 1 << 0,
	- FilterTypeImage = 1 << 1,
	- FilterTypeCSS = 1 << 2,
	- FilterTypeObject = 1 << 3,
	- FilterTypeXHR = 1 << 4,
	- FilterTypeObjectSub = 1 << 5,
	- FilterTypeSubDoc = 1 << 6,
	- FilterTypePing = 1 << 7,
	- FilterTypeDocument = 1 << 8,
	- FilterTypeElemHide = 1 << 9,
	- FilterTypeOther = 1 << 10,
	- FilterTypeGenericHide = 1 << 11,
	- FilterTypeGenericBlock = 1 << 12,
	- FilterTypeMatchCase = 1 << 13,
	-};
	-
	-struct filtertype {
	- /* `type` must be atleast 32-bit, see FilterType enum */
	- unsigned long type;
	- char *name;
	- size_t namelen;
	- int allowinverse;
	- int allownormal;
	- int onlyexception;
	- int (fn)(struct filterrule , char *);
	-};
	-
	-static int parsedomainsoption(struct filterrule , char );
	-
	-#define STRP(s) s,sizeof(s)-1
	-
	-static struct filtertype filtertypes[] = {
	- /* NOTE: options with 'type' = 0 are silently ignored and treated as
	- * requests for now */
	- { 0, STRP("collapse"), 1, 1, 0, NULL },
	- { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
	- { 0, STRP("domain"), 0, 1, 0,
	- /* domain=... */ &parsedomainsoption },
	- { 0, STRP("donottrack"), 1, 1, 0, NULL },
	- { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
	- { 0, STRP("font"), 1, 1, 0, NULL },
	- { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
	- { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
	- { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
	- { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
	- { 0, STRP("media"), 1, 1, 0, NULL },
	- { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
	- { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
	- { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
	- { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
	- { 0, STRP("popup"), 1, 1, 0, NULL },
	- { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
	- { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
	- { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
	- { 0, STRP("third-party"), 1, 1, 0, NULL },
	- { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
	- /* NOTE: site-key not supported */
	-};
	-
	-static String globalcss;
	static Page *pages;
	-static struct filterrule *rules;
	-
	-static void
	-cleanup(void)
	-{
	- struct filterrule *r;
	- struct filterdomain *d;
	-
	- free(globalcss.data);
	-
	- for (r = rules; r; r = rules) {
	- for (d = r->domains; d; d = r->domains) {
	- free(d->domain);
	- r->domains = d->next;
	- free(d);
	- }
	- free(r->css);
	- free(r->uri);
	- rules = r->next;
	- free(r);
	- }
	-}
	-
	-static void
	-weprintf(const char *fmt, ...)
	-{
	- va_list ap;
	-
	- fprintf(stderr, "surf-adblock: ");
	-
	- va_start(ap, fmt);
	- vfprintf(stderr, fmt, ap);
	- va_end(ap);
	-}
	-
	-static size_t
	-string_buffer_realloc(String *s, size_t newsz)
	-{
	- char *tmp;
	- size_t allocsz;
	-
	- for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
	- ;
	- if (!(tmp = realloc(s->data, allocsz))) {
	- weprintf("realloc: %s\n", strerror(errno));
	- } else {
	- s->data = tmp;
	- s->datasz = allocsz;
	- }
	-
	- return s->datasz;
	-}
	-
	-static size_t
	-string_append(String s, const char data, size_t len)
	-{
	- size_t newlen;
	-
	- if (!len)
	- return len;
	-
	- newlen = s->len + len;
	- /* check if allocation is necesary, don't shrink buffer,
	- * should be more than datasz ofcourse. */
	- if (newlen >= s->datasz) {
	- if (string_buffer_realloc(s, newlen + 1) <= newlen)
	- return 0;
	- }
	- memcpy(s->data + s->len, data, len);
	- s->len = newlen;
	- s->data[s->len] = '\0';
	- return len;
	-}
	-
	-static void *
	-wecalloc(size_t nmemb, size_t size)
	-{
	- void *p;
	-
	- if (!(p = calloc(nmemb, size)))
	- weprintf("calloc: %s\n", strerror(errno));
	-
	- return p;
	-}
	-
	-static char *
	-westrndup(const char *s, size_t n)
	-{
	- char *p;
	-
	- if (!(p = strndup(s, n)))
	- weprintf("strndup: %s\n", strerror(errno));
	- return p;
	-}
	-
	-static char *
	-westrdup(const char *s)
	-{
	- char *p;
	-
	- if (!(p = strdup(s)))
	- weprintf("strdup: %s\n", strerror(errno));
	-
	- return p;
	-}
	-
	-#define END 0
	-#define UNMATCHABLE -2
	-#define BRACKET -3
	-#define CARET -4
	-#define STAR -5
	-
	-static int
	-str_next(const char str, size_t n, size_t step)
	-{
	- if (!n) {
	- *step = 0;
	- return 0;
	- }
	- if (str[0] >= 128U) {
	- wchar_t wc;
	- int k = mbtowc(&wc, str, n);
	- if (k<0) {
	- *step = 1;
	- return -1;
	- }
	- *step = k;
	- return wc;
	- }
	- *step = 1;
	-
	- return str[0];
	-}
	-
	-static int
	-pat_next(const char pat, size_t m, size_t step)
	-{
	- int esc = 0;
	-
	- if (!m \|\| !*pat) {
	- *step = 0;
	- return END;
	- }
	- *step = 1;
	- if (pat[0]=='\\' && pat[1]) {
	- *step = 2;
	- pat++;
	- esc = 1;
	- goto escaped;
	- }
	- if (pat[0]=='^')
	- return CARET;
	- if (pat[0] == '*')
	- return STAR;
	-escaped:
	- if (pat[0] >= 128U) {
	- wchar_t wc;
	- int k = mbtowc(&wc, pat, m);
	- if (k<0) {
	- *step = 0;
	- return UNMATCHABLE;
	- }
	- *step = k + esc;
	- return wc;
	- }
	- return pat[0];
	-}
	-
	-static int
	-casefold(int k)
	-{
	- int c = towupper(k);
	- return c == k ? towlower(k) : c;
	-}
	-
	-/* match() based on musl-libc fnmatch:
	- https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
	-static int
	-match(const char pat, const char str, int fcase)
	-{
	- size_t m = -1, n = -1;
	- const char p, ptail, *endpat;
	- const char s, stail, *endstr;
	- size_t pinc, sinc, tailcnt=0;
	- int c, k, kfold;
	-
	- for (;;) {
	- switch ((c = pat_next(pat, m, &pinc))) {
	- case UNMATCHABLE:
	- return 1;
	- case STAR:
	- pat++;
	- m--;
	- break;
	- default:
	- k = str_next(str, n, &sinc);
	- /* TODO: write a test-case */
	- if (c == CARET && (k == '?' \|\| k == '/' \|\| k <= 0))
	- return 1;
	- if (k <= 0)
	- return (c==END) ? 0 : 1;
	- str += sinc;
	- n -= sinc;
	- kfold = fcase ? casefold(k) : k;
	- if (k != c && kfold != c)
	- return 1;
	- pat+=pinc;
	- m-=pinc;
	- continue;
	- }
	- break;
	- }
	-
	- /* Compute real pat length if it was initially unknown/-1 */
	- m = strnlen(pat, m);
	- endpat = pat + m;
	-
	- /* Find the last * in pat and count chars needed after it */
	- for (p=ptail=pat; p<endpat; p+=pinc) {
	- switch (pat_next(p, endpat-p, &pinc)) {
	- case UNMATCHABLE:
	- return 1;
	- case STAR:
	- tailcnt=0;
	- ptail = p+1;
	- break;
	- default:
	- tailcnt++;
	- break;
	- }
	- }
	-
	- /* Past this point we need not check for UNMATCHABLE in pat,
	- * because all of pat has already been parsed once. */
	-
	- /* Compute real str length if it was initially unknown/-1 */
	- n = strnlen(str, n);
	- endstr = str + n;
	- if (n < tailcnt) return 1;
	-
	- /* Find the final tailcnt chars of str, accounting for UTF-8.
	- * On illegal sequences we may get it wrong, but in that case
	- * we necessarily have a matching failure anyway. */
	- for (s=endstr; s>str && tailcnt; tailcnt--) {
	- if (s[-1] < 128U \|\| MB_CUR_MAX==1) s--;
	- else while ((unsigned char)*--s-0x80U<0x40 && s>str);
	- }
	- if (tailcnt) return 1;
	- stail = s;
	-
	- /* Check that the pat and str tails match */
	- p = ptail;
	- for (;;) {
	- c = pat_next(p, endpat-p, &pinc);
	- p += pinc;
	- if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
	- if (c != END) return 1;
	- break;
	- }
	- s += sinc;
	- kfold = fcase ? casefold(k) : k;
	- if (k != c && kfold != c)
	- return 1;
	- }
	-
	- /* We're all done with the tails now, so throw them out */
	- endstr = stail;
	- endpat = ptail;
	-
	- /* Match pattern components until there are none left */
	- while (pat<endpat) {
	- p = pat;
	- s = str;
	- for (;;) {
	- c = pat_next(p, endpat-p, &pinc);
	- p += pinc;
	- /* Encountering * completes/commits a component */
	- if (c == STAR) {
	- pat = p;
	- str = s;
	- break;
	- }
	- k = str_next(s, endstr-s, &sinc);
	- if (!k)
	- return 1;
	- kfold = fcase ? casefold(k) : k;
	- if (k != c && kfold != c)
	- break;
	- s += sinc;
	- }
	- if (c == STAR) continue;
	- /* If we failed, advance str, by 1 char if it's a valid
	- * char, or past all invalid bytes otherwise. */
	- k = str_next(str, endstr-str, &sinc);
	- if (k > 0) str += sinc;
	- else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
	- }
	-
	- return 0;
	-}
	-
	-/*
	-domain=... if domain is prefixed with ~, ignore.
	-multiple domains can be separated with \|
	-*/
	-static int
	-parsedomains(const char s, int sep, struct filterdomain *head)
	-{
	- struct filterdomain d, last = *head = NULL;
	- char *p;
	- int inverse;
	-
	- do {
	- inverse = 0;
	- if (*s == '~') {
	- inverse = !inverse;
	- s++;
	- }
	- if (!s \|\| s == sep)
	- break;
	-
	- if (!(d = wecalloc(1, sizeof(struct filterdomain))))
	- return -1;
	- if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
	- d->domain = westrndup(s, p - s);
	- s = p + 1;
	- } else {
	- d->domain = westrdup(s);
	- }
	- if (!d->domain)
	- return -1;
	- d->inverse = inverse;
	-
	- if (!*head)
	- *head = last = d;
	- else
	- last = last->next = d;
	- } while (p);
	-
	- return (*head != NULL);
	-}
	-
	-static int
	-parsedomainselement(struct filterrule f, char s)
	-{
	- struct filterdomain d, last;
	-
	- for (last = f->domains; last && last->next; last = last->next)
	- ;
	-
	- if (parsedomains(s, ',', &d) < 0)
	- return -1;
	- if (last)
	- last->next = d;
	- else
	- f->domains = d;
	-
	- return (d != NULL);
	-}
	-
	-static int
	-parsedomainsoption(struct filterrule f, char s)
	-{
	- struct filterdomain d, last;
	-
	- for (last = f->domains; last && last->next; last = last->next)
	- ;
	-
	- if (parsedomains(s, '\|', &d) < 0)
	- return -1;
	- if (last)
	- last->next = d;
	- else
	- f->domains = d;
	-
	- return (d != NULL);
	-}
	-
	-static int
	-filtertype_cmp(const void a, const void b)
	-{
	- return strcmp(((struct filtertype *)a)->name,
	- ((struct filtertype *)b)->name);
	-}
	-
	-/* check if domain is the same domain or a subdomain of `s` */
	-static int
	-matchdomain(const char s, const char domain)
	-{
	- size_t l1, l2;
	-
	- l1 = strlen(s);
	- l2 = strlen(domain);
	-
	- /* subdomain-specific (longer) or other domain */
	- if (l1 > l2)
	- return 0;
	- /* subdomain */
	- if (l2 > l1 && domain[l2 - l1 - 1] == '.')
	- return !strcmp(&domain[l2 - l1], s);
	-
	- return !strcmp(s, domain);
	-}
	-
	-static int
	-matchrule(struct filterrule f, const char uri, const char *type,
	- const char *domain)
	-{
	- /* NOTE: order matters, see FilterType enum values */
	- struct filterdomain *d;
	- char pat[1024];
	- int r, m;
	-
	- r = f->domains ? 0 : 1;
	- for (d = f->domains; d; d = d->next) {
	- if (matchdomain(d->domain, domain)) {
	- if (r && d->inverse)
	- r = 0;
	- else if (!r && !d->inverse)
	- r = 1;
	- } else if (r && !d->inverse) {
	- r = 0;
	- }
	- }
	- if (f->css) {
	- /* DEBUG */
	-#if 0
	- if (f->isexception)
	- printf("DEBUG, exception rule, CSS: %s, match? %d\n",
	- f->css, r);
	-#endif
	- return r;
	- }
	-
	-#if 1
	- /* skip allow rule, TODO: inverse? */
	- if (!r)
	- return 0;
	-#endif
	-
	-#if 0
	- /* DEBUG: test, match if it is a simple pattern */
	- char *p;
	- p = strchr(f->uri, '*');
	- if (!p)
	- p = strchr(f->uri, '^');
	- if (!p) {
	- /* TODO: write a test-case */
	- if (f->block & FilterTypeMatchCase) {
	- if (f->matchbegin)
	- m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
	- else if (f->matchend)
	- m = strlen(f->uri) <= strlen(uri) &&
	- strcmp(&uri[strlen(uri) - strlen(f->ur…
	- else
	- m = strstr(uri, f->uri) ? 1 : 0;
	- } else {
	- if (f->matchbegin)
	- m = strncasecmp(uri, f->uri, strlen(f->uri)) =…
	- else if (f->matchend)
	- m = strlen(f->uri) <= strlen(uri) &&
	- strcasecmp(&uri[strlen(uri) - strlen(f…
	- else
	- m = strcasestr(uri, f->uri) ? 1 : 0;
	- }
	- /m = r ? !m : m;/
	- return m;
	- }
	-#endif
	-
	- r = snprintf(pat, sizeof(pat), "%s%s%s",
	- f->matchbegin ? "" : "*",
	- f->uri,
	- f->matchend ? "" : "*");
	- if (r == -1 \|\| (size_t)r >= sizeof(pat)) {
	- fprintf(stderr, "warning: pattern too large, ignoring\n");
	- return 0;
	- }
	-
	- m = 0;
	- if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
	-#if 0
	- for (; *type; type++) {
	- for (i = 0; blockstr[i]; i++) {
	- if (blockstr[i] == *type &&
	- f->block & (1 << i))
	- printf("block type '%c'\n", blockstr[i…
	- return 1;
	- }
	- }
	- }
	-
	- return 0;
	-#endif
	- m = 1;
	- }
	- /m = r ? !m : m;/
	- return m;
	-}
	-
	-static int
	-parserule(struct filterrule f, char s)
	-{
	- struct filtertype key, *ft;
	- int inverse = 0;
	- char p, values;
	-
	- if (s == '!' \|\| (s == '[' && s[strlen(s) - 1] == ']'))
	- return 0; /* skip comment or empty line */
	- for (; s && isspace(s); s++)
	- ;
	- if (!*s)
	- return 0; /* line had only whitespace: skip */
	-
	- memset(f, 0, sizeof(struct filterrule));
	-
	- if ((p = strstr(s, "#@#"))) {
	- *p = '\0';
	- if (parsedomainselement(f, s) < 0)
	- return -1;
	- *p = '#';
	- if (!(f->css = westrdup(p + 3)))
	- return -1;
	- f->isexception = 1;
	- goto end; /* end of CSS rule */
	- }
	-
	- /* element hiding rule, NOTE: no wildcards are supported,
	- "Simplified element hiding syntax" is not supported. */
	- if ((p = strstr(s, "##"))) {
	- *p = '\0';
	- if (parsedomainselement(f, s) < 0)
	- return -1;
	- *p = '#';
	- if (!(f->css = westrdup(p + 2)))
	- return -1;
	- goto end; /* end of rule */
	- }
	-
	- if (!strncmp(s, "@@", 2)) {
	- f->isexception = 1;
	- s += 2;
	- }
	- if (*s == '\|') {
	- s++;
	- if (*s == '\|') {
	- f->matchbegin = 1;
	- s++;
	- } else {
	- f->matchend = 1;
	- }
	- }
	-
	- /* no options, use rest of line as uri. */
	- if (!(p = strrchr(s, '$'))) {
	- if (!(f->uri = westrdup(s)))
	- return -1;
	- goto end;
	- }
	-
	- /* has options */
	- if (!(f->uri = westrndup(s, p - s)))
	- return -1;
	- s = ++p;
	-
	- /* blockmask, has options? default: allow all options, case-sensitive
	- * has no options? default: block all options, case-sensitive */
	- f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
	- do {
	- if ((p = strchr(s, ',')))
	- *p = '\0';
	- /* match option */
	- inverse = 0;
	- if (*s == '~') {
	- inverse = 1;
	- s++;
	- }
	- if ((values = strchr(s, '=')))
	- *(values) = '\0';
	- key.name = s;
	-
	- ft = bsearch(&key, &filtertypes,
	- sizeof(filtertypes) / sizeof(*filtertypes),
	- sizeof(*filtertypes), filtertype_cmp);
	-
	- /* restore NUL-terminator for domain= option */
	- if (values)
	- *(values++) = '=';
	-
	- if (ft) {
	- if (inverse)
	- f->block &= ~(ft->type);
	- else
	- f->block \|= ft->type;
	- if (ft->fn && values)
	- ft->fn(f, values);
	- } else {
	- /* DEBUG */
	- fprintf(stderr, "ignored: unknown option: '%s' "
	- "in rule: %s\n", key.name, f->uri);
	- }
	-
	- /* restore ',' */
	- if (p) {
	- *p = ',';
	- s = p + 1;
	- }
	- } while (p);
	-end:
	-
	- return 1;
	-}
	-
	-#if 0
	-static void
	-debugrule(struct filterrule *r)
	-{
	- printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
	- "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
	- r->isexception, r->block);
	-}
	-#endif
	-
	-static int
	-loadrules(FILE *fp)
	-{
	- struct filterrule f, r, rn = NULL;
	- char *line = NULL;
	- size_t linesiz = 0;
	- ssize_t n;
	- int ret;
	-
	- /* TODO: handle ferror() */
	- /* load rules */
	- while ((n = getline(&line, &linesiz, fp)) > 0) {
	- if (line[n - 1] == '\n')
	- line[--n] = '\0';
	- if (n > 0 && line[n - 1] == '\r')
	- line[--n] = '\0';
	-
	- if ((ret = parserule(&f, line) > 0)) {
	- if (!(r = wecalloc(1, sizeof(struct filterrule))))
	- return -1;
	- if (!rules)
	- rules = rn = r;
	- else
	- rn = rn->next = r;
	- memcpy(rn, &f, sizeof(struct filterrule));
	- } else if (ret < 0) {
	- return -1;
	- }
	- }
	- return (rules != NULL);
	-}

	static Page *
	newpage(WebKitWebPage *page)
	{
	Page *p;

	- if (!(p = wecalloc(1, sizeof(Page))))
	+ if (!(p = calloc(1, sizeof(Page)))) {
	+ fprintf(stderr, "surf-adblock: calloc: %s\n", strerror(errno));
	return NULL;
	+ }
	p->next = pages;
	pages = p;

	@@ -783,146 +44,32 @@ newpage(WebKitWebPage *page)
	static void
	documentloaded(WebKitWebPage wp, Page p)
	{
	- char domain[256];
	WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp);
	WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc);
	WebKitDOMElement *el;
	- String sitecss;
	- struct filterrule *r;
	- const char s, uri = webkit_web_page_get_uri(p->webpage);
	- size_t len;
	-
	- if (!uri)
	- return;
	-
	- s = strstr(uri, "://") + sizeof("://") - 1;
	- len = strcspn(s, "/");
	- memcpy(domain, s, len);
	- domain[len] = '\0';
	-
	- printf("uri: %s\n", uri);
	- printf("domain: %s\n", domain);
	-
	- /* DEBUG: timing */
	- struct timespec tp_start, tp_end, tp_diff;
	- if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
	- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
	- }
	-
	- /* site-specific CSS */
	- memset(&sitecss, 0, sizeof(sitecss));
	- for (r = rules; r; r = r->next) {
	- if (!r->css \|\| !r->domains \|\| !matchrule(r, "", "", domain))
	- continue;
	-
	- len = strlen(r->css);
	- if (string_append(&sitecss, r->css, len) < len)
	- return;
	-
	- s = r->isexception ? "{display:initial;}" : "{display:none;}";
	- len = strlen(s);
	- if (string_append(&sitecss, s, len) < len)
	- return;
	- }
	-/* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
	-
	- /* DEBUG: timing */
	- if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
	- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
	- }
	-
	- tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
	- tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
	- if (tp_diff.tv_nsec < 0) {
	- tp_diff.tv_sec--;
	- tp_diff.tv_nsec += 1000000000L;
	- }
	-
	- printf("timing: %zu sec, %.3f ms\n",
	- tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
	+ const char *uri = webkit_web_page_get_uri(p->webpage);
	+ char css, globalcss;

	- if (globalcss.data)
	- printf("global CSS length in bytes: %zu\n", strlen(globalcss.d…
	- if (sitecss.data)
	- printf("site CSS length in bytes: %zu\n", strlen(sitecss.data)…
	+ /p->view = webkit_dom_document_get_default_view(doc);/

	- p->view = webkit_dom_document_get_default_view(doc);
	-
	- if (globalcss.data) {
	+ if ((globalcss = getglobalcss())) {
	el = webkit_dom_document_create_element(doc, "style", NULL);
	webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
	- webkit_dom_element_set_inner_html(el, globalcss.data, NULL);
	+ webkit_dom_element_set_inner_html(el, globalcss, NULL);
	webkit_dom_node_append_child(WEBKIT_DOM_NODE(body),
	WEBKIT_DOM_NODE(el), NULL);
	}

	- if (sitecss.data) {
	+ if ((css = getdocumentcss(uri))) {
	el = webkit_dom_document_create_element(doc, "style", NULL);
	webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
	- webkit_dom_element_set_inner_html(el, sitecss.data, NULL);
	+ webkit_dom_element_set_inner_html(el, css, NULL);
	webkit_dom_node_append_child(WEBKIT_DOM_NODE(body),
	WEBKIT_DOM_NODE(el), NULL);
	}

	- free(sitecss.data);
	-}
	-
	-int
	-checkrequest(const char uri, const char requri)
	-{
	- char domain[256];
	- struct filterrule *r;
	- const char *s;
	- size_t len;
	- int status = 1;
	-
	- if (!uri \|\| !strcmp(requri, uri))
	- return 1;
	-
	- s = strstr(uri, "://") + sizeof("://") - 1;
	- len = strcspn(s, "/");
	- memcpy(domain, s, len);
	- domain[len] = '\0';
	-
	- /* DEBUG: timing */
	- struct timespec tp_start, tp_end, tp_diff;
	- if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
	- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
	- }
	-
	- /* match rules */
	- for (r = rules; r; r = r->next) {
	- if (!r->css && matchrule(r, requri, "csio^", domain)) {
	- printf("requri: %s\n", requri);
	- printf("uri: %s\n", uri);
	- printf("domain: %s\n", domain);
	-
	- fprintf(stderr, "blocked: %s, %s\n", domain, requri);
	-
	- /* DEBUG: for showing the timing */
	- status = 0;
	- goto end;
	- /return 1;/
	- }
	- }
	-
	-end:
	- /* DEBUG: timing */
	- if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
	- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
	- }
	-
	- tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
	- tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
	- if (tp_diff.tv_nsec < 0) {
	- tp_diff.tv_sec--;
	- tp_diff.tv_nsec += 1000000000L;
	- }
	-
	- printf("%s [%s] timing: %zu sec, %.3f ms\n",
	- requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.…
	-
	- return status;
	+ free(css);
	+ /* NOTE: globalcss free'd at cleanup() */
	}

	static gboolean
	@@ -941,69 +88,6 @@ sendrequest(WebKitWebPage wp, WebKitURIRequest req,
	return status;
	}

	-void
	-init(void)
	-{
	- struct filterrule *r;
	- FILE *fp;
	- char filepath[PATH_MAX], *e;
	- size_t len;
	- int n;
	-
	- if ((e = getenv("SURF_ADBLOCK_FILE"))) {
	- n = snprintf(filepath, sizeof(filepath), "%s", e);
	- } else {
	- if (!(e = getenv("HOME")))
	- e = "";
	- n = snprintf(filepath, sizeof(filepath),
	- "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
	- }
	- if (n < 0 \|\| (size_t)n >= sizeof(filepath)) {
	- weprintf("fatal: rules file path too long");
	- return;
	- }
	-
	- if (!(fp = fopen(filepath, "r"))) {
	- weprintf("fatal: cannot open rules file %s: %s\n",
	- filepath, strerror(errno));
	- return;
	- }
	-
	- n = loadrules(fp);
	- fclose(fp);
	- if (n < 1) {
	- if (n < 0) {
	- weprintf("fatal: cannot read rules from file %s: %s\n",
	- filepath, strerror(errno));
	- } else {
	- weprintf("fatal: cannot read any rule from file %s\n",
	- filepath);
	- }
	- return;
	- }
	-
	- /* general CSS rules: all sites */
	- for (r = rules; r; r = r->next) {
	- if (!r->css \|\| r->domains)
	- continue;
	-
	- len = strlen(r->css);
	- if (string_append(&globalcss, r->css, strlen(r->css)) < len) {
	- weprintf("cannot load global css selectors "
	- "in memory\n");
	- cleanup();
	- return;
	- }
	- len = sizeof("{display:none;}") - 1;
	- if (string_append(&globalcss, "{display:none;}", len) < len) {
	- weprintf("cannot append css rule "
	- "to global css selectors\n");
	- cleanup();
	- return;
	- }
	- }
	-}
	-
	static void
	webpagecreated(WebKitWebExtension e, WebKitWebPage p, gpointer unused)
	{