Introduction
Introduction Statistics Contact Development Disclaimer Help
initial insertion (twss) - surf-adblock - Surf adblock web extension
git clone git://git.codemadness.org/surf-adblock
Log
Files
Refs
README
LICENSE
---
commit 888e9c8a2237bf9c374e04737f103e91cdc30267
Author: Hiltjo Posthuma <[email protected]>
Date: Thu, 14 Jul 2016 22:18:31 +0200
initial insertion (twss)
Diffstat:
A LICENSE | 16 ++++++++++++++++
A Makefile | 35 +++++++++++++++++++++++++++++…
A README | 1 +
A TODO | 11 +++++++++++
A config.mk | 32 +++++++++++++++++++++++++++++…
A dl.sh | 20 ++++++++++++++++++++
A surf-adblock.c | 781 +++++++++++++++++++++++++++++…
7 files changed, 896 insertions(+), 0 deletions(-)
---
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,16 @@
+ISC License
+
+Copyright (c) 2016 Hiltjo Posthuma <[email protected]>
+Copyright (c) 2016 Quentin Rameau <[email protected]>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,35 @@
+include config.mk
+
+SRC = surf-adblock.c
+OBJ = ${SRC:.c=.lo}
+
+all: surf-adblock.la
+
+.SUFFIXES: .la .lo .o .c
+
+.c.o:
+ ${CC} -c ${CFLAGS} $<
+
+.c.lo:
+ ${LIBTOOL} --mode compile --tag CC ${CC} ${LIBCFLAGS} -c $<
+
+${OBJ}: config.mk
+
+surf-adblock.la: ${OBJ}
+ ${LIBTOOL} --mode link --tag CC ${CC} ${LIB} ${LIBLDFLAGS} -o $@ \
+ ${OBJ} -rpath ${DESTDIR}${LIBPREFIX}
+
+clean:
+ rm -rf surf-adblock.la .libs ${OBJ} ${OBJ:.lo=.o}
+
+install: all surf-adblock.la
+ mkdir -p ${DESTDIR}${LIBPREFIX}
+ ${LIBTOOL} --mode install install -c surf-adblock.la \
+ ${DESTDIR}${LIBPREFIX}/surf-adblock.la
+
+uninstall:
+ ${LIBTOOL} --mode uninstall rm -f \
+ ${DESTDIR}${LIBPREFIX}/surf-adblock.la
+ rm -df ${DESTDIR}${LIBPREFIX} || true
+
+.PHONY: all clean install uninstall
diff --git a/README b/README
@@ -0,0 +1 @@
+adblock parser (WIP)
diff --git a/TODO b/TODO
@@ -0,0 +1,11 @@
+Docs:
+- https://adblockplus.org/en/filter-cheatsheet
+- https://adblockplus.org/filters
+
+- separate between site-specific and global block rules.
+- optimize matching.
+- optimize memory allocation.
+- optimize: pregenerate one global stylesheet that applies to all sites?
+? support exception rules #@#
+
+
diff --git a/config.mk b/config.mk
@@ -0,0 +1,32 @@
+VERSION = 0.1
+
+# Customize below to fit your system
+
+# paths
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/share/man
+LIBPREFIX = ${PREFIX}/lib/surf
+
+X11INC = /usr/X11R6/include
+X11LIB = /usr/X11R6/lib
+
+GTKINC = `pkg-config --cflags gtk+-3.0 webkit2gtk-4.0`
+GTKLIB = `pkg-config --libs gtk+-3.0 webkit2gtk-4.0`
+WEBEXTINC = `pkg-config --cflags webkit2gtk-4.0 webkit2gtk-web-extension-4.0`
+WEBEXTLIB = `pkg-config --libs webkit2gtk-4.0 webkit2gtk-web-extension-4.0`
+
+# includes and libs
+INCS = -I. -I/usr/include -I${X11INC} ${GTKINC}
+LIBS = -L/usr/lib -lc -L${X11LIB} -lX11 ${GTKLIB} -lgthread-2.0
+
+# flags
+CPPFLAGS = -DVERSION=\"${VERSION}\" -DWEBEXTDIR=\"${LIBPREFIX}\" -D_DEFAULT_SO…
+CFLAGS = -std=c99 -pedantic -Wall -Os ${INCS} ${CPPFLAGS}
+LDFLAGS = -s ${LIBS}
+LIBCPPFLAGS = -DWEBEXTDIR=\"${LIBPREFIX}\" -DWEBKIT_DOM_USE_UNSTABLE_API
+LIBCFLAGS = -std=c99 -pedantic -Wall -Os ${WEBEXTINC} ${LIBCPPFLAGS}
+LIBLDFLAGS = -s ${WEBEXTLIB} -module -avoid-version -no-undefined
+
+# compiler and linker
+CC = cc
+LIBTOOL = libtool --quiet
diff --git a/dl.sh b/dl.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# download adblock lists
+
+set -e
+
+(while read -r l; do
+ curl -L "$l"
+ echo "" # not all lists have line at EOF.
+done <<!
+https://easylist.github.io/easylist/easylist.txt
+https://easylist.github.io/easylist/easyprivacy.txt
+https://easylist-downloads.adblockplus.org/antiadblockfilters.txt
+https://easylist-downloads.adblockplus.org/easylistdutch.txt
+https://easylist.github.io/easylistgermany/easylistgermany.txt
+https://easylist-downloads.adblockplus.org/liste_fr.txt
+https://easylist.github.io/easylist/fanboy-annoyance.txt
+https://easylist.github.io/easylist/fanboy-social.txt
+!
+) | awk '{if(!x[$0]++){print $0;}}' > list
+# remove duplicate lines but keep the order.
diff --git a/surf-adblock.c b/surf-adblock.c
@@ -0,0 +1,781 @@
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include <webkit2/webkit-web-extension.h>
+#include <webkitdom/webkitdom.h>
+#include <webkitdom/WebKitDOMDOMWindowUnstable.h>
+
+typedef struct Page {
+ guint64 id;
+ WebKitWebPage *webpage;
+ WebKitDOMDOMWindow *view;
+ struct Page *next;
+} Page;
+
+struct filterdomain {
+ char *domain;
+ int inverse;
+ struct filterdomain *next;
+};
+
+struct filterrule {
+ char *line; /* DEBUG */
+ /* type: match mask, must be atleast 32-bit, see FilterType enum */
+ unsigned long block;
+ int matchbegin;
+ int matchend;
+ /* is exception rule: prefix @@ for ABP or #@# for CSS */
+ int isexception;
+ char *css; /* if non-NULL is CSS rule / hide element rule */
+ char *uri;
+ struct filterdomain *domains;
+ struct filterrule *next;
+};
+
+enum {
+ FilterTypeScript = 1 << 0,
+ FilterTypeImage = 1 << 1,
+ FilterTypeCSS = 1 << 2,
+ FilterTypeObject = 1 << 3,
+ FilterTypeXHR = 1 << 4,
+ FilterTypeObjectSub = 1 << 5,
+ FilterTypeSubDoc = 1 << 6,
+ FilterTypePing = 1 << 7,
+ FilterTypeDocument = 1 << 8,
+ FilterTypeElemHide = 1 << 9,
+ FilterTypeOther = 1 << 10,
+ FilterTypeGenericHide = 1 << 11,
+ FilterTypeGenericBlock = 1 << 12,
+ FilterTypeMatchCase = 1 << 13,
+};
+
+struct filtertype {
+ /* `type` must be atleast 32-bit, see FilterType enum */
+ unsigned long type;
+ char *name;
+ size_t namelen;
+ int allowinverse;
+ int allownormal;
+ int onlyexception;
+ void (*fn)(struct filterrule *, char *);
+};
+
+static void parsedomainsoption(struct filterrule *, char *);
+
+#define STRP(s) s,sizeof(s)-1
+
+struct filtertype filtertypes[] = {
+ /* NOTE: options with 'type' = 0 are silently ignored and treated as
+ * requests for now */
+ { 0, STRP("collapse"), 1, 1, 0, NULL },
+ { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
+ { 0, STRP("domain"), 0, 1, 0, &parsedo…
+ { 0, STRP("donottrack"), 1, 1, 0, NULL },
+ { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
+ { 0, STRP("font"), 1, 1, 0, NULL },
+ { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
+ { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
+ { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
+ { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
+ { 0, STRP("media"), 1, 1, 0, NULL },
+ { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
+ { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
+ { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
+ { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
+ { 0, STRP("popup"), 1, 1, 0, NULL },
+ { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
+ { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
+ { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
+ { 0, STRP("third-party"), 1, 1, 0, NULL },
+ { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
+ /* NOTE: site-key not supported */
+};
+
+static Page *pages;
+static char globalcss[5000000]; /* TEST: dynamic allocate later */
+static struct filterrule *rules;
+
+void *
+ecalloc(size_t nmemb, size_t size)
+{
+ void *p;
+
+ if (!(p = calloc(nmemb, size))) {
+ fprintf(stderr, "calloc: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ return p;
+}
+
+char *
+estrdup(const char *s)
+{
+ char *p;
+
+ if (!(p = strdup(s))) {
+ fprintf(stderr, "strdup: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ return p;
+}
+
+#define END 0
+#define UNMATCHABLE -2
+#define BRACKET -3
+#define CARET -4
+#define STAR -5
+
+static int
+str_next(const char *str, size_t n, size_t *step)
+{
+ if (!n) {
+ *step = 0;
+ return 0;
+ }
+ if (str[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, str, n);
+ if (k<0) {
+ *step = 1;
+ return -1;
+ }
+ *step = k;
+ return wc;
+ }
+ *step = 1;
+
+ return str[0];
+}
+
+static int
+pat_next(const char *pat, size_t m, size_t *step)
+{
+ int esc = 0;
+
+ if (!m || !*pat) {
+ *step = 0;
+ return END;
+ }
+ *step = 1;
+ if (pat[0]=='\\' && pat[1]) {
+ *step = 2;
+ pat++;
+ esc = 1;
+ goto escaped;
+ }
+ if (pat[0]=='^')
+ return CARET;
+ if (pat[0] == '*')
+ return STAR;
+escaped:
+ if (pat[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, pat, m);
+ if (k<0) {
+ *step = 0;
+ return UNMATCHABLE;
+ }
+ *step = k + esc;
+ return wc;
+ }
+ return pat[0];
+}
+
+static int
+casefold(int k)
+{
+ int c = towupper(k);
+ return c == k ? towlower(k) : c;
+}
+
+/* match() based on musl-libc fnmatch:
+ https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
+int
+match(const char *pat, const char *str, int fcase)
+{
+ size_t m = -1, n = -1;
+ const char *p, *ptail, *endpat;
+ const char *s, *stail, *endstr;
+ size_t pinc, sinc, tailcnt=0;
+ int c, k, kfold;
+
+ for (;;) {
+ switch ((c = pat_next(pat, m, &pinc))) {
+ case UNMATCHABLE:
+ return 1;
+ case STAR:
+ pat++;
+ m--;
+ break;
+ default:
+ k = str_next(str, n, &sinc);
+ if (k <= 0)
+ return (c==END) ? 0 : 1;
+ if (c == CARET && (iswdigit(k) || iswalpha(k) || strch…
+ return 1;
+ str += sinc;
+ n -= sinc;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ return 1;
+ pat+=pinc;
+ m-=pinc;
+ continue;
+ }
+ break;
+ }
+
+ /* Compute real pat length if it was initially unknown/-1 */
+ m = strnlen(pat, m);
+ endpat = pat + m;
+
+ /* Find the last * in pat and count chars needed after it */
+ for (p=ptail=pat; p<endpat; p+=pinc) {
+ switch (pat_next(p, endpat-p, &pinc)) {
+ case UNMATCHABLE:
+ return 1;
+ case STAR:
+ tailcnt=0;
+ ptail = p+1;
+ break;
+ default:
+ tailcnt++;
+ break;
+ }
+ }
+
+ /* Past this point we need not check for UNMATCHABLE in pat,
+ * because all of pat has already been parsed once. */
+
+ /* Compute real str length if it was initially unknown/-1 */
+ n = strnlen(str, n);
+ endstr = str + n;
+ if (n < tailcnt) return 1;
+
+ /* Find the final tailcnt chars of str, accounting for UTF-8.
+ * On illegal sequences we may get it wrong, but in that case
+ * we necessarily have a matching failure anyway. */
+ for (s=endstr; s>str && tailcnt; tailcnt--) {
+ if (s[-1] < 128U || MB_CUR_MAX==1) s--;
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
+ }
+ if (tailcnt) return 1;
+ stail = s;
+
+ /* Check that the pat and str tails match */
+ p = ptail;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc);
+ p += pinc;
+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
+ if (c != END) return 1;
+ break;
+ }
+ s += sinc;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ return 1;
+ }
+
+ /* We're all done with the tails now, so throw them out */
+ endstr = stail;
+ endpat = ptail;
+
+ /* Match pattern components until there are none left */
+ while (pat<endpat) {
+ p = pat;
+ s = str;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc);
+ p += pinc;
+ /* Encountering * completes/commits a component */
+ if (c == STAR) {
+ pat = p;
+ str = s;
+ break;
+ }
+ k = str_next(s, endstr-s, &sinc);
+ if (!k)
+ return 1;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ break;
+ s += sinc;
+ }
+ if (c == STAR) continue;
+ /* If we failed, advance str, by 1 char if it's a valid
+ * char, or past all invalid bytes otherwise. */
+ k = str_next(str, endstr-str, &sinc);
+ if (k > 0) str += sinc;
+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
+ }
+
+ return 0;
+}
+
+
+/*
+domain=... if domain is prefixed with ~, ignore.
+multiple domains can be separated with |
+*/
+struct filterdomain *
+parsedomains(char *s, int sep)
+{
+ struct filterdomain *head = NULL, *d, *last = NULL;
+ char *p;
+ int inverse;
+
+ do {
+ inverse = 0;
+ if (*s == '~') {
+ inverse = !inverse;
+ s++;
+ }
+ if (!*s || *s == sep)
+ break;
+
+ if ((p = strchr(s, sep))) /* TODO: should not contain , */
+ *p = '\0';
+
+ d = ecalloc(1, sizeof(struct filterdomain));
+ d->inverse = inverse;
+ d->domain = estrdup(s);
+
+ if (!head)
+ head = last = d;
+ else
+ last = last->next = d;
+
+ if (p) {
+ *p = sep;
+ s = p + 1;
+ }
+ } while (p);
+
+ return head;
+}
+
+void
+parsedomainselement(struct filterrule *f, char *s)
+{
+ struct filterdomain *d, *last;
+
+ for (last = f->domains; last && last->next; last = last->next)
+ ;
+
+ d = parsedomains(s, ',');
+ if (last)
+ last->next = d;
+ else
+ f->domains = d;
+}
+
+void
+parsedomainsoption(struct filterrule *f, char *s)
+{
+ struct filterdomain *d, *last;
+
+ for (last = f->domains; last && last->next; last = last->next)
+ ;
+
+ d = parsedomains(s, '|');
+ if (last)
+ last->next = d;
+ else
+ f->domains = d;
+}
+
+int
+filtertype_cmp(const void *a, const void *b)
+{
+ return strcmp(((struct filtertype *)a)->name, ((struct filtertype *)b)…
+}
+
+/* check if domain is the same domain or a subdomain of `s` */
+int
+matchdomain(const char *s, const char *domain)
+{
+ size_t l1, l2;
+
+ l1 = strlen(s);
+ l2 = strlen(domain);
+
+ /* subdomain-specific (longer) or other domain */
+ if (l1 > l2)
+ return 0;
+ /* subdomain */
+ if (l2 > l1 && domain[l2 - l1 - 1] == '.')
+ return !strcmp(&domain[l2 - l1], s);
+
+ return !strcmp(s, domain);
+}
+
+int
+matchrule(struct filterrule *f, const char *uri, const char *type, const char …
+{
+ /* NOTE: order matters, see FilterType enum values */
+ const char *blockstr = "sicoxOSpde^";
+ struct filterdomain *d;
+ char pat[1024];
+ int r, i;
+
+ /* ignore exception rules for now, these are usually paid
+ * for by sites to allow advertisements. */
+ if (f->isexception)
+ return 0;
+
+ if (f->css) {
+ r = f->domains ? 0 : 1;
+ for (d = f->domains; d; d = d->next) {
+ if (matchdomain(d->domain, domain)) {
+ if (r && d->inverse)
+ r = 0;
+ else if (!r && !d->inverse)
+ r = 1;
+ } else if (r && !d->inverse) {
+ r = 0;
+ }
+ }
+ return r;
+ }
+
+ r = snprintf(pat, sizeof(pat), "%s%s%s",
+ f->matchbegin ? "" : "*",
+ f->uri,
+ f->matchend ? "" : "*");
+ if (r == -1 || (size_t)r >= sizeof(pat)) {
+ fprintf(stderr, "warning: pattern too large, ignoring\n");
+ return 0;
+ }
+
+ r = f->domains ? 0 : 1;
+ for (d = f->domains; d; d = d->next) {
+ if (matchdomain(d->domain, domain)) {
+ if (r && d->inverse)
+ r = 0;
+ else if (!r && !d->inverse)
+ r = 1;
+ } else if (r && !d->inverse) {
+ r = 0;
+ }
+ }
+
+ if (r && !match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
+ for (; *type; type++) {
+ for (i = 0; blockstr[i]; i++) {
+ if (blockstr[i] == *type &&
+ f->block & (1 << i)) {
+ printf("block type '%c'\n", blockstr[i…
+ }
+ }
+ }
+ return 1;
+ }
+ return 0;
+}
+
+int
+parserule(struct filterrule *f, char *s)
+{
+ struct filtertype key, *ft;
+ int inverse = 0;
+ char *p, *values;
+
+ if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
+ return 0; /* skip comment or empty line */
+ for (; *s && isspace(*s); s++)
+ ;
+ if (!*s)
+ return 0; /* line had only whitespace: skip */
+
+ memset(f, 0, sizeof(struct filterrule));
+
+ if ((p = strstr(s, "#@#"))) {
+ *p = '\0';
+ parsedomainselement(f, s);
+ *p = '#';
+ f->css = estrdup(p + 3);
+ f->isexception = 1;
+ goto end; /* end of CSS rule */
+ }
+
+ /* element hiding rule, NOTE: no wildcards are supported,
+ "Simplified element hiding syntax" is not supported. */
+ if ((p = strstr(s, "##"))) {
+ *p = '\0';
+ parsedomainselement(f, s);
+ *p = '#';
+ f->css = estrdup(p + 2);
+ goto end; /* end of rule */
+ }
+
+ if (!strncmp(s, "@@", 2)) {
+ f->isexception = 1;
+ s += 2;
+ }
+ if (*s == '|') {
+ s++;
+ if (*s == '|') {
+ f->matchbegin = 1;
+ s++;
+ } else {
+ f->matchend = 1;
+ }
+ }
+
+ /* no options, use rest of line as uri. */
+ if (!(p = strrchr(s, '$'))) {
+ f->uri = estrdup(s);
+ goto end;
+ }
+
+ /* has options */
+ *p = '\0';
+ f->uri = estrdup(s);
+ *p = '$';
+ s = ++p;
+
+ /* blockmask, has options? default: allow all options, case-sensitive
+ * has no options? default: block all options, case-sensitive */
+ f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
+ do {
+ if ((p = strchr(s, ',')))
+ *p = '\0';
+ /* match option */
+ inverse = 0;
+ if (*s == '~') {
+ inverse = 1;
+ s++;
+ }
+ if ((values = strchr(s, '=')))
+ *(values) = '\0';
+ key.name = s;
+
+ ft = bsearch(&key, &filtertypes,
+ sizeof(filtertypes) / sizeof(*filtertypes), sizeof(*filter…
+ filtertype_cmp);
+
+ /* restore NUL-terminator for domain= option */
+ if (values)
+ *(values++) = '=';
+
+ if (ft) {
+ if (inverse)
+ f->block &= ~(ft->type);
+ else
+ f->block |= ft->type;
+ if (ft->fn && values)
+ ft->fn(f, values);
+ } else {
+ /* DEBUG */
+ fprintf(stderr, "unknown option: '%s' in rule: %s\n",
+ key.name, f->uri);
+ }
+
+ /* restore ',' */
+ if (p) {
+ *p = ',';
+ s = p + 1;
+ }
+ } while (p);
+end:
+
+ return 1;
+}
+
+void
+debugrule(struct filterrule *r)
+{
+ printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: %lu\n===…
+ r->uri ? r->uri : "", r->css ? r->css : "", r->isexception, r-…
+}
+
+struct filterrule *
+loadrules(FILE *fp)
+{
+ char *line = NULL;
+ size_t linesiz = 0;
+ ssize_t n;
+ struct filterrule f, *r, *rn = NULL, *rules = NULL;
+
+ /* TODO: handle ferror() */
+ /* load rules */
+ while ((n = getline(&line, &linesiz, fp)) > 0) {
+ if (line[n - 1] == '\n')
+ line[--n] = '\0';
+ if (n > 0 && line[n - 1] == '\r')
+ line[--n] = '\0';
+
+ if (parserule(&f, line)) {
+ r = ecalloc(1, sizeof(struct filterrule));
+ if (!rules)
+ rules = rn = r;
+ else
+ rn = rn->next = r;
+ memcpy(rn, &f, sizeof(struct filterrule));
+ r->line = estrdup(line); /* DEBUG */
+ }
+ }
+ return rules;
+}
+
+Page *
+newpage(WebKitWebPage *page)
+{
+ Page *p;
+
+ p = ecalloc(1, sizeof(Page));
+ p->next = pages;
+ pages = p;
+
+ p->id = webkit_web_page_get_id(page);
+ p->webpage = page;
+
+ return p;
+}
+
+static void
+documentloaded(WebKitWebPage *wp, Page *p)
+{
+ WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp);
+ WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc);
+ WebKitDOMElement *el;
+ char sitecss[1000000] = ""; /* TODO: dynamic allocate */
+ struct filterrule *r;
+ char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage));
+ char *domain, *s;
+
+ /* TODO: improve, hacky */
+ if ((domain = strstr(uri, "://"))) {
+ domain += sizeof("://") - 1;
+ } else {
+ domain = uri;
+ }
+ if ((s = strchr(domain, '/')))
+ *s = '\0';
+
+ printf("uri: %s\n", uri);
+ printf("domain: %s\n", domain);
+
+ /* site-specific CSS */
+ for (r = rules; r; r = r->next) {
+ if (!r->css || !r->domains || !matchrule(r, "", "", domain))
+ continue;
+ strlcat(sitecss, r->css, sizeof(sitecss));
+ strlcat(sitecss, "{display:none;}", sizeof(sitecss));
+ }
+ printf("sitecss: %s\n", sitecss);
+
+ p->view = webkit_dom_document_get_default_view(doc);
+
+ el = webkit_dom_document_create_element(doc, "style", NULL);
+ webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
+ webkit_dom_element_set_inner_html(el, globalcss, NULL);
+ webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el…
+
+ el = webkit_dom_document_create_element(doc, "style", NULL);
+ webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
+ webkit_dom_element_set_inner_html(el, sitecss, NULL);
+ webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el…
+
+ free(uri);
+}
+
+static gboolean
+sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
+ WebKitURIResponse *res, Page *p)
+{
+ struct filterrule *r;
+
+ char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage));
+ char *requri = estrdup((char *)webkit_uri_request_get_uri(req));
+ char *domain, *s;
+
+ /* TODO: improve, hacky */
+ if ((domain = strstr(uri, "://"))) {
+ domain += sizeof("://") - 1;
+ } else {
+ domain = uri;
+ }
+
+ if ((s = strchr(domain, '/')))
+ *s = '\0';
+
+ /* match rules */
+ for (r = rules; r; r = r->next) {
+ if (!r->css && matchrule(r, requri, "csio^", domain)) {
+ printf("requri: %s\n", requri);
+ printf("uri: %s\n", uri);
+ printf("domain: %s\n", domain);
+
+ fprintf(stderr, "blocked: %s, %s\n", domain, requri);
+ free(uri);
+ free(requri);
+ return TRUE;
+ }
+ }
+ free(uri);
+ free(requri);
+
+ return FALSE;
+}
+
+static void
+objectcleared(WebKitScriptWorld *w, WebKitWebPage *wp, WebKitFrame *f, Page *p)
+{
+}
+
+static void
+webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused)
+{
+ Page *np = newpage(p);
+ WebKitScriptWorld *w = webkit_script_world_get_default();
+
+ g_signal_connect(p, "send-request",
+ G_CALLBACK(sendrequest), np);
+ g_signal_connect(w, "window-object-cleared",
+ G_CALLBACK(objectcleared), np);
+ g_signal_connect(p, "document-loaded",
+ G_CALLBACK(documentloaded), np);
+}
+
+G_MODULE_EXPORT void
+webkit_web_extension_initialize_with_user_data(WebKitWebExtension *e, GVariant…
+{
+ struct filterrule *r;
+ FILE *fp;
+ const char *filename = "/home/hiltjo/adblock_rules";
+
+ if (!(fp = fopen(filename, "r"))) {
+ fprintf(stderr, "cannot read rules from file: %s\n", filename);
+ return;
+ }
+ if (!(rules = loadrules(fp))) {
+ fprintf(stderr, "cannot load adblock rules\n");
+ return;
+ }
+ fclose(fp);
+
+ /* general rules: all sites */
+ for (r = rules; r; r = r->next) {
+ if (!r->css || r->domains)
+ continue;
+ strlcat(globalcss, r->css, sizeof(globalcss));
+ strlcat(globalcss, "{display:none;}", sizeof(globalcss));
+ }
+
+ g_signal_connect(e, "page-created",
+ G_CALLBACK(webpagecreated), NULL);
+}
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.