Introduction
Introduction Statistics Contact Development Disclaimer Help
separate adblock and surf-specific code - surf-adblock - Surf adblock web exten…
git clone git://git.codemadness.org/surf-adblock
Log
Files
Refs
README
LICENSE
---
commit 08e747efa80a44603f80db0fdacb3f63ad210b8e
parent c4841f33f64cd77b30def9c0b11d1ac9ece7f821
Author: Hiltjo Posthuma <[email protected]>
Date: Sun, 4 Jun 2017 14:15:27 +0200
separate adblock and surf-specific code
this will be useful later on for writing test-cases also or re-using
the code in a daemon.
Diffstat:
M Makefile | 2 +-
M TODO | 2 ++
A adblock.c | 956 +++++++++++++++++++++++++++++…
M surf-adblock.c | 944 +----------------------------…
4 files changed, 973 insertions(+), 931 deletions(-)
---
diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
include config.mk
-SRC = surf-adblock.c
+SRC = surf-adblock.c adblock.c
OBJ = ${SRC:.c=.lo}
all: surf-adblock.la
diff --git a/TODO b/TODO
@@ -1,3 +1,5 @@
+- optimize towupper for fnmatch? check < 128, see musl optimization.
+
- fix blocking of : ||ads.somesite.com^
- fix tweakers.net popup / rule.
diff --git a/adblock.c b/adblock.c
@@ -0,0 +1,956 @@
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "adblock.h"
+
+/* String data / memory pool */
+typedef struct string {
+ char *data; /* data */
+ size_t datasz; /* allocated size */
+ size_t len; /* current string length */
+} String;
+
+struct filterdomain {
+ char *domain;
+ int inverse;
+ struct filterdomain *next;
+};
+
+struct filterrule {
+ /* type: match mask, must be atleast 32-bit, see FilterType enum */
+ unsigned long block;
+ int matchbegin;
+ int matchend;
+ /* is exception rule: prefix @@ for ABP or #@# for CSS */
+ int isexception;
+ char *css; /* if non-NULL is CSS rule / hide element rule */
+ char *uri;
+ struct filterdomain *domains;
+ struct filterrule *next;
+};
+
+enum {
+ FilterTypeScript = 1 << 0,
+ FilterTypeImage = 1 << 1,
+ FilterTypeCSS = 1 << 2,
+ FilterTypeObject = 1 << 3,
+ FilterTypeXHR = 1 << 4,
+ FilterTypeObjectSub = 1 << 5,
+ FilterTypeSubDoc = 1 << 6,
+ FilterTypePing = 1 << 7,
+ FilterTypeDocument = 1 << 8,
+ FilterTypeElemHide = 1 << 9,
+ FilterTypeOther = 1 << 10,
+ FilterTypeGenericHide = 1 << 11,
+ FilterTypeGenericBlock = 1 << 12,
+ FilterTypeMatchCase = 1 << 13,
+};
+
+struct filtertype {
+ /* `type` must be atleast 32-bit, see FilterType enum */
+ unsigned long type;
+ char *name;
+ size_t namelen;
+ int allowinverse;
+ int allownormal;
+ int onlyexception;
+ int (*fn)(struct filterrule *, char *);
+};
+
+static int parsedomainsoption(struct filterrule *, char *);
+
+#define STRP(s) s,sizeof(s)-1
+
+static struct filtertype filtertypes[] = {
+ /* NOTE: options with 'type' = 0 are silently ignored and treated as
+ * requests for now */
+ { 0, STRP("collapse"), 1, 1, 0, NULL },
+ { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
+ { 0, STRP("domain"), 0, 1, 0,
+ /* domain=... */ &parsedomainsoption },
+ { 0, STRP("donottrack"), 1, 1, 0, NULL },
+ { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
+ { 0, STRP("font"), 1, 1, 0, NULL },
+ { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
+ { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
+ { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
+ { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
+ { 0, STRP("media"), 1, 1, 0, NULL },
+ { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
+ { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
+ { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
+ { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
+ { 0, STRP("popup"), 1, 1, 0, NULL },
+ { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
+ { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
+ { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
+ { 0, STRP("third-party"), 1, 1, 0, NULL },
+ { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
+ /* NOTE: site-key not supported */
+};
+
+static String globalcss;
+static struct filterrule *rules;
+
+static void
+weprintf(const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "surf-adblock: ");
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+static void *
+wecalloc(size_t nmemb, size_t size)
+{
+ void *p;
+
+ if (!(p = calloc(nmemb, size)))
+ weprintf("calloc: %s\n", strerror(errno));
+
+ return p;
+}
+
+static char *
+westrndup(const char *s, size_t n)
+{
+ char *p;
+
+ if (!(p = strndup(s, n)))
+ weprintf("strndup: %s\n", strerror(errno));
+ return p;
+}
+
+static char *
+westrdup(const char *s)
+{
+ char *p;
+
+ if (!(p = strdup(s)))
+ weprintf("strdup: %s\n", strerror(errno));
+
+ return p;
+}
+
+void
+cleanup(void)
+{
+ struct filterrule *r;
+ struct filterdomain *d;
+
+ free(globalcss.data);
+
+ for (r = rules; r; r = rules) {
+ for (d = r->domains; d; d = r->domains) {
+ free(d->domain);
+ r->domains = d->next;
+ free(d);
+ }
+ free(r->css);
+ free(r->uri);
+ rules = r->next;
+ free(r);
+ }
+}
+
+static size_t
+string_buffer_realloc(String *s, size_t newsz)
+{
+ char *tmp;
+ size_t allocsz;
+
+ for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
+ ;
+ if (!(tmp = realloc(s->data, allocsz))) {
+ weprintf("realloc: %s\n", strerror(errno));
+ } else {
+ s->data = tmp;
+ s->datasz = allocsz;
+ }
+
+ return s->datasz;
+}
+
+static size_t
+string_append(String *s, const char *data, size_t len)
+{
+ size_t newlen;
+
+ if (!len)
+ return len;
+
+ newlen = s->len + len;
+ /* check if allocation is necesary, don't shrink buffer,
+ * should be more than datasz ofcourse. */
+ if (newlen >= s->datasz) {
+ if (string_buffer_realloc(s, newlen + 1) <= newlen)
+ return 0;
+ }
+ memcpy(s->data + s->len, data, len);
+ s->len = newlen;
+ s->data[s->len] = '\0';
+ return len;
+}
+
+#define END 0
+#define UNMATCHABLE -2
+#define BRACKET -3
+#define CARET -4
+#define STAR -5
+
+static int
+str_next(const char *str, size_t n, size_t *step)
+{
+ if (!n) {
+ *step = 0;
+ return 0;
+ }
+ if (str[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, str, n);
+ if (k<0) {
+ *step = 1;
+ return -1;
+ }
+ *step = k;
+ return wc;
+ }
+ *step = 1;
+
+ return str[0];
+}
+
+static int
+pat_next(const char *pat, size_t m, size_t *step)
+{
+ int esc = 0;
+
+ if (!m || !*pat) {
+ *step = 0;
+ return END;
+ }
+ *step = 1;
+ if (pat[0]=='\\' && pat[1]) {
+ *step = 2;
+ pat++;
+ esc = 1;
+ goto escaped;
+ }
+ if (pat[0]=='^')
+ return CARET;
+ if (pat[0] == '*')
+ return STAR;
+escaped:
+ if (pat[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, pat, m);
+ if (k<0) {
+ *step = 0;
+ return UNMATCHABLE;
+ }
+ *step = k + esc;
+ return wc;
+ }
+ return pat[0];
+}
+
+static int
+casefold(int k)
+{
+ int c = towupper(k);
+ return c == k ? towlower(k) : c;
+}
+
+/* match() based on musl-libc fnmatch:
+ https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
+static int
+match(const char *pat, const char *str, int fcase)
+{
+ size_t m = -1, n = -1;
+ const char *p, *ptail, *endpat;
+ const char *s, *stail, *endstr;
+ size_t pinc, sinc, tailcnt=0;
+ int c, k, kfold;
+
+ for (;;) {
+ switch ((c = pat_next(pat, m, &pinc))) {
+ case UNMATCHABLE:
+ return 1;
+ case STAR:
+ pat++;
+ m--;
+ break;
+ default:
+ k = str_next(str, n, &sinc);
+ /* TODO: write a test-case */
+ if (c == CARET && (k == '?' || k == '/' || k <= 0))
+ return 1;
+ if (k <= 0)
+ return (c==END) ? 0 : 1;
+ str += sinc;
+ n -= sinc;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ return 1;
+ pat+=pinc;
+ m-=pinc;
+ continue;
+ }
+ break;
+ }
+
+ /* Compute real pat length if it was initially unknown/-1 */
+ m = strnlen(pat, m);
+ endpat = pat + m;
+
+ /* Find the last * in pat and count chars needed after it */
+ for (p=ptail=pat; p<endpat; p+=pinc) {
+ switch (pat_next(p, endpat-p, &pinc)) {
+ case UNMATCHABLE:
+ return 1;
+ case STAR:
+ tailcnt=0;
+ ptail = p+1;
+ break;
+ default:
+ tailcnt++;
+ break;
+ }
+ }
+
+ /* Past this point we need not check for UNMATCHABLE in pat,
+ * because all of pat has already been parsed once. */
+
+ /* Compute real str length if it was initially unknown/-1 */
+ n = strnlen(str, n);
+ endstr = str + n;
+ if (n < tailcnt) return 1;
+
+ /* Find the final tailcnt chars of str, accounting for UTF-8.
+ * On illegal sequences we may get it wrong, but in that case
+ * we necessarily have a matching failure anyway. */
+ for (s=endstr; s>str && tailcnt; tailcnt--) {
+ if (s[-1] < 128U || MB_CUR_MAX==1) s--;
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
+ }
+ if (tailcnt) return 1;
+ stail = s;
+
+ /* Check that the pat and str tails match */
+ p = ptail;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc);
+ p += pinc;
+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
+ if (c != END) return 1;
+ break;
+ }
+ s += sinc;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ return 1;
+ }
+
+ /* We're all done with the tails now, so throw them out */
+ endstr = stail;
+ endpat = ptail;
+
+ /* Match pattern components until there are none left */
+ while (pat<endpat) {
+ p = pat;
+ s = str;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc);
+ p += pinc;
+ /* Encountering * completes/commits a component */
+ if (c == STAR) {
+ pat = p;
+ str = s;
+ break;
+ }
+ k = str_next(s, endstr-s, &sinc);
+ if (!k)
+ return 1;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ break;
+ s += sinc;
+ }
+ if (c == STAR) continue;
+ /* If we failed, advance str, by 1 char if it's a valid
+ * char, or past all invalid bytes otherwise. */
+ k = str_next(str, endstr-str, &sinc);
+ if (k > 0) str += sinc;
+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
+ }
+
+ return 0;
+}
+
+/*
+domain=... if domain is prefixed with ~, ignore.
+multiple domains can be separated with |
+*/
+static int
+parsedomains(const char *s, int sep, struct filterdomain **head)
+{
+ struct filterdomain *d, *last = *head = NULL;
+ char *p;
+ int inverse;
+
+ do {
+ inverse = 0;
+ if (*s == '~') {
+ inverse = !inverse;
+ s++;
+ }
+ if (!*s || *s == sep)
+ break;
+
+ if (!(d = wecalloc(1, sizeof(struct filterdomain))))
+ return -1;
+ if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
+ d->domain = westrndup(s, p - s);
+ s = p + 1;
+ } else {
+ d->domain = westrdup(s);
+ }
+ if (!d->domain)
+ return -1;
+ d->inverse = inverse;
+
+ if (!*head)
+ *head = last = d;
+ else
+ last = last->next = d;
+ } while (p);
+
+ return (*head != NULL);
+}
+
+static int
+parsedomainselement(struct filterrule *f, char *s)
+{
+ struct filterdomain *d, *last;
+
+ for (last = f->domains; last && last->next; last = last->next)
+ ;
+
+ if (parsedomains(s, ',', &d) < 0)
+ return -1;
+ if (last)
+ last->next = d;
+ else
+ f->domains = d;
+
+ return (d != NULL);
+}
+
+static int
+parsedomainsoption(struct filterrule *f, char *s)
+{
+ struct filterdomain *d, *last;
+
+ for (last = f->domains; last && last->next; last = last->next)
+ ;
+
+ if (parsedomains(s, '|', &d) < 0)
+ return -1;
+ if (last)
+ last->next = d;
+ else
+ f->domains = d;
+
+ return (d != NULL);
+}
+
+static int
+filtertype_cmp(const void *a, const void *b)
+{
+ return strcmp(((struct filtertype *)a)->name,
+ ((struct filtertype *)b)->name);
+}
+
+/* check if domain is the same domain or a subdomain of `s` */
+static int
+matchdomain(const char *s, const char *domain)
+{
+ size_t l1, l2;
+
+ l1 = strlen(s);
+ l2 = strlen(domain);
+
+ /* subdomain-specific (longer) or other domain */
+ if (l1 > l2)
+ return 0;
+ /* subdomain */
+ if (l2 > l1 && domain[l2 - l1 - 1] == '.')
+ return !strcmp(&domain[l2 - l1], s);
+
+ return !strcmp(s, domain);
+}
+
+static int
+matchrule(struct filterrule *f, const char *uri, const char *type,
+ const char *domain)
+{
+ /* NOTE: order matters, see FilterType enum values */
+ struct filterdomain *d;
+ char pat[1024];
+ int r, m;
+
+ r = f->domains ? 0 : 1;
+ for (d = f->domains; d; d = d->next) {
+ if (matchdomain(d->domain, domain)) {
+ if (r && d->inverse)
+ r = 0;
+ else if (!r && !d->inverse)
+ r = 1;
+ } else if (r && !d->inverse) {
+ r = 0;
+ }
+ }
+ if (f->css) {
+ /* DEBUG */
+#if 0
+ if (f->isexception)
+ printf("DEBUG, exception rule, CSS: %s, match? %d\n",
+ f->css, r);
+#endif
+ return r;
+ }
+
+#if 1
+ /* skip allow rule, TODO: inverse? */
+ if (!r)
+ return 0;
+#endif
+
+#if 0
+ /* DEBUG: test, match if it is a simple pattern */
+ char *p;
+ p = strchr(f->uri, '*');
+ if (!p)
+ p = strchr(f->uri, '^');
+ if (!p) {
+ /* TODO: write a test-case */
+ if (f->block & FilterTypeMatchCase) {
+ if (f->matchbegin)
+ m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
+ else if (f->matchend)
+ m = strlen(f->uri) <= strlen(uri) &&
+ strcmp(&uri[strlen(uri) - strlen(f->ur…
+ else
+ m = strstr(uri, f->uri) ? 1 : 0;
+ } else {
+ if (f->matchbegin)
+ m = strncasecmp(uri, f->uri, strlen(f->uri)) =…
+ else if (f->matchend)
+ m = strlen(f->uri) <= strlen(uri) &&
+ strcasecmp(&uri[strlen(uri) - strlen(f…
+ else
+ m = strcasestr(uri, f->uri) ? 1 : 0;
+ }
+ /*m = r ? !m : m;*/
+ return m;
+ }
+#endif
+
+ r = snprintf(pat, sizeof(pat), "%s%s%s",
+ f->matchbegin ? "" : "*",
+ f->uri,
+ f->matchend ? "" : "*");
+ if (r == -1 || (size_t)r >= sizeof(pat)) {
+ fprintf(stderr, "warning: pattern too large, ignoring\n");
+ return 0;
+ }
+
+ m = 0;
+ if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
+#if 0
+ for (; *type; type++) {
+ for (i = 0; blockstr[i]; i++) {
+ if (blockstr[i] == *type &&
+ f->block & (1 << i))
+ printf("block type '%c'\n", blockstr[i…
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+#endif
+ m = 1;
+ }
+ /*m = r ? !m : m;*/
+ return m;
+}
+
+static int
+parserule(struct filterrule *f, char *s)
+{
+ struct filtertype key, *ft;
+ int inverse = 0;
+ char *p, *values;
+
+ if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
+ return 0; /* skip comment or empty line */
+ for (; *s && isspace(*s); s++)
+ ;
+ if (!*s)
+ return 0; /* line had only whitespace: skip */
+
+ memset(f, 0, sizeof(struct filterrule));
+
+ if ((p = strstr(s, "#@#"))) {
+ *p = '\0';
+ if (parsedomainselement(f, s) < 0)
+ return -1;
+ *p = '#';
+ if (!(f->css = westrdup(p + 3)))
+ return -1;
+ f->isexception = 1;
+ goto end; /* end of CSS rule */
+ }
+
+ /* element hiding rule, NOTE: no wildcards are supported,
+ "Simplified element hiding syntax" is not supported. */
+ if ((p = strstr(s, "##"))) {
+ *p = '\0';
+ if (parsedomainselement(f, s) < 0)
+ return -1;
+ *p = '#';
+ if (!(f->css = westrdup(p + 2)))
+ return -1;
+ goto end; /* end of rule */
+ }
+
+ if (!strncmp(s, "@@", 2)) {
+ f->isexception = 1;
+ s += 2;
+ }
+ if (*s == '|') {
+ s++;
+ if (*s == '|') {
+ f->matchbegin = 1;
+ s++;
+ } else {
+ f->matchend = 1;
+ }
+ }
+
+ /* no options, use rest of line as uri. */
+ if (!(p = strrchr(s, '$'))) {
+ if (!(f->uri = westrdup(s)))
+ return -1;
+ goto end;
+ }
+
+ /* has options */
+ if (!(f->uri = westrndup(s, p - s)))
+ return -1;
+ s = ++p;
+
+ /* blockmask, has options? default: allow all options, case-sensitive
+ * has no options? default: block all options, case-sensitive */
+ f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
+ do {
+ if ((p = strchr(s, ',')))
+ *p = '\0';
+ /* match option */
+ inverse = 0;
+ if (*s == '~') {
+ inverse = 1;
+ s++;
+ }
+ if ((values = strchr(s, '=')))
+ *(values) = '\0';
+ key.name = s;
+
+ ft = bsearch(&key, &filtertypes,
+ sizeof(filtertypes) / sizeof(*filtertypes),
+ sizeof(*filtertypes), filtertype_cmp);
+
+ /* restore NUL-terminator for domain= option */
+ if (values)
+ *(values++) = '=';
+
+ if (ft) {
+ if (inverse)
+ f->block &= ~(ft->type);
+ else
+ f->block |= ft->type;
+ if (ft->fn && values)
+ ft->fn(f, values);
+ } else {
+ /* DEBUG */
+ fprintf(stderr, "ignored: unknown option: '%s' "
+ "in rule: %s\n", key.name, f->uri);
+ }
+
+ /* restore ',' */
+ if (p) {
+ *p = ',';
+ s = p + 1;
+ }
+ } while (p);
+end:
+
+ return 1;
+}
+
+#if 0
+static void
+debugrule(struct filterrule *r)
+{
+ printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
+ "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
+ r->isexception, r->block);
+}
+#endif
+
+static int
+loadrules(FILE *fp)
+{
+ struct filterrule f, *r, *rn = NULL;
+ char *line = NULL;
+ size_t linesiz = 0;
+ ssize_t n;
+ int ret;
+
+ /* TODO: handle ferror() */
+ /* load rules */
+ while ((n = getline(&line, &linesiz, fp)) > 0) {
+ if (line[n - 1] == '\n')
+ line[--n] = '\0';
+ if (n > 0 && line[n - 1] == '\r')
+ line[--n] = '\0';
+
+ if ((ret = parserule(&f, line) > 0)) {
+ if (!(r = wecalloc(1, sizeof(struct filterrule))))
+ return -1;
+ if (!rules)
+ rules = rn = r;
+ else
+ rn = rn->next = r;
+ memcpy(rn, &f, sizeof(struct filterrule));
+ } else if (ret < 0) {
+ return -1;
+ }
+ }
+ return (rules != NULL);
+}
+
+char *
+getglobalcss(void)
+{
+ return globalcss.data;
+}
+
+char *
+getdocumentcss(const char *uri)
+{
+ const char *s;
+ char domain[256];
+ String sitecss;
+ struct filterrule *r;
+ size_t len;
+
+ if (!uri)
+ return NULL;
+
+ if (!(s = strstr(uri, "://")))
+ return NULL;
+ s += sizeof("://") - 1;
+ len = strcspn(s, "/");
+ memcpy(domain, s, len);
+ domain[len] = '\0';
+
+ printf("uri: %s\n", uri);
+ printf("domain: %s\n", domain);
+
+ /* DEBUG: timing */
+ struct timespec tp_start, tp_end, tp_diff;
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ /* site-specific CSS */
+ memset(&sitecss, 0, sizeof(sitecss));
+ for (r = rules; r; r = r->next) {
+ if (!r->css || !r->domains || !matchrule(r, "", "", domain))
+ continue;
+
+ len = strlen(r->css);
+ if (string_append(&sitecss, r->css, len) < len)
+ goto err;
+
+ s = r->isexception ? "{display:initial;}" : "{display:none;}";
+ len = strlen(s);
+ if (string_append(&sitecss, s, len) < len)
+ goto err;
+ }
+/* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
+
+ /* DEBUG: timing */
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
+ if (tp_diff.tv_nsec < 0) {
+ tp_diff.tv_sec--;
+ tp_diff.tv_nsec += 1000000000L;
+ }
+
+ printf("timing: %zu sec, %.3f ms\n",
+ tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
+
+ if (globalcss.data)
+ printf("global CSS length in bytes: %zu\n", strlen(globalcss.d…
+ if (sitecss.data)
+ printf("site CSS length in bytes: %zu\n", strlen(sitecss.data)…
+
+ return sitecss.data;
+
+err:
+ free(sitecss.data);
+ return NULL;
+}
+
+int
+checkrequest(const char *uri, const char *requri)
+{
+ char domain[256];
+ struct filterrule *r;
+ const char *s;
+ size_t len;
+ int status = 1;
+
+ if (!uri || !strcmp(requri, uri))
+ return 1;
+
+ s = strstr(uri, "://") + sizeof("://") - 1;
+ len = strcspn(s, "/");
+ memcpy(domain, s, len);
+ domain[len] = '\0';
+
+ /* DEBUG: timing */
+ struct timespec tp_start, tp_end, tp_diff;
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ /* match rules */
+ for (r = rules; r; r = r->next) {
+ if (!r->css && matchrule(r, requri, "csio^", domain)) {
+ printf("requri: %s\n", requri);
+ printf("uri: %s\n", uri);
+ printf("domain: %s\n", domain);
+
+ fprintf(stderr, "blocked: %s, %s\n", domain, requri);
+
+ /* DEBUG: for showing the timing */
+ status = 0;
+ goto end;
+ /*return 1;*/
+ }
+ }
+
+end:
+ /* DEBUG: timing */
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
+ if (tp_diff.tv_nsec < 0) {
+ tp_diff.tv_sec--;
+ tp_diff.tv_nsec += 1000000000L;
+ }
+
+ printf("%s [%s] timing: %zu sec, %.3f ms\n",
+ requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.…
+
+ return status;
+}
+
+void
+init(void)
+{
+ struct filterrule *r;
+ FILE *fp;
+ char filepath[PATH_MAX], *e;
+ size_t len;
+ int n;
+
+ if ((e = getenv("SURF_ADBLOCK_FILE"))) {
+ n = snprintf(filepath, sizeof(filepath), "%s", e);
+ } else {
+ if (!(e = getenv("HOME")))
+ e = "";
+ n = snprintf(filepath, sizeof(filepath),
+ "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
+ }
+ if (n < 0 || (size_t)n >= sizeof(filepath)) {
+ weprintf("fatal: rules file path too long");
+ return;
+ }
+
+ if (!(fp = fopen(filepath, "r"))) {
+ weprintf("fatal: cannot open rules file %s: %s\n",
+ filepath, strerror(errno));
+ return;
+ }
+
+ n = loadrules(fp);
+ fclose(fp);
+ if (n < 1) {
+ if (n < 0) {
+ weprintf("fatal: cannot read rules from file %s: %s\n",
+ filepath, strerror(errno));
+ } else {
+ weprintf("fatal: cannot read any rule from file %s\n",
+ filepath);
+ }
+ return;
+ }
+
+ /* general CSS rules: all sites */
+ for (r = rules; r; r = r->next) {
+ if (!r->css || r->domains)
+ continue;
+
+ len = strlen(r->css);
+ if (string_append(&globalcss, r->css, strlen(r->css)) < len) {
+ weprintf("cannot load global css selectors "
+ "in memory\n");
+ cleanup();
+ return;
+ }
+ len = sizeof("{display:none;}") - 1;
+ if (string_append(&globalcss, "{display:none;}", len) < len) {
+ weprintf("cannot append css rule "
+ "to global css selectors\n");
+ cleanup();
+ return;
+ }
+ }
+}
diff --git a/surf-adblock.c b/surf-adblock.c
@@ -8,769 +8,30 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <wchar.h>
-#include <wctype.h>
#include <webkit2/webkit-web-extension.h>
#include <webkitdom/webkitdom.h>
-/* String data / memory pool */
-typedef struct string {
- char *data; /* data */
- size_t datasz; /* allocated size */
- size_t len; /* current string length */
-} String;
+#include "adblock.h"
typedef struct Page {
guint64 id;
WebKitWebPage *webpage;
- WebKitDOMDOMWindow *view;
+ /*WebKitDOMDOMWindow *view;*/
struct Page *next;
} Page;
-struct filterdomain {
- char *domain;
- int inverse;
- struct filterdomain *next;
-};
-
-struct filterrule {
- /* type: match mask, must be atleast 32-bit, see FilterType enum */
- unsigned long block;
- int matchbegin;
- int matchend;
- /* is exception rule: prefix @@ for ABP or #@# for CSS */
- int isexception;
- char *css; /* if non-NULL is CSS rule / hide element rule */
- char *uri;
- struct filterdomain *domains;
- struct filterrule *next;
-};
-
-enum {
- FilterTypeScript = 1 << 0,
- FilterTypeImage = 1 << 1,
- FilterTypeCSS = 1 << 2,
- FilterTypeObject = 1 << 3,
- FilterTypeXHR = 1 << 4,
- FilterTypeObjectSub = 1 << 5,
- FilterTypeSubDoc = 1 << 6,
- FilterTypePing = 1 << 7,
- FilterTypeDocument = 1 << 8,
- FilterTypeElemHide = 1 << 9,
- FilterTypeOther = 1 << 10,
- FilterTypeGenericHide = 1 << 11,
- FilterTypeGenericBlock = 1 << 12,
- FilterTypeMatchCase = 1 << 13,
-};
-
-struct filtertype {
- /* `type` must be atleast 32-bit, see FilterType enum */
- unsigned long type;
- char *name;
- size_t namelen;
- int allowinverse;
- int allownormal;
- int onlyexception;
- int (*fn)(struct filterrule *, char *);
-};
-
-static int parsedomainsoption(struct filterrule *, char *);
-
-#define STRP(s) s,sizeof(s)-1
-
-static struct filtertype filtertypes[] = {
- /* NOTE: options with 'type' = 0 are silently ignored and treated as
- * requests for now */
- { 0, STRP("collapse"), 1, 1, 0, NULL },
- { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
- { 0, STRP("domain"), 0, 1, 0,
- /* domain=... */ &parsedomainsoption },
- { 0, STRP("donottrack"), 1, 1, 0, NULL },
- { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
- { 0, STRP("font"), 1, 1, 0, NULL },
- { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
- { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
- { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
- { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
- { 0, STRP("media"), 1, 1, 0, NULL },
- { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
- { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
- { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
- { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
- { 0, STRP("popup"), 1, 1, 0, NULL },
- { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
- { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
- { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
- { 0, STRP("third-party"), 1, 1, 0, NULL },
- { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
- /* NOTE: site-key not supported */
-};
-
-static String globalcss;
static Page *pages;
-static struct filterrule *rules;
-
-static void
-cleanup(void)
-{
- struct filterrule *r;
- struct filterdomain *d;
-
- free(globalcss.data);
-
- for (r = rules; r; r = rules) {
- for (d = r->domains; d; d = r->domains) {
- free(d->domain);
- r->domains = d->next;
- free(d);
- }
- free(r->css);
- free(r->uri);
- rules = r->next;
- free(r);
- }
-}
-
-static void
-weprintf(const char *fmt, ...)
-{
- va_list ap;
-
- fprintf(stderr, "surf-adblock: ");
-
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
-}
-
-static size_t
-string_buffer_realloc(String *s, size_t newsz)
-{
- char *tmp;
- size_t allocsz;
-
- for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
- ;
- if (!(tmp = realloc(s->data, allocsz))) {
- weprintf("realloc: %s\n", strerror(errno));
- } else {
- s->data = tmp;
- s->datasz = allocsz;
- }
-
- return s->datasz;
-}
-
-static size_t
-string_append(String *s, const char *data, size_t len)
-{
- size_t newlen;
-
- if (!len)
- return len;
-
- newlen = s->len + len;
- /* check if allocation is necesary, don't shrink buffer,
- * should be more than datasz ofcourse. */
- if (newlen >= s->datasz) {
- if (string_buffer_realloc(s, newlen + 1) <= newlen)
- return 0;
- }
- memcpy(s->data + s->len, data, len);
- s->len = newlen;
- s->data[s->len] = '\0';
- return len;
-}
-
-static void *
-wecalloc(size_t nmemb, size_t size)
-{
- void *p;
-
- if (!(p = calloc(nmemb, size)))
- weprintf("calloc: %s\n", strerror(errno));
-
- return p;
-}
-
-static char *
-westrndup(const char *s, size_t n)
-{
- char *p;
-
- if (!(p = strndup(s, n)))
- weprintf("strndup: %s\n", strerror(errno));
- return p;
-}
-
-static char *
-westrdup(const char *s)
-{
- char *p;
-
- if (!(p = strdup(s)))
- weprintf("strdup: %s\n", strerror(errno));
-
- return p;
-}
-
-#define END 0
-#define UNMATCHABLE -2
-#define BRACKET -3
-#define CARET -4
-#define STAR -5
-
-static int
-str_next(const char *str, size_t n, size_t *step)
-{
- if (!n) {
- *step = 0;
- return 0;
- }
- if (str[0] >= 128U) {
- wchar_t wc;
- int k = mbtowc(&wc, str, n);
- if (k<0) {
- *step = 1;
- return -1;
- }
- *step = k;
- return wc;
- }
- *step = 1;
-
- return str[0];
-}
-
-static int
-pat_next(const char *pat, size_t m, size_t *step)
-{
- int esc = 0;
-
- if (!m || !*pat) {
- *step = 0;
- return END;
- }
- *step = 1;
- if (pat[0]=='\\' && pat[1]) {
- *step = 2;
- pat++;
- esc = 1;
- goto escaped;
- }
- if (pat[0]=='^')
- return CARET;
- if (pat[0] == '*')
- return STAR;
-escaped:
- if (pat[0] >= 128U) {
- wchar_t wc;
- int k = mbtowc(&wc, pat, m);
- if (k<0) {
- *step = 0;
- return UNMATCHABLE;
- }
- *step = k + esc;
- return wc;
- }
- return pat[0];
-}
-
-static int
-casefold(int k)
-{
- int c = towupper(k);
- return c == k ? towlower(k) : c;
-}
-
-/* match() based on musl-libc fnmatch:
- https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
-static int
-match(const char *pat, const char *str, int fcase)
-{
- size_t m = -1, n = -1;
- const char *p, *ptail, *endpat;
- const char *s, *stail, *endstr;
- size_t pinc, sinc, tailcnt=0;
- int c, k, kfold;
-
- for (;;) {
- switch ((c = pat_next(pat, m, &pinc))) {
- case UNMATCHABLE:
- return 1;
- case STAR:
- pat++;
- m--;
- break;
- default:
- k = str_next(str, n, &sinc);
- /* TODO: write a test-case */
- if (c == CARET && (k == '?' || k == '/' || k <= 0))
- return 1;
- if (k <= 0)
- return (c==END) ? 0 : 1;
- str += sinc;
- n -= sinc;
- kfold = fcase ? casefold(k) : k;
- if (k != c && kfold != c)
- return 1;
- pat+=pinc;
- m-=pinc;
- continue;
- }
- break;
- }
-
- /* Compute real pat length if it was initially unknown/-1 */
- m = strnlen(pat, m);
- endpat = pat + m;
-
- /* Find the last * in pat and count chars needed after it */
- for (p=ptail=pat; p<endpat; p+=pinc) {
- switch (pat_next(p, endpat-p, &pinc)) {
- case UNMATCHABLE:
- return 1;
- case STAR:
- tailcnt=0;
- ptail = p+1;
- break;
- default:
- tailcnt++;
- break;
- }
- }
-
- /* Past this point we need not check for UNMATCHABLE in pat,
- * because all of pat has already been parsed once. */
-
- /* Compute real str length if it was initially unknown/-1 */
- n = strnlen(str, n);
- endstr = str + n;
- if (n < tailcnt) return 1;
-
- /* Find the final tailcnt chars of str, accounting for UTF-8.
- * On illegal sequences we may get it wrong, but in that case
- * we necessarily have a matching failure anyway. */
- for (s=endstr; s>str && tailcnt; tailcnt--) {
- if (s[-1] < 128U || MB_CUR_MAX==1) s--;
- else while ((unsigned char)*--s-0x80U<0x40 && s>str);
- }
- if (tailcnt) return 1;
- stail = s;
-
- /* Check that the pat and str tails match */
- p = ptail;
- for (;;) {
- c = pat_next(p, endpat-p, &pinc);
- p += pinc;
- if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
- if (c != END) return 1;
- break;
- }
- s += sinc;
- kfold = fcase ? casefold(k) : k;
- if (k != c && kfold != c)
- return 1;
- }
-
- /* We're all done with the tails now, so throw them out */
- endstr = stail;
- endpat = ptail;
-
- /* Match pattern components until there are none left */
- while (pat<endpat) {
- p = pat;
- s = str;
- for (;;) {
- c = pat_next(p, endpat-p, &pinc);
- p += pinc;
- /* Encountering * completes/commits a component */
- if (c == STAR) {
- pat = p;
- str = s;
- break;
- }
- k = str_next(s, endstr-s, &sinc);
- if (!k)
- return 1;
- kfold = fcase ? casefold(k) : k;
- if (k != c && kfold != c)
- break;
- s += sinc;
- }
- if (c == STAR) continue;
- /* If we failed, advance str, by 1 char if it's a valid
- * char, or past all invalid bytes otherwise. */
- k = str_next(str, endstr-str, &sinc);
- if (k > 0) str += sinc;
- else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
- }
-
- return 0;
-}
-
-/*
-domain=... if domain is prefixed with ~, ignore.
-multiple domains can be separated with |
-*/
-static int
-parsedomains(const char *s, int sep, struct filterdomain **head)
-{
- struct filterdomain *d, *last = *head = NULL;
- char *p;
- int inverse;
-
- do {
- inverse = 0;
- if (*s == '~') {
- inverse = !inverse;
- s++;
- }
- if (!*s || *s == sep)
- break;
-
- if (!(d = wecalloc(1, sizeof(struct filterdomain))))
- return -1;
- if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
- d->domain = westrndup(s, p - s);
- s = p + 1;
- } else {
- d->domain = westrdup(s);
- }
- if (!d->domain)
- return -1;
- d->inverse = inverse;
-
- if (!*head)
- *head = last = d;
- else
- last = last->next = d;
- } while (p);
-
- return (*head != NULL);
-}
-
-static int
-parsedomainselement(struct filterrule *f, char *s)
-{
- struct filterdomain *d, *last;
-
- for (last = f->domains; last && last->next; last = last->next)
- ;
-
- if (parsedomains(s, ',', &d) < 0)
- return -1;
- if (last)
- last->next = d;
- else
- f->domains = d;
-
- return (d != NULL);
-}
-
-static int
-parsedomainsoption(struct filterrule *f, char *s)
-{
- struct filterdomain *d, *last;
-
- for (last = f->domains; last && last->next; last = last->next)
- ;
-
- if (parsedomains(s, '|', &d) < 0)
- return -1;
- if (last)
- last->next = d;
- else
- f->domains = d;
-
- return (d != NULL);
-}
-
-static int
-filtertype_cmp(const void *a, const void *b)
-{
- return strcmp(((struct filtertype *)a)->name,
- ((struct filtertype *)b)->name);
-}
-
-/* check if domain is the same domain or a subdomain of `s` */
-static int
-matchdomain(const char *s, const char *domain)
-{
- size_t l1, l2;
-
- l1 = strlen(s);
- l2 = strlen(domain);
-
- /* subdomain-specific (longer) or other domain */
- if (l1 > l2)
- return 0;
- /* subdomain */
- if (l2 > l1 && domain[l2 - l1 - 1] == '.')
- return !strcmp(&domain[l2 - l1], s);
-
- return !strcmp(s, domain);
-}
-
-static int
-matchrule(struct filterrule *f, const char *uri, const char *type,
- const char *domain)
-{
- /* NOTE: order matters, see FilterType enum values */
- struct filterdomain *d;
- char pat[1024];
- int r, m;
-
- r = f->domains ? 0 : 1;
- for (d = f->domains; d; d = d->next) {
- if (matchdomain(d->domain, domain)) {
- if (r && d->inverse)
- r = 0;
- else if (!r && !d->inverse)
- r = 1;
- } else if (r && !d->inverse) {
- r = 0;
- }
- }
- if (f->css) {
- /* DEBUG */
-#if 0
- if (f->isexception)
- printf("DEBUG, exception rule, CSS: %s, match? %d\n",
- f->css, r);
-#endif
- return r;
- }
-
-#if 1
- /* skip allow rule, TODO: inverse? */
- if (!r)
- return 0;
-#endif
-
-#if 0
- /* DEBUG: test, match if it is a simple pattern */
- char *p;
- p = strchr(f->uri, '*');
- if (!p)
- p = strchr(f->uri, '^');
- if (!p) {
- /* TODO: write a test-case */
- if (f->block & FilterTypeMatchCase) {
- if (f->matchbegin)
- m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
- else if (f->matchend)
- m = strlen(f->uri) <= strlen(uri) &&
- strcmp(&uri[strlen(uri) - strlen(f->ur…
- else
- m = strstr(uri, f->uri) ? 1 : 0;
- } else {
- if (f->matchbegin)
- m = strncasecmp(uri, f->uri, strlen(f->uri)) =…
- else if (f->matchend)
- m = strlen(f->uri) <= strlen(uri) &&
- strcasecmp(&uri[strlen(uri) - strlen(f…
- else
- m = strcasestr(uri, f->uri) ? 1 : 0;
- }
- /*m = r ? !m : m;*/
- return m;
- }
-#endif
-
- r = snprintf(pat, sizeof(pat), "%s%s%s",
- f->matchbegin ? "" : "*",
- f->uri,
- f->matchend ? "" : "*");
- if (r == -1 || (size_t)r >= sizeof(pat)) {
- fprintf(stderr, "warning: pattern too large, ignoring\n");
- return 0;
- }
-
- m = 0;
- if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
-#if 0
- for (; *type; type++) {
- for (i = 0; blockstr[i]; i++) {
- if (blockstr[i] == *type &&
- f->block & (1 << i))
- printf("block type '%c'\n", blockstr[i…
- return 1;
- }
- }
- }
-
- return 0;
-#endif
- m = 1;
- }
- /*m = r ? !m : m;*/
- return m;
-}
-
-static int
-parserule(struct filterrule *f, char *s)
-{
- struct filtertype key, *ft;
- int inverse = 0;
- char *p, *values;
-
- if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
- return 0; /* skip comment or empty line */
- for (; *s && isspace(*s); s++)
- ;
- if (!*s)
- return 0; /* line had only whitespace: skip */
-
- memset(f, 0, sizeof(struct filterrule));
-
- if ((p = strstr(s, "#@#"))) {
- *p = '\0';
- if (parsedomainselement(f, s) < 0)
- return -1;
- *p = '#';
- if (!(f->css = westrdup(p + 3)))
- return -1;
- f->isexception = 1;
- goto end; /* end of CSS rule */
- }
-
- /* element hiding rule, NOTE: no wildcards are supported,
- "Simplified element hiding syntax" is not supported. */
- if ((p = strstr(s, "##"))) {
- *p = '\0';
- if (parsedomainselement(f, s) < 0)
- return -1;
- *p = '#';
- if (!(f->css = westrdup(p + 2)))
- return -1;
- goto end; /* end of rule */
- }
-
- if (!strncmp(s, "@@", 2)) {
- f->isexception = 1;
- s += 2;
- }
- if (*s == '|') {
- s++;
- if (*s == '|') {
- f->matchbegin = 1;
- s++;
- } else {
- f->matchend = 1;
- }
- }
-
- /* no options, use rest of line as uri. */
- if (!(p = strrchr(s, '$'))) {
- if (!(f->uri = westrdup(s)))
- return -1;
- goto end;
- }
-
- /* has options */
- if (!(f->uri = westrndup(s, p - s)))
- return -1;
- s = ++p;
-
- /* blockmask, has options? default: allow all options, case-sensitive
- * has no options? default: block all options, case-sensitive */
- f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
- do {
- if ((p = strchr(s, ',')))
- *p = '\0';
- /* match option */
- inverse = 0;
- if (*s == '~') {
- inverse = 1;
- s++;
- }
- if ((values = strchr(s, '=')))
- *(values) = '\0';
- key.name = s;
-
- ft = bsearch(&key, &filtertypes,
- sizeof(filtertypes) / sizeof(*filtertypes),
- sizeof(*filtertypes), filtertype_cmp);
-
- /* restore NUL-terminator for domain= option */
- if (values)
- *(values++) = '=';
-
- if (ft) {
- if (inverse)
- f->block &= ~(ft->type);
- else
- f->block |= ft->type;
- if (ft->fn && values)
- ft->fn(f, values);
- } else {
- /* DEBUG */
- fprintf(stderr, "ignored: unknown option: '%s' "
- "in rule: %s\n", key.name, f->uri);
- }
-
- /* restore ',' */
- if (p) {
- *p = ',';
- s = p + 1;
- }
- } while (p);
-end:
-
- return 1;
-}
-
-#if 0
-static void
-debugrule(struct filterrule *r)
-{
- printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
- "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
- r->isexception, r->block);
-}
-#endif
-
-static int
-loadrules(FILE *fp)
-{
- struct filterrule f, *r, *rn = NULL;
- char *line = NULL;
- size_t linesiz = 0;
- ssize_t n;
- int ret;
-
- /* TODO: handle ferror() */
- /* load rules */
- while ((n = getline(&line, &linesiz, fp)) > 0) {
- if (line[n - 1] == '\n')
- line[--n] = '\0';
- if (n > 0 && line[n - 1] == '\r')
- line[--n] = '\0';
-
- if ((ret = parserule(&f, line) > 0)) {
- if (!(r = wecalloc(1, sizeof(struct filterrule))))
- return -1;
- if (!rules)
- rules = rn = r;
- else
- rn = rn->next = r;
- memcpy(rn, &f, sizeof(struct filterrule));
- } else if (ret < 0) {
- return -1;
- }
- }
- return (rules != NULL);
-}
static Page *
newpage(WebKitWebPage *page)
{
Page *p;
- if (!(p = wecalloc(1, sizeof(Page))))
+ if (!(p = calloc(1, sizeof(Page)))) {
+ fprintf(stderr, "surf-adblock: calloc: %s\n", strerror(errno));
return NULL;
+ }
p->next = pages;
pages = p;
@@ -783,146 +44,32 @@ newpage(WebKitWebPage *page)
static void
documentloaded(WebKitWebPage *wp, Page *p)
{
- char domain[256];
WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp);
WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc);
WebKitDOMElement *el;
- String sitecss;
- struct filterrule *r;
- const char *s, *uri = webkit_web_page_get_uri(p->webpage);
- size_t len;
-
- if (!uri)
- return;
-
- s = strstr(uri, "://") + sizeof("://") - 1;
- len = strcspn(s, "/");
- memcpy(domain, s, len);
- domain[len] = '\0';
-
- printf("uri: %s\n", uri);
- printf("domain: %s\n", domain);
-
- /* DEBUG: timing */
- struct timespec tp_start, tp_end, tp_diff;
- if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
- }
-
- /* site-specific CSS */
- memset(&sitecss, 0, sizeof(sitecss));
- for (r = rules; r; r = r->next) {
- if (!r->css || !r->domains || !matchrule(r, "", "", domain))
- continue;
-
- len = strlen(r->css);
- if (string_append(&sitecss, r->css, len) < len)
- return;
-
- s = r->isexception ? "{display:initial;}" : "{display:none;}";
- len = strlen(s);
- if (string_append(&sitecss, s, len) < len)
- return;
- }
-/* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
-
- /* DEBUG: timing */
- if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
- }
-
- tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
- tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
- if (tp_diff.tv_nsec < 0) {
- tp_diff.tv_sec--;
- tp_diff.tv_nsec += 1000000000L;
- }
-
- printf("timing: %zu sec, %.3f ms\n",
- tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
+ const char *uri = webkit_web_page_get_uri(p->webpage);
+ char *css, *globalcss;
- if (globalcss.data)
- printf("global CSS length in bytes: %zu\n", strlen(globalcss.d…
- if (sitecss.data)
- printf("site CSS length in bytes: %zu\n", strlen(sitecss.data)…
+ /*p->view = webkit_dom_document_get_default_view(doc);*/
- p->view = webkit_dom_document_get_default_view(doc);
-
- if (globalcss.data) {
+ if ((globalcss = getglobalcss())) {
el = webkit_dom_document_create_element(doc, "style", NULL);
webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
- webkit_dom_element_set_inner_html(el, globalcss.data, NULL);
+ webkit_dom_element_set_inner_html(el, globalcss, NULL);
webkit_dom_node_append_child(WEBKIT_DOM_NODE(body),
WEBKIT_DOM_NODE(el), NULL);
}
- if (sitecss.data) {
+ if ((css = getdocumentcss(uri))) {
el = webkit_dom_document_create_element(doc, "style", NULL);
webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
- webkit_dom_element_set_inner_html(el, sitecss.data, NULL);
+ webkit_dom_element_set_inner_html(el, css, NULL);
webkit_dom_node_append_child(WEBKIT_DOM_NODE(body),
WEBKIT_DOM_NODE(el), NULL);
}
- free(sitecss.data);
-}
-
-int
-checkrequest(const char *uri, const char *requri)
-{
- char domain[256];
- struct filterrule *r;
- const char *s;
- size_t len;
- int status = 1;
-
- if (!uri || !strcmp(requri, uri))
- return 1;
-
- s = strstr(uri, "://") + sizeof("://") - 1;
- len = strcspn(s, "/");
- memcpy(domain, s, len);
- domain[len] = '\0';
-
- /* DEBUG: timing */
- struct timespec tp_start, tp_end, tp_diff;
- if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
- }
-
- /* match rules */
- for (r = rules; r; r = r->next) {
- if (!r->css && matchrule(r, requri, "csio^", domain)) {
- printf("requri: %s\n", requri);
- printf("uri: %s\n", uri);
- printf("domain: %s\n", domain);
-
- fprintf(stderr, "blocked: %s, %s\n", domain, requri);
-
- /* DEBUG: for showing the timing */
- status = 0;
- goto end;
- /*return 1;*/
- }
- }
-
-end:
- /* DEBUG: timing */
- if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
- }
-
- tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
- tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
- if (tp_diff.tv_nsec < 0) {
- tp_diff.tv_sec--;
- tp_diff.tv_nsec += 1000000000L;
- }
-
- printf("%s [%s] timing: %zu sec, %.3f ms\n",
- requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.…
-
- return status;
+ free(css);
+ /* NOTE: globalcss free'd at cleanup() */
}
static gboolean
@@ -941,69 +88,6 @@ sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
return status;
}
-void
-init(void)
-{
- struct filterrule *r;
- FILE *fp;
- char filepath[PATH_MAX], *e;
- size_t len;
- int n;
-
- if ((e = getenv("SURF_ADBLOCK_FILE"))) {
- n = snprintf(filepath, sizeof(filepath), "%s", e);
- } else {
- if (!(e = getenv("HOME")))
- e = "";
- n = snprintf(filepath, sizeof(filepath),
- "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
- }
- if (n < 0 || (size_t)n >= sizeof(filepath)) {
- weprintf("fatal: rules file path too long");
- return;
- }
-
- if (!(fp = fopen(filepath, "r"))) {
- weprintf("fatal: cannot open rules file %s: %s\n",
- filepath, strerror(errno));
- return;
- }
-
- n = loadrules(fp);
- fclose(fp);
- if (n < 1) {
- if (n < 0) {
- weprintf("fatal: cannot read rules from file %s: %s\n",
- filepath, strerror(errno));
- } else {
- weprintf("fatal: cannot read any rule from file %s\n",
- filepath);
- }
- return;
- }
-
- /* general CSS rules: all sites */
- for (r = rules; r; r = r->next) {
- if (!r->css || r->domains)
- continue;
-
- len = strlen(r->css);
- if (string_append(&globalcss, r->css, strlen(r->css)) < len) {
- weprintf("cannot load global css selectors "
- "in memory\n");
- cleanup();
- return;
- }
- len = sizeof("{display:none;}") - 1;
- if (string_append(&globalcss, "{display:none;}", len) < len) {
- weprintf("cannot append css rule "
- "to global css selectors\n");
- cleanup();
- return;
- }
- }
-}
-
static void
webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused)
{
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.