Introduction
Introduction Statistics Contact Development Disclaimer Help
improvements - surf-adblock - Surf adblock web extension
git clone git://git.codemadness.org/surf-adblock
Log
Files
Refs
README
LICENSE
---
commit d781090ae7718310fb13c83c1a8406be46a613b8
parent b6cc76e9fcac3112086f2d2348ef53b16b59da9d
Author: Hiltjo Posthuma <[email protected]>
Date: Sat, 3 Jun 2017 21:54:05 +0200
improvements
- WIP: faster matching for simple rules, just a test atm, but ~50ms max to ~20m…
machine.
- add support for exception rules.
- debug.sh add debug script for testing: compile as main().
Diffstat:
M TODO | 14 +++++++++++++-
A debug.sh | 18 ++++++++++++++++++
M surf-adblock.c | 279 +++++++++++++++++++++++++----…
3 files changed, 255 insertions(+), 56 deletions(-)
---
diff --git a/TODO b/TODO
@@ -1,5 +1,15 @@
- fix tweakers.net popup / rule.
-- benchmark rule matching (timing).
+ this is in an exception rule...
+
+ make sure exception rules are always below in the list? modify awk scr…
+
+- performance:
+ - benchmark rule matching (timing).
+ - bloom filters? some kind of cache?
+ - optimize simple filter case.
+
+- support separator "^" = [/\?]?
+ - test it better.
===
@@ -23,6 +33,8 @@ Docs:
and matchbegin or matchend set.
- make less CPU intensive.
- maybe even include it statically?
+ - optimize CSS rule matching (only per site?).
+
- optimize memory allocation.
- optimize: pregenerate one global stylesheet that applies to all sites?
- separate adblocker into daemon? not sure.
diff --git a/debug.sh b/debug.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+# ugly debug script: compile as standalone program for testing.
+
+cc -std=c99 -pedantic -Wall -Os -I. -I/usr/include -I/usr/X11R6/include \
+ `pkg-config --cflags gtk+-3.0 webkit2gtk-4.0 webkit2gtk-web-extension…
+ -DVERSION=\"0.1\" -DWEBEXTDIR=\"/usr/local/lib/surf\" -D_DEFAULT_SOUR…
+ -DWEBEXTDIR=\"/usr/local/lib/surf\" \
+ `pkg-config --cflags gtk+-3.0 webkit2gtk-4.0 webkit2gtk-web-extension…
+ -DDEBUG \
+ -c surf-adblock.c
+cc -s -L/usr/lib -lc -L/usr/X11R6/lib -lX11 \
+ `pkg-config --libs gtk+-3.0 webkit2gtk-4.0 webkit2gtk-web-extension-4…
+ surf-adblock.o
+
+chmod +x surf-adblock
+# NOTE: need to copy because of W^X.
+doas cp surf-adblock /usr/local/bin
+/usr/local/bin/surf-adblock
diff --git a/surf-adblock.c b/surf-adblock.c
@@ -305,11 +305,11 @@ match(const char *pat, const char *str, int fcase)
break;
default:
k = str_next(str, n, &sinc);
+ /* TODO: write a test-case */
+ if (c == CARET && (k == '?' || k == '/' || k <= 0))
+ return 1;
if (k <= 0)
return (c==END) ? 0 : 1;
- if (c == CARET && (iswdigit(k) || iswalpha(k) ||
- strchr("_-.%", k)))
- return 1;
str += sinc;
n -= sinc;
kfold = fcase ? casefold(k) : k;
@@ -410,7 +410,6 @@ match(const char *pat, const char *str, int fcase)
return 0;
}
-
/*
domain=... if domain is prefixed with ~, ignore.
multiple domains can be separated with |
@@ -521,28 +520,63 @@ matchrule(struct filterrule *f, const char *uri, const ch…
/* NOTE: order matters, see FilterType enum values */
struct filterdomain *d;
char pat[1024];
- int r;
-
- /* ignore exception rules for now, these are usually paid
- * for by sites to allow advertisements. */
- if (f->isexception)
- return 0;
+ int r, m;
- if (f->css) {
- r = f->domains ? 0 : 1;
- for (d = f->domains; d; d = d->next) {
- if (matchdomain(d->domain, domain)) {
- if (r && d->inverse)
- r = 0;
- else if (!r && !d->inverse)
- r = 1;
- } else if (r && !d->inverse) {
+ r = f->domains ? 0 : 1;
+ for (d = f->domains; d; d = d->next) {
+ if (matchdomain(d->domain, domain)) {
+ if (r && d->inverse)
r = 0;
- }
+ else if (!r && !d->inverse)
+ r = 1;
+ } else if (r && !d->inverse) {
+ r = 0;
}
+ }
+ if (f->css) {
+ /* DEBUG */
+ if (f->isexception)
+ printf("DEBUG, exception rule, CSS: %s, match? %d\n",
+ f->css, r);
return r;
}
+#if 1
+ /* skip allow rule, TODO: inverse? */
+ if (!r)
+ return 0;
+#endif
+
+#if 1
+ /* DEBUG: test, match if it is a simple pattern */
+ char *p;
+ p = strchr(f->uri, '*');
+ if (!p)
+ p = strchr(f->uri, '^');
+ if (!p) {
+ /* TODO: write a test-case */
+ if (f->block & FilterTypeMatchCase) {
+ if (f->matchbegin)
+ m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
+ else if (f->matchend)
+ m = strlen(f->uri) <= strlen(uri) &&
+ strcmp(&uri[strlen(uri) - strlen(f->ur…
+ else
+ m = strstr(uri, f->uri) ? 1 : 0;
+ } else {
+ if (f->matchbegin)
+ m = strncasecmp(uri, f->uri, strlen(f->uri)) =…
+ else if (f->matchend)
+ m = strlen(f->uri) <= strlen(uri) &&
+ strcasecmp(&uri[strlen(uri) - strlen(f…
+ else
+ m = strcasestr(uri, f->uri) ? 1 : 0;
+ }
+ /*m = r ? !m : m;*/
+ return m;
+ }
+#endif
+
r = snprintf(pat, sizeof(pat), "%s%s%s",
f->matchbegin ? "" : "*",
f->uri,
@@ -552,19 +586,8 @@ matchrule(struct filterrule *f, const char *uri, const cha…
return 0;
}
- r = f->domains ? 0 : 1;
- for (d = f->domains; d; d = d->next) {
- if (matchdomain(d->domain, domain)) {
- if (r && d->inverse)
- r = 0;
- else if (!r && !d->inverse)
- r = 1;
- } else if (r && !d->inverse) {
- r = 0;
- }
- }
-
- if (r && !match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
+ m = 0;
+ if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
#if 0
for (; *type; type++) {
for (i = 0; blockstr[i]; i++) {
@@ -575,11 +598,13 @@ matchrule(struct filterrule *f, const char *uri, const ch…
}
}
}
+
return 0;
#endif
- return 1;
+ m = 1;
}
- return 0;
+ /*m = r ? !m : m;*/
+ return m;
}
static int
@@ -695,6 +720,7 @@ end:
return 1;
}
+#if 0
static void
debugrule(struct filterrule *r)
{
@@ -702,6 +728,7 @@ debugrule(struct filterrule *r)
"%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
r->isexception, r->block);
}
+#endif
static int
loadrules(FILE *fp)
@@ -775,6 +802,12 @@ documentloaded(WebKitWebPage *wp, Page *p)
printf("uri: %s\n", uri);
printf("domain: %s\n", domain);
+ /* DEBUG: timing */
+ struct timespec tp_start, tp_end, tp_diff;
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
/* site-specific CSS */
memset(&sitecss, 0, sizeof(sitecss));
for (r = rules; r; r = r->next) {
@@ -783,11 +816,38 @@ documentloaded(WebKitWebPage *wp, Page *p)
len = strlen(r->css);
if (string_append(&sitecss, r->css, len) < len)
return;
- len = sizeof("{display:none;}") -1;
- if (string_append(&sitecss, "{display:none;}", len) < len)
- return;
+
+ if (r->isexception) {
+ len = sizeof("{display:initial;}") -1;
+ if (string_append(&sitecss, "{display:initial;}", len)…
+ return;
+ } else {
+ len = sizeof("{display:none;}") -1;
+ if (string_append(&sitecss, "{display:none;}", len) < …
+ return;
+ }
+ }
+/* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
+
+ /* DEBUG: timing */
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
}
- printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");
+
+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
+ if (tp_diff.tv_nsec < 0) {
+ tp_diff.tv_sec--;
+ tp_diff.tv_nsec += 1000000000L;
+ }
+
+ printf("timing: %zu sec, %.3f ms\n",
+ tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
+
+ if (globalcss.data)
+ printf("global CSS length in bytes: %zu\n", strlen(globalcss.d…
+ if (sitecss.data)
+ printf("site CSS length in bytes: %zu\n", strlen(sitecss.data)…
p->view = webkit_dom_document_get_default_view(doc);
@@ -819,6 +879,7 @@ sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
const char *s, *uri = webkit_web_page_get_uri(p->webpage),
*requri = webkit_uri_request_get_uri(req);
size_t len;
+ gboolean status = FALSE;
if (!uri || !strcmp(requri, uri) ||
(strncmp(uri, "http://", sizeof("http://") - 1) &&
@@ -830,6 +891,12 @@ sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
memcpy(domain, s, len);
domain[len] = '\0';
+ /* DEBUG: timing */
+ struct timespec tp_start, tp_end, tp_diff;
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
/* match rules */
for (r = rules; r; r = r->next) {
if (!r->css && matchrule(r, requri, "csio^", domain)) {
@@ -839,30 +906,32 @@ sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
fprintf(stderr, "blocked: %s, %s\n", domain, requri);
- return TRUE;
+ status = TRUE;
+ goto end;
}
}
- return FALSE;
-}
-
-static void
-webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused)
-{
- Page *np;
+end:
+ /* DEBUG: timing */
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
- if (!(np = newpage(p))) {
- weprintf("cannot associate webext with new page: %s\n",
- strerror(errno));
- return;
+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
+ if (tp_diff.tv_nsec < 0) {
+ tp_diff.tv_sec--;
+ tp_diff.tv_nsec += 1000000000L;
}
- g_signal_connect(p, "document-loaded", G_CALLBACK(documentloaded), np);
- g_signal_connect(p, "send-request", G_CALLBACK(sendrequest), np);
+ printf("%s [%s] timing: %zu sec, %.3f ms\n",
+ requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.…
+
+ return status;
}
-G_MODULE_EXPORT void
-webkit_web_extension_initialize(WebKitWebExtension *ext)
+void
+init(void)
{
struct filterrule *r;
FILE *fp;
@@ -922,6 +991,106 @@ webkit_web_extension_initialize(WebKitWebExtension *ext)
return;
}
}
+}
+
+static void
+webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused)
+{
+ Page *np;
+
+ if (!(np = newpage(p))) {
+ weprintf("cannot associate webext with new page: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ g_signal_connect(p, "document-loaded", G_CALLBACK(documentloaded), np);
+ g_signal_connect(p, "send-request", G_CALLBACK(sendrequest), np);
+}
+G_MODULE_EXPORT void
+webkit_web_extension_initialize(WebKitWebExtension *ext)
+{
+ init();
g_signal_connect(ext, "page-created", G_CALLBACK(webpagecreated), NULL…
}
+
+#ifdef DEBUG
+int
+main(void)
+{
+ char domain[256];
+ String sitecss;
+ struct filterrule *r;
+ const char *s, *uri;
+ size_t len;
+
+ /* TEST */
+ uri = "https://tweakers.net/";
+
+ if (!uri || (strncmp(uri, "http://", sizeof("http://") - 1) &&
+ strncmp(uri, "https://", sizeof("https://") - 1)))
+ return;
+
+ init();
+
+ s = strstr(uri, "://") + sizeof("://") - 1;
+ len = strcspn(s, "/");
+ memcpy(domain, s, len);
+ domain[len] = '\0';
+
+ printf("uri: %s\n", uri);
+ printf("domain: %s\n", domain);
+
+ /* DEBUG: timing */
+ struct timespec tp_start, tp_end, tp_diff;
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ /* site-specific CSS */
+ memset(&sitecss, 0, sizeof(sitecss));
+ for (r = rules; r; r = r->next) {
+ if (!r->css || !r->domains || !matchrule(r, "", "", domain))
+ continue;
+ len = strlen(r->css);
+ if (string_append(&sitecss, r->css, len) < len)
+ return;
+ if (r->isexception) {
+ len = sizeof("{display:initial;}") -1;
+ if (string_append(&sitecss, "{display:initial;}", len)…
+ return;
+ } else {
+ len = sizeof("{display:none;}") -1;
+ if (string_append(&sitecss, "{display:none;}", len) < …
+ return;
+ }
+ }
+ printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");
+
+ /* DEBUG: timing */
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
+ if (tp_diff.tv_nsec < 0) {
+ tp_diff.tv_sec--;
+ tp_diff.tv_nsec += 1000000000L;
+ }
+
+ printf("timing: %zu sec, %.3f ms\n",
+ tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
+
+ if (globalcss.data)
+ printf("global CSS length in bytes: %zu\n", strlen(globalcss.d…
+ if (sitecss.data)
+ printf("site CSS length in bytes: %zu\n", strlen(sitecss.data)…
+
+ free(sitecss.data);
+ cleanup();
+
+ return 0;
+}
+#endif
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.