improvements - surf-adblock - Surf adblock web extension | |
git clone git://git.codemadness.org/surf-adblock | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 3cc61dad61ee13b47cc3b6a2931de9413c4c6176 | |
parent 84d3f064e393f5856f4bbbfb519b267ed4a5aa0a | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Mon, 5 Jun 2017 17:36:10 +0200 | |
improvements | |
- fix CARET in match() | |
- fix matchbegin rules (starts with ||). | |
Diffstat: | |
M TODO | 2 ++ | |
M adblock.c | 233 ++++++++++++++++++++---------… | |
M surf-adblock.c | 6 +++--- | |
M tests/tests.c | 36 ++++++++++++++---------------… | |
4 files changed, 175 insertions(+), 102 deletions(-) | |
--- | |
diff --git a/TODO b/TODO | |
@@ -1,3 +1,5 @@ | |
+- simplify match, there are only a few rules with multiple *. | |
+ | |
- loadrules: return struct rules* ? | |
on error free rules. | |
diff --git a/adblock.c b/adblock.c | |
@@ -35,8 +35,8 @@ struct filterrule { | |
int matchend; | |
/* is exception rule: prefix @@ for ABP or #@# for CSS */ | |
int isexception; | |
- char *css; /* if non-NULL is CSS rule / hide element rule */ | |
- char *uri; | |
+ const char *css; /* if non-NULL is CSS rule / hide element rule */ | |
+ const char *uri; | |
struct filterdomain *domains; | |
struct filterrule *next; | |
}; | |
@@ -184,14 +184,14 @@ string_append(String *s, const char *data, size_t len) | |
memcpy(s->data + s->len, data, len); | |
s->len = newlen; | |
s->data[s->len] = '\0'; | |
+ | |
return len; | |
} | |
#define END 0 | |
#define UNMATCHABLE -2 | |
-#define BRACKET -3 | |
-#define CARET -4 | |
-#define STAR -5 | |
+#define CARET -3 | |
+#define STAR -4 | |
static int | |
str_next(const char *str, size_t n, size_t *step) | |
@@ -275,11 +275,19 @@ match(const char *pat, const char *str, int fcase) | |
pat++; | |
m--; | |
break; | |
- default: | |
+ case CARET: | |
k = str_next(str, n, &sinc); | |
- /* TODO: write a test-case */ | |
- if (c == CARET && (k == '?' || k == '/' || k <= 0)) | |
+ if (k <= 0) | |
+ return (c==END) ? 0 : 1; | |
+ str += sinc; | |
+ n -= sinc; | |
+ if (k != '?' && k != '/') | |
return 1; | |
+ pat++; | |
+ m--; | |
+ break; | |
+ default: | |
+ k = str_next(str, n, &sinc); | |
if (k <= 0) | |
return (c==END) ? 0 : 1; | |
str += sinc; | |
@@ -341,9 +349,14 @@ match(const char *pat, const char *str, int fcase) | |
break; | |
} | |
s += sinc; | |
- kfold = fcase ? casefold(k) : k; | |
- if (k != c && kfold != c) | |
- return 1; | |
+ if (c == CARET) { | |
+ if (k != '/' && k != '?') | |
+ return 1; | |
+ } else { | |
+ kfold = fcase ? casefold(k) : k; | |
+ if (k != c && kfold != c) | |
+ return 1; | |
+ } | |
} | |
/* We're all done with the tails now, so throw them out */ | |
@@ -366,10 +379,16 @@ match(const char *pat, const char *str, int fcase) | |
k = str_next(s, endstr-s, &sinc); | |
if (!k) | |
return 1; | |
- kfold = fcase ? casefold(k) : k; | |
- if (k != c && kfold != c) | |
- break; | |
s += sinc; | |
+ if (c == CARET) { | |
+ if (k != '/' && k != '?') | |
+ break; | |
+ } else { | |
+ kfold = fcase ? casefold(k) : k; | |
+ if (k != c && kfold != c) | |
+ break; | |
+ } | |
+ | |
} | |
if (c == STAR) continue; | |
/* If we failed, advance str, by 1 char if it's a valid | |
@@ -486,17 +505,20 @@ matchdomain(const char *s, const char *domain) | |
} | |
static int | |
-matchrule(struct filterrule *f, const char *uri, const char *type, | |
- const char *domain) | |
+matchrule(struct filterrule *f, const char *fromuri, const char *fromdomain, | |
+ const char *fromrel, | |
+ const char *requri, const char *reqdomain, const char *reqrel, | |
+ const char *type) | |
{ | |
/* NOTE: order matters, see FilterType enum values */ | |
struct filterdomain *d; | |
char pat[1024]; | |
- int r, m; | |
+ const char *uri; | |
+ int len, r; | |
r = f->domains ? 0 : 1; | |
for (d = f->domains; d; d = d->next) { | |
- if (matchdomain(d->domain, domain)) { | |
+ if (matchdomain(d->domain, fromdomain)) { | |
if (r && d->inverse) | |
r = 0; | |
else if (!r && !d->inverse) | |
@@ -521,39 +543,58 @@ matchrule(struct filterrule *f, const char *uri, const ch… | |
return 0; | |
#endif | |
- r = snprintf(pat, sizeof(pat), "%s%s%s", | |
- f->matchbegin ? "" : "*", | |
- f->uri, | |
- f->matchend ? "" : "*"); | |
- if (r == -1 || (size_t)r >= sizeof(pat)) { | |
- fprintf(stderr, "warning: pattern too large, ignoring\n"); | |
- return 0; | |
- } | |
- | |
- /* DEBUG */ | |
+ /* match begin including domain */ | |
if (f->matchbegin) { | |
- printf("pat: %s, uri: %s, domain: %s\n", pat, uri, domai… | |
- } | |
+ /* TODO: match domain part of pattern */ | |
+ /* TODO: preprocess pattern if it is matchbegin? */ | |
- m = 0; | |
- if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) { | |
-#if 0 | |
- for (; *type; type++) { | |
- for (i = 0; blockstr[i]; i++) { | |
- if (blockstr[i] == *type && | |
- f->block & (1 << i)) | |
- printf("block type '%c'\n", blockstr[i… | |
- return 1; | |
- } | |
+ len = strcspn(f->uri, "^/"); | |
+ | |
+ /* match domain without dot */ | |
+ r = snprintf(pat, sizeof(pat), "%.*s", | |
+ len, f->uri); | |
+ if (r == -1 || (size_t)r >= sizeof(pat)) { | |
+ fprintf(stderr, "warning: pattern too large, ignoring\… | |
+ return 0; | |
+ } | |
+ | |
+ /* TODO: block type mask */ | |
+ if (match(pat, reqdomain, (f->block & FilterTypeMatchCase) ? 0… | |
+ /* match domain with dot */ | |
+ r = snprintf(pat, sizeof(pat), "*.%.*s", | |
+ len, f->uri); | |
+ if (r == -1 || (size_t)r >= sizeof(pat)) { | |
+ fprintf(stderr, "warning: pattern too large, i… | |
+ return 0; | |
} | |
+ | |
+ /* TODO: block type mask */ | |
+ if (match(pat, reqdomain, (f->block & FilterTypeMatchC… | |
+ return 0; | |
} | |
+ /* match on path */ | |
+ r = snprintf(pat, sizeof(pat), "*%s%s", | |
+ f->uri + len, | |
+ f->matchend ? "" : "*"); | |
+ uri = reqrel; | |
+ } else { | |
+ r = snprintf(pat, sizeof(pat), "*%s%s", | |
+ f->uri, | |
+ f->matchend ? "" : "*"); | |
+ uri = requri; | |
+ | |
+ } | |
+ if (r == -1 || (size_t)r >= sizeof(pat)) { | |
+ fprintf(stderr, "warning: pattern too large, ignoring\n"); | |
return 0; | |
-#endif | |
- m = 1; | |
} | |
- /*m = r ? !m : m;*/ | |
- return m; | |
+ | |
+ /* TODO: block type mask */ | |
+ if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) | |
+ return 1; | |
+ | |
+ return 0; | |
} | |
static int | |
@@ -619,6 +660,7 @@ parserule(struct filterrule *f, char *s) | |
/* has options */ | |
if (!(f->uri = westrndup(s, p - s))) | |
return -1; | |
+ | |
s = ++p; | |
/* blockmask, has options? default: allow all options, case-sensitive | |
@@ -723,27 +765,23 @@ getglobalcss(void) | |
} | |
char * | |
-getdocumentcss(const char *uri) | |
+getdocumentcss(const char *fromuri) | |
{ | |
const char *s; | |
- char domain[256]; | |
+ char fromdomain[256]; | |
String sitecss; | |
struct filterrule *r; | |
size_t len; | |
- if (!uri) | |
- return NULL; | |
- | |
- if ((s = strstr(uri, "://"))) | |
- s += sizeof("://") - 1; | |
- else | |
- s = uri; | |
- len = strcspn(s, "/"); /* TODO: ":/" */ | |
- memcpy(domain, s, len); | |
- domain[len] = '\0'; | |
+ /* skip protocol */ | |
+ if ((s = strstr(fromuri, "://"))) | |
+ fromuri = s + sizeof("://") - 1; | |
+ len = strcspn(fromuri, "/"); /* TODO: ":/" */ | |
+ memcpy(fromdomain, s, len); | |
+ fromdomain[len] = '\0'; | |
- printf("uri: %s\n", uri); | |
- printf("domain: %s\n", domain); | |
+ printf("fromuri: %s\n", fromuri); | |
+ printf("fromdomain: %s\n", fromdomain); | |
/* DEBUG: timing */ | |
struct timespec tp_start, tp_end, tp_diff; | |
@@ -754,7 +792,8 @@ getdocumentcss(const char *uri) | |
/* site-specific CSS */ | |
memset(&sitecss, 0, sizeof(sitecss)); | |
for (r = rules; r; r = r->next) { | |
- if (!r->css || !r->domains || !matchrule(r, "", "", domain)) | |
+ if (!r->css || !r->domains || | |
+ !matchrule(r, "", fromdomain, "", "", "", "", "")) | |
continue; | |
len = strlen(r->css); | |
@@ -792,28 +831,39 @@ getdocumentcss(const char *uri) | |
err: | |
free(sitecss.data); | |
+ /*memset(&sitecss, 0, sizeof(sitecss));*/ | |
+ | |
return NULL; | |
} | |
int | |
-allowrequest(const char *uri, const char *requri) | |
+allowrequest(const char *fromuri, const char *requri) | |
{ | |
- char domain[256]; | |
struct filterrule *r; | |
- const char *s; | |
+ char fromdomain[256], reqdomain[256]; | |
+ const char *s, *reqrel, *fromrel; | |
size_t len; | |
int status = 1; | |
- if (!uri || !strcmp(requri, uri)) | |
- return 1; | |
+ /* skip protocol part */ | |
+ if ((s = strstr(fromuri, "://"))) | |
+ fromuri = s + sizeof("://") - 1; | |
+ if ((s = strstr(requri, "://"))) | |
+ requri = s + sizeof("://") - 1; | |
- if ((s = strstr(uri, "://"))) | |
- s += sizeof("://") - 1; | |
- else | |
- s = uri; | |
- len = strcspn(s, "/"); /* TODO: ":/" */ | |
- memcpy(domain, s, len); | |
- domain[len] = '\0'; | |
+ len = strcspn(fromuri, ":/"); /* TODO: ":/", but support IPV6... */ | |
+ memcpy(fromdomain, fromuri, len); | |
+ fromdomain[len] = '\0'; | |
+ | |
+ len = strcspn(requri, ":/"); /* TODO: ":/", but support IPV6... */ | |
+ memcpy(reqdomain, requri, len); | |
+ reqdomain[len] = '\0'; | |
+ | |
+ fromrel = &fromuri[strcspn(fromuri, "/")]; | |
+ reqrel = &requri[strcspn(requri, "/")]; | |
+ | |
+ printf("req %s = %s\n", requri, reqrel); | |
+ printf("from %s = %s\n", fromuri, fromrel); | |
/* DEBUG: timing */ | |
struct timespec tp_start, tp_end, tp_diff; | |
@@ -823,12 +873,15 @@ allowrequest(const char *uri, const char *requri) | |
/* match rules */ | |
for (r = rules; r; r = r->next) { | |
- if (!r->css && matchrule(r, requri, "csio^", domain)) { | |
- printf("requri: %s\n", requri); | |
- printf("uri: %s\n", uri); | |
- printf("domain: %s\n", domain); | |
+ if (!r->css && matchrule(r, fromuri, fromdomain, | |
+ fromrel, requri, reqdomain, reqrel, "… | |
+ printf("reqrel: %s\n", reqrel); | |
+ printf("reqdomain: %s\n", reqdomain); | |
+ printf("requri: %s\n", requri); | |
+ printf("from uri: %s\n", fromuri); | |
+ printf("from domain: %s\n", fromdomain); | |
- fprintf(stderr, "blocked: %s, %s\n", domain, requri); | |
+ fprintf(stderr, "blocked: %s, %s\n", fromdomain, requr… | |
/* DEBUG: for showing the timing */ | |
status = 0; | |
@@ -851,13 +904,36 @@ end: | |
} | |
printf("%s [%s] timing: %lld sec, %.3f ms\n", | |
- requri, uri, (long long)tp_diff.tv_sec, | |
+ requri, fromuri, (long long)tp_diff.tv_sec, | |
(float)tp_diff.tv_nsec / 1000000.0f); | |
return status; | |
} | |
void | |
+cleanup(void) | |
+{ | |
+ struct filterrule *r; | |
+ struct filterdomain *d; | |
+ | |
+ free(globalcss.data); | |
+ memset(&globalcss, 0, sizeof(globalcss)); | |
+ | |
+ for (r = rules; r; r = rules) { | |
+ for (d = r->domains; d; d = r->domains) { | |
+ free(d->domain); | |
+ r->domains = d->next; | |
+ free(d); | |
+ } | |
+ free(r->css); | |
+ free(r->uri); | |
+ rules = r->next; | |
+ free(r); | |
+ } | |
+ rules = NULL; | |
+} | |
+ | |
+void | |
init(void) | |
{ | |
struct filterrule *r; | |
@@ -906,8 +982,7 @@ init(void) | |
len = strlen(r->css); | |
if (string_append(&globalcss, r->css, strlen(r->css)) < len) { | |
- weprintf("cannot load global css selectors " | |
- "in memory\n"); | |
+ weprintf("cannot load global css selectors in memory\n… | |
cleanup(); | |
return; | |
} | |
diff --git a/surf-adblock.c b/surf-adblock.c | |
@@ -67,14 +67,14 @@ static gboolean | |
sendrequest(WebKitWebPage *wp, WebKitURIRequest *req, | |
WebKitURIResponse *res, Page *p) | |
{ | |
- const char *uri, *requri; | |
+ const char *fromuri, *requri; | |
if (!webkit_uri_request_get_http_method(req)) | |
return TRUE; /* TRUE = don't handle any more events */ | |
- uri = webkit_web_page_get_uri(p->webpage); | |
+ fromuri = webkit_web_page_get_uri(p->webpage); | |
requri = webkit_uri_request_get_uri(req); | |
- return allowrequest(uri, requri) ? FALSE : TRUE; | |
+ return allowrequest(fromuri, requri) ? FALSE : TRUE; | |
} | |
static void | |
diff --git a/tests/tests.c b/tests/tests.c | |
@@ -1,25 +1,21 @@ | |
#include "../adblock.c" | |
-void | |
-cleanup(void) | |
-{ | |
- struct filterrule *r; | |
- struct filterdomain *d; | |
- | |
- free(globalcss.data); | |
- | |
- for (r = rules; r; r = rules) { | |
- for (d = r->domains; d; d = r->domains) { | |
- free(d->domain); | |
- r->domains = d->next; | |
- free(d); | |
- } | |
- free(r->css); | |
- free(r->uri); | |
- rules = r->next; | |
- free(r); | |
- } | |
-} | |
+/* | |
+ | |
+TODO: add tests: | |
+ | |
+||example.com/banner.gif will block all these addresses | |
+ | |
+ http://example.com/banner.gif | |
+ https://example.com/banner.gif | |
+ http://www.example.com/banner.gif | |
+ | |
+while not blocking: | |
+ | |
+ http://badexample.com/banner.gif | |
+ http://gooddomain.example/analyze?http://example.com/banner.gif | |
+ | |
+*/ | |
int | |
main(void) |