Introduction
Introduction Statistics Contact Development Disclaimer Help
improvements - surf-adblock - Surf adblock web extension
git clone git://git.codemadness.org/surf-adblock
Log
Files
Refs
README
LICENSE
---
commit 3cc61dad61ee13b47cc3b6a2931de9413c4c6176
parent 84d3f064e393f5856f4bbbfb519b267ed4a5aa0a
Author: Hiltjo Posthuma <[email protected]>
Date: Mon, 5 Jun 2017 17:36:10 +0200
improvements
- fix CARET in match()
- fix matchbegin rules (starts with ||).
Diffstat:
M TODO | 2 ++
M adblock.c | 233 ++++++++++++++++++++---------…
M surf-adblock.c | 6 +++---
M tests/tests.c | 36 ++++++++++++++---------------…
4 files changed, 175 insertions(+), 102 deletions(-)
---
diff --git a/TODO b/TODO
@@ -1,3 +1,5 @@
+- simplify match, there are only a few rules with multiple *.
+
- loadrules: return struct rules* ?
on error free rules.
diff --git a/adblock.c b/adblock.c
@@ -35,8 +35,8 @@ struct filterrule {
int matchend;
/* is exception rule: prefix @@ for ABP or #@# for CSS */
int isexception;
- char *css; /* if non-NULL is CSS rule / hide element rule */
- char *uri;
+ const char *css; /* if non-NULL is CSS rule / hide element rule */
+ const char *uri;
struct filterdomain *domains;
struct filterrule *next;
};
@@ -184,14 +184,14 @@ string_append(String *s, const char *data, size_t len)
memcpy(s->data + s->len, data, len);
s->len = newlen;
s->data[s->len] = '\0';
+
return len;
}
#define END 0
#define UNMATCHABLE -2
-#define BRACKET -3
-#define CARET -4
-#define STAR -5
+#define CARET -3
+#define STAR -4
static int
str_next(const char *str, size_t n, size_t *step)
@@ -275,11 +275,19 @@ match(const char *pat, const char *str, int fcase)
pat++;
m--;
break;
- default:
+ case CARET:
k = str_next(str, n, &sinc);
- /* TODO: write a test-case */
- if (c == CARET && (k == '?' || k == '/' || k <= 0))
+ if (k <= 0)
+ return (c==END) ? 0 : 1;
+ str += sinc;
+ n -= sinc;
+ if (k != '?' && k != '/')
return 1;
+ pat++;
+ m--;
+ break;
+ default:
+ k = str_next(str, n, &sinc);
if (k <= 0)
return (c==END) ? 0 : 1;
str += sinc;
@@ -341,9 +349,14 @@ match(const char *pat, const char *str, int fcase)
break;
}
s += sinc;
- kfold = fcase ? casefold(k) : k;
- if (k != c && kfold != c)
- return 1;
+ if (c == CARET) {
+ if (k != '/' && k != '?')
+ return 1;
+ } else {
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ return 1;
+ }
}
/* We're all done with the tails now, so throw them out */
@@ -366,10 +379,16 @@ match(const char *pat, const char *str, int fcase)
k = str_next(s, endstr-s, &sinc);
if (!k)
return 1;
- kfold = fcase ? casefold(k) : k;
- if (k != c && kfold != c)
- break;
s += sinc;
+ if (c == CARET) {
+ if (k != '/' && k != '?')
+ break;
+ } else {
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ break;
+ }
+
}
if (c == STAR) continue;
/* If we failed, advance str, by 1 char if it's a valid
@@ -486,17 +505,20 @@ matchdomain(const char *s, const char *domain)
}
static int
-matchrule(struct filterrule *f, const char *uri, const char *type,
- const char *domain)
+matchrule(struct filterrule *f, const char *fromuri, const char *fromdomain,
+ const char *fromrel,
+ const char *requri, const char *reqdomain, const char *reqrel,
+ const char *type)
{
/* NOTE: order matters, see FilterType enum values */
struct filterdomain *d;
char pat[1024];
- int r, m;
+ const char *uri;
+ int len, r;
r = f->domains ? 0 : 1;
for (d = f->domains; d; d = d->next) {
- if (matchdomain(d->domain, domain)) {
+ if (matchdomain(d->domain, fromdomain)) {
if (r && d->inverse)
r = 0;
else if (!r && !d->inverse)
@@ -521,39 +543,58 @@ matchrule(struct filterrule *f, const char *uri, const ch…
return 0;
#endif
- r = snprintf(pat, sizeof(pat), "%s%s%s",
- f->matchbegin ? "" : "*",
- f->uri,
- f->matchend ? "" : "*");
- if (r == -1 || (size_t)r >= sizeof(pat)) {
- fprintf(stderr, "warning: pattern too large, ignoring\n");
- return 0;
- }
-
- /* DEBUG */
+ /* match begin including domain */
if (f->matchbegin) {
- printf("pat: %s, uri: %s, domain: %s\n", pat, uri, domai…
- }
+ /* TODO: match domain part of pattern */
+ /* TODO: preprocess pattern if it is matchbegin? */
- m = 0;
- if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
-#if 0
- for (; *type; type++) {
- for (i = 0; blockstr[i]; i++) {
- if (blockstr[i] == *type &&
- f->block & (1 << i))
- printf("block type '%c'\n", blockstr[i…
- return 1;
- }
+ len = strcspn(f->uri, "^/");
+
+ /* match domain without dot */
+ r = snprintf(pat, sizeof(pat), "%.*s",
+ len, f->uri);
+ if (r == -1 || (size_t)r >= sizeof(pat)) {
+ fprintf(stderr, "warning: pattern too large, ignoring\…
+ return 0;
+ }
+
+ /* TODO: block type mask */
+ if (match(pat, reqdomain, (f->block & FilterTypeMatchCase) ? 0…
+ /* match domain with dot */
+ r = snprintf(pat, sizeof(pat), "*.%.*s",
+ len, f->uri);
+ if (r == -1 || (size_t)r >= sizeof(pat)) {
+ fprintf(stderr, "warning: pattern too large, i…
+ return 0;
}
+
+ /* TODO: block type mask */
+ if (match(pat, reqdomain, (f->block & FilterTypeMatchC…
+ return 0;
}
+ /* match on path */
+ r = snprintf(pat, sizeof(pat), "*%s%s",
+ f->uri + len,
+ f->matchend ? "" : "*");
+ uri = reqrel;
+ } else {
+ r = snprintf(pat, sizeof(pat), "*%s%s",
+ f->uri,
+ f->matchend ? "" : "*");
+ uri = requri;
+
+ }
+ if (r == -1 || (size_t)r >= sizeof(pat)) {
+ fprintf(stderr, "warning: pattern too large, ignoring\n");
return 0;
-#endif
- m = 1;
}
- /*m = r ? !m : m;*/
- return m;
+
+ /* TODO: block type mask */
+ if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1))
+ return 1;
+
+ return 0;
}
static int
@@ -619,6 +660,7 @@ parserule(struct filterrule *f, char *s)
/* has options */
if (!(f->uri = westrndup(s, p - s)))
return -1;
+
s = ++p;
/* blockmask, has options? default: allow all options, case-sensitive
@@ -723,27 +765,23 @@ getglobalcss(void)
}
char *
-getdocumentcss(const char *uri)
+getdocumentcss(const char *fromuri)
{
const char *s;
- char domain[256];
+ char fromdomain[256];
String sitecss;
struct filterrule *r;
size_t len;
- if (!uri)
- return NULL;
-
- if ((s = strstr(uri, "://")))
- s += sizeof("://") - 1;
- else
- s = uri;
- len = strcspn(s, "/"); /* TODO: ":/" */
- memcpy(domain, s, len);
- domain[len] = '\0';
+ /* skip protocol */
+ if ((s = strstr(fromuri, "://")))
+ fromuri = s + sizeof("://") - 1;
+ len = strcspn(fromuri, "/"); /* TODO: ":/" */
+ memcpy(fromdomain, s, len);
+ fromdomain[len] = '\0';
- printf("uri: %s\n", uri);
- printf("domain: %s\n", domain);
+ printf("fromuri: %s\n", fromuri);
+ printf("fromdomain: %s\n", fromdomain);
/* DEBUG: timing */
struct timespec tp_start, tp_end, tp_diff;
@@ -754,7 +792,8 @@ getdocumentcss(const char *uri)
/* site-specific CSS */
memset(&sitecss, 0, sizeof(sitecss));
for (r = rules; r; r = r->next) {
- if (!r->css || !r->domains || !matchrule(r, "", "", domain))
+ if (!r->css || !r->domains ||
+ !matchrule(r, "", fromdomain, "", "", "", "", ""))
continue;
len = strlen(r->css);
@@ -792,28 +831,39 @@ getdocumentcss(const char *uri)
err:
free(sitecss.data);
+ /*memset(&sitecss, 0, sizeof(sitecss));*/
+
return NULL;
}
int
-allowrequest(const char *uri, const char *requri)
+allowrequest(const char *fromuri, const char *requri)
{
- char domain[256];
struct filterrule *r;
- const char *s;
+ char fromdomain[256], reqdomain[256];
+ const char *s, *reqrel, *fromrel;
size_t len;
int status = 1;
- if (!uri || !strcmp(requri, uri))
- return 1;
+ /* skip protocol part */
+ if ((s = strstr(fromuri, "://")))
+ fromuri = s + sizeof("://") - 1;
+ if ((s = strstr(requri, "://")))
+ requri = s + sizeof("://") - 1;
- if ((s = strstr(uri, "://")))
- s += sizeof("://") - 1;
- else
- s = uri;
- len = strcspn(s, "/"); /* TODO: ":/" */
- memcpy(domain, s, len);
- domain[len] = '\0';
+ len = strcspn(fromuri, ":/"); /* TODO: ":/", but support IPV6... */
+ memcpy(fromdomain, fromuri, len);
+ fromdomain[len] = '\0';
+
+ len = strcspn(requri, ":/"); /* TODO: ":/", but support IPV6... */
+ memcpy(reqdomain, requri, len);
+ reqdomain[len] = '\0';
+
+ fromrel = &fromuri[strcspn(fromuri, "/")];
+ reqrel = &requri[strcspn(requri, "/")];
+
+ printf("req %s = %s\n", requri, reqrel);
+ printf("from %s = %s\n", fromuri, fromrel);
/* DEBUG: timing */
struct timespec tp_start, tp_end, tp_diff;
@@ -823,12 +873,15 @@ allowrequest(const char *uri, const char *requri)
/* match rules */
for (r = rules; r; r = r->next) {
- if (!r->css && matchrule(r, requri, "csio^", domain)) {
- printf("requri: %s\n", requri);
- printf("uri: %s\n", uri);
- printf("domain: %s\n", domain);
+ if (!r->css && matchrule(r, fromuri, fromdomain,
+ fromrel, requri, reqdomain, reqrel, "…
+ printf("reqrel: %s\n", reqrel);
+ printf("reqdomain: %s\n", reqdomain);
+ printf("requri: %s\n", requri);
+ printf("from uri: %s\n", fromuri);
+ printf("from domain: %s\n", fromdomain);
- fprintf(stderr, "blocked: %s, %s\n", domain, requri);
+ fprintf(stderr, "blocked: %s, %s\n", fromdomain, requr…
/* DEBUG: for showing the timing */
status = 0;
@@ -851,13 +904,36 @@ end:
}
printf("%s [%s] timing: %lld sec, %.3f ms\n",
- requri, uri, (long long)tp_diff.tv_sec,
+ requri, fromuri, (long long)tp_diff.tv_sec,
(float)tp_diff.tv_nsec / 1000000.0f);
return status;
}
void
+cleanup(void)
+{
+ struct filterrule *r;
+ struct filterdomain *d;
+
+ free(globalcss.data);
+ memset(&globalcss, 0, sizeof(globalcss));
+
+ for (r = rules; r; r = rules) {
+ for (d = r->domains; d; d = r->domains) {
+ free(d->domain);
+ r->domains = d->next;
+ free(d);
+ }
+ free(r->css);
+ free(r->uri);
+ rules = r->next;
+ free(r);
+ }
+ rules = NULL;
+}
+
+void
init(void)
{
struct filterrule *r;
@@ -906,8 +982,7 @@ init(void)
len = strlen(r->css);
if (string_append(&globalcss, r->css, strlen(r->css)) < len) {
- weprintf("cannot load global css selectors "
- "in memory\n");
+ weprintf("cannot load global css selectors in memory\n…
cleanup();
return;
}
diff --git a/surf-adblock.c b/surf-adblock.c
@@ -67,14 +67,14 @@ static gboolean
sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
WebKitURIResponse *res, Page *p)
{
- const char *uri, *requri;
+ const char *fromuri, *requri;
if (!webkit_uri_request_get_http_method(req))
return TRUE; /* TRUE = don't handle any more events */
- uri = webkit_web_page_get_uri(p->webpage);
+ fromuri = webkit_web_page_get_uri(p->webpage);
requri = webkit_uri_request_get_uri(req);
- return allowrequest(uri, requri) ? FALSE : TRUE;
+ return allowrequest(fromuri, requri) ? FALSE : TRUE;
}
static void
diff --git a/tests/tests.c b/tests/tests.c
@@ -1,25 +1,21 @@
#include "../adblock.c"
-void
-cleanup(void)
-{
- struct filterrule *r;
- struct filterdomain *d;
-
- free(globalcss.data);
-
- for (r = rules; r; r = rules) {
- for (d = r->domains; d; d = r->domains) {
- free(d->domain);
- r->domains = d->next;
- free(d);
- }
- free(r->css);
- free(r->uri);
- rules = r->next;
- free(r);
- }
-}
+/*
+
+TODO: add tests:
+
+||example.com/banner.gif will block all these addresses
+
+ http://example.com/banner.gif
+ https://example.com/banner.gif
+ http://www.example.com/banner.gif
+
+while not blocking:
+
+ http://badexample.com/banner.gif
+ http://gooddomain.example/analyze?http://example.com/banner.gif
+
+*/
int
main(void)
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.