Ignore queries and fragments in URIs - quark - quark web server | |
git clone git://git.suckless.org/quark | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 319ba7083fdde836d6614c6b8b228bf3a9849e95 | |
parent c6a9055e5a30be570e30da8d216c39662c3a3f99 | |
Author: Laslo Hunhold <[email protected]> | |
Date: Sat, 30 Jan 2021 12:53:00 +0100 | |
Ignore queries and fragments in URIs | |
Previously, a request for "/index.html" would yield a 200, while a | |
request for "/index.html?foo=bar" would yield a 404, as quark would | |
look for the file "index.html?foo=bar" in the serve directory. | |
To accomodate this behaviour, it's no longer sufficient to just compare | |
realuri and req->uri. Instead, we set a "dirty" flag every time we | |
change the URI in such a way that it requires a redirect. | |
According to RFC 3986 section 3, queries and fragments are there | |
to (further) "identify a resource within the scope of the URI's scheme | |
and naming authority (if any)". However, it's perfectly legitimate to | |
just ignore this further specification when the URI itself is already | |
pointing at a unique resource (i.e. "/index.html"). | |
This behaviour is consistent with dynamic web applications which usually | |
ignore parameters they don't care about. Quark is too much Zen to care | |
about any parameters. This has the added bonus that you can now clone | |
repositories (read-only) via the "dumb" HTTP git-protocol, so | |
git clone https://example.org/git/project.git | |
is now possible (provided you run update-server-info during the | |
post-update-hook). This wouldn't work previously because git, when | |
asked to clone via HTTP, would first probe the server with a request for | |
project.git/info/refs?service=git-upload-pack | |
(i.e. asking for the "smart" HTTP git-protocol to confirm). Quark would | |
return a 404, though, while git only gracefully "downgrades" to the | |
"dumb" HTTP git-protocol if the request succeeds but only yields a basic | |
200 response without special git-headers. | |
This way, it is now trivial to also share git-repositories (and other | |
gracefully-downgrading protocols). While the "dumb" HTTP git-protocol | |
only supports read-only-access, I don't think that's much of an overall | |
loss (to the contrary!). | |
HTTP authentication is broken and it makes much more sense to enable | |
ssh-access to contributors and make them push changes via ssh. The key | |
advantage of HTTP-cloning over git://-cloning is the fact that the git | |
protocol can be tampered with, while the HTTP-protocol can be encapsulated | |
into a secure TLS connection. | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M http.c | 69 +++++++++++++++++++++++------… | |
1 file changed, 51 insertions(+), 18 deletions(-) | |
--- | |
diff --git a/http.c b/http.c | |
@@ -368,12 +368,12 @@ static int | |
normabspath(char *path) | |
{ | |
size_t len; | |
- int last = 0; | |
+ int dirty = 0, last = 0; | |
char *p, *q; | |
/* require and skip first slash */ | |
if (path[0] != '/') { | |
- return 1; | |
+ return -1; | |
} | |
p = path + 1; | |
@@ -387,7 +387,9 @@ normabspath(char *path) | |
last = 1; | |
} | |
- if (p == q || (q - p == 1 && p[0] == '.')) { | |
+ if (*p == '\0') { | |
+ break; | |
+ } else if (p == q || (q - p == 1 && p[0] == '.')) { | |
/* "/" or "./" */ | |
goto squash; | |
} else if (q - p == 2 && p[0] == '.' && p[1] == '.') { | |
@@ -412,9 +414,10 @@ squash: | |
memmove(p, q + 1, len - ((q + 1) - path) + 2); | |
len -= (q + 1) - p; | |
} | |
+ dirty = 1; | |
} | |
- return 0; | |
+ return dirty; | |
} | |
static enum status | |
@@ -562,7 +565,7 @@ http_prepare_response(const struct request *req, struct res… | |
struct tm tm = { 0 }; | |
struct vhost *vhost; | |
size_t len, i; | |
- int hasport, ipv6host; | |
+ int dirty = 0, hasport, ipv6host; | |
static char realuri[PATH_MAX], tmpuri[PATH_MAX]; | |
char *p, *mime; | |
const char *targethost; | |
@@ -570,11 +573,29 @@ http_prepare_response(const struct request *req, struct r… | |
/* empty all response fields */ | |
memset(res, 0, sizeof(*res)); | |
- /* make a working copy of the URI and normalize it */ | |
+ /* | |
+ * make a working copy of the URI, strip queries and fragments | |
+ * (ignorable according to RFC 3986 section 3) and normalize it | |
+ */ | |
memcpy(realuri, req->uri, sizeof(realuri)); | |
- if (normabspath(realuri)) { | |
+ | |
+ if ((p = strchr(realuri, '?'))) { | |
+ *p = '\0'; | |
+ } else if ((p = strchr(realuri, '#'))) { | |
+ *p = '\0'; | |
+ } | |
+ | |
+ switch (normabspath(realuri)) { | |
+ case -1: | |
s = S_BAD_REQUEST; | |
goto err; | |
+ case 0: | |
+ /* string is unchanged */ | |
+ break; | |
+ case 1: | |
+ /* string was changed */ | |
+ dirty = 1; | |
+ break; | |
} | |
/* match vhost */ | |
@@ -594,10 +615,12 @@ http_prepare_response(const struct request *req, struct r… | |
} | |
/* if we have a vhost prefix, prepend it to the URI */ | |
- if (vhost->prefix && | |
- prepend(realuri, LEN(realuri), vhost->prefix)) { | |
- s = S_REQUEST_TOO_LARGE; | |
- goto err; | |
+ if (vhost->prefix) { | |
+ if (prepend(realuri, LEN(realuri), vhost->prefix)) { | |
+ s = S_REQUEST_TOO_LARGE; | |
+ goto err; | |
+ } | |
+ dirty = 1; | |
} | |
} | |
@@ -618,14 +641,23 @@ http_prepare_response(const struct request *req, struct r… | |
s = S_REQUEST_TOO_LARGE; | |
goto err; | |
} | |
+ dirty = 1; | |
break; | |
} | |
} | |
/* normalize URI again, in case we introduced dirt */ | |
- if (normabspath(realuri)) { | |
+ switch (normabspath(realuri)) { | |
+ case -1: | |
s = S_BAD_REQUEST; | |
goto err; | |
+ case 0: | |
+ /* string is unchanged */ | |
+ break; | |
+ case 1: | |
+ /* string was changed */ | |
+ dirty = 1; | |
+ break; | |
} | |
/* stat the relative path derived from the URI */ | |
@@ -644,6 +676,7 @@ http_prepare_response(const struct request *req, struct res… | |
if (len > 0 && realuri[len - 1] != '/') { | |
realuri[len] = '/'; | |
realuri[len + 1] = '\0'; | |
+ dirty = 1; | |
} | |
} | |
@@ -658,10 +691,10 @@ http_prepare_response(const struct request *req, struct r… | |
} | |
/* | |
- * redirect if the original URI and the "real" URI differ or if | |
- * the requested host is non-canonical | |
+ * redirect if the URI needs to be redirected or the requested | |
+ * host is non-canonical | |
*/ | |
- if (strcmp(req->uri, realuri) || (srv->vhost && vhost && | |
+ if (dirty || (srv->vhost && vhost && | |
strcmp(req->field[REQ_HOST], vhost->chost))) { | |
res->status = S_MOVED_PERMANENTLY; | |
@@ -716,12 +749,12 @@ http_prepare_response(const struct request *req, struct r… | |
* (optionally including the vhost servedir as a prefix) | |
* into the actual response-path | |
*/ | |
- if (esnprintf(res->uri, sizeof(res->uri), "%s", req->uri)) { | |
+ if (esnprintf(res->uri, sizeof(res->uri), "%s", realuri)) { | |
s = S_REQUEST_TOO_LARGE; | |
goto err; | |
} | |
if (esnprintf(res->path, sizeof(res->path), "%s%s", | |
- vhost ? vhost->dir : "", RELPATH(req->uri))) { | |
+ vhost ? vhost->dir : "", RELPATH(realuri))) { | |
s = S_REQUEST_TOO_LARGE; | |
goto err; | |
} | |
@@ -733,7 +766,7 @@ http_prepare_response(const struct request *req, struct res… | |
* the URI | |
*/ | |
if (esnprintf(tmpuri, sizeof(tmpuri), "%s%s", | |
- req->uri, srv->docindex)) { | |
+ realuri, srv->docindex)) { | |
s = S_REQUEST_TOO_LARGE; | |
goto err; | |
} |