Introduction
Introduction Statistics Contact Development Disclaimer Help
Refactor resource-parsing and -handling heavily - quark - quark web server
git clone git://git.suckless.org/quark
Log
Files
Refs
LICENSE
---
commit 8aa213c123cbcfc2e50a77142c2206c202705e90
parent f45ca668af3d615e9215db49f190ea2833cecc18
Author: Laslo Hunhold <[email protected]>
Date: Wed, 3 Feb 2021 18:33:51 +0100
Refactor resource-parsing and -handling heavily
The previous approach of stripping query and fragment from the resource
was flawed and fixing this issue motivated a much deeper refactor that
was on my todo-list for a while.
First off, the resource-request (e.g. "/projects/index.html?123#eme")
is made up of the path ("/projects/index.html") and optionally the
query ("123") and fragment ("eme"). Instead of trying to break it up
or stripping it in the response-generation, we now store these three
things separately in the request-struct. Calling the resource-request
an "URI" was wrong, as the URI also includes the authority (i.e. the
host in this case) and the protocol-prefix (i.e. "http://"). To fix
this, the respective fields in the request- and response-structs had
to be renamed, as follows:
This commit adds a differentiation between a "path" (what is publicly
requested) and an "internal_path" (what we actually serve in the file
system). These two things can differ, e.g. with virtual hosts. The
cleanup and generation of these paths for the response-struct is heavily
refactored in http_prepare_response(), eliminating some deep bugs that
were due to the previously complicated approach.
Instead of doing everything by hand and having a very complicated logic
on when, after the path-cleanup, it was necessary to do a redirect, the
cleanup sections are exported to separate functions which indicate when
a redirect is necessary. This also makes more complex path-processing
possible, if desired, and definitely increases the readability.
In total, this makes the path-processing much more straightforward, and
fixes the problem where a redirect would strip queries and fragments.
Possible file-access problems for virtual hosts were also fixed while
also eliminating the pretty hacky RELPATH-macro, among other things.
Signed-off-by: Laslo Hunhold <[email protected]>
Diffstat:
M data.c | 6 +++---
M http.c | 447 +++++++++++++++++++----------…
M http.h | 7 +++++--
M main.c | 5 +++--
4 files changed, 290 insertions(+), 175 deletions(-)
---
diff --git a/data.c b/data.c
@@ -104,13 +104,13 @@ data_prepare_dirlisting_buf(const struct response *res,
memset(buf, 0, sizeof(*buf));
/* read directory */
- if ((dirlen = scandir(res->path, &e, NULL, compareent)) < 0) {
+ if ((dirlen = scandir(res->internal_path, &e, NULL, compareent)) < 0) {
return S_FORBIDDEN;
}
if (*progress == 0) {
/* write listing header (sizeof(esc) >= PATH_MAX) */
- html_escape(res->uri, esc, MIN(PATH_MAX, sizeof(esc)));
+ html_escape(res->path, esc, MIN(PATH_MAX, sizeof(esc)));
if (buffer_appendf(buf,
"<!DOCTYPE html>\n<html>\n\t<head>"
"<title>Index of %s</title></head>\n"
@@ -197,7 +197,7 @@ data_prepare_file_buf(const struct response *res, struct bu…
memset(buf, 0, sizeof(*buf));
/* open file */
- if (!(fp = fopen(res->path, "r"))) {
+ if (!(fp = fopen(res->internal_path, "r"))) {
s = S_FORBIDDEN;
goto cleanup;
}
diff --git a/http.c b/http.c
@@ -205,7 +205,7 @@ http_parse_header(const char *h, struct request *req)
{
struct in6_addr addr;
size_t i, mlen;
- const char *p, *q;
+ const char *p, *q, *r, *s, *t;
char *m, *n;
/* empty the request struct */
@@ -235,16 +235,94 @@ http_parse_header(const char *h, struct request *req)
/* basis for next step */
p = h + mlen + 1;
- /* TARGET */
+ /* RESOURCE */
+
+ /*
+ * path?query#fragment
+ * ^ ^ ^ ^
+ * | | | |
+ * p r s q
+ *
+ */
if (!(q = strchr(p, ' '))) {
return S_BAD_REQUEST;
}
- if (q - p + 1 > PATH_MAX) {
+
+ /* search for first '?' */
+ for (r = p; r < q; r++) {
+ if (!isprint(*r)) {
+ return S_BAD_REQUEST;
+ }
+ if (*r == '?') {
+ break;
+ }
+ }
+ if (r == q) {
+ /* not found */
+ r = NULL;
+ }
+
+ /* search for first '#' */
+ for (s = p; s < q; s++) {
+ if (!isprint(*s)) {
+ return S_BAD_REQUEST;
+ }
+ if (*s == '#') {
+ break;
+ }
+ }
+ if (s == q) {
+ /* not found */
+ s = NULL;
+ }
+
+ if (r != NULL && s != NULL && s < r) {
+ /*
+ * '#' comes before '?' and thus the '?' is literal,
+ * because the query must come before the fragment
+ */
+ r = NULL;
+ }
+
+ /* write path using temporary endpointer t */
+ if (r != NULL) {
+ /* resource contains a query, path ends at r */
+ t = r;
+ } else if (s != NULL) {
+ /* resource contains only a fragment, path ends at s */
+ t = s;
+ } else {
+ /* resource contains no queries, path ends at q */
+ t = q;
+ }
+ if ((size_t)(t - p + 1) > LEN(req->path)) {
return S_REQUEST_TOO_LARGE;
}
- memcpy(req->uri, p, q - p);
- req->uri[q - p] = '\0';
- decode(req->uri, req->uri);
+ memcpy(req->path, p, t - p);
+ req->path[t - p] = '\0';
+ decode(req->path, req->path);
+
+ /* write query if present */
+ if (r != NULL) {
+ /* query ends either at s (if fragment present) or q */
+ t = (s != NULL) ? s : q;
+
+ if ((size_t)(t - (r + 1) + 1) > LEN(req->query)) {
+ return S_REQUEST_TOO_LARGE;
+ }
+ memcpy(req->query, r + 1, t - (r + 1));
+ req->query[t - (r + 1)] = '\0';
+ }
+
+ /* write fragment if present */
+ if (s != NULL) {
+ /* the fragment always starts at s + 1 and ends at q */
+ if ((size_t)(q - (s + 1) + 1) > LEN(req->fragment)) {
+ return S_REQUEST_TOO_LARGE;
+ }
+ memcpy(req->fragment, s + 1, q - (s + 1));
+ req->fragment[q - (s + 1)] = '\0';
+ }
/* basis for next step */
p = q + 1;
@@ -304,7 +382,7 @@ http_parse_header(const char *h, struct request *req)
if (!(q = strstr(p, "\r\n"))) {
return S_BAD_REQUEST;
}
- if (q - p + 1 > FIELD_MAX) {
+ if ((size_t)(q - p + 1) > LEN(req->field[i])) {
return S_REQUEST_TOO_LARGE;
}
memcpy(req->field[i], p, q - p);
@@ -372,24 +450,24 @@ encode(const char src[PATH_MAX], char dest[PATH_MAX])
dest[i] = '\0';
}
-static int
-normabspath(char *path)
+static enum status
+path_normalize(char *uri, int *redirect)
{
size_t len;
- int dirty = 0, last = 0;
+ int last = 0;
char *p, *q;
/* require and skip first slash */
- if (path[0] != '/') {
- return -1;
+ if (uri[0] != '/') {
+ return S_BAD_REQUEST;
}
- p = path + 1;
+ p = uri + 1;
- /* get length of path */
+ /* get length of URI */
len = strlen(p);
for (; !last; ) {
- /* bound path component within (p,q) */
+ /* bound uri component within (p,q) */
if (!(q = strchr(p, '/'))) {
q = strchr(p, '\0');
last = 1;
@@ -402,9 +480,9 @@ normabspath(char *path)
goto squash;
} else if (q - p == 2 && p[0] == '.' && p[1] == '.') {
/* "../" */
- if (p != path + 1) {
+ if (p != uri + 1) {
/* place p right after the previous / */
- for (p -= 2; p > path && *p != '/'; p--);
+ for (p -= 2; p > uri && *p != '/'; p--);
p++;
}
goto squash;
@@ -417,15 +495,90 @@ squash:
/* squash (p,q) into void */
if (last) {
*p = '\0';
- len = p - path;
+ len = p - uri;
} else {
- memmove(p, q + 1, len - ((q + 1) - path) + 2);
+ memmove(p, q + 1, len - ((q + 1) - uri) + 2);
len -= (q + 1) - p;
}
- dirty = 1;
+ if (redirect != NULL) {
+ *redirect = 1;
+ }
+ }
+
+ return 0;
+}
+
+static enum status
+path_add_vhost_prefix(char uri[PATH_MAX], int *redirect,
+ const struct server *srv, const struct response *res)
+{
+ if (srv->vhost && res->vhost && res->vhost->prefix) {
+ if (prepend(uri, PATH_MAX, res->vhost->prefix)) {
+ return S_REQUEST_TOO_LARGE;
+ }
+ if (redirect != NULL) {
+ *redirect = 1;
+ }
+ }
+
+ return 0;
+}
+
+static enum status
+path_apply_prefix_mapping(char uri[PATH_MAX], int *redirect,
+ const struct server *srv, const struct response *res)
+{
+ size_t i, len;
+
+ for (i = 0; i < srv->map_len; i++) {
+ len = strlen(srv->map[i].from);
+ if (!strncmp(uri, srv->map[i].from, len)) {
+ /*
+ * if vhosts are enabled only apply mappings
+ * defined for the current canonical host
+ */
+ if (srv->vhost && res->vhost && srv->map[i].chost &&
+ strcmp(srv->map[i].chost, res->vhost->chost)) {
+ continue;
+ }
+
+ /* swap out URI prefix */
+ memmove(uri, uri + len, strlen(uri) + 1);
+ if (prepend(uri, PATH_MAX, srv->map[i].to)) {
+ return S_REQUEST_TOO_LARGE;
+ }
+
+ if (redirect != NULL) {
+ *redirect = 1;
+ }
+
+ /* break so we don't possibly hit an infinite loop */
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static enum status
+path_ensure_dirslash(char uri[PATH_MAX], int *redirect)
+{
+ size_t len;
+
+ /* append '/' to URI if not present */
+ len = strlen(uri);
+ if (len + 1 + 1 > PATH_MAX) {
+ return S_REQUEST_TOO_LARGE;
+ }
+ if (len > 0 && uri[len - 1] != '/') {
+ uri[len] = '/';
+ uri[len + 1] = '\0';
+ if (redirect != NULL) {
+ *redirect = 1;
+ }
}
- return dirty;
+ return 0;
}
static enum status
@@ -560,60 +713,29 @@ parse_range(const char *str, size_t size, size_t *lower, …
return 0;
}
-#undef RELPATH
-#define RELPATH(x) ((!*(x) || !strcmp(x, "/")) ? "." : ((x) + 1))
-
void
http_prepare_response(const struct request *req, struct response *res,
const struct server *srv)
{
- enum status s;
+ enum status s, tmps;
struct in6_addr addr;
struct stat st;
struct tm tm = { 0 };
- struct vhost *vhost;
- size_t len, i;
- int dirty = 0, hasport, ipv6host;
- static char realuri[PATH_MAX], tmpuri[PATH_MAX];
+ size_t i;
+ int redirect, hasport, ipv6host;
+ static char tmppath[PATH_MAX];
char *p, *mime;
- const char *targethost;
/* empty all response fields */
memset(res, 0, sizeof(*res));
- /*
- * make a working copy of the URI, strip queries and fragments
- * (ignorable according to RFC 3986 section 3) and normalize it
- */
- memcpy(realuri, req->uri, sizeof(realuri));
-
- if ((p = strchr(realuri, '?'))) {
- *p = '\0';
- } else if ((p = strchr(realuri, '#'))) {
- *p = '\0';
- }
-
- switch (normabspath(realuri)) {
- case -1:
- s = S_BAD_REQUEST;
- goto err;
- case 0:
- /* string is unchanged */
- break;
- case 1:
- /* string was changed */
- dirty = 1;
- break;
- }
-
- /* match vhost */
- vhost = NULL;
+ /* determine virtual host */
if (srv->vhost) {
for (i = 0; i < srv->vhost_len; i++) {
- if (!regexec(&(srv->vhost[i].re), req->field[REQ_HOST],
- 0, NULL, 0)) {
+ if (!regexec(&(srv->vhost[i].re),
+ req->field[REQ_HOST], 0, NULL, 0)) {
/* we have a matching vhost */
- vhost = &(srv->vhost[i]);
+ res->vhost = &(srv->vhost[i]);
break;
}
}
@@ -621,100 +743,74 @@ http_prepare_response(const struct request *req, struct …
s = S_NOT_FOUND;
goto err;
}
-
- /* if we have a vhost prefix, prepend it to the URI */
- if (vhost->prefix) {
- if (prepend(realuri, LEN(realuri), vhost->prefix)) {
- s = S_REQUEST_TOO_LARGE;
- goto err;
- }
- dirty = 1;
- }
}
- /* apply URI prefix mapping */
- for (i = 0; i < srv->map_len; i++) {
- len = strlen(srv->map[i].from);
- if (!strncmp(realuri, srv->map[i].from, len)) {
- /* match canonical host if vhosts are enabled and
- * the mapping specifies a canonical host */
- if (srv->vhost && srv->map[i].chost &&
- strcmp(srv->map[i].chost, vhost->chost)) {
- continue;
- }
+ /* copy request-path to response-path and clean it up */
+ redirect = 0;
+ memcpy(res->path, req->path, MIN(sizeof(res->path), sizeof(req->path))…
+ if ((tmps = path_normalize(res->path, &redirect)) ||
+ (tmps = path_add_vhost_prefix(res->path, &redirect, srv, res)) ||
+ (tmps = path_apply_prefix_mapping(res->path, &redirect, srv, res))…
+ (tmps = path_normalize(res->path, &redirect))) {
+ s = tmps;
+ goto err;
+ }
- /* swap out URI prefix */
- memmove(realuri, realuri + len, strlen(realuri) + 1);
- if (prepend(realuri, LEN(realuri), srv->map[i].to)) {
- s = S_REQUEST_TOO_LARGE;
- goto err;
- }
- dirty = 1;
- break;
- }
+ /* redirect all non-canonical hosts to their canonical forms */
+ if (srv->vhost && res->vhost &&
+ strcmp(req->field[REQ_HOST], res->vhost->chost)) {
+ redirect = 1;
}
- /* normalize URI again, in case we introduced dirt */
- switch (normabspath(realuri)) {
- case -1:
- s = S_BAD_REQUEST;
+ /* reject all non-well-known hidden targets (see RFC 8615) */
+ if (strstr(res->path, "/.") && strncmp(res->path, "/.well-known/",
+ sizeof("/.well-known/") - 1)) {
+ s = S_FORBIDDEN;
goto err;
- case 0:
- /* string is unchanged */
- break;
- case 1:
- /* string was changed */
- dirty = 1;
- break;
}
- /* stat the relative path derived from the URI */
- if (stat(RELPATH(realuri), &st) < 0) {
+ /*
+ * generate and stat internal path based on the cleaned up request
+ * path and the virtual host while ignoring query and fragment
+ * (valid according to RFC 3986)
+ */
+ if (esnprintf(res->internal_path, sizeof(res->internal_path), "/%s/%s",
+ (srv->vhost && res->vhost) ? res->vhost->dir : "",
+ res->path)) {
+ s = S_REQUEST_TOO_LARGE;
+ goto err;
+ }
+ if ((tmps = path_normalize(res->internal_path, NULL))) {
+ s = tmps;
+ goto err;
+ }
+ if (stat(res->internal_path, &st) < 0) {
s = (errno == EACCES) ? S_FORBIDDEN : S_NOT_FOUND;
goto err;
}
+ /*
+ * if the path points at a directory, make sure both the path
+ * and internal path have a trailing slash
+ */
if (S_ISDIR(st.st_mode)) {
- /* append '/' to URI if not present */
- len = strlen(realuri);
- if (len + 1 + 1 > PATH_MAX) {
- s = S_REQUEST_TOO_LARGE;
+ if ((tmps = path_ensure_dirslash(res->path, &redirect)) ||
+ (tmps = path_ensure_dirslash(res->internal_path, NULL))) {
+ s = tmps;
goto err;
}
- if (len > 0 && realuri[len - 1] != '/') {
- realuri[len] = '/';
- realuri[len + 1] = '\0';
- dirty = 1;
- }
}
- /*
- * reject hidden targets, except if it is a well-known URI
- * according to RFC 8615
- */
- if (strstr(realuri, "/.") && strncmp(realuri,
- "/.well-known/", sizeof("/.well-known/") - 1)) {
- s = S_FORBIDDEN;
- goto err;
- }
-
- /*
- * redirect if the URI needs to be redirected or the requested
- * host is non-canonical
- */
- if (dirty || (srv->vhost && vhost &&
- strcmp(req->field[REQ_HOST], vhost->chost))) {
+ /* redirect if the path-cleanup necessitated it earlier */
+ if (redirect) {
res->status = S_MOVED_PERMANENTLY;
- /* encode realuri */
- encode(realuri, tmpuri);
+ /* encode path */
+ encode(res->path, tmppath);
/* determine target location */
- if (srv->vhost) {
+ if (srv->vhost && res->vhost) {
/* absolute redirection URL */
- targethost = req->field[REQ_HOST][0] ? vhost->chost ?
- vhost->chost : req->field[REQ_HOST] :
- srv->host ? srv->host : "localhost";
/* do we need to add a port to the Location? */
hasport = srv->port && strcmp(srv->port, "80");
@@ -722,70 +818,83 @@ http_prepare_response(const struct request *req, struct r…
/* RFC 2732 specifies to use brackets for IPv6-address…
* in URLs, so we need to check if our host is one and
* honor that later when we fill the "Location"-field …
- if ((ipv6host = inet_pton(AF_INET6, targethost,
+ if ((ipv6host = inet_pton(AF_INET6, res->vhost->chost,
&addr)) < 0) {
s = S_INTERNAL_SERVER_ERROR;
goto err;
}
- /* write location to response struct */
+ /*
+ * write location to response struct (re-including
+ * the query and fragment, if present)
+ */
if (esnprintf(res->field[RES_LOCATION],
sizeof(res->field[RES_LOCATION]),
- "//%s%s%s%s%s%s",
+ "//%s%s%s%s%s%s%s%s%s%s",
ipv6host ? "[" : "",
- targethost,
- ipv6host ? "]" : "", hasport ? ":" : "",
- hasport ? srv->port : "", tmpuri)) {
+ res->vhost->chost,
+ ipv6host ? "]" : "",
+ hasport ? ":" : "",
+ hasport ? srv->port : "",
+ tmppath,
+ req->query[0] ? "?" : "",
+ req->query,
+ req->fragment[0] ? "#" : "",
+ req->fragment)) {
s = S_REQUEST_TOO_LARGE;
goto err;
}
} else {
- /* write relative redirection URI to response struct */
+ /*
+ * write relative redirection URI to response struct
+ * (re-including the query and fragment, if present)
+ */
if (esnprintf(res->field[RES_LOCATION],
sizeof(res->field[RES_LOCATION]),
- "%s", tmpuri)) {
+ "%s%s%s%s%s",
+ tmppath,
+ req->query[0] ? "?" : "",
+ req->query,
+ req->fragment[0] ? "#" : "",
+ req->fragment)) {
s = S_REQUEST_TOO_LARGE;
goto err;
}
}
return;
- } else {
- /*
- * the URI is well-formed, we can now write the URI into
- * the response-URI and corresponding relative path
- * (optionally including the vhost servedir as a prefix)
- * into the actual response-path
- */
- if (esnprintf(res->uri, sizeof(res->uri), "%s", realuri)) {
- s = S_REQUEST_TOO_LARGE;
- goto err;
- }
- if (esnprintf(res->path, sizeof(res->path), "%s%s",
- vhost ? vhost->dir : "", RELPATH(realuri))) {
- s = S_REQUEST_TOO_LARGE;
- goto err;
- }
}
if (S_ISDIR(st.st_mode)) {
/*
- * check if the directory index exists by appending it to
- * the URI
+ * when we serve a directory, we first check if there
+ * exists a directory index. If not, we either make
+ * a directory listing (if enabled) or send an error
+ */
+
+ /*
+ * append docindex to internal_path temporarily
+ * (internal_path is guaranteed to end with '/')
*/
- if (esnprintf(tmpuri, sizeof(tmpuri), "%s%s",
- realuri, srv->docindex)) {
+ if (esnprintf(tmppath, sizeof(tmppath), "%s%s",
+ res->internal_path, srv->docindex)) {
s = S_REQUEST_TOO_LARGE;
goto err;
}
- /* stat the docindex, which must be a regular file */
- if (stat(RELPATH(tmpuri), &st) < 0 || !S_ISREG(st.st_mode)) {
+ /* stat the temporary path, which must be a regular file */
+ if (stat(tmppath, &st) < 0 || !S_ISREG(st.st_mode)) {
if (srv->listdirs) {
/* serve directory listing */
+
+ /* check if directory is accessible */
+ if (access(res->internal_path, R_OK) != 0) {
+ s = S_FORBIDDEN;
+ goto err;
+ } else {
+ res->status = S_OK;
+ }
res->type = RESTYPE_DIRLISTING;
- res->status = (access(res->path, R_OK)) ?
- S_FORBIDDEN : S_OK;
if (esnprintf(res->field[RES_CONTENT_TYPE],
sizeof(res->field[RES_CONTENT_TY…
@@ -802,8 +911,10 @@ http_prepare_response(const struct request *req, struct re…
goto err;
}
} else {
- /* docindex is valid, write tmpuri to response-path */
- if (esnprintf(res->path, sizeof(res->path), "%s", tmpu…
+ /* the docindex exists; copy tmppath to internal path …
+ if (esnprintf(res->internal_path,
+ sizeof(res->internal_path), "%s",
+ tmppath)) {
s = S_REQUEST_TOO_LARGE;
goto err;
}
@@ -847,7 +958,7 @@ http_prepare_response(const struct request *req, struct res…
/* mime */
mime = "application/octet-stream";
- if ((p = strrchr(res->path, '.'))) {
+ if ((p = strrchr(res->internal_path, '.'))) {
for (i = 0; i < LEN(mimes); i++) {
if (!strcmp(mimes[i].ext, p + 1)) {
mime = mimes[i].type;
@@ -860,7 +971,7 @@ http_prepare_response(const struct request *req, struct res…
res->type = RESTYPE_FILE;
/* check if file is readable */
- res->status = (access(res->path, R_OK)) ? S_FORBIDDEN :
+ res->status = (access(res->internal_path, R_OK)) ? S_FORBIDDEN :
(req->field[REQ_RANGE][0] != '\0') ?
S_PARTIAL_CONTENT : S_OK;
diff --git a/http.h b/http.h
@@ -27,7 +27,9 @@ extern const char *req_method_str[];
struct request {
enum req_method method;
- char uri[PATH_MAX];
+ char path[PATH_MAX];
+ char query[FIELD_MAX];
+ char fragment[FIELD_MAX];
char field[NUM_REQ_FIELDS][FIELD_MAX];
};
@@ -73,8 +75,9 @@ struct response {
enum res_type type;
enum status status;
char field[NUM_RES_FIELDS][FIELD_MAX];
- char uri[PATH_MAX];
char path[PATH_MAX];
+ char internal_path[PATH_MAX];
+ struct vhost *vhost;
struct {
size_t lower;
size_t upper;
diff --git a/main.c b/main.c
@@ -45,8 +45,9 @@ logmsg(const struct connection *c)
inaddr_str[0] = '\0';
}
- printf("%s\t%s\t%d\t%s\t%s\n", tstmp, inaddr_str, c->res.status,
- c->req.field[REQ_HOST], c->req.uri);
+ printf("%s\t%s\t%d\t%s\t%s%s%s%s%s\n", tstmp, inaddr_str, c->res.statu…
+ c->req.field[REQ_HOST], c->req.path, c->req.query[0] ? "?" : "",
+ c->req.query, c->req.fragment[0] ? "#" : "", c->req.fragment);
}
static void
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.