GopherProxy

	Ignore queries and fragments in URIs - quark - quark web server
	git clone git://git.suckless.org/quark
	Log
	Files
	Refs
	LICENSE
	---
	commit 319ba7083fdde836d6614c6b8b228bf3a9849e95
	parent c6a9055e5a30be570e30da8d216c39662c3a3f99
	Author: Laslo Hunhold <[email protected]>
	Date: Sat, 30 Jan 2021 12:53:00 +0100

	Ignore queries and fragments in URIs

	Previously, a request for "/index.html" would yield a 200, while a
	request for "/index.html?foo=bar" would yield a 404, as quark would
	look for the file "index.html?foo=bar" in the serve directory.

	To accomodate this behaviour, it's no longer sufficient to just compare
	realuri and req->uri. Instead, we set a "dirty" flag every time we
	change the URI in such a way that it requires a redirect.

	According to RFC 3986 section 3, queries and fragments are there
	to (further) "identify a resource within the scope of the URI's scheme
	and naming authority (if any)". However, it's perfectly legitimate to
	just ignore this further specification when the URI itself is already
	pointing at a unique resource (i.e. "/index.html").

	This behaviour is consistent with dynamic web applications which usually
	ignore parameters they don't care about. Quark is too much Zen to care
	about any parameters. This has the added bonus that you can now clone
	repositories (read-only) via the "dumb" HTTP git-protocol, so

	git clone https://example.org/git/project.git

	is now possible (provided you run update-server-info during the
	post-update-hook). This wouldn't work previously because git, when
	asked to clone via HTTP, would first probe the server with a request for

	project.git/info/refs?service=git-upload-pack

	(i.e. asking for the "smart" HTTP git-protocol to confirm). Quark would
	return a 404, though, while git only gracefully "downgrades" to the
	"dumb" HTTP git-protocol if the request succeeds but only yields a basic
	200 response without special git-headers.

	This way, it is now trivial to also share git-repositories (and other
	gracefully-downgrading protocols). While the "dumb" HTTP git-protocol
	only supports read-only-access, I don't think that's much of an overall
	loss (to the contrary!).

	HTTP authentication is broken and it makes much more sense to enable
	ssh-access to contributors and make them push changes via ssh. The key
	advantage of HTTP-cloning over git://-cloning is the fact that the git
	protocol can be tampered with, while the HTTP-protocol can be encapsulated
	into a secure TLS connection.

	Signed-off-by: Laslo Hunhold <[email protected]>

	Diffstat:
	M http.c \| 69 +++++++++++++++++++++++------…

	1 file changed, 51 insertions(+), 18 deletions(-)
	---
	diff --git a/http.c b/http.c
	@@ -368,12 +368,12 @@ static int
	normabspath(char *path)
	{
	size_t len;
	- int last = 0;
	+ int dirty = 0, last = 0;
	char p, q;

	/* require and skip first slash */
	if (path[0] != '/') {
	- return 1;
	+ return -1;
	}
	p = path + 1;

	@@ -387,7 +387,9 @@ normabspath(char *path)
	last = 1;
	}

	- if (p == q \|\| (q - p == 1 && p[0] == '.')) {
	+ if (*p == '\0') {
	+ break;
	+ } else if (p == q \|\| (q - p == 1 && p[0] == '.')) {
	/* "/" or "./" */
	goto squash;
	} else if (q - p == 2 && p[0] == '.' && p[1] == '.') {
	@@ -412,9 +414,10 @@ squash:
	memmove(p, q + 1, len - ((q + 1) - path) + 2);
	len -= (q + 1) - p;
	}
	+ dirty = 1;
	}

	- return 0;
	+ return dirty;
	}

	static enum status
	@@ -562,7 +565,7 @@ http_prepare_response(const struct request *req, struct res…
	struct tm tm = { 0 };
	struct vhost *vhost;
	size_t len, i;
	- int hasport, ipv6host;
	+ int dirty = 0, hasport, ipv6host;
	static char realuri[PATH_MAX], tmpuri[PATH_MAX];
	char p, mime;
	const char *targethost;
	@@ -570,11 +573,29 @@ http_prepare_response(const struct request *req, struct r…
	/* empty all response fields */
	memset(res, 0, sizeof(*res));

	- /* make a working copy of the URI and normalize it */
	+ /*
	+ * make a working copy of the URI, strip queries and fragments
	+ * (ignorable according to RFC 3986 section 3) and normalize it
	+ */
	memcpy(realuri, req->uri, sizeof(realuri));
	- if (normabspath(realuri)) {
	+
	+ if ((p = strchr(realuri, '?'))) {
	+ *p = '\0';
	+ } else if ((p = strchr(realuri, '#'))) {
	+ *p = '\0';
	+ }
	+
	+ switch (normabspath(realuri)) {
	+ case -1:
	s = S_BAD_REQUEST;
	goto err;
	+ case 0:
	+ /* string is unchanged */
	+ break;
	+ case 1:
	+ /* string was changed */
	+ dirty = 1;
	+ break;
	}

	/* match vhost */
	@@ -594,10 +615,12 @@ http_prepare_response(const struct request *req, struct r…
	}

	/* if we have a vhost prefix, prepend it to the URI */
	- if (vhost->prefix &&
	- prepend(realuri, LEN(realuri), vhost->prefix)) {
	- s = S_REQUEST_TOO_LARGE;
	- goto err;
	+ if (vhost->prefix) {
	+ if (prepend(realuri, LEN(realuri), vhost->prefix)) {
	+ s = S_REQUEST_TOO_LARGE;
	+ goto err;
	+ }
	+ dirty = 1;
	}
	}

	@@ -618,14 +641,23 @@ http_prepare_response(const struct request *req, struct r…
	s = S_REQUEST_TOO_LARGE;
	goto err;
	}
	+ dirty = 1;
	break;
	}
	}

	/* normalize URI again, in case we introduced dirt */
	- if (normabspath(realuri)) {
	+ switch (normabspath(realuri)) {
	+ case -1:
	s = S_BAD_REQUEST;
	goto err;
	+ case 0:
	+ /* string is unchanged */
	+ break;
	+ case 1:
	+ /* string was changed */
	+ dirty = 1;
	+ break;
	}

	/* stat the relative path derived from the URI */
	@@ -644,6 +676,7 @@ http_prepare_response(const struct request *req, struct res…
	if (len > 0 && realuri[len - 1] != '/') {
	realuri[len] = '/';
	realuri[len + 1] = '\0';
	+ dirty = 1;
	}
	}

	@@ -658,10 +691,10 @@ http_prepare_response(const struct request *req, struct r…
	}

	/*
	- * redirect if the original URI and the "real" URI differ or if
	- * the requested host is non-canonical
	+ * redirect if the URI needs to be redirected or the requested
	+ * host is non-canonical
	*/
	- if (strcmp(req->uri, realuri) \|\| (srv->vhost && vhost &&
	+ if (dirty \|\| (srv->vhost && vhost &&
	strcmp(req->field[REQ_HOST], vhost->chost))) {
	res->status = S_MOVED_PERMANENTLY;

	@@ -716,12 +749,12 @@ http_prepare_response(const struct request *req, struct r…
	* (optionally including the vhost servedir as a prefix)
	* into the actual response-path
	*/
	- if (esnprintf(res->uri, sizeof(res->uri), "%s", req->uri)) {
	+ if (esnprintf(res->uri, sizeof(res->uri), "%s", realuri)) {
	s = S_REQUEST_TOO_LARGE;
	goto err;
	}
	if (esnprintf(res->path, sizeof(res->path), "%s%s",
	- vhost ? vhost->dir : "", RELPATH(req->uri))) {
	+ vhost ? vhost->dir : "", RELPATH(realuri))) {
	s = S_REQUEST_TOO_LARGE;
	goto err;
	}
	@@ -733,7 +766,7 @@ http_prepare_response(const struct request *req, struct res…
	* the URI
	*/
	if (esnprintf(tmpuri, sizeof(tmpuri), "%s%s",
	- req->uri, srv->docindex)) {
	+ realuri, srv->docindex)) {
	s = S_REQUEST_TOO_LARGE;
	goto err;
	}