Introduction
Introduction Statistics Contact Development Disclaimer Help
rework URI handling - hurl - Gopher/HTTP/HTTPS file grabber
git clone git://git.codemadness.org/hurl
Log
Files
Refs
README
LICENSE
---
commit e8e1e1a7d09c614b57fac5070eb5c28822c948ba
parent 5a9951db80a5e9b9f2d5ad7ca1c6efebbd00e11f
Author: Hiltjo Posthuma <[email protected]>
Date: Fri, 12 Mar 2021 22:22:13 +0100
rework URI handling
- Parse the URI in a more correct way following the Gopher URI RFC 4266 and
General URI RFC 3986 - Uniform Resource Identifier (URI): Generic Syntax.
- An URI fragment is not sent to the server anymore.
- A gopher type is now optional for an empty path or for example:
"gopher://codemadness.org".
Also The use of strlcat() is removed and the code should now be more portable.
Diffstat:
M hurl.c | 175 ++++++++++++++++++++---------…
1 file changed, 116 insertions(+), 59 deletions(-)
---
diff --git a/hurl.c b/hurl.c
@@ -28,12 +28,15 @@
#define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
#endif
-/* uri */
+/* URI */
struct uri {
- char proto[48];
+ char proto[48]; /* scheme including ":" or "://" */
+ char userinfo[256]; /* username [:password] */
char host[256];
- char path[2048];
- char port[6]; /* numeric port */
+ char port[6]; /* numeric port */
+ char path[1024];
+ char query[1024];
+ char fragment[1024];
};
char *argv0;
@@ -61,70 +64,115 @@ sighandler(int signo)
}
int
-parseuri(const char *s, struct uri *u)
+uri_parse(const char *s, struct uri *u)
{
- const char *p = s, *b;
- char *endptr = NULL;
+ const char *p = s;
+ char *endptr;
size_t i;
- unsigned long l;
+ long l;
- u->proto[0] = u->host[0] = u->path[0] = u->port[0] = '\0';
- if (!*p)
- return 0;
+ u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
+ u->path[0] = u->query[0] = u->fragment[0] = '\0';
- /* protocol part */
- for (p = s; *p && (isalpha((unsigned char)*p) || isdigit((unsigned cha…
- *p == '+' || *p == '-' || *p == '.'); p++)
+ /* protocol-relative */
+ if (*p == '/' && *(p + 1) == '/') {
+ p += 2; /* skip "//" */
+ goto parseauth;
+ }
+
+ /* scheme / protocol part */
+ for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
+ *p == '+' || *p == '-' || *p == '.'; p++)
;
- if (!strncmp(p, "://", 3)) {
+ /* scheme, except if empty and starts with ":" then it is a path */
+ if (*p == ':' && p != s) {
+ if (*(p + 1) == '/' && *(p + 2) == '/')
+ p += 3; /* skip "://" */
+ else
+ p++; /* skip ":" */
+
if ((size_t)(p - s) >= sizeof(u->proto))
return -1; /* protocol too long */
memcpy(u->proto, s, p - s);
u->proto[p - s] = '\0';
- p += 3; /* skip "://" */
+
+ if (*(p - 1) != '/')
+ goto parsepath;
} else {
- return -1; /* no protocol specified */
+ p = s; /* no scheme format, reset to start */
+ goto parsepath;
+ }
+
+parseauth:
+ /* userinfo (username:password) */
+ i = strcspn(p, "@/?#");
+ if (p[i] == '@') {
+ if (i >= sizeof(u->userinfo))
+ return -1; /* userinfo too long */
+ memcpy(u->userinfo, p, i);
+ u->userinfo[i] = '\0';
+ p += i + 1;
}
/* IPv6 address */
if (*p == '[') {
- /* bracket not found or host too long */
- if (!(b = strchr(p, ']')) || (size_t)(b - p) >= (ssize_t)sizeo…
+ /* bracket not found, host too short or too long */
+ i = strcspn(p, "]");
+ if (p[i] != ']' || i < 3)
return -1;
- memcpy(u->host, p + 1, b - p - 1);
- u->host[b - p - 1] = '\0';
- p = b + 1;
+ i++; /* including "]" */
} else {
/* domain / host part, skip until port, path or end. */
- if ((i = strcspn(p, ":/")) >= sizeof(u->host))
- return -1; /* host too long */
- memcpy(u->host, p, i);
- u->host[i] = '\0';
- p = &p[i];
+ i = strcspn(p, ":/?#");
}
+ if (i >= sizeof(u->host))
+ return -1; /* host too long */
+ memcpy(u->host, p, i);
+ u->host[i] = '\0';
+ p += i;
+
/* port */
if (*p == ':') {
- if ((i = strcspn(++p, "/")) >= sizeof(u->port))
+ p++;
+ if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
return -1; /* port too long */
memcpy(u->port, p, i);
u->port[i] = '\0';
- /* check for valid port: range 1 - 65535 */
+ /* check for valid port: range 1 - 65535, may be empty */
errno = 0;
- l = strtoul(u->port, &endptr, 10);
- if (errno || u->port[0] == '\0' || *endptr ||
- !l || l > 65535)
+ l = strtol(u->port, &endptr, 10);
+ if (i && (errno || *endptr || l <= 0 || l > 65535))
return -1;
- p = &p[i];
+ p += i;
}
- if (u->host[0]) {
- p = &p[strspn(p, "/")];
- memcpy(u->path, "/", 2);
- } else {
- return -1;
+
+parsepath:
+ /* path */
+ if ((i = strcspn(p, "?#")) >= sizeof(u->path))
+ return -1; /* path too long */
+ memcpy(u->path, p, i);
+ u->path[i] = '\0';
+ p += i;
+
+ /* query */
+ if (*p == '?') {
+ p++;
+ if ((i = strcspn(p, "#")) >= sizeof(u->query))
+ return -1; /* query too long */
+ memcpy(u->query, p, i);
+ u->query[i] = '\0';
+ p += i;
}
- /* treat truncation as an error */
- if (strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path))
- return -1;
+
+ /* fragment */
+ if (*p == '#') {
+ p++;
+ if ((i = strlen(p)) >= sizeof(u->fragment))
+ return -1; /* fragment too long */
+ memcpy(u->fragment, p, i);
+ u->fragment[i] = '\0';
+ }
+
return 0;
}
@@ -206,11 +254,14 @@ https_request(void)
/* create and send HTTP header */
r = snprintf(buf, sizeof(buf),
- "GET %s HTTP/1.0\r\n"
+ "GET %s%s%s HTTP/1.0\r\n"
"Host: %s%s%s\r\n"
"Connection: close\r\n"
"%s%s"
- "\r\n", u.path, u.host,
+ "\r\n",
+ u.path[0] ? u.path : "/",
+ u.query[0] ? "?" : "", u.query,
+ u.host,
stdport ? "" : ":",
stdport ? "" : u.port,
config_headers, config_headers[0] ? "\r\n" : "");
@@ -334,11 +385,14 @@ http_request(void)
/* create and send HTTP header */
r = snprintf(buf, sizeof(buf),
- "GET %s HTTP/1.0\r\n"
+ "GET %s%s%s HTTP/1.0\r\n"
"Host: %s%s%s\r\n"
"Connection: close\r\n"
"%s%s"
- "\r\n", u.path, u.host,
+ "\r\n",
+ u.path[0] ? u.path : "/",
+ u.query[0] ? "?" : "", u.query,
+ u.host,
stdport ? "" : ":",
stdport ? "" : u.port,
config_headers, config_headers[0] ? "\r\n" : "");
@@ -427,7 +481,7 @@ int
gopher_request(void)
{
char buf[READ_BUF_SIZ], *p;
- const char *errstr;
+ const char *errstr, *path;
size_t len = 0;
ssize_t r;
int fd = -1, ret = 1;
@@ -440,8 +494,13 @@ gopher_request(void)
if (pledge("stdio", NULL) == -1)
err(1, "pledge");
- /* create and send path, skip type part */
- r = snprintf(buf, sizeof(buf), "%s\r\n", u.path + 2);
+ /* create and send path, skip type part, empty path is allowed,
+ see RFC 4266 The gopher URI Scheme - section 2.1 */
+ path = u.path;
+ if (*path == '/' && *path++)
+ path++;
+ r = snprintf(buf, sizeof(buf), "%s%s%s\r\n",
+ path, u.query[0] ? "?" : "", u.query);
if (r < 0 || (size_t)r >= sizeof(buf)) {
fprintf(stderr, "not writing header because it is truncated");
goto err;
@@ -623,8 +682,10 @@ main(int argc, char **argv)
usage();
url = argv[0];
- if (parseuri(url, &u) == -1)
- errx(1, "invalid url: %s", url);
+ if (uri_parse(url, &u) == -1)
+ errx(1, "invalid URL: %s", url);
+ if (u.userinfo[0])
+ errx(1, "userinfo field not supported in the URL: %s", url);
if (config_timeout > 0) {
signal(SIGALRM, sighandler);
@@ -632,7 +693,7 @@ main(int argc, char **argv)
err(1, "alarm");
}
- if (!strcmp(u.proto, "https")) {
+ if (!strcmp(u.proto, "https://")) {
if (tls_init())
errx(1, "tls_init failed");
if (!(tls_config = tls_config_new()))
@@ -643,22 +704,18 @@ main(int argc, char **argv)
errx(1, "tls set ciphers failed: %s",
tls_config_error(tls_config));
}
- if (!u.port[0] && !strcmp(u.proto, "https"))
+ if (!u.port[0])
memcpy(u.port, "443", 4);
statuscode = https_request();
- } else if (!strcmp(u.proto, "http")) {
+ } else if (!strcmp(u.proto, "http://")) {
if (!u.port[0])
memcpy(u.port, "80", 3);
statuscode = http_request();
- } else if (!strcmp(u.proto, "gopher")) {
+ } else if (!strcmp(u.proto, "gopher://")) {
if (!u.port[0])
memcpy(u.port, "70", 3);
-
- if (u.path[0] != '/' || u.path[1] == '\0')
- errx(1, "must specify type");
-
statuscode = gopher_request();
- } else if (!strcmp(u.proto, "gophers")) {
+ } else if (!strcmp(u.proto, "gophers://")) {
if (tls_init())
errx(1, "tls_init failed");
if (!(tls_config = tls_config_new()))
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.