rework URI handling - hurl - Gopher/HTTP/HTTPS file grabber | |
git clone git://git.codemadness.org/hurl | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit e8e1e1a7d09c614b57fac5070eb5c28822c948ba | |
parent 5a9951db80a5e9b9f2d5ad7ca1c6efebbd00e11f | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Fri, 12 Mar 2021 22:22:13 +0100 | |
rework URI handling | |
- Parse the URI in a more correct way following the Gopher URI RFC 4266 and | |
General URI RFC 3986 - Uniform Resource Identifier (URI): Generic Syntax. | |
- An URI fragment is not sent to the server anymore. | |
- A gopher type is now optional for an empty path or for example: | |
"gopher://codemadness.org". | |
Also The use of strlcat() is removed and the code should now be more portable. | |
Diffstat: | |
M hurl.c | 175 ++++++++++++++++++++---------… | |
1 file changed, 116 insertions(+), 59 deletions(-) | |
--- | |
diff --git a/hurl.c b/hurl.c | |
@@ -28,12 +28,15 @@ | |
#define TLS_CA_CERT_FILE "/etc/ssl/cert.pem" | |
#endif | |
-/* uri */ | |
+/* URI */ | |
struct uri { | |
- char proto[48]; | |
+ char proto[48]; /* scheme including ":" or "://" */ | |
+ char userinfo[256]; /* username [:password] */ | |
char host[256]; | |
- char path[2048]; | |
- char port[6]; /* numeric port */ | |
+ char port[6]; /* numeric port */ | |
+ char path[1024]; | |
+ char query[1024]; | |
+ char fragment[1024]; | |
}; | |
char *argv0; | |
@@ -61,70 +64,115 @@ sighandler(int signo) | |
} | |
int | |
-parseuri(const char *s, struct uri *u) | |
+uri_parse(const char *s, struct uri *u) | |
{ | |
- const char *p = s, *b; | |
- char *endptr = NULL; | |
+ const char *p = s; | |
+ char *endptr; | |
size_t i; | |
- unsigned long l; | |
+ long l; | |
- u->proto[0] = u->host[0] = u->path[0] = u->port[0] = '\0'; | |
- if (!*p) | |
- return 0; | |
+ u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0'; | |
+ u->path[0] = u->query[0] = u->fragment[0] = '\0'; | |
- /* protocol part */ | |
- for (p = s; *p && (isalpha((unsigned char)*p) || isdigit((unsigned cha… | |
- *p == '+' || *p == '-' || *p == '.'); p++) | |
+ /* protocol-relative */ | |
+ if (*p == '/' && *(p + 1) == '/') { | |
+ p += 2; /* skip "//" */ | |
+ goto parseauth; | |
+ } | |
+ | |
+ /* scheme / protocol part */ | |
+ for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || | |
+ *p == '+' || *p == '-' || *p == '.'; p++) | |
; | |
- if (!strncmp(p, "://", 3)) { | |
+ /* scheme, except if empty and starts with ":" then it is a path */ | |
+ if (*p == ':' && p != s) { | |
+ if (*(p + 1) == '/' && *(p + 2) == '/') | |
+ p += 3; /* skip "://" */ | |
+ else | |
+ p++; /* skip ":" */ | |
+ | |
if ((size_t)(p - s) >= sizeof(u->proto)) | |
return -1; /* protocol too long */ | |
memcpy(u->proto, s, p - s); | |
u->proto[p - s] = '\0'; | |
- p += 3; /* skip "://" */ | |
+ | |
+ if (*(p - 1) != '/') | |
+ goto parsepath; | |
} else { | |
- return -1; /* no protocol specified */ | |
+ p = s; /* no scheme format, reset to start */ | |
+ goto parsepath; | |
+ } | |
+ | |
+parseauth: | |
+ /* userinfo (username:password) */ | |
+ i = strcspn(p, "@/?#"); | |
+ if (p[i] == '@') { | |
+ if (i >= sizeof(u->userinfo)) | |
+ return -1; /* userinfo too long */ | |
+ memcpy(u->userinfo, p, i); | |
+ u->userinfo[i] = '\0'; | |
+ p += i + 1; | |
} | |
/* IPv6 address */ | |
if (*p == '[') { | |
- /* bracket not found or host too long */ | |
- if (!(b = strchr(p, ']')) || (size_t)(b - p) >= (ssize_t)sizeo… | |
+ /* bracket not found, host too short or too long */ | |
+ i = strcspn(p, "]"); | |
+ if (p[i] != ']' || i < 3) | |
return -1; | |
- memcpy(u->host, p + 1, b - p - 1); | |
- u->host[b - p - 1] = '\0'; | |
- p = b + 1; | |
+ i++; /* including "]" */ | |
} else { | |
/* domain / host part, skip until port, path or end. */ | |
- if ((i = strcspn(p, ":/")) >= sizeof(u->host)) | |
- return -1; /* host too long */ | |
- memcpy(u->host, p, i); | |
- u->host[i] = '\0'; | |
- p = &p[i]; | |
+ i = strcspn(p, ":/?#"); | |
} | |
+ if (i >= sizeof(u->host)) | |
+ return -1; /* host too long */ | |
+ memcpy(u->host, p, i); | |
+ u->host[i] = '\0'; | |
+ p += i; | |
+ | |
/* port */ | |
if (*p == ':') { | |
- if ((i = strcspn(++p, "/")) >= sizeof(u->port)) | |
+ p++; | |
+ if ((i = strcspn(p, "/?#")) >= sizeof(u->port)) | |
return -1; /* port too long */ | |
memcpy(u->port, p, i); | |
u->port[i] = '\0'; | |
- /* check for valid port: range 1 - 65535 */ | |
+ /* check for valid port: range 1 - 65535, may be empty */ | |
errno = 0; | |
- l = strtoul(u->port, &endptr, 10); | |
- if (errno || u->port[0] == '\0' || *endptr || | |
- !l || l > 65535) | |
+ l = strtol(u->port, &endptr, 10); | |
+ if (i && (errno || *endptr || l <= 0 || l > 65535)) | |
return -1; | |
- p = &p[i]; | |
+ p += i; | |
} | |
- if (u->host[0]) { | |
- p = &p[strspn(p, "/")]; | |
- memcpy(u->path, "/", 2); | |
- } else { | |
- return -1; | |
+ | |
+parsepath: | |
+ /* path */ | |
+ if ((i = strcspn(p, "?#")) >= sizeof(u->path)) | |
+ return -1; /* path too long */ | |
+ memcpy(u->path, p, i); | |
+ u->path[i] = '\0'; | |
+ p += i; | |
+ | |
+ /* query */ | |
+ if (*p == '?') { | |
+ p++; | |
+ if ((i = strcspn(p, "#")) >= sizeof(u->query)) | |
+ return -1; /* query too long */ | |
+ memcpy(u->query, p, i); | |
+ u->query[i] = '\0'; | |
+ p += i; | |
} | |
- /* treat truncation as an error */ | |
- if (strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path)) | |
- return -1; | |
+ | |
+ /* fragment */ | |
+ if (*p == '#') { | |
+ p++; | |
+ if ((i = strlen(p)) >= sizeof(u->fragment)) | |
+ return -1; /* fragment too long */ | |
+ memcpy(u->fragment, p, i); | |
+ u->fragment[i] = '\0'; | |
+ } | |
+ | |
return 0; | |
} | |
@@ -206,11 +254,14 @@ https_request(void) | |
/* create and send HTTP header */ | |
r = snprintf(buf, sizeof(buf), | |
- "GET %s HTTP/1.0\r\n" | |
+ "GET %s%s%s HTTP/1.0\r\n" | |
"Host: %s%s%s\r\n" | |
"Connection: close\r\n" | |
"%s%s" | |
- "\r\n", u.path, u.host, | |
+ "\r\n", | |
+ u.path[0] ? u.path : "/", | |
+ u.query[0] ? "?" : "", u.query, | |
+ u.host, | |
stdport ? "" : ":", | |
stdport ? "" : u.port, | |
config_headers, config_headers[0] ? "\r\n" : ""); | |
@@ -334,11 +385,14 @@ http_request(void) | |
/* create and send HTTP header */ | |
r = snprintf(buf, sizeof(buf), | |
- "GET %s HTTP/1.0\r\n" | |
+ "GET %s%s%s HTTP/1.0\r\n" | |
"Host: %s%s%s\r\n" | |
"Connection: close\r\n" | |
"%s%s" | |
- "\r\n", u.path, u.host, | |
+ "\r\n", | |
+ u.path[0] ? u.path : "/", | |
+ u.query[0] ? "?" : "", u.query, | |
+ u.host, | |
stdport ? "" : ":", | |
stdport ? "" : u.port, | |
config_headers, config_headers[0] ? "\r\n" : ""); | |
@@ -427,7 +481,7 @@ int | |
gopher_request(void) | |
{ | |
char buf[READ_BUF_SIZ], *p; | |
- const char *errstr; | |
+ const char *errstr, *path; | |
size_t len = 0; | |
ssize_t r; | |
int fd = -1, ret = 1; | |
@@ -440,8 +494,13 @@ gopher_request(void) | |
if (pledge("stdio", NULL) == -1) | |
err(1, "pledge"); | |
- /* create and send path, skip type part */ | |
- r = snprintf(buf, sizeof(buf), "%s\r\n", u.path + 2); | |
+ /* create and send path, skip type part, empty path is allowed, | |
+ see RFC 4266 The gopher URI Scheme - section 2.1 */ | |
+ path = u.path; | |
+ if (*path == '/' && *path++) | |
+ path++; | |
+ r = snprintf(buf, sizeof(buf), "%s%s%s\r\n", | |
+ path, u.query[0] ? "?" : "", u.query); | |
if (r < 0 || (size_t)r >= sizeof(buf)) { | |
fprintf(stderr, "not writing header because it is truncated"); | |
goto err; | |
@@ -623,8 +682,10 @@ main(int argc, char **argv) | |
usage(); | |
url = argv[0]; | |
- if (parseuri(url, &u) == -1) | |
- errx(1, "invalid url: %s", url); | |
+ if (uri_parse(url, &u) == -1) | |
+ errx(1, "invalid URL: %s", url); | |
+ if (u.userinfo[0]) | |
+ errx(1, "userinfo field not supported in the URL: %s", url); | |
if (config_timeout > 0) { | |
signal(SIGALRM, sighandler); | |
@@ -632,7 +693,7 @@ main(int argc, char **argv) | |
err(1, "alarm"); | |
} | |
- if (!strcmp(u.proto, "https")) { | |
+ if (!strcmp(u.proto, "https://")) { | |
if (tls_init()) | |
errx(1, "tls_init failed"); | |
if (!(tls_config = tls_config_new())) | |
@@ -643,22 +704,18 @@ main(int argc, char **argv) | |
errx(1, "tls set ciphers failed: %s", | |
tls_config_error(tls_config)); | |
} | |
- if (!u.port[0] && !strcmp(u.proto, "https")) | |
+ if (!u.port[0]) | |
memcpy(u.port, "443", 4); | |
statuscode = https_request(); | |
- } else if (!strcmp(u.proto, "http")) { | |
+ } else if (!strcmp(u.proto, "http://")) { | |
if (!u.port[0]) | |
memcpy(u.port, "80", 3); | |
statuscode = http_request(); | |
- } else if (!strcmp(u.proto, "gopher")) { | |
+ } else if (!strcmp(u.proto, "gopher://")) { | |
if (!u.port[0]) | |
memcpy(u.port, "70", 3); | |
- | |
- if (u.path[0] != '/' || u.path[1] == '\0') | |
- errx(1, "must specify type"); | |
- | |
statuscode = gopher_request(); | |
- } else if (!strcmp(u.proto, "gophers")) { | |
+ } else if (!strcmp(u.proto, "gophers://")) { | |
if (tls_init()) | |
errx(1, "tls_init failed"); | |
if (!(tls_config = tls_config_new())) |