initial repo - uriparser - URI parser | |
git clone git://git.codemadness.org/uriparser | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit f003f99bc853675e14235c2750a31571c988543b | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Wed, 22 Nov 2023 19:21:03 +0100 | |
initial repo | |
Diffstat: | |
A LICENSE | 15 +++++++++++++++ | |
A Makefile | 5 +++++ | |
A README | 1 + | |
A example.c | 65 +++++++++++++++++++++++++++++… | |
A strlcat.c | 54 +++++++++++++++++++++++++++++… | |
A strlcpy.c | 49 +++++++++++++++++++++++++++++… | |
A util.c | 207 ++++++++++++++++++++++++++++++ | |
A util.h | 26 ++++++++++++++++++++++++++ | |
8 files changed, 422 insertions(+), 0 deletions(-) | |
--- | |
diff --git a/LICENSE b/LICENSE | |
@@ -0,0 +1,15 @@ | |
+ISC License | |
+ | |
+Copyright (c) 2023 Hiltjo Posthuma <[email protected]> | |
+ | |
+Permission to use, copy, modify, and/or distribute this software for any | |
+purpose with or without fee is hereby granted, provided that the above | |
+copyright notice and this permission notice appear in all copies. | |
+ | |
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
diff --git a/Makefile b/Makefile | |
@@ -0,0 +1,5 @@ | |
+build: clean | |
+ ${CC} -o example example.c util.c strlcat.c strlcpy.c -Wall | |
+ | |
+clean: | |
+ rm -f example | |
diff --git a/README b/README | |
@@ -0,0 +1 @@ | |
+Small URI parser written in C | |
diff --git a/example.c b/example.c | |
@@ -0,0 +1,65 @@ | |
+#include <stdio.h> | |
+ | |
+#include "util.h" | |
+ | |
+void | |
+printfields(struct uri *u) | |
+{ | |
+ printf("* proto: %s\n", u->proto); | |
+ printf("* userinfo: %s\n", u->userinfo); | |
+ printf("* host: %s\n", u->host); | |
+ printf("* port: %s\n", u->port); | |
+ printf("* path: %s\n", u->path); | |
+ printf("* query: %s\n", u->query); | |
+ printf("* fragment: %s\n", u->fragment); | |
+} | |
+ | |
+int | |
+main(int argc, char *argv[]) | |
+{ | |
+ struct uri b, u, abs; | |
+ char buf[4096]; | |
+ int r; | |
+ | |
+ if (argc < 2) { | |
+ fprintf(stderr, "usage: %s <url> [baseurl]\n", argv[0]); | |
+ return 1; | |
+ } | |
+ | |
+ r = uri_parse(argv[1], &u); | |
+ printf("uri_parse() for %s returned: %d\n", argv[1], r); | |
+ if (r != -1) { | |
+ printf("success!\n\nfields for %s:\n", argv[1]); | |
+ printfields(&u); | |
+ printf("\n"); | |
+ | |
+ if (argc > 2) { | |
+ r = uri_parse(argv[2], &b); | |
+ printf("uri_parse() for %s returned: %d\n", argv[2], r… | |
+ if (r != -1) { | |
+ printf("success!\n\nfields for %s:\n", argv[2]… | |
+ printfields(&b); | |
+ printf("\n"); | |
+ | |
+ r = uri_makeabs(&abs, &u, &b); | |
+ printf("uri_makeabs() for %s and %s returned: … | |
+ if (r != -1) { | |
+ printf("success!\n\nfields for %s and … | |
+ printfields(&abs); | |
+ r = uri_format(buf, sizeof(buf), &abs); | |
+ printf("uri_format() for absolute URI … | |
+ if (r > 0 && r < sizeof(buf)) | |
+ printf("formatted URI: %s\n", … | |
+ } else { | |
+ printf("failure!\n"); | |
+ } | |
+ } else { | |
+ printf("failure!\n"); | |
+ } | |
+ } | |
+ } else { | |
+ printf("failure!\n"); | |
+ } | |
+ | |
+ return 0; | |
+} | |
diff --git a/strlcat.c b/strlcat.c | |
@@ -0,0 +1,54 @@ | |
+/* $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $ … | |
+ | |
+/* | |
+ * Copyright (c) 1998, 2015 Todd C. Miller <[email protected]> | |
+ * | |
+ * Permission to use, copy, modify, and distribute this software for any | |
+ * purpose with or without fee is hereby granted, provided that the above | |
+ * copyright notice and this permission notice appear in all copies. | |
+ * | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ */ | |
+ | |
+#include <string.h> | |
+ | |
+/* | |
+ * Appends src to string dst of size dsize (unlike strncat, dsize is the | |
+ * full size of dst, not space left). At most dsize-1 characters | |
+ * will be copied. Always NUL terminates (unless dsize <= strlen(dst)). | |
+ * Returns strlen(src) + MIN(dsize, strlen(initial dst)). | |
+ * If retval >= dsize, truncation occurred. | |
+ */ | |
+size_t | |
+strlcat(char *dst, const char *src, size_t dsize) | |
+{ | |
+ const char *odst = dst; | |
+ const char *osrc = src; | |
+ size_t n = dsize; | |
+ size_t dlen; | |
+ | |
+ /* Find the end of dst and adjust bytes left but don't go past end. */ | |
+ while (n-- != 0 && *dst != '\0') | |
+ dst++; | |
+ dlen = dst - odst; | |
+ n = dsize - dlen; | |
+ | |
+ if (n-- == 0) | |
+ return(dlen + strlen(src)); | |
+ while (*src != '\0') { | |
+ if (n != 0) { | |
+ *dst++ = *src; | |
+ n--; | |
+ } | |
+ src++; | |
+ } | |
+ *dst = '\0'; | |
+ | |
+ return(dlen + (src - osrc)); /* count does not include NUL */ | |
+} | |
diff --git a/strlcpy.c b/strlcpy.c | |
@@ -0,0 +1,49 @@ | |
+/* $OpenBSD: strlcpy.c,v 1.12 2015/01/15 03:54:12 millert Exp $ … | |
+ | |
+/* | |
+ * Copyright (c) 1998, 2015 Todd C. Miller <[email protected]> | |
+ * | |
+ * Permission to use, copy, modify, and distribute this software for any | |
+ * purpose with or without fee is hereby granted, provided that the above | |
+ * copyright notice and this permission notice appear in all copies. | |
+ * | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ */ | |
+ | |
+#include <string.h> | |
+ | |
+/* | |
+ * Copy string src to buffer dst of size dsize. At most dsize-1 | |
+ * chars will be copied. Always NUL terminates (unless dsize == 0). | |
+ * Returns strlen(src); if retval >= dsize, truncation occurred. | |
+ */ | |
+size_t | |
+strlcpy(char *dst, const char *src, size_t dsize) | |
+{ | |
+ const char *osrc = src; | |
+ size_t nleft = dsize; | |
+ | |
+ /* Copy as many bytes as will fit. */ | |
+ if (nleft != 0) { | |
+ while (--nleft != 0) { | |
+ if ((*dst++ = *src++) == '\0') | |
+ break; | |
+ } | |
+ } | |
+ | |
+ /* Not enough room in dst, add NUL and traverse rest of src. */ | |
+ if (nleft == 0) { | |
+ if (dsize != 0) | |
+ *dst = '\0'; /* NUL-terminate dst */ | |
+ while (*src++) | |
+ ; | |
+ } | |
+ | |
+ return(src - osrc - 1); /* count does not include NUL */ | |
+} | |
diff --git a/util.c b/util.c | |
@@ -0,0 +1,207 @@ | |
+#include <errno.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+ | |
+#include "util.h" | |
+ | |
+/* Check if string has a non-empty scheme / protocol part. */ | |
+int | |
+uri_hasscheme(const char *s) | |
+{ | |
+ const char *p = s; | |
+ | |
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || | |
+ *p == '+' || *p == '-' || *p == '.'; p++) | |
+ ; | |
+ /* scheme, except if empty and starts with ":" then it is a path */ | |
+ return (*p == ':' && p != s); | |
+} | |
+ | |
+/* Parse URI string `s` into an uri structure `u`. | |
+ Returns 0 on success or -1 on failure */ | |
+int | |
+uri_parse(const char *s, struct uri *u) | |
+{ | |
+ const char *p = s; | |
+ char *endptr; | |
+ size_t i; | |
+ long l; | |
+ | |
+ u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0'; | |
+ u->path[0] = u->query[0] = u->fragment[0] = '\0'; | |
+ | |
+ /* protocol-relative */ | |
+ if (*p == '/' && *(p + 1) == '/') { | |
+ p += 2; /* skip "//" */ | |
+ goto parseauth; | |
+ } | |
+ | |
+ /* scheme / protocol part */ | |
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || | |
+ *p == '+' || *p == '-' || *p == '.'; p++) | |
+ ; | |
+ /* scheme, except if empty and starts with ":" then it is a path */ | |
+ if (*p == ':' && p != s) { | |
+ if (*(p + 1) == '/' && *(p + 2) == '/') | |
+ p += 3; /* skip "://" */ | |
+ else | |
+ p++; /* skip ":" */ | |
+ | |
+ if ((size_t)(p - s) >= sizeof(u->proto)) | |
+ return -1; /* protocol too long */ | |
+ memcpy(u->proto, s, p - s); | |
+ u->proto[p - s] = '\0'; | |
+ | |
+ if (*(p - 1) != '/') | |
+ goto parsepath; | |
+ } else { | |
+ p = s; /* no scheme format, reset to start */ | |
+ goto parsepath; | |
+ } | |
+ | |
+parseauth: | |
+ /* userinfo (username:password) */ | |
+ i = strcspn(p, "@/?#"); | |
+ if (p[i] == '@') { | |
+ if (i >= sizeof(u->userinfo)) | |
+ return -1; /* userinfo too long */ | |
+ memcpy(u->userinfo, p, i); | |
+ u->userinfo[i] = '\0'; | |
+ p += i + 1; | |
+ } | |
+ | |
+ /* IPv6 address */ | |
+ if (*p == '[') { | |
+ /* bracket not found, host too short or too long */ | |
+ i = strcspn(p, "]"); | |
+ if (p[i] != ']' || i < 3) | |
+ return -1; | |
+ i++; /* including "]" */ | |
+ } else { | |
+ /* domain / host part, skip until port, path or end. */ | |
+ i = strcspn(p, ":/?#"); | |
+ } | |
+ if (i >= sizeof(u->host)) | |
+ return -1; /* host too long */ | |
+ memcpy(u->host, p, i); | |
+ u->host[i] = '\0'; | |
+ p += i; | |
+ | |
+ /* port */ | |
+ if (*p == ':') { | |
+ p++; | |
+ if ((i = strcspn(p, "/?#")) >= sizeof(u->port)) | |
+ return -1; /* port too long */ | |
+ memcpy(u->port, p, i); | |
+ u->port[i] = '\0'; | |
+ /* check for valid port: range 1 - 65535, may be empty */ | |
+ errno = 0; | |
+ l = strtol(u->port, &endptr, 10); | |
+ if (i && (errno || *endptr || l <= 0 || l > 65535)) | |
+ return -1; | |
+ p += i; | |
+ } | |
+ | |
+parsepath: | |
+ /* path */ | |
+ if ((i = strcspn(p, "?#")) >= sizeof(u->path)) | |
+ return -1; /* path too long */ | |
+ memcpy(u->path, p, i); | |
+ u->path[i] = '\0'; | |
+ p += i; | |
+ | |
+ /* query */ | |
+ if (*p == '?') { | |
+ p++; | |
+ if ((i = strcspn(p, "#")) >= sizeof(u->query)) | |
+ return -1; /* query too long */ | |
+ memcpy(u->query, p, i); | |
+ u->query[i] = '\0'; | |
+ p += i; | |
+ } | |
+ | |
+ /* fragment */ | |
+ if (*p == '#') { | |
+ p++; | |
+ if ((i = strlen(p)) >= sizeof(u->fragment)) | |
+ return -1; /* fragment too long */ | |
+ memcpy(u->fragment, p, i); | |
+ u->fragment[i] = '\0'; | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+/* Transform and try to make the URI `u` absolute using base URI `b` into `a`. | |
+ Follows some of the logic from "RFC 3986 - 5.2.2. Transform References". | |
+ Returns 0 on success, -1 on error or truncation. */ | |
+int | |
+uri_makeabs(struct uri *a, struct uri *u, struct uri *b) | |
+{ | |
+ char *p; | |
+ int c; | |
+ | |
+ strlcpy(a->fragment, u->fragment, sizeof(a->fragment)); | |
+ | |
+ if (u->proto[0] || u->host[0]) { | |
+ strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a-… | |
+ strlcpy(a->host, u->host, sizeof(a->host)); | |
+ strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo)); | |
+ strlcpy(a->host, u->host, sizeof(a->host)); | |
+ strlcpy(a->port, u->port, sizeof(a->port)); | |
+ strlcpy(a->path, u->path, sizeof(a->path)); | |
+ strlcpy(a->query, u->query, sizeof(a->query)); | |
+ return 0; | |
+ } | |
+ | |
+ strlcpy(a->proto, b->proto, sizeof(a->proto)); | |
+ strlcpy(a->host, b->host, sizeof(a->host)); | |
+ strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo)); | |
+ strlcpy(a->host, b->host, sizeof(a->host)); | |
+ strlcpy(a->port, b->port, sizeof(a->port)); | |
+ | |
+ if (!u->path[0]) { | |
+ strlcpy(a->path, b->path, sizeof(a->path)); | |
+ } else if (u->path[0] == '/') { | |
+ strlcpy(a->path, u->path, sizeof(a->path)); | |
+ } else { | |
+ a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0'; | |
+ a->path[1] = '\0'; | |
+ | |
+ if ((p = strrchr(b->path, '/'))) { | |
+ c = *(++p); | |
+ *p = '\0'; /* temporary NUL-terminate */ | |
+ if (strlcat(a->path, b->path, sizeof(a->path)) >= size… | |
+ return -1; | |
+ *p = c; /* restore */ | |
+ } | |
+ if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->pa… | |
+ return -1; | |
+ } | |
+ | |
+ if (u->path[0] || u->query[0]) | |
+ strlcpy(a->query, u->query, sizeof(a->query)); | |
+ else | |
+ strlcpy(a->query, b->query, sizeof(a->query)); | |
+ | |
+ return 0; | |
+} | |
+ | |
+int | |
+uri_format(char *buf, size_t bufsiz, struct uri *u) | |
+{ | |
+ return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s", | |
+ u->proto, | |
+ u->userinfo[0] ? u->userinfo : "", | |
+ u->userinfo[0] ? "@" : "", | |
+ u->host, | |
+ u->port[0] ? ":" : "", | |
+ u->port, | |
+ u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "", | |
+ u->path, | |
+ u->query[0] ? "?" : "", | |
+ u->query, | |
+ u->fragment[0] ? "#" : "", | |
+ u->fragment); | |
+} | |
diff --git a/util.h b/util.h | |
@@ -0,0 +1,26 @@ | |
+#include <stdio.h> | |
+ | |
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */ | |
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) | |
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10) | |
+ | |
+#undef strlcat | |
+size_t strlcat(char *, const char *, size_t); | |
+#undef strlcpy | |
+size_t strlcpy(char *, const char *, size_t); | |
+ | |
+/* URI */ | |
+struct uri { | |
+ char proto[48]; /* scheme including ":" or "://" */ | |
+ char userinfo[256]; /* username [:password] */ | |
+ char host[256]; | |
+ char port[6]; /* numeric port */ | |
+ char path[1024]; | |
+ char query[1024]; | |
+ char fragment[1024]; | |
+}; | |
+ | |
+int uri_format(char *, size_t, struct uri *); | |
+int uri_hasscheme(const char *); | |
+int uri_makeabs(struct uri *, struct uri *, struct uri *); | |
+int uri_parse(const char *, struct uri *); |