Introduction
Introduction Statistics Contact Development Disclaimer Help
initial repo - uriparser - URI parser
git clone git://git.codemadness.org/uriparser
Log
Files
Refs
README
LICENSE
---
commit f003f99bc853675e14235c2750a31571c988543b
Author: Hiltjo Posthuma <[email protected]>
Date: Wed, 22 Nov 2023 19:21:03 +0100
initial repo
Diffstat:
A LICENSE | 15 +++++++++++++++
A Makefile | 5 +++++
A README | 1 +
A example.c | 65 +++++++++++++++++++++++++++++…
A strlcat.c | 54 +++++++++++++++++++++++++++++…
A strlcpy.c | 49 +++++++++++++++++++++++++++++…
A util.c | 207 ++++++++++++++++++++++++++++++
A util.h | 26 ++++++++++++++++++++++++++
8 files changed, 422 insertions(+), 0 deletions(-)
---
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2023 Hiltjo Posthuma <[email protected]>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,5 @@
+build: clean
+ ${CC} -o example example.c util.c strlcat.c strlcpy.c -Wall
+
+clean:
+ rm -f example
diff --git a/README b/README
@@ -0,0 +1 @@
+Small URI parser written in C
diff --git a/example.c b/example.c
@@ -0,0 +1,65 @@
+#include <stdio.h>
+
+#include "util.h"
+
+void
+printfields(struct uri *u)
+{
+ printf("* proto: %s\n", u->proto);
+ printf("* userinfo: %s\n", u->userinfo);
+ printf("* host: %s\n", u->host);
+ printf("* port: %s\n", u->port);
+ printf("* path: %s\n", u->path);
+ printf("* query: %s\n", u->query);
+ printf("* fragment: %s\n", u->fragment);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct uri b, u, abs;
+ char buf[4096];
+ int r;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <url> [baseurl]\n", argv[0]);
+ return 1;
+ }
+
+ r = uri_parse(argv[1], &u);
+ printf("uri_parse() for %s returned: %d\n", argv[1], r);
+ if (r != -1) {
+ printf("success!\n\nfields for %s:\n", argv[1]);
+ printfields(&u);
+ printf("\n");
+
+ if (argc > 2) {
+ r = uri_parse(argv[2], &b);
+ printf("uri_parse() for %s returned: %d\n", argv[2], r…
+ if (r != -1) {
+ printf("success!\n\nfields for %s:\n", argv[2]…
+ printfields(&b);
+ printf("\n");
+
+ r = uri_makeabs(&abs, &u, &b);
+ printf("uri_makeabs() for %s and %s returned: …
+ if (r != -1) {
+ printf("success!\n\nfields for %s and …
+ printfields(&abs);
+ r = uri_format(buf, sizeof(buf), &abs);
+ printf("uri_format() for absolute URI …
+ if (r > 0 && r < sizeof(buf))
+ printf("formatted URI: %s\n", …
+ } else {
+ printf("failure!\n");
+ }
+ } else {
+ printf("failure!\n");
+ }
+ }
+ } else {
+ printf("failure!\n");
+ }
+
+ return 0;
+}
diff --git a/strlcat.c b/strlcat.c
@@ -0,0 +1,54 @@
+/* $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $ …
+
+/*
+ * Copyright (c) 1998, 2015 Todd C. Miller <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <string.h>
+
+/*
+ * Appends src to string dst of size dsize (unlike strncat, dsize is the
+ * full size of dst, not space left). At most dsize-1 characters
+ * will be copied. Always NUL terminates (unless dsize <= strlen(dst)).
+ * Returns strlen(src) + MIN(dsize, strlen(initial dst)).
+ * If retval >= dsize, truncation occurred.
+ */
+size_t
+strlcat(char *dst, const char *src, size_t dsize)
+{
+ const char *odst = dst;
+ const char *osrc = src;
+ size_t n = dsize;
+ size_t dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end. */
+ while (n-- != 0 && *dst != '\0')
+ dst++;
+ dlen = dst - odst;
+ n = dsize - dlen;
+
+ if (n-- == 0)
+ return(dlen + strlen(src));
+ while (*src != '\0') {
+ if (n != 0) {
+ *dst++ = *src;
+ n--;
+ }
+ src++;
+ }
+ *dst = '\0';
+
+ return(dlen + (src - osrc)); /* count does not include NUL */
+}
diff --git a/strlcpy.c b/strlcpy.c
@@ -0,0 +1,49 @@
+/* $OpenBSD: strlcpy.c,v 1.12 2015/01/15 03:54:12 millert Exp $ …
+
+/*
+ * Copyright (c) 1998, 2015 Todd C. Miller <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <string.h>
+
+/*
+ * Copy string src to buffer dst of size dsize. At most dsize-1
+ * chars will be copied. Always NUL terminates (unless dsize == 0).
+ * Returns strlen(src); if retval >= dsize, truncation occurred.
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t dsize)
+{
+ const char *osrc = src;
+ size_t nleft = dsize;
+
+ /* Copy as many bytes as will fit. */
+ if (nleft != 0) {
+ while (--nleft != 0) {
+ if ((*dst++ = *src++) == '\0')
+ break;
+ }
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src. */
+ if (nleft == 0) {
+ if (dsize != 0)
+ *dst = '\0'; /* NUL-terminate dst */
+ while (*src++)
+ ;
+ }
+
+ return(src - osrc - 1); /* count does not include NUL */
+}
diff --git a/util.c b/util.c
@@ -0,0 +1,207 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+
+/* Check if string has a non-empty scheme / protocol part. */
+int
+uri_hasscheme(const char *s)
+{
+ const char *p = s;
+
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
+ *p == '+' || *p == '-' || *p == '.'; p++)
+ ;
+ /* scheme, except if empty and starts with ":" then it is a path */
+ return (*p == ':' && p != s);
+}
+
+/* Parse URI string `s` into an uri structure `u`.
+ Returns 0 on success or -1 on failure */
+int
+uri_parse(const char *s, struct uri *u)
+{
+ const char *p = s;
+ char *endptr;
+ size_t i;
+ long l;
+
+ u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
+ u->path[0] = u->query[0] = u->fragment[0] = '\0';
+
+ /* protocol-relative */
+ if (*p == '/' && *(p + 1) == '/') {
+ p += 2; /* skip "//" */
+ goto parseauth;
+ }
+
+ /* scheme / protocol part */
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
+ *p == '+' || *p == '-' || *p == '.'; p++)
+ ;
+ /* scheme, except if empty and starts with ":" then it is a path */
+ if (*p == ':' && p != s) {
+ if (*(p + 1) == '/' && *(p + 2) == '/')
+ p += 3; /* skip "://" */
+ else
+ p++; /* skip ":" */
+
+ if ((size_t)(p - s) >= sizeof(u->proto))
+ return -1; /* protocol too long */
+ memcpy(u->proto, s, p - s);
+ u->proto[p - s] = '\0';
+
+ if (*(p - 1) != '/')
+ goto parsepath;
+ } else {
+ p = s; /* no scheme format, reset to start */
+ goto parsepath;
+ }
+
+parseauth:
+ /* userinfo (username:password) */
+ i = strcspn(p, "@/?#");
+ if (p[i] == '@') {
+ if (i >= sizeof(u->userinfo))
+ return -1; /* userinfo too long */
+ memcpy(u->userinfo, p, i);
+ u->userinfo[i] = '\0';
+ p += i + 1;
+ }
+
+ /* IPv6 address */
+ if (*p == '[') {
+ /* bracket not found, host too short or too long */
+ i = strcspn(p, "]");
+ if (p[i] != ']' || i < 3)
+ return -1;
+ i++; /* including "]" */
+ } else {
+ /* domain / host part, skip until port, path or end. */
+ i = strcspn(p, ":/?#");
+ }
+ if (i >= sizeof(u->host))
+ return -1; /* host too long */
+ memcpy(u->host, p, i);
+ u->host[i] = '\0';
+ p += i;
+
+ /* port */
+ if (*p == ':') {
+ p++;
+ if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
+ return -1; /* port too long */
+ memcpy(u->port, p, i);
+ u->port[i] = '\0';
+ /* check for valid port: range 1 - 65535, may be empty */
+ errno = 0;
+ l = strtol(u->port, &endptr, 10);
+ if (i && (errno || *endptr || l <= 0 || l > 65535))
+ return -1;
+ p += i;
+ }
+
+parsepath:
+ /* path */
+ if ((i = strcspn(p, "?#")) >= sizeof(u->path))
+ return -1; /* path too long */
+ memcpy(u->path, p, i);
+ u->path[i] = '\0';
+ p += i;
+
+ /* query */
+ if (*p == '?') {
+ p++;
+ if ((i = strcspn(p, "#")) >= sizeof(u->query))
+ return -1; /* query too long */
+ memcpy(u->query, p, i);
+ u->query[i] = '\0';
+ p += i;
+ }
+
+ /* fragment */
+ if (*p == '#') {
+ p++;
+ if ((i = strlen(p)) >= sizeof(u->fragment))
+ return -1; /* fragment too long */
+ memcpy(u->fragment, p, i);
+ u->fragment[i] = '\0';
+ }
+
+ return 0;
+}
+
+/* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
+ Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
+ Returns 0 on success, -1 on error or truncation. */
+int
+uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
+{
+ char *p;
+ int c;
+
+ strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
+
+ if (u->proto[0] || u->host[0]) {
+ strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a-…
+ strlcpy(a->host, u->host, sizeof(a->host));
+ strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
+ strlcpy(a->host, u->host, sizeof(a->host));
+ strlcpy(a->port, u->port, sizeof(a->port));
+ strlcpy(a->path, u->path, sizeof(a->path));
+ strlcpy(a->query, u->query, sizeof(a->query));
+ return 0;
+ }
+
+ strlcpy(a->proto, b->proto, sizeof(a->proto));
+ strlcpy(a->host, b->host, sizeof(a->host));
+ strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
+ strlcpy(a->host, b->host, sizeof(a->host));
+ strlcpy(a->port, b->port, sizeof(a->port));
+
+ if (!u->path[0]) {
+ strlcpy(a->path, b->path, sizeof(a->path));
+ } else if (u->path[0] == '/') {
+ strlcpy(a->path, u->path, sizeof(a->path));
+ } else {
+ a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
+ a->path[1] = '\0';
+
+ if ((p = strrchr(b->path, '/'))) {
+ c = *(++p);
+ *p = '\0'; /* temporary NUL-terminate */
+ if (strlcat(a->path, b->path, sizeof(a->path)) >= size…
+ return -1;
+ *p = c; /* restore */
+ }
+ if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->pa…
+ return -1;
+ }
+
+ if (u->path[0] || u->query[0])
+ strlcpy(a->query, u->query, sizeof(a->query));
+ else
+ strlcpy(a->query, b->query, sizeof(a->query));
+
+ return 0;
+}
+
+int
+uri_format(char *buf, size_t bufsiz, struct uri *u)
+{
+ return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
+ u->proto,
+ u->userinfo[0] ? u->userinfo : "",
+ u->userinfo[0] ? "@" : "",
+ u->host,
+ u->port[0] ? ":" : "",
+ u->port,
+ u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
+ u->path,
+ u->query[0] ? "?" : "",
+ u->query,
+ u->fragment[0] ? "#" : "",
+ u->fragment);
+}
diff --git a/util.h b/util.h
@@ -0,0 +1,26 @@
+#include <stdio.h>
+
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
+
+#undef strlcat
+size_t strlcat(char *, const char *, size_t);
+#undef strlcpy
+size_t strlcpy(char *, const char *, size_t);
+
+/* URI */
+struct uri {
+ char proto[48]; /* scheme including ":" or "://" */
+ char userinfo[256]; /* username [:password] */
+ char host[256];
+ char port[6]; /* numeric port */
+ char path[1024];
+ char query[1024];
+ char fragment[1024];
+};
+
+int uri_format(char *, size_t, struct uri *);
+int uri_hasscheme(const char *);
+int uri_makeabs(struct uri *, struct uri *, struct uri *);
+int uri_parse(const char *, struct uri *);
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.