util.c - uriparser - URI parser | |
git clone git://git.codemadness.org/uriparser | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
util.c (5093B) | |
--- | |
1 #include <errno.h> | |
2 #include <stdio.h> | |
3 #include <stdlib.h> | |
4 #include <string.h> | |
5 | |
6 #include "util.h" | |
7 | |
8 /* Check if string has a non-empty scheme / protocol part. */ | |
9 int | |
10 uri_hasscheme(const char *s) | |
11 { | |
12 const char *p = s; | |
13 | |
14 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) … | |
15 *p == '+' || *p == '-' || *p == '.'; p++) | |
16 ; | |
17 /* scheme, except if empty and starts with ":" then it is a path… | |
18 return (*p == ':' && p != s); | |
19 } | |
20 | |
21 /* Parse URI string `s` into an uri structure `u`. | |
22 Returns 0 on success or -1 on failure */ | |
23 int | |
24 uri_parse(const char *s, struct uri *u) | |
25 { | |
26 const char *p = s; | |
27 char *endptr; | |
28 size_t i; | |
29 long l; | |
30 | |
31 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0'; | |
32 u->path[0] = u->query[0] = u->fragment[0] = '\0'; | |
33 | |
34 /* protocol-relative */ | |
35 if (*p == '/' && *(p + 1) == '/') { | |
36 p += 2; /* skip "//" */ | |
37 goto parseauth; | |
38 } | |
39 | |
40 /* scheme / protocol part */ | |
41 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) … | |
42 *p == '+' || *p == '-' || *p == '.'; p++) | |
43 ; | |
44 /* scheme, except if empty and starts with ":" then it is a path… | |
45 if (*p == ':' && p != s) { | |
46 if (*(p + 1) == '/' && *(p + 2) == '/') | |
47 p += 3; /* skip "://" */ | |
48 else | |
49 p++; /* skip ":" */ | |
50 | |
51 if ((size_t)(p - s) >= sizeof(u->proto)) | |
52 return -1; /* protocol too long */ | |
53 memcpy(u->proto, s, p - s); | |
54 u->proto[p - s] = '\0'; | |
55 | |
56 if (*(p - 1) != '/') | |
57 goto parsepath; | |
58 } else { | |
59 p = s; /* no scheme format, reset to start */ | |
60 goto parsepath; | |
61 } | |
62 | |
63 parseauth: | |
64 /* userinfo (username:password) */ | |
65 i = strcspn(p, "@/?#"); | |
66 if (p[i] == '@') { | |
67 if (i >= sizeof(u->userinfo)) | |
68 return -1; /* userinfo too long */ | |
69 memcpy(u->userinfo, p, i); | |
70 u->userinfo[i] = '\0'; | |
71 p += i + 1; | |
72 } | |
73 | |
74 /* IPv6 address */ | |
75 if (*p == '[') { | |
76 /* bracket not found, host too short or too long */ | |
77 i = strcspn(p, "]"); | |
78 if (p[i] != ']' || i < 3) | |
79 return -1; | |
80 i++; /* including "]" */ | |
81 } else { | |
82 /* domain / host part, skip until port, path or end. */ | |
83 i = strcspn(p, ":/?#"); | |
84 } | |
85 if (i >= sizeof(u->host)) | |
86 return -1; /* host too long */ | |
87 memcpy(u->host, p, i); | |
88 u->host[i] = '\0'; | |
89 p += i; | |
90 | |
91 /* port */ | |
92 if (*p == ':') { | |
93 p++; | |
94 if ((i = strcspn(p, "/?#")) >= sizeof(u->port)) | |
95 return -1; /* port too long */ | |
96 memcpy(u->port, p, i); | |
97 u->port[i] = '\0'; | |
98 /* check for valid port: range 1 - 65535, may be empty */ | |
99 errno = 0; | |
100 l = strtol(u->port, &endptr, 10); | |
101 if (i && (errno || *endptr || l <= 0 || l > 65535)) | |
102 return -1; | |
103 p += i; | |
104 } | |
105 | |
106 parsepath: | |
107 /* path */ | |
108 if ((i = strcspn(p, "?#")) >= sizeof(u->path)) | |
109 return -1; /* path too long */ | |
110 memcpy(u->path, p, i); | |
111 u->path[i] = '\0'; | |
112 p += i; | |
113 | |
114 /* query */ | |
115 if (*p == '?') { | |
116 p++; | |
117 if ((i = strcspn(p, "#")) >= sizeof(u->query)) | |
118 return -1; /* query too long */ | |
119 memcpy(u->query, p, i); | |
120 u->query[i] = '\0'; | |
121 p += i; | |
122 } | |
123 | |
124 /* fragment */ | |
125 if (*p == '#') { | |
126 p++; | |
127 if ((i = strlen(p)) >= sizeof(u->fragment)) | |
128 return -1; /* fragment too long */ | |
129 memcpy(u->fragment, p, i); | |
130 u->fragment[i] = '\0'; | |
131 } | |
132 | |
133 return 0; | |
134 } | |
135 | |
136 /* Transform and try to make the URI `u` absolute using base URI `b` int… | |
137 Follows some of the logic from "RFC 3986 - 5.2.2. Transform Reference… | |
138 Returns 0 on success, -1 on error or truncation. */ | |
139 int | |
140 uri_makeabs(struct uri *a, struct uri *u, struct uri *b) | |
141 { | |
142 char *p; | |
143 int c; | |
144 | |
145 strlcpy(a->fragment, u->fragment, sizeof(a->fragment)); | |
146 | |
147 if (u->proto[0] || u->host[0]) { | |
148 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, siz… | |
149 strlcpy(a->host, u->host, sizeof(a->host)); | |
150 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo)); | |
151 strlcpy(a->host, u->host, sizeof(a->host)); | |
152 strlcpy(a->port, u->port, sizeof(a->port)); | |
153 strlcpy(a->path, u->path, sizeof(a->path)); | |
154 strlcpy(a->query, u->query, sizeof(a->query)); | |
155 return 0; | |
156 } | |
157 | |
158 strlcpy(a->proto, b->proto, sizeof(a->proto)); | |
159 strlcpy(a->host, b->host, sizeof(a->host)); | |
160 strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo)); | |
161 strlcpy(a->host, b->host, sizeof(a->host)); | |
162 strlcpy(a->port, b->port, sizeof(a->port)); | |
163 | |
164 if (!u->path[0]) { | |
165 strlcpy(a->path, b->path, sizeof(a->path)); | |
166 } else if (u->path[0] == '/') { | |
167 strlcpy(a->path, u->path, sizeof(a->path)); | |
168 } else { | |
169 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '… | |
170 a->path[1] = '\0'; | |
171 | |
172 if ((p = strrchr(b->path, '/'))) { | |
173 c = *(++p); | |
174 *p = '\0'; /* temporary NUL-terminate */ | |
175 if (strlcat(a->path, b->path, sizeof(a->path)) >… | |
176 return -1; | |
177 *p = c; /* restore */ | |
178 } | |
179 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof… | |
180 return -1; | |
181 } | |
182 | |
183 if (u->path[0] || u->query[0]) | |
184 strlcpy(a->query, u->query, sizeof(a->query)); | |
185 else | |
186 strlcpy(a->query, b->query, sizeof(a->query)); | |
187 | |
188 return 0; | |
189 } | |
190 | |
191 int | |
192 uri_format(char *buf, size_t bufsiz, struct uri *u) | |
193 { | |
194 return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s", | |
195 u->proto, | |
196 u->userinfo[0] ? u->userinfo : "", | |
197 u->userinfo[0] ? "@" : "", | |
198 u->host, | |
199 u->port[0] ? ":" : "", | |
200 u->port, | |
201 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "", | |
202 u->path, | |
203 u->query[0] ? "?" : "", | |
204 u->query, | |
205 u->fragment[0] ? "#" : "", | |
206 u->fragment); | |
207 } |