util.c - sfeed - RSS and Atom parser | |
git clone git://git.codemadness.org/sfeed | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
util.c (9301B) | |
--- | |
1 #include <errno.h> | |
2 #include <stdarg.h> | |
3 #include <stdio.h> | |
4 #include <stdlib.h> | |
5 #include <string.h> | |
6 #include <wchar.h> | |
7 | |
8 #include "util.h" | |
9 | |
10 /* print to stderr, print error message of errno and exit(). | |
11 * Unlike BSD err() it does not prefix __progname */ | |
12 __dead void | |
13 err(int exitstatus, const char *fmt, ...) | |
14 { | |
15 va_list ap; | |
16 int saved_errno; | |
17 | |
18 saved_errno = errno; | |
19 | |
20 if (fmt) { | |
21 va_start(ap, fmt); | |
22 vfprintf(stderr, fmt, ap); | |
23 va_end(ap); | |
24 fputs(": ", stderr); | |
25 } | |
26 fprintf(stderr, "%s\n", strerror(saved_errno)); | |
27 | |
28 exit(exitstatus); | |
29 } | |
30 | |
31 /* print to stderr and exit(). | |
32 * Unlike BSD errx() it does not prefix __progname */ | |
33 __dead void | |
34 errx(int exitstatus, const char *fmt, ...) | |
35 { | |
36 va_list ap; | |
37 | |
38 if (fmt) { | |
39 va_start(ap, fmt); | |
40 vfprintf(stderr, fmt, ap); | |
41 va_end(ap); | |
42 } | |
43 fputs("\n", stderr); | |
44 | |
45 exit(exitstatus); | |
46 } | |
47 | |
48 /* Handle read or write errors for a FILE * stream */ | |
49 void | |
50 checkfileerror(FILE *fp, const char *name, int mode) | |
51 { | |
52 if (mode == 'r' && ferror(fp)) | |
53 errx(1, "read error: %s", name); | |
54 else if (mode == 'w' && (fflush(fp) || ferror(fp))) | |
55 errx(1, "write error: %s", name); | |
56 } | |
57 | |
58 /* strcasestr() included for portability */ | |
59 char * | |
60 strcasestr(const char *h, const char *n) | |
61 { | |
62 size_t i; | |
63 | |
64 if (!n[0]) | |
65 return (char *)h; | |
66 | |
67 for (; *h; ++h) { | |
68 for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) == | |
69 TOLOWER((unsigned char)h[i]); ++i) | |
70 ; | |
71 if (n[i] == '\0') | |
72 return (char *)h; | |
73 } | |
74 | |
75 return NULL; | |
76 } | |
77 | |
78 /* Check if string has a non-empty scheme / protocol part. */ | |
79 int | |
80 uri_hasscheme(const char *s) | |
81 { | |
82 const char *p = s; | |
83 | |
84 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) … | |
85 *p == '+' || *p == '-' || *p == '.'; p++) | |
86 ; | |
87 /* scheme, except if empty and starts with ":" then it is a path… | |
88 return (*p == ':' && p != s); | |
89 } | |
90 | |
91 /* Parse URI string `s` into an uri structure `u`. | |
92 * Returns 0 on success or -1 on failure */ | |
93 int | |
94 uri_parse(const char *s, struct uri *u) | |
95 { | |
96 const char *p = s; | |
97 char *endptr; | |
98 size_t i; | |
99 long l; | |
100 | |
101 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0'; | |
102 u->path[0] = u->query[0] = u->fragment[0] = '\0'; | |
103 | |
104 /* protocol-relative */ | |
105 if (*p == '/' && *(p + 1) == '/') { | |
106 p += 2; /* skip "//" */ | |
107 goto parseauth; | |
108 } | |
109 | |
110 /* scheme / protocol part */ | |
111 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) … | |
112 *p == '+' || *p == '-' || *p == '.'; p++) | |
113 ; | |
114 /* scheme, except if empty and starts with ":" then it is a path… | |
115 if (*p == ':' && p != s) { | |
116 if (*(p + 1) == '/' && *(p + 2) == '/') | |
117 p += 3; /* skip "://" */ | |
118 else | |
119 p++; /* skip ":" */ | |
120 | |
121 if ((size_t)(p - s) >= sizeof(u->proto)) | |
122 return -1; /* protocol too long */ | |
123 memcpy(u->proto, s, p - s); | |
124 u->proto[p - s] = '\0'; | |
125 | |
126 if (*(p - 1) != '/') | |
127 goto parsepath; | |
128 } else { | |
129 p = s; /* no scheme format, reset to start */ | |
130 goto parsepath; | |
131 } | |
132 | |
133 parseauth: | |
134 /* userinfo (username:password) */ | |
135 i = strcspn(p, "@/?#"); | |
136 if (p[i] == '@') { | |
137 if (i >= sizeof(u->userinfo)) | |
138 return -1; /* userinfo too long */ | |
139 memcpy(u->userinfo, p, i); | |
140 u->userinfo[i] = '\0'; | |
141 p += i + 1; | |
142 } | |
143 | |
144 /* IPv6 address */ | |
145 if (*p == '[') { | |
146 /* bracket not found, host too short or too long */ | |
147 i = strcspn(p, "]"); | |
148 if (p[i] != ']' || i < 3) | |
149 return -1; | |
150 i++; /* including "]" */ | |
151 } else { | |
152 /* domain / host part, skip until port, path or end. */ | |
153 i = strcspn(p, ":/?#"); | |
154 } | |
155 if (i >= sizeof(u->host)) | |
156 return -1; /* host too long */ | |
157 memcpy(u->host, p, i); | |
158 u->host[i] = '\0'; | |
159 p += i; | |
160 | |
161 /* port */ | |
162 if (*p == ':') { | |
163 p++; | |
164 if ((i = strcspn(p, "/?#")) >= sizeof(u->port)) | |
165 return -1; /* port too long */ | |
166 memcpy(u->port, p, i); | |
167 u->port[i] = '\0'; | |
168 /* check for valid port: range 1 - 65535, may be empty */ | |
169 errno = 0; | |
170 l = strtol(u->port, &endptr, 10); | |
171 if (i && (errno || *endptr || l <= 0 || l > 65535)) | |
172 return -1; | |
173 p += i; | |
174 } | |
175 | |
176 parsepath: | |
177 /* path */ | |
178 if ((i = strcspn(p, "?#")) >= sizeof(u->path)) | |
179 return -1; /* path too long */ | |
180 memcpy(u->path, p, i); | |
181 u->path[i] = '\0'; | |
182 p += i; | |
183 | |
184 /* query */ | |
185 if (*p == '?') { | |
186 p++; | |
187 if ((i = strcspn(p, "#")) >= sizeof(u->query)) | |
188 return -1; /* query too long */ | |
189 memcpy(u->query, p, i); | |
190 u->query[i] = '\0'; | |
191 p += i; | |
192 } | |
193 | |
194 /* fragment */ | |
195 if (*p == '#') { | |
196 p++; | |
197 if ((i = strlen(p)) >= sizeof(u->fragment)) | |
198 return -1; /* fragment too long */ | |
199 memcpy(u->fragment, p, i); | |
200 u->fragment[i] = '\0'; | |
201 } | |
202 | |
203 return 0; | |
204 } | |
205 | |
206 /* Transform and try to make the URI `u` absolute using base URI `b` int… | |
207 * Follows some of the logic from "RFC 3986 - 5.2.2. Transform Reference… | |
208 * Returns 0 on success, -1 on error or truncation. */ | |
209 int | |
210 uri_makeabs(struct uri *a, struct uri *u, struct uri *b) | |
211 { | |
212 char *p; | |
213 int c; | |
214 | |
215 strlcpy(a->fragment, u->fragment, sizeof(a->fragment)); | |
216 | |
217 if (u->proto[0] || u->host[0]) { | |
218 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, siz… | |
219 strlcpy(a->host, u->host, sizeof(a->host)); | |
220 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo)); | |
221 strlcpy(a->host, u->host, sizeof(a->host)); | |
222 strlcpy(a->port, u->port, sizeof(a->port)); | |
223 strlcpy(a->path, u->path, sizeof(a->path)); | |
224 strlcpy(a->query, u->query, sizeof(a->query)); | |
225 return 0; | |
226 } | |
227 | |
228 strlcpy(a->proto, b->proto, sizeof(a->proto)); | |
229 strlcpy(a->host, b->host, sizeof(a->host)); | |
230 strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo)); | |
231 strlcpy(a->host, b->host, sizeof(a->host)); | |
232 strlcpy(a->port, b->port, sizeof(a->port)); | |
233 | |
234 if (!u->path[0]) { | |
235 strlcpy(a->path, b->path, sizeof(a->path)); | |
236 } else if (u->path[0] == '/') { | |
237 strlcpy(a->path, u->path, sizeof(a->path)); | |
238 } else { | |
239 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '… | |
240 a->path[1] = '\0'; | |
241 | |
242 if ((p = strrchr(b->path, '/'))) { | |
243 c = *(++p); | |
244 *p = '\0'; /* temporary NUL-terminate */ | |
245 if (strlcat(a->path, b->path, sizeof(a->path)) >… | |
246 return -1; | |
247 *p = c; /* restore */ | |
248 } | |
249 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof… | |
250 return -1; | |
251 } | |
252 | |
253 if (u->path[0] || u->query[0]) | |
254 strlcpy(a->query, u->query, sizeof(a->query)); | |
255 else | |
256 strlcpy(a->query, b->query, sizeof(a->query)); | |
257 | |
258 return 0; | |
259 } | |
260 | |
261 int | |
262 uri_format(char *buf, size_t bufsiz, struct uri *u) | |
263 { | |
264 return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s", | |
265 u->proto, | |
266 u->userinfo[0] ? u->userinfo : "", | |
267 u->userinfo[0] ? "@" : "", | |
268 u->host, | |
269 u->port[0] ? ":" : "", | |
270 u->port, | |
271 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "", | |
272 u->path, | |
273 u->query[0] ? "?" : "", | |
274 u->query, | |
275 u->fragment[0] ? "#" : "", | |
276 u->fragment); | |
277 } | |
278 | |
279 /* Splits fields in the line buffer by replacing TAB separators with NUL… | |
280 * terminators and assign these fields as pointers. If there are less fi… | |
281 * than expected then the field is an empty string constant. */ | |
282 void | |
283 parseline(char *line, char *fields[FieldLast]) | |
284 { | |
285 char *prev, *s; | |
286 size_t i; | |
287 | |
288 for (prev = line, i = 0; | |
289 (s = strchr(prev, '\t')) && i < FieldLast - 1; | |
290 i++) { | |
291 *s = '\0'; | |
292 fields[i] = prev; | |
293 prev = s + 1; | |
294 } | |
295 fields[i++] = prev; | |
296 /* make non-parsed fields empty. */ | |
297 for (; i < FieldLast; i++) | |
298 fields[i] = ""; | |
299 } | |
300 | |
301 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */ | |
302 int | |
303 strtotime(const char *s, time_t *t) | |
304 { | |
305 long long l; | |
306 char *e; | |
307 | |
308 errno = 0; | |
309 l = strtoll(s, &e, 10); | |
310 if (errno || *s == '\0' || *e) | |
311 return -1; | |
312 | |
313 /* NOTE: the type long long supports the 64-bit range. If time_t… | |
314 * 64-bit it is "2038-ready", otherwise it is truncated/wrapped.… | |
315 if (t) | |
316 *t = (time_t)l; | |
317 | |
318 return 0; | |
319 } | |
320 | |
321 time_t | |
322 getcomparetime(void) | |
323 { | |
324 time_t now, t; | |
325 char *p; | |
326 | |
327 if ((now = time(NULL)) == (time_t)-1) | |
328 return (time_t)-1; | |
329 | |
330 if ((p = getenv("SFEED_NEW_AGE"))) { | |
331 if (strtotime(p, &t) == -1) | |
332 return (time_t)-1; | |
333 return now - t; | |
334 } | |
335 | |
336 return now - 86400; /* 1 day is old news */ | |
337 } | |
338 | |
339 /* Escape characters below as HTML 2.0 / XML 1.0. */ | |
340 void | |
341 xmlencode(const char *s, FILE *fp) | |
342 { | |
343 for (; *s; ++s) { | |
344 switch (*s) { | |
345 case '<': fputs("<", fp); break; | |
346 case '>': fputs(">", fp); break; | |
347 case '\'': fputs("'", fp); break; | |
348 case '&': fputs("&", fp); break; | |
349 case '"': fputs(""", fp); break; | |
350 default: putc(*s, fp); | |
351 } | |
352 } | |
353 } | |
354 | |
355 /* print `len` columns of characters. If string is shorter pad the rest … | |
356 * characters `pad`. */ | |
357 void | |
358 printutf8pad(FILE *fp, const char *s, size_t len, int pad) | |
359 { | |
360 wchar_t wc; | |
361 size_t col = 0, i, slen; | |
362 int inc, rl, w; | |
363 | |
364 if (!len) | |
365 return; | |
366 | |
367 slen = strlen(s); | |
368 for (i = 0; i < slen; i += inc) { | |
369 inc = 1; /* next byte */ | |
370 if ((unsigned char)s[i] < 32) { | |
371 continue; /* skip control characters */ | |
372 } else if ((unsigned char)s[i] >= 127) { | |
373 rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i … | |
374 inc = rl; | |
375 if (rl < 0) { | |
376 mbtowc(NULL, NULL, 0); /* reset state */ | |
377 inc = 1; /* invalid, seek next byte */ | |
378 w = 1; /* replacement char is one width … | |
379 } else if ((w = wcwidth(wc)) == -1) { | |
380 continue; | |
381 } | |
382 | |
383 if (col + w > len || (col + w == len && s[i + in… | |
384 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellip… | |
385 col++; | |
386 break; | |
387 } else if (rl < 0) { | |
388 fputs(UTF_INVALID_SYMBOL, fp); /* replac… | |
389 col++; | |
390 continue; | |
391 } | |
392 fwrite(&s[i], 1, rl, fp); | |
393 col += w; | |
394 } else { | |
395 /* optimization: simple ASCII character */ | |
396 if (col + 1 > len || (col + 1 == len && s[i + 1]… | |
397 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellip… | |
398 col++; | |
399 break; | |
400 } | |
401 putc(s[i], fp); | |
402 col++; | |
403 } | |
404 | |
405 } | |
406 for (; col < len; ++col) | |
407 putc(pad, fp); | |
408 } |