Introduction
Introduction Statistics Contact Development Disclaimer Help
util.c - sfeed - RSS and Atom parser
git clone git://git.codemadness.org/sfeed
Log
Files
Refs
README
LICENSE
---
util.c (9301B)
---
1 #include <errno.h>
2 #include <stdarg.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <wchar.h>
7
8 #include "util.h"
9
10 /* print to stderr, print error message of errno and exit().
11 * Unlike BSD err() it does not prefix __progname */
12 __dead void
13 err(int exitstatus, const char *fmt, ...)
14 {
15 va_list ap;
16 int saved_errno;
17
18 saved_errno = errno;
19
20 if (fmt) {
21 va_start(ap, fmt);
22 vfprintf(stderr, fmt, ap);
23 va_end(ap);
24 fputs(": ", stderr);
25 }
26 fprintf(stderr, "%s\n", strerror(saved_errno));
27
28 exit(exitstatus);
29 }
30
31 /* print to stderr and exit().
32 * Unlike BSD errx() it does not prefix __progname */
33 __dead void
34 errx(int exitstatus, const char *fmt, ...)
35 {
36 va_list ap;
37
38 if (fmt) {
39 va_start(ap, fmt);
40 vfprintf(stderr, fmt, ap);
41 va_end(ap);
42 }
43 fputs("\n", stderr);
44
45 exit(exitstatus);
46 }
47
48 /* Handle read or write errors for a FILE * stream */
49 void
50 checkfileerror(FILE *fp, const char *name, int mode)
51 {
52 if (mode == 'r' && ferror(fp))
53 errx(1, "read error: %s", name);
54 else if (mode == 'w' && (fflush(fp) || ferror(fp)))
55 errx(1, "write error: %s", name);
56 }
57
58 /* strcasestr() included for portability */
59 char *
60 strcasestr(const char *h, const char *n)
61 {
62 size_t i;
63
64 if (!n[0])
65 return (char *)h;
66
67 for (; *h; ++h) {
68 for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
69 TOLOWER((unsigned char)h[i]); ++i)
70 ;
71 if (n[i] == '\0')
72 return (char *)h;
73 }
74
75 return NULL;
76 }
77
78 /* Check if string has a non-empty scheme / protocol part. */
79 int
80 uri_hasscheme(const char *s)
81 {
82 const char *p = s;
83
84 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) …
85 *p == '+' || *p == '-' || *p == '.'; p++)
86 ;
87 /* scheme, except if empty and starts with ":" then it is a path…
88 return (*p == ':' && p != s);
89 }
90
91 /* Parse URI string `s` into an uri structure `u`.
92 * Returns 0 on success or -1 on failure */
93 int
94 uri_parse(const char *s, struct uri *u)
95 {
96 const char *p = s;
97 char *endptr;
98 size_t i;
99 long l;
100
101 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
102 u->path[0] = u->query[0] = u->fragment[0] = '\0';
103
104 /* protocol-relative */
105 if (*p == '/' && *(p + 1) == '/') {
106 p += 2; /* skip "//" */
107 goto parseauth;
108 }
109
110 /* scheme / protocol part */
111 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) …
112 *p == '+' || *p == '-' || *p == '.'; p++)
113 ;
114 /* scheme, except if empty and starts with ":" then it is a path…
115 if (*p == ':' && p != s) {
116 if (*(p + 1) == '/' && *(p + 2) == '/')
117 p += 3; /* skip "://" */
118 else
119 p++; /* skip ":" */
120
121 if ((size_t)(p - s) >= sizeof(u->proto))
122 return -1; /* protocol too long */
123 memcpy(u->proto, s, p - s);
124 u->proto[p - s] = '\0';
125
126 if (*(p - 1) != '/')
127 goto parsepath;
128 } else {
129 p = s; /* no scheme format, reset to start */
130 goto parsepath;
131 }
132
133 parseauth:
134 /* userinfo (username:password) */
135 i = strcspn(p, "@/?#");
136 if (p[i] == '@') {
137 if (i >= sizeof(u->userinfo))
138 return -1; /* userinfo too long */
139 memcpy(u->userinfo, p, i);
140 u->userinfo[i] = '\0';
141 p += i + 1;
142 }
143
144 /* IPv6 address */
145 if (*p == '[') {
146 /* bracket not found, host too short or too long */
147 i = strcspn(p, "]");
148 if (p[i] != ']' || i < 3)
149 return -1;
150 i++; /* including "]" */
151 } else {
152 /* domain / host part, skip until port, path or end. */
153 i = strcspn(p, ":/?#");
154 }
155 if (i >= sizeof(u->host))
156 return -1; /* host too long */
157 memcpy(u->host, p, i);
158 u->host[i] = '\0';
159 p += i;
160
161 /* port */
162 if (*p == ':') {
163 p++;
164 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
165 return -1; /* port too long */
166 memcpy(u->port, p, i);
167 u->port[i] = '\0';
168 /* check for valid port: range 1 - 65535, may be empty */
169 errno = 0;
170 l = strtol(u->port, &endptr, 10);
171 if (i && (errno || *endptr || l <= 0 || l > 65535))
172 return -1;
173 p += i;
174 }
175
176 parsepath:
177 /* path */
178 if ((i = strcspn(p, "?#")) >= sizeof(u->path))
179 return -1; /* path too long */
180 memcpy(u->path, p, i);
181 u->path[i] = '\0';
182 p += i;
183
184 /* query */
185 if (*p == '?') {
186 p++;
187 if ((i = strcspn(p, "#")) >= sizeof(u->query))
188 return -1; /* query too long */
189 memcpy(u->query, p, i);
190 u->query[i] = '\0';
191 p += i;
192 }
193
194 /* fragment */
195 if (*p == '#') {
196 p++;
197 if ((i = strlen(p)) >= sizeof(u->fragment))
198 return -1; /* fragment too long */
199 memcpy(u->fragment, p, i);
200 u->fragment[i] = '\0';
201 }
202
203 return 0;
204 }
205
206 /* Transform and try to make the URI `u` absolute using base URI `b` int…
207 * Follows some of the logic from "RFC 3986 - 5.2.2. Transform Reference…
208 * Returns 0 on success, -1 on error or truncation. */
209 int
210 uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
211 {
212 char *p;
213 int c;
214
215 strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
216
217 if (u->proto[0] || u->host[0]) {
218 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, siz…
219 strlcpy(a->host, u->host, sizeof(a->host));
220 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
221 strlcpy(a->host, u->host, sizeof(a->host));
222 strlcpy(a->port, u->port, sizeof(a->port));
223 strlcpy(a->path, u->path, sizeof(a->path));
224 strlcpy(a->query, u->query, sizeof(a->query));
225 return 0;
226 }
227
228 strlcpy(a->proto, b->proto, sizeof(a->proto));
229 strlcpy(a->host, b->host, sizeof(a->host));
230 strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
231 strlcpy(a->host, b->host, sizeof(a->host));
232 strlcpy(a->port, b->port, sizeof(a->port));
233
234 if (!u->path[0]) {
235 strlcpy(a->path, b->path, sizeof(a->path));
236 } else if (u->path[0] == '/') {
237 strlcpy(a->path, u->path, sizeof(a->path));
238 } else {
239 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '…
240 a->path[1] = '\0';
241
242 if ((p = strrchr(b->path, '/'))) {
243 c = *(++p);
244 *p = '\0'; /* temporary NUL-terminate */
245 if (strlcat(a->path, b->path, sizeof(a->path)) >…
246 return -1;
247 *p = c; /* restore */
248 }
249 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof…
250 return -1;
251 }
252
253 if (u->path[0] || u->query[0])
254 strlcpy(a->query, u->query, sizeof(a->query));
255 else
256 strlcpy(a->query, b->query, sizeof(a->query));
257
258 return 0;
259 }
260
261 int
262 uri_format(char *buf, size_t bufsiz, struct uri *u)
263 {
264 return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
265 u->proto,
266 u->userinfo[0] ? u->userinfo : "",
267 u->userinfo[0] ? "@" : "",
268 u->host,
269 u->port[0] ? ":" : "",
270 u->port,
271 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
272 u->path,
273 u->query[0] ? "?" : "",
274 u->query,
275 u->fragment[0] ? "#" : "",
276 u->fragment);
277 }
278
279 /* Splits fields in the line buffer by replacing TAB separators with NUL…
280 * terminators and assign these fields as pointers. If there are less fi…
281 * than expected then the field is an empty string constant. */
282 void
283 parseline(char *line, char *fields[FieldLast])
284 {
285 char *prev, *s;
286 size_t i;
287
288 for (prev = line, i = 0;
289 (s = strchr(prev, '\t')) && i < FieldLast - 1;
290 i++) {
291 *s = '\0';
292 fields[i] = prev;
293 prev = s + 1;
294 }
295 fields[i++] = prev;
296 /* make non-parsed fields empty. */
297 for (; i < FieldLast; i++)
298 fields[i] = "";
299 }
300
301 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
302 int
303 strtotime(const char *s, time_t *t)
304 {
305 long long l;
306 char *e;
307
308 errno = 0;
309 l = strtoll(s, &e, 10);
310 if (errno || *s == '\0' || *e)
311 return -1;
312
313 /* NOTE: the type long long supports the 64-bit range. If time_t…
314 * 64-bit it is "2038-ready", otherwise it is truncated/wrapped.…
315 if (t)
316 *t = (time_t)l;
317
318 return 0;
319 }
320
321 time_t
322 getcomparetime(void)
323 {
324 time_t now, t;
325 char *p;
326
327 if ((now = time(NULL)) == (time_t)-1)
328 return (time_t)-1;
329
330 if ((p = getenv("SFEED_NEW_AGE"))) {
331 if (strtotime(p, &t) == -1)
332 return (time_t)-1;
333 return now - t;
334 }
335
336 return now - 86400; /* 1 day is old news */
337 }
338
339 /* Escape characters below as HTML 2.0 / XML 1.0. */
340 void
341 xmlencode(const char *s, FILE *fp)
342 {
343 for (; *s; ++s) {
344 switch (*s) {
345 case '<': fputs("&lt;", fp); break;
346 case '>': fputs("&gt;", fp); break;
347 case '\'': fputs("&#39;", fp); break;
348 case '&': fputs("&amp;", fp); break;
349 case '"': fputs("&quot;", fp); break;
350 default: putc(*s, fp);
351 }
352 }
353 }
354
355 /* print `len` columns of characters. If string is shorter pad the rest …
356 * characters `pad`. */
357 void
358 printutf8pad(FILE *fp, const char *s, size_t len, int pad)
359 {
360 wchar_t wc;
361 size_t col = 0, i, slen;
362 int inc, rl, w;
363
364 if (!len)
365 return;
366
367 slen = strlen(s);
368 for (i = 0; i < slen; i += inc) {
369 inc = 1; /* next byte */
370 if ((unsigned char)s[i] < 32) {
371 continue; /* skip control characters */
372 } else if ((unsigned char)s[i] >= 127) {
373 rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i …
374 inc = rl;
375 if (rl < 0) {
376 mbtowc(NULL, NULL, 0); /* reset state */
377 inc = 1; /* invalid, seek next byte */
378 w = 1; /* replacement char is one width …
379 } else if ((w = wcwidth(wc)) == -1) {
380 continue;
381 }
382
383 if (col + w > len || (col + w == len && s[i + in…
384 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellip…
385 col++;
386 break;
387 } else if (rl < 0) {
388 fputs(UTF_INVALID_SYMBOL, fp); /* replac…
389 col++;
390 continue;
391 }
392 fwrite(&s[i], 1, rl, fp);
393 col += w;
394 } else {
395 /* optimization: simple ASCII character */
396 if (col + 1 > len || (col + 1 == len && s[i + 1]…
397 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellip…
398 col++;
399 break;
400 }
401 putc(s[i], fp);
402 col++;
403 }
404
405 }
406 for (; col < len; ++col)
407 putc(pad, fp);
408 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.