GopherProxy

	util.c - sfeed - RSS and Atom parser
	git clone git://git.codemadness.org/sfeed
	Log
	Files
	Refs
	README
	LICENSE
	---
	util.c (9301B)
	---
	1 #include <errno.h>
	2 #include <stdarg.h>
	3 #include <stdio.h>
	4 #include <stdlib.h>
	5 #include <string.h>
	6 #include <wchar.h>
	7
	8 #include "util.h"
	9
	10 /* print to stderr, print error message of errno and exit().
	11 * Unlike BSD err() it does not prefix __progname */
	12 __dead void
	13 err(int exitstatus, const char *fmt, ...)
	14 {
	15 va_list ap;
	16 int saved_errno;
	17
	18 saved_errno = errno;
	19
	20 if (fmt) {
	21 va_start(ap, fmt);
	22 vfprintf(stderr, fmt, ap);
	23 va_end(ap);
	24 fputs(": ", stderr);
	25 }
	26 fprintf(stderr, "%s\n", strerror(saved_errno));
	27
	28 exit(exitstatus);
	29 }
	30
	31 /* print to stderr and exit().
	32 * Unlike BSD errx() it does not prefix __progname */
	33 __dead void
	34 errx(int exitstatus, const char *fmt, ...)
	35 {
	36 va_list ap;
	37
	38 if (fmt) {
	39 va_start(ap, fmt);
	40 vfprintf(stderr, fmt, ap);
	41 va_end(ap);
	42 }
	43 fputs("\n", stderr);
	44
	45 exit(exitstatus);
	46 }
	47
	48 /* Handle read or write errors for a FILE * stream */
	49 void
	50 checkfileerror(FILE fp, const char name, int mode)
	51 {
	52 if (mode == 'r' && ferror(fp))
	53 errx(1, "read error: %s", name);
	54 else if (mode == 'w' && (fflush(fp) \|\| ferror(fp)))
	55 errx(1, "write error: %s", name);
	56 }
	57
	58 /* strcasestr() included for portability */
	59 char *
	60 strcasestr(const char h, const char n)
	61 {
	62 size_t i;
	63
	64 if (!n[0])
	65 return (char *)h;
	66
	67 for (; *h; ++h) {
	68 for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
	69 TOLOWER((unsigned char)h[i]); ++i)
	70 ;
	71 if (n[i] == '\0')
	72 return (char *)h;
	73 }
	74
	75 return NULL;
	76 }
	77
	78 /* Check if string has a non-empty scheme / protocol part. */
	79 int
	80 uri_hasscheme(const char *s)
	81 {
	82 const char *p = s;
	83
	84 for (; ISALPHA((unsigned char)p) \|\| ISDIGIT((unsigned char)p) …
	85 p == '+' \|\| p == '-' \|\| *p == '.'; p++)
	86 ;
	87 /* scheme, except if empty and starts with ":" then it is a path…
	88 return (*p == ':' && p != s);
	89 }
	90
	91 /* Parse URI string `s` into an uri structure `u`.
	92 * Returns 0 on success or -1 on failure */
	93 int
	94 uri_parse(const char s, struct uri u)
	95 {
	96 const char *p = s;
	97 char *endptr;
	98 size_t i;
	99 long l;
	100
	101 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
	102 u->path[0] = u->query[0] = u->fragment[0] = '\0';
	103
	104 /* protocol-relative */
	105 if (p == '/' && (p + 1) == '/') {
	106 p += 2; /* skip "//" */
	107 goto parseauth;
	108 }
	109
	110 /* scheme / protocol part */
	111 for (; ISALPHA((unsigned char)p) \|\| ISDIGIT((unsigned char)p) …
	112 p == '+' \|\| p == '-' \|\| *p == '.'; p++)
	113 ;
	114 /* scheme, except if empty and starts with ":" then it is a path…
	115 if (*p == ':' && p != s) {
	116 if ((p + 1) == '/' && (p + 2) == '/')
	117 p += 3; /* skip "://" */
	118 else
	119 p++; /* skip ":" */
	120
	121 if ((size_t)(p - s) >= sizeof(u->proto))
	122 return -1; /* protocol too long */
	123 memcpy(u->proto, s, p - s);
	124 u->proto[p - s] = '\0';
	125
	126 if (*(p - 1) != '/')
	127 goto parsepath;
	128 } else {
	129 p = s; /* no scheme format, reset to start */
	130 goto parsepath;
	131 }
	132
	133 parseauth:
	134 /* userinfo (username:password) */
	135 i = strcspn(p, "@/?#");
	136 if (p[i] == '@') {
	137 if (i >= sizeof(u->userinfo))
	138 return -1; /* userinfo too long */
	139 memcpy(u->userinfo, p, i);
	140 u->userinfo[i] = '\0';
	141 p += i + 1;
	142 }
	143
	144 /* IPv6 address */
	145 if (*p == '[') {
	146 /* bracket not found, host too short or too long */
	147 i = strcspn(p, "]");
	148 if (p[i] != ']' \|\| i < 3)
	149 return -1;
	150 i++; /* including "]" */
	151 } else {
	152 /* domain / host part, skip until port, path or end. */
	153 i = strcspn(p, ":/?#");
	154 }
	155 if (i >= sizeof(u->host))
	156 return -1; /* host too long */
	157 memcpy(u->host, p, i);
	158 u->host[i] = '\0';
	159 p += i;
	160
	161 /* port */
	162 if (*p == ':') {
	163 p++;
	164 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
	165 return -1; /* port too long */
	166 memcpy(u->port, p, i);
	167 u->port[i] = '\0';
	168 /* check for valid port: range 1 - 65535, may be empty */
	169 errno = 0;
	170 l = strtol(u->port, &endptr, 10);
	171 if (i && (errno \|\| *endptr \|\| l <= 0 \|\| l > 65535))
	172 return -1;
	173 p += i;
	174 }
	175
	176 parsepath:
	177 /* path */
	178 if ((i = strcspn(p, "?#")) >= sizeof(u->path))
	179 return -1; /* path too long */
	180 memcpy(u->path, p, i);
	181 u->path[i] = '\0';
	182 p += i;
	183
	184 /* query */
	185 if (*p == '?') {
	186 p++;
	187 if ((i = strcspn(p, "#")) >= sizeof(u->query))
	188 return -1; /* query too long */
	189 memcpy(u->query, p, i);
	190 u->query[i] = '\0';
	191 p += i;
	192 }
	193
	194 /* fragment */
	195 if (*p == '#') {
	196 p++;
	197 if ((i = strlen(p)) >= sizeof(u->fragment))
	198 return -1; /* fragment too long */
	199 memcpy(u->fragment, p, i);
	200 u->fragment[i] = '\0';
	201 }
	202
	203 return 0;
	204 }
	205
	206 /* Transform and try to make the URI `u` absolute using base URI `b` int…
	207 * Follows some of the logic from "RFC 3986 - 5.2.2. Transform Reference…
	208 * Returns 0 on success, -1 on error or truncation. */
	209 int
	210 uri_makeabs(struct uri a, struct uri u, struct uri *b)
	211 {
	212 char *p;
	213 int c;
	214
	215 strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
	216
	217 if (u->proto[0] \|\| u->host[0]) {
	218 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, siz…
	219 strlcpy(a->host, u->host, sizeof(a->host));
	220 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
	221 strlcpy(a->host, u->host, sizeof(a->host));
	222 strlcpy(a->port, u->port, sizeof(a->port));
	223 strlcpy(a->path, u->path, sizeof(a->path));
	224 strlcpy(a->query, u->query, sizeof(a->query));
	225 return 0;
	226 }
	227
	228 strlcpy(a->proto, b->proto, sizeof(a->proto));
	229 strlcpy(a->host, b->host, sizeof(a->host));
	230 strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
	231 strlcpy(a->host, b->host, sizeof(a->host));
	232 strlcpy(a->port, b->port, sizeof(a->port));
	233
	234 if (!u->path[0]) {
	235 strlcpy(a->path, b->path, sizeof(a->path));
	236 } else if (u->path[0] == '/') {
	237 strlcpy(a->path, u->path, sizeof(a->path));
	238 } else {
	239 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '…
	240 a->path[1] = '\0';
	241
	242 if ((p = strrchr(b->path, '/'))) {
	243 c = *(++p);
	244 p = '\0'; / temporary NUL-terminate */
	245 if (strlcat(a->path, b->path, sizeof(a->path)) >…
	246 return -1;
	247 p = c; / restore */
	248 }
	249 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof…
	250 return -1;
	251 }
	252
	253 if (u->path[0] \|\| u->query[0])
	254 strlcpy(a->query, u->query, sizeof(a->query));
	255 else
	256 strlcpy(a->query, b->query, sizeof(a->query));
	257
	258 return 0;
	259 }
	260
	261 int
	262 uri_format(char buf, size_t bufsiz, struct uri u)
	263 {
	264 return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
	265 u->proto,
	266 u->userinfo[0] ? u->userinfo : "",
	267 u->userinfo[0] ? "@" : "",
	268 u->host,
	269 u->port[0] ? ":" : "",
	270 u->port,
	271 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
	272 u->path,
	273 u->query[0] ? "?" : "",
	274 u->query,
	275 u->fragment[0] ? "#" : "",
	276 u->fragment);
	277 }
	278
	279 /* Splits fields in the line buffer by replacing TAB separators with NUL…
	280 * terminators and assign these fields as pointers. If there are less fi…
	281 * than expected then the field is an empty string constant. */
	282 void
	283 parseline(char line, char fields[FieldLast])
	284 {
	285 char prev, s;
	286 size_t i;
	287
	288 for (prev = line, i = 0;
	289 (s = strchr(prev, '\t')) && i < FieldLast - 1;
	290 i++) {
	291 *s = '\0';
	292 fields[i] = prev;
	293 prev = s + 1;
	294 }
	295 fields[i++] = prev;
	296 /* make non-parsed fields empty. */
	297 for (; i < FieldLast; i++)
	298 fields[i] = "";
	299 }
	300
	301 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
	302 int
	303 strtotime(const char s, time_t t)
	304 {
	305 long long l;
	306 char *e;
	307
	308 errno = 0;
	309 l = strtoll(s, &e, 10);
	310 if (errno \|\| s == '\0' \|\| e)
	311 return -1;
	312
	313 /* NOTE: the type long long supports the 64-bit range. If time_t…
	314 * 64-bit it is "2038-ready", otherwise it is truncated/wrapped.…
	315 if (t)
	316 *t = (time_t)l;
	317
	318 return 0;
	319 }
	320
	321 time_t
	322 getcomparetime(void)
	323 {
	324 time_t now, t;
	325 char *p;
	326
	327 if ((now = time(NULL)) == (time_t)-1)
	328 return (time_t)-1;
	329
	330 if ((p = getenv("SFEED_NEW_AGE"))) {
	331 if (strtotime(p, &t) == -1)
	332 return (time_t)-1;
	333 return now - t;
	334 }
	335
	336 return now - 86400; /* 1 day is old news */
	337 }
	338
	339 /* Escape characters below as HTML 2.0 / XML 1.0. */
	340 void
	341 xmlencode(const char s, FILE fp)
	342 {
	343 for (; *s; ++s) {
	344 switch (*s) {
	345 case '<': fputs("<", fp); break;
	346 case '>': fputs(">", fp); break;
	347 case '\'': fputs("'", fp); break;
	348 case '&': fputs("&", fp); break;
	349 case '"': fputs(""", fp); break;
	350 default: putc(*s, fp);
	351 }
	352 }
	353 }
	354
	355 /* print `len` columns of characters. If string is shorter pad the rest …
	356 * characters `pad`. */
	357 void
	358 printutf8pad(FILE fp, const char s, size_t len, int pad)
	359 {
	360 wchar_t wc;
	361 size_t col = 0, i, slen;
	362 int inc, rl, w;
	363
	364 if (!len)
	365 return;
	366
	367 slen = strlen(s);
	368 for (i = 0; i < slen; i += inc) {
	369 inc = 1; /* next byte */
	370 if ((unsigned char)s[i] < 32) {
	371 continue; /* skip control characters */
	372 } else if ((unsigned char)s[i] >= 127) {
	373 rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i …
	374 inc = rl;
	375 if (rl < 0) {
	376 mbtowc(NULL, NULL, 0); /* reset state */
	377 inc = 1; /* invalid, seek next byte */
	378 w = 1; /* replacement char is one width …
	379 } else if ((w = wcwidth(wc)) == -1) {
	380 continue;
	381 }
	382
	383 if (col + w > len \|\| (col + w == len && s[i + in…
	384 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellip…
	385 col++;
	386 break;
	387 } else if (rl < 0) {
	388 fputs(UTF_INVALID_SYMBOL, fp); /* replac…
	389 col++;
	390 continue;
	391 }
	392 fwrite(&s[i], 1, rl, fp);
	393 col += w;
	394 } else {
	395 /* optimization: simple ASCII character */
	396 if (col + 1 > len \|\| (col + 1 == len && s[i + 1]…
	397 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellip…
	398 col++;
	399 break;
	400 }
	401 putc(s[i], fp);
	402 col++;
	403 }
	404
	405 }
	406 for (; col < len; ++col)
	407 putc(pad, fp);
	408 }