gopher-validator.c - gopher-validator - Gopher validator and happy helper | |
git clone git://git.codemadness.org/gopher-validator | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
gopher-validator.c (12643B) | |
--- | |
1 #include <sys/socket.h> | |
2 #include <sys/time.h> | |
3 #include <sys/types.h> | |
4 | |
5 #include <ctype.h> | |
6 #include <errno.h> | |
7 #include <locale.h> | |
8 #include <limits.h> | |
9 #include <netdb.h> | |
10 #include <stdarg.h> | |
11 #include <stdio.h> | |
12 #include <stdlib.h> | |
13 #include <string.h> | |
14 #include <unistd.h> | |
15 #include <wchar.h> | |
16 #include <wctype.h> | |
17 | |
18 #define MAX_RESPONSETIMEOUT 10 /* timeout in seconds */ | |
19 | |
20 #ifndef __OpenBSD__ | |
21 #define pledge(a,b) 0 | |
22 #endif | |
23 | |
24 struct uri { | |
25 char host[256]; | |
26 char port[8]; | |
27 char path[1024]; | |
28 }; | |
29 | |
30 struct visited { | |
31 int _type; | |
32 char username[1024]; | |
33 char path[1024]; | |
34 char host[256]; | |
35 char port[8]; | |
36 }; | |
37 | |
38 /* check valid types with extension in path */ | |
39 struct gophertype { | |
40 const char *ext; /* filename extension */ | |
41 const char *allow; /* allowed types for this extension */ | |
42 }; | |
43 | |
44 /* must be sorted alphabetically by extension */ | |
45 struct gophertype types[] = { | |
46 { .ext = "asc", "0" }, | |
47 { .ext = "avi", "9" }, | |
48 { .ext = "bz2", "9" }, | |
49 { .ext = "c", "0" }, | |
50 { .ext = "dcgi", "17" }, | |
51 { .ext = "doc", "9" }, | |
52 { .ext = "exe", "9" }, | |
53 { .ext = "gif", "gI" }, | |
54 { .ext = "go", "0" }, | |
55 { .ext = "gph", "1" }, | |
56 { .ext = "gz", "9" }, | |
57 { .ext = "h", "0" }, | |
58 { .ext = "htm", "0h" }, | |
59 { .ext = "html", "0h" }, | |
60 { .ext = "iso", "9" }, | |
61 { .ext = "jpeg", "I" }, | |
62 { .ext = "jpg", "I" }, | |
63 { .ext = "json", "0" }, | |
64 { .ext = "lzma", "9" }, | |
65 { .ext = "m3u", "0" }, | |
66 { .ext = "md", "0" }, | |
67 { .ext = "md5", "0" }, | |
68 { .ext = "md5sum", "0" }, | |
69 { .ext = "mkv", "9" }, | |
70 { .ext = "mp3", "9" }, | |
71 { .ext = "mp4", "9" }, | |
72 { .ext = "ogg", "9" }, | |
73 { .ext = "ogv", "9" }, | |
74 { .ext = "pdf", "9" }, | |
75 { .ext = "png", "I" }, | |
76 { .ext = "rss", "0" }, | |
77 { .ext = "sh", "0" }, | |
78 { .ext = "sha1", "0" }, | |
79 { .ext = "sha1sum", "0" }, | |
80 { .ext = "sha256", "0" }, | |
81 { .ext = "sha256sum", "0" }, | |
82 { .ext = "sha512", "0" }, | |
83 { .ext = "sha512sum", "0" }, | |
84 { .ext = "srt", "0" }, | |
85 { .ext = "tgz", "9" }, | |
86 { .ext = "txt", "0" }, | |
87 { .ext = "wav", "9" }, | |
88 { .ext = "xml", "0" }, | |
89 { .ext = "xz", "9" }, | |
90 }; | |
91 | |
92 int exitcode = 0; | |
93 FILE *errfp, *outfp; | |
94 | |
95 void | |
96 die(const char *fmt, ...) | |
97 { | |
98 va_list ap; | |
99 | |
100 fputs("fatal: ", errfp); | |
101 | |
102 va_start(ap, fmt); | |
103 vfprintf(errfp, fmt, ap); | |
104 va_end(ap); | |
105 | |
106 exit(2); | |
107 } | |
108 | |
109 void | |
110 error(const char *fmt, ...) | |
111 { | |
112 va_list ap; | |
113 | |
114 fputs("error: ", outfp); | |
115 | |
116 va_start(ap, fmt); | |
117 vfprintf(outfp, fmt, ap); | |
118 va_end(ap); | |
119 | |
120 exitcode = 1; | |
121 } | |
122 | |
123 void | |
124 warning(const char *fmt, ...) | |
125 { | |
126 va_list ap; | |
127 | |
128 fputs("warning: ", outfp); | |
129 | |
130 va_start(ap, fmt); | |
131 vfprintf(outfp, fmt, ap); | |
132 va_end(ap); | |
133 } | |
134 | |
135 int | |
136 gophertypecmp(const void *v1, const void *v2) | |
137 { | |
138 return strcasecmp(((struct gophertype *)v1)->ext, | |
139 ((struct gophertype *)v2)->ext); | |
140 } | |
141 | |
142 int | |
143 isvalidhost(const char *s) | |
144 { | |
145 int colons; | |
146 | |
147 /* IPv6 */ | |
148 if (*s == '[') { | |
149 colons = 0; | |
150 s++; | |
151 for (; *s; s++) { | |
152 if (*s == ':') | |
153 colons++; | |
154 else if (*s == ']') | |
155 break; | |
156 else if (isxdigit((unsigned char)*s) || *s == '.… | |
157 ; | |
158 else | |
159 return 0; | |
160 } | |
161 if (colons < 2 || *s != ']') | |
162 return 0; | |
163 } else { | |
164 if (!*s) | |
165 return 0; | |
166 for (; *s; s++) { | |
167 if (!isalpha((unsigned char)*s) && | |
168 !isdigit((unsigned char)*s) && | |
169 *s != '-' && *s != '.') | |
170 return 0; | |
171 } | |
172 } | |
173 | |
174 return 1; | |
175 } | |
176 | |
177 int | |
178 edial(const char *host, const char *port) | |
179 { | |
180 struct addrinfo hints, *res, *res0; | |
181 int error, save_errno, s; | |
182 const char *cause = NULL; | |
183 struct timeval timeout; | |
184 | |
185 memset(&hints, 0, sizeof(hints)); | |
186 hints.ai_family = AF_UNSPEC; | |
187 hints.ai_socktype = SOCK_STREAM; | |
188 hints.ai_flags = AI_NUMERICSERV; /* numeric port only */ | |
189 if ((error = getaddrinfo(host, port, &hints, &res0))) | |
190 die("%s: %s: %s:%s\n", __func__, gai_strerror(error), ho… | |
191 s = -1; | |
192 for (res = res0; res; res = res->ai_next) { | |
193 s = socket(res->ai_family, res->ai_socktype, | |
194 res->ai_protocol); | |
195 if (s == -1) { | |
196 cause = "socket"; | |
197 continue; | |
198 } | |
199 | |
200 timeout.tv_sec = MAX_RESPONSETIMEOUT; | |
201 timeout.tv_usec = 0; | |
202 if (setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &timeout, siz… | |
203 die("%s: setsockopt: %s\n", __func__, strerror(e… | |
204 | |
205 timeout.tv_sec = MAX_RESPONSETIMEOUT; | |
206 timeout.tv_usec = 0; | |
207 if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &timeout, siz… | |
208 die("%s: setsockopt: %s\n", __func__, strerror(e… | |
209 | |
210 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) { | |
211 cause = "connect"; | |
212 save_errno = errno; | |
213 close(s); | |
214 errno = save_errno; | |
215 s = -1; | |
216 continue; | |
217 } | |
218 break; | |
219 } | |
220 if (s == -1) | |
221 die("%s: %s: %s:%s\n", __func__, cause, host, port); | |
222 freeaddrinfo(res0); | |
223 | |
224 return s; | |
225 } | |
226 | |
227 void | |
228 checkdir(FILE *fp) | |
229 { | |
230 struct gophertype gt, *rgt; | |
231 struct visited v; | |
232 char line[1024], *end, *s; | |
233 size_t linenr; | |
234 ssize_t n; | |
235 long long l; | |
236 int i, r, len, hasdotend = 0, c, primarytype = 0, wc, col; | |
237 wchar_t w; | |
238 | |
239 if (pledge("stdio", NULL) == -1) | |
240 die("pledge: %s\n", strerror(errno)); | |
241 | |
242 for (linenr = 1; fgets(line, sizeof(line), fp); linenr++) { | |
243 n = strcspn(line, "\n"); | |
244 if (line[n] != '\n') | |
245 die("%zu: line too long\n", linenr); /* fatal */ | |
246 if (n && line[n] == '\n') | |
247 line[n] = '\0'; | |
248 if (n && line[n - 1] == '\r') | |
249 line[--n] = '\0'; | |
250 else | |
251 error("%zu: invalid line-ending, not CRLF (\\r\\… | |
252 if (n == 1 && line[0] == '.') { | |
253 hasdotend = 1; | |
254 break; | |
255 } | |
256 | |
257 memset(&v, 0, sizeof(v)); | |
258 | |
259 v._type = line[0]; | |
260 | |
261 /* "username" */ | |
262 i = 1; | |
263 len = strcspn(line + i, "\t"); | |
264 if (len + 1 < sizeof(v.username)) { | |
265 memcpy(v.username, line + i, len); | |
266 v.username[len] = '\0'; | |
267 } else { | |
268 error("%zu: username field too long\n", linenr); | |
269 continue; | |
270 } | |
271 if (line[i + len] == '\t') { | |
272 i += len + 1; | |
273 } else { | |
274 error("%zu: invalid line / field count\n", linen… | |
275 continue; | |
276 } | |
277 | |
278 /* selector / path */ | |
279 len = strcspn(line + i, "\t"); | |
280 if (len + 1 < sizeof(v.path)) { | |
281 memcpy(v.path, line + i, len); | |
282 v.path[len] = '\0'; | |
283 } else { | |
284 error("%zu: path field too long\n", linenr); | |
285 continue; | |
286 } | |
287 if (line[i + len] == '\t') { | |
288 i += len + 1; | |
289 } else { | |
290 error("%zu: invalid line / field count\n", linen… | |
291 continue; | |
292 } | |
293 | |
294 /* host */ | |
295 len = strcspn(line + i, "\t"); | |
296 if (len + 1 < sizeof(v.host)) { | |
297 memcpy(v.host, line + i, len); | |
298 v.host[len] = '\0'; | |
299 } else { | |
300 error("%zu: host field too long\n", linenr); | |
301 continue; | |
302 } | |
303 if (line[i + len] == '\t') { | |
304 i += len + 1; | |
305 } else { | |
306 error("%zu: invalid line / field count\n", linen… | |
307 continue; | |
308 } | |
309 | |
310 /* port */ | |
311 len = strcspn(line + i, "\t"); | |
312 if (len + 1 < sizeof(v.port)) { | |
313 memcpy(v.port, line + i, len); | |
314 v.port[len] = '\0'; | |
315 } else { | |
316 error("%zu: port field too long\n", linenr); | |
317 continue; | |
318 } | |
319 | |
320 /* check non-standard types */ | |
321 c = v._type; | |
322 if (v._type == '+' && !primarytype) | |
323 error("%zu: mirror type used, but no previous ty… | |
324 if (v._type != '+') | |
325 primarytype = v._type; | |
326 | |
327 if (!(isdigit(c) || c == 'g' || c == 'I' || c == 'T' || … | |
328 /* common-used */ | |
329 if (c == 'i' || c == 'h') { | |
330 #if 0 | |
331 warning("%zu: non-standard, but common-u… | |
332 linenr, c); | |
333 #endif | |
334 } else { | |
335 /* 3.8: "Characters '0' through 'Z' are … | |
336 experiments should use other characte… | |
337 Machine-specific extensions are not e… | |
338 if (c >= '0' && c <= 'Z') | |
339 error("%zu: unknown / non-standa… | |
340 linenr, c); | |
341 } | |
342 } | |
343 | |
344 /* check type with file extension, unless it is the HTML… | |
345 type with a "URL:" prefix */ | |
346 if ((s = strrchr(v.path, '.')) && !strchr(s, '/') && | |
347 !(primarytype == 'h' && !strncmp(v.path, "URL:", siz… | |
348 gt.ext = ++s; | |
349 if (!(rgt = bsearch(>, &types, sizeof(types) /… | |
350 sizeof(types[0]), &gophertypecmp))) | |
351 continue; | |
352 | |
353 if (!strchr(rgt->allow, primarytype)) | |
354 warning("%zu: invalid type '%c' for exte… | |
355 linenr, primarytype, rgt->ext, r… | |
356 } | |
357 | |
358 if (!isvalidhost(v.host)) | |
359 error("%zu: invalid host: %s\n", linenr, v.host); | |
360 | |
361 /* check port, must be numeric and in range, port 0 is a… | |
362 "Appendix: | |
363 Note: Port corresponds the the TCP Port Number, its valu… | |
364 be in the range [0..65535]; port 70 is officially … | |
365 to gopher." */ | |
366 | |
367 errno = 0; | |
368 l = strtoll(v.port, &end, 10); | |
369 if (errno || v.port == end || *end || l < 0 || l > 65535… | |
370 error("%zu: invalid port: %s\n", linenr, v.port); | |
371 } else { | |
372 #if 0 | |
373 if (l != 70) | |
374 warning("%zu: non-standard gopher port: … | |
375 linenr, l); | |
376 #endif | |
377 } | |
378 | |
379 /* RFC "Notes": "The Selector string should be no longer… | |
380 255 characters." */ | |
381 if ((len = strlen(v.path)) > 255) | |
382 error("%zu: selector should not be longer than 2… | |
383 linenr, len); | |
384 | |
385 /* decode UTF-8 (text-encoding is ASCII/Latin1 in the RF… | |
386 Latin1 sucks, recommend UTF-8 instead. | |
387 Check column length as recommended as described in th… | |
388 in section 3.9. */ | |
389 s = v.username; | |
390 len = strlen(s); | |
391 col = 0; | |
392 for (i = 0; i < len; i += r) { | |
393 r = mbtowc(&w, &s[i], len - i < 4 ? len - i : 4); | |
394 if (r == 0) | |
395 break; | |
396 if (r == -1) { | |
397 mbtowc(NULL, NULL, 0); /* reset state */ | |
398 warning("%zu:%d: username: first invalid… | |
399 linenr, i + 1); | |
400 break; | |
401 } | |
402 if ((wc = wcwidth(w)) == -1) | |
403 wc = 1; | |
404 col += (size_t)wc; | |
405 | |
406 /* RFC "Notes": "It is *highly* recommended that… | |
407 User_Name field contain only printable charac… | |
408 if (!iswprint(w)) { | |
409 error("%zu:%d: first non-printable chara… | |
410 linenr, i + 1); | |
411 break; | |
412 } | |
413 } | |
414 #if 0 | |
415 /* instead of 70 check 79 */ | |
416 if (col > 79) | |
417 warning("%zu: username column length is > 79 (%d… | |
418 linenr, col); | |
419 #endif | |
420 | |
421 if (!strcmp(v.path, "..") || strstr(v.path, "../")) | |
422 warning("%zu: found ../ in path: don't use relat… | |
423 } | |
424 if (ferror(fp)) | |
425 die("fgets: %s\n", strerror(errno)); | |
426 | |
427 if (!hasdotend) | |
428 error("no .\\r\\n end\n"); | |
429 } | |
430 | |
431 void | |
432 checkremote(const char *host, const char *port, const char *path, const … | |
433 { | |
434 FILE *fp; | |
435 int fd, r; | |
436 | |
437 fd = edial(host, port); | |
438 | |
439 if (param[0]) | |
440 r = dprintf(fd, "%s\t%s\r\n", path, param); | |
441 else | |
442 r = dprintf(fd, "%s\r\n", path); | |
443 if (r == -1) | |
444 die("write: %s\n", strerror(errno)); | |
445 | |
446 if (!(fp = fdopen(fd, "rb+"))) | |
447 die("fdopen: %s\n", strerror(errno)); | |
448 checkdir(fp); | |
449 fclose(fp); | |
450 } | |
451 | |
452 int | |
453 parseuri(const char *str, struct uri *u) | |
454 { | |
455 const char *s, *e; | |
456 | |
457 memset(u, 0, sizeof(struct uri)); | |
458 | |
459 s = str; | |
460 | |
461 /* IPv6 */ | |
462 if (*s == '[') { | |
463 s++; | |
464 e = strchr(s, ']'); | |
465 if (!e || e - s + 1 >= sizeof(u->host)) | |
466 return 0; | |
467 memcpy(u->host, s, e - s); | |
468 u->host[e - s] = '\0'; | |
469 e++; | |
470 } else { | |
471 e = &s[strcspn(s, ":/")]; | |
472 if (e - s + 1 >= sizeof(u->host)) | |
473 return 0; | |
474 memcpy(u->host, s, e - s); | |
475 u->host[e - s] = '\0'; | |
476 } | |
477 | |
478 if (*e == ':') { | |
479 s = e + 1; | |
480 e = &s[strcspn(s, "/")]; | |
481 | |
482 if (e - s + 1 >= sizeof(u->port)) | |
483 return 0; | |
484 memcpy(u->port, s, e - s); | |
485 u->port[e - s] = '\0'; | |
486 } | |
487 if (*e && *e != '/') | |
488 return 0; /* invalid path */ | |
489 | |
490 s = e; | |
491 e = s + strlen(s); | |
492 | |
493 if (e - s + 1 >= sizeof(u->path)) | |
494 return 0; | |
495 memcpy(u->path, s, e - s); | |
496 u->path[e - s] = '\0'; | |
497 | |
498 return 1; | |
499 } | |
500 | |
501 int | |
502 main(int argc, char **argv) | |
503 { | |
504 struct uri u; | |
505 const char *path, *uri = "", *param = "", *s; | |
506 int _type = '1'; | |
507 | |
508 setlocale(LC_CTYPE, ""); | |
509 | |
510 outfp = stdout; | |
511 errfp = stderr; | |
512 | |
513 /* CGI-mode or stand-alone */ | |
514 if ((s = getenv("QUERY_STRING"))) { | |
515 uri = s; | |
516 param = ""; | |
517 errfp = stdout; /* output errors to stdout also in CGI m… | |
518 } else { | |
519 switch (argc) { | |
520 case 3: | |
521 param = argv[2]; | |
522 case 2: | |
523 uri = argv[1]; | |
524 break; | |
525 case 1: | |
526 checkdir(stdin); | |
527 return exitcode; | |
528 default: | |
529 fprintf(errfp, "usage: %s [uri] [param]\n", argv… | |
530 return 1; | |
531 } | |
532 } | |
533 | |
534 if (pledge("stdio inet dns", NULL) == -1) | |
535 die("pledge: %s\n", strerror(errno)); | |
536 | |
537 if (!strncmp(uri, "gopher://", sizeof("gopher://") - 1)) | |
538 uri += sizeof("gopher://") - 1; | |
539 else if (!strncmp(uri, "gophers://", sizeof("gophers://") - 1)) | |
540 uri += sizeof("gophers://") - 1; | |
541 | |
542 if (!parseuri(uri, &u)) | |
543 die("Invalid URI\n"); | |
544 if (u.host[0] == '\0') | |
545 die("Invalid hostname\n"); | |
546 | |
547 if (u.path[0] == '\0') | |
548 memcpy(u.path, "/", 2); | |
549 if (u.port[0] == '\0') | |
550 memcpy(u.port, "70", 3); | |
551 | |
552 path = u.path; | |
553 if (path[0] == '/') { | |
554 path++; | |
555 if (*path) { | |
556 _type = *path; | |
557 path++; | |
558 } | |
559 } else { | |
560 path = ""; | |
561 } | |
562 | |
563 switch (_type) { | |
564 case '1': | |
565 case '7': | |
566 break; /* handled below */ | |
567 default: /* these types are not validated */ | |
568 fprintf(errfp, "only types 1 (dir) and 7 (search) are va… | |
569 return 1; | |
570 } | |
571 | |
572 if (_type != '7') | |
573 param = ""; | |
574 | |
575 checkremote(u.host, u.port, path, param); | |
576 | |
577 return exitcode; | |
578 } |