Introduction
Introduction Statistics Contact Development Disclaimer Help
gopher-validator.c - gopher-validator - Gopher validator and happy helper
git clone git://git.codemadness.org/gopher-validator
Log
Files
Refs
README
LICENSE
---
gopher-validator.c (12643B)
---
1 #include <sys/socket.h>
2 #include <sys/time.h>
3 #include <sys/types.h>
4
5 #include <ctype.h>
6 #include <errno.h>
7 #include <locale.h>
8 #include <limits.h>
9 #include <netdb.h>
10 #include <stdarg.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <wchar.h>
16 #include <wctype.h>
17
18 #define MAX_RESPONSETIMEOUT 10 /* timeout in seconds */
19
20 #ifndef __OpenBSD__
21 #define pledge(a,b) 0
22 #endif
23
24 struct uri {
25 char host[256];
26 char port[8];
27 char path[1024];
28 };
29
30 struct visited {
31 int _type;
32 char username[1024];
33 char path[1024];
34 char host[256];
35 char port[8];
36 };
37
38 /* check valid types with extension in path */
39 struct gophertype {
40 const char *ext; /* filename extension */
41 const char *allow; /* allowed types for this extension */
42 };
43
44 /* must be sorted alphabetically by extension */
45 struct gophertype types[] = {
46 { .ext = "asc", "0" },
47 { .ext = "avi", "9" },
48 { .ext = "bz2", "9" },
49 { .ext = "c", "0" },
50 { .ext = "dcgi", "17" },
51 { .ext = "doc", "9" },
52 { .ext = "exe", "9" },
53 { .ext = "gif", "gI" },
54 { .ext = "go", "0" },
55 { .ext = "gph", "1" },
56 { .ext = "gz", "9" },
57 { .ext = "h", "0" },
58 { .ext = "htm", "0h" },
59 { .ext = "html", "0h" },
60 { .ext = "iso", "9" },
61 { .ext = "jpeg", "I" },
62 { .ext = "jpg", "I" },
63 { .ext = "json", "0" },
64 { .ext = "lzma", "9" },
65 { .ext = "m3u", "0" },
66 { .ext = "md", "0" },
67 { .ext = "md5", "0" },
68 { .ext = "md5sum", "0" },
69 { .ext = "mkv", "9" },
70 { .ext = "mp3", "9" },
71 { .ext = "mp4", "9" },
72 { .ext = "ogg", "9" },
73 { .ext = "ogv", "9" },
74 { .ext = "pdf", "9" },
75 { .ext = "png", "I" },
76 { .ext = "rss", "0" },
77 { .ext = "sh", "0" },
78 { .ext = "sha1", "0" },
79 { .ext = "sha1sum", "0" },
80 { .ext = "sha256", "0" },
81 { .ext = "sha256sum", "0" },
82 { .ext = "sha512", "0" },
83 { .ext = "sha512sum", "0" },
84 { .ext = "srt", "0" },
85 { .ext = "tgz", "9" },
86 { .ext = "txt", "0" },
87 { .ext = "wav", "9" },
88 { .ext = "xml", "0" },
89 { .ext = "xz", "9" },
90 };
91
92 int exitcode = 0;
93 FILE *errfp, *outfp;
94
95 void
96 die(const char *fmt, ...)
97 {
98 va_list ap;
99
100 fputs("fatal: ", errfp);
101
102 va_start(ap, fmt);
103 vfprintf(errfp, fmt, ap);
104 va_end(ap);
105
106 exit(2);
107 }
108
109 void
110 error(const char *fmt, ...)
111 {
112 va_list ap;
113
114 fputs("error: ", outfp);
115
116 va_start(ap, fmt);
117 vfprintf(outfp, fmt, ap);
118 va_end(ap);
119
120 exitcode = 1;
121 }
122
123 void
124 warning(const char *fmt, ...)
125 {
126 va_list ap;
127
128 fputs("warning: ", outfp);
129
130 va_start(ap, fmt);
131 vfprintf(outfp, fmt, ap);
132 va_end(ap);
133 }
134
135 int
136 gophertypecmp(const void *v1, const void *v2)
137 {
138 return strcasecmp(((struct gophertype *)v1)->ext,
139 ((struct gophertype *)v2)->ext);
140 }
141
142 int
143 isvalidhost(const char *s)
144 {
145 int colons;
146
147 /* IPv6 */
148 if (*s == '[') {
149 colons = 0;
150 s++;
151 for (; *s; s++) {
152 if (*s == ':')
153 colons++;
154 else if (*s == ']')
155 break;
156 else if (isxdigit((unsigned char)*s) || *s == '.…
157 ;
158 else
159 return 0;
160 }
161 if (colons < 2 || *s != ']')
162 return 0;
163 } else {
164 if (!*s)
165 return 0;
166 for (; *s; s++) {
167 if (!isalpha((unsigned char)*s) &&
168 !isdigit((unsigned char)*s) &&
169 *s != '-' && *s != '.')
170 return 0;
171 }
172 }
173
174 return 1;
175 }
176
177 int
178 edial(const char *host, const char *port)
179 {
180 struct addrinfo hints, *res, *res0;
181 int error, save_errno, s;
182 const char *cause = NULL;
183 struct timeval timeout;
184
185 memset(&hints, 0, sizeof(hints));
186 hints.ai_family = AF_UNSPEC;
187 hints.ai_socktype = SOCK_STREAM;
188 hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
189 if ((error = getaddrinfo(host, port, &hints, &res0)))
190 die("%s: %s: %s:%s\n", __func__, gai_strerror(error), ho…
191 s = -1;
192 for (res = res0; res; res = res->ai_next) {
193 s = socket(res->ai_family, res->ai_socktype,
194 res->ai_protocol);
195 if (s == -1) {
196 cause = "socket";
197 continue;
198 }
199
200 timeout.tv_sec = MAX_RESPONSETIMEOUT;
201 timeout.tv_usec = 0;
202 if (setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &timeout, siz…
203 die("%s: setsockopt: %s\n", __func__, strerror(e…
204
205 timeout.tv_sec = MAX_RESPONSETIMEOUT;
206 timeout.tv_usec = 0;
207 if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &timeout, siz…
208 die("%s: setsockopt: %s\n", __func__, strerror(e…
209
210 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
211 cause = "connect";
212 save_errno = errno;
213 close(s);
214 errno = save_errno;
215 s = -1;
216 continue;
217 }
218 break;
219 }
220 if (s == -1)
221 die("%s: %s: %s:%s\n", __func__, cause, host, port);
222 freeaddrinfo(res0);
223
224 return s;
225 }
226
227 void
228 checkdir(FILE *fp)
229 {
230 struct gophertype gt, *rgt;
231 struct visited v;
232 char line[1024], *end, *s;
233 size_t linenr;
234 ssize_t n;
235 long long l;
236 int i, r, len, hasdotend = 0, c, primarytype = 0, wc, col;
237 wchar_t w;
238
239 if (pledge("stdio", NULL) == -1)
240 die("pledge: %s\n", strerror(errno));
241
242 for (linenr = 1; fgets(line, sizeof(line), fp); linenr++) {
243 n = strcspn(line, "\n");
244 if (line[n] != '\n')
245 die("%zu: line too long\n", linenr); /* fatal */
246 if (n && line[n] == '\n')
247 line[n] = '\0';
248 if (n && line[n - 1] == '\r')
249 line[--n] = '\0';
250 else
251 error("%zu: invalid line-ending, not CRLF (\\r\\…
252 if (n == 1 && line[0] == '.') {
253 hasdotend = 1;
254 break;
255 }
256
257 memset(&v, 0, sizeof(v));
258
259 v._type = line[0];
260
261 /* "username" */
262 i = 1;
263 len = strcspn(line + i, "\t");
264 if (len + 1 < sizeof(v.username)) {
265 memcpy(v.username, line + i, len);
266 v.username[len] = '\0';
267 } else {
268 error("%zu: username field too long\n", linenr);
269 continue;
270 }
271 if (line[i + len] == '\t') {
272 i += len + 1;
273 } else {
274 error("%zu: invalid line / field count\n", linen…
275 continue;
276 }
277
278 /* selector / path */
279 len = strcspn(line + i, "\t");
280 if (len + 1 < sizeof(v.path)) {
281 memcpy(v.path, line + i, len);
282 v.path[len] = '\0';
283 } else {
284 error("%zu: path field too long\n", linenr);
285 continue;
286 }
287 if (line[i + len] == '\t') {
288 i += len + 1;
289 } else {
290 error("%zu: invalid line / field count\n", linen…
291 continue;
292 }
293
294 /* host */
295 len = strcspn(line + i, "\t");
296 if (len + 1 < sizeof(v.host)) {
297 memcpy(v.host, line + i, len);
298 v.host[len] = '\0';
299 } else {
300 error("%zu: host field too long\n", linenr);
301 continue;
302 }
303 if (line[i + len] == '\t') {
304 i += len + 1;
305 } else {
306 error("%zu: invalid line / field count\n", linen…
307 continue;
308 }
309
310 /* port */
311 len = strcspn(line + i, "\t");
312 if (len + 1 < sizeof(v.port)) {
313 memcpy(v.port, line + i, len);
314 v.port[len] = '\0';
315 } else {
316 error("%zu: port field too long\n", linenr);
317 continue;
318 }
319
320 /* check non-standard types */
321 c = v._type;
322 if (v._type == '+' && !primarytype)
323 error("%zu: mirror type used, but no previous ty…
324 if (v._type != '+')
325 primarytype = v._type;
326
327 if (!(isdigit(c) || c == 'g' || c == 'I' || c == 'T' || …
328 /* common-used */
329 if (c == 'i' || c == 'h') {
330 #if 0
331 warning("%zu: non-standard, but common-u…
332 linenr, c);
333 #endif
334 } else {
335 /* 3.8: "Characters '0' through 'Z' are …
336 experiments should use other characte…
337 Machine-specific extensions are not e…
338 if (c >= '0' && c <= 'Z')
339 error("%zu: unknown / non-standa…
340 linenr, c);
341 }
342 }
343
344 /* check type with file extension, unless it is the HTML…
345 type with a "URL:" prefix */
346 if ((s = strrchr(v.path, '.')) && !strchr(s, '/') &&
347 !(primarytype == 'h' && !strncmp(v.path, "URL:", siz…
348 gt.ext = ++s;
349 if (!(rgt = bsearch(&gt, &types, sizeof(types) /…
350 sizeof(types[0]), &gophertypecmp)))
351 continue;
352
353 if (!strchr(rgt->allow, primarytype))
354 warning("%zu: invalid type '%c' for exte…
355 linenr, primarytype, rgt->ext, r…
356 }
357
358 if (!isvalidhost(v.host))
359 error("%zu: invalid host: %s\n", linenr, v.host);
360
361 /* check port, must be numeric and in range, port 0 is a…
362 "Appendix:
363 Note: Port corresponds the the TCP Port Number, its valu…
364 be in the range [0..65535]; port 70 is officially …
365 to gopher." */
366
367 errno = 0;
368 l = strtoll(v.port, &end, 10);
369 if (errno || v.port == end || *end || l < 0 || l > 65535…
370 error("%zu: invalid port: %s\n", linenr, v.port);
371 } else {
372 #if 0
373 if (l != 70)
374 warning("%zu: non-standard gopher port: …
375 linenr, l);
376 #endif
377 }
378
379 /* RFC "Notes": "The Selector string should be no longer…
380 255 characters." */
381 if ((len = strlen(v.path)) > 255)
382 error("%zu: selector should not be longer than 2…
383 linenr, len);
384
385 /* decode UTF-8 (text-encoding is ASCII/Latin1 in the RF…
386 Latin1 sucks, recommend UTF-8 instead.
387 Check column length as recommended as described in th…
388 in section 3.9. */
389 s = v.username;
390 len = strlen(s);
391 col = 0;
392 for (i = 0; i < len; i += r) {
393 r = mbtowc(&w, &s[i], len - i < 4 ? len - i : 4);
394 if (r == 0)
395 break;
396 if (r == -1) {
397 mbtowc(NULL, NULL, 0); /* reset state */
398 warning("%zu:%d: username: first invalid…
399 linenr, i + 1);
400 break;
401 }
402 if ((wc = wcwidth(w)) == -1)
403 wc = 1;
404 col += (size_t)wc;
405
406 /* RFC "Notes": "It is *highly* recommended that…
407 User_Name field contain only printable charac…
408 if (!iswprint(w)) {
409 error("%zu:%d: first non-printable chara…
410 linenr, i + 1);
411 break;
412 }
413 }
414 #if 0
415 /* instead of 70 check 79 */
416 if (col > 79)
417 warning("%zu: username column length is > 79 (%d…
418 linenr, col);
419 #endif
420
421 if (!strcmp(v.path, "..") || strstr(v.path, "../"))
422 warning("%zu: found ../ in path: don't use relat…
423 }
424 if (ferror(fp))
425 die("fgets: %s\n", strerror(errno));
426
427 if (!hasdotend)
428 error("no .\\r\\n end\n");
429 }
430
431 void
432 checkremote(const char *host, const char *port, const char *path, const …
433 {
434 FILE *fp;
435 int fd, r;
436
437 fd = edial(host, port);
438
439 if (param[0])
440 r = dprintf(fd, "%s\t%s\r\n", path, param);
441 else
442 r = dprintf(fd, "%s\r\n", path);
443 if (r == -1)
444 die("write: %s\n", strerror(errno));
445
446 if (!(fp = fdopen(fd, "rb+")))
447 die("fdopen: %s\n", strerror(errno));
448 checkdir(fp);
449 fclose(fp);
450 }
451
452 int
453 parseuri(const char *str, struct uri *u)
454 {
455 const char *s, *e;
456
457 memset(u, 0, sizeof(struct uri));
458
459 s = str;
460
461 /* IPv6 */
462 if (*s == '[') {
463 s++;
464 e = strchr(s, ']');
465 if (!e || e - s + 1 >= sizeof(u->host))
466 return 0;
467 memcpy(u->host, s, e - s);
468 u->host[e - s] = '\0';
469 e++;
470 } else {
471 e = &s[strcspn(s, ":/")];
472 if (e - s + 1 >= sizeof(u->host))
473 return 0;
474 memcpy(u->host, s, e - s);
475 u->host[e - s] = '\0';
476 }
477
478 if (*e == ':') {
479 s = e + 1;
480 e = &s[strcspn(s, "/")];
481
482 if (e - s + 1 >= sizeof(u->port))
483 return 0;
484 memcpy(u->port, s, e - s);
485 u->port[e - s] = '\0';
486 }
487 if (*e && *e != '/')
488 return 0; /* invalid path */
489
490 s = e;
491 e = s + strlen(s);
492
493 if (e - s + 1 >= sizeof(u->path))
494 return 0;
495 memcpy(u->path, s, e - s);
496 u->path[e - s] = '\0';
497
498 return 1;
499 }
500
501 int
502 main(int argc, char **argv)
503 {
504 struct uri u;
505 const char *path, *uri = "", *param = "", *s;
506 int _type = '1';
507
508 setlocale(LC_CTYPE, "");
509
510 outfp = stdout;
511 errfp = stderr;
512
513 /* CGI-mode or stand-alone */
514 if ((s = getenv("QUERY_STRING"))) {
515 uri = s;
516 param = "";
517 errfp = stdout; /* output errors to stdout also in CGI m…
518 } else {
519 switch (argc) {
520 case 3:
521 param = argv[2];
522 case 2:
523 uri = argv[1];
524 break;
525 case 1:
526 checkdir(stdin);
527 return exitcode;
528 default:
529 fprintf(errfp, "usage: %s [uri] [param]\n", argv…
530 return 1;
531 }
532 }
533
534 if (pledge("stdio inet dns", NULL) == -1)
535 die("pledge: %s\n", strerror(errno));
536
537 if (!strncmp(uri, "gopher://", sizeof("gopher://") - 1))
538 uri += sizeof("gopher://") - 1;
539 else if (!strncmp(uri, "gophers://", sizeof("gophers://") - 1))
540 uri += sizeof("gophers://") - 1;
541
542 if (!parseuri(uri, &u))
543 die("Invalid URI\n");
544 if (u.host[0] == '\0')
545 die("Invalid hostname\n");
546
547 if (u.path[0] == '\0')
548 memcpy(u.path, "/", 2);
549 if (u.port[0] == '\0')
550 memcpy(u.port, "70", 3);
551
552 path = u.path;
553 if (path[0] == '/') {
554 path++;
555 if (*path) {
556 _type = *path;
557 path++;
558 }
559 } else {
560 path = "";
561 }
562
563 switch (_type) {
564 case '1':
565 case '7':
566 break; /* handled below */
567 default: /* these types are not validated */
568 fprintf(errfp, "only types 1 (dir) and 7 (search) are va…
569 return 1;
570 }
571
572 if (_type != '7')
573 param = "";
574
575 checkremote(u.host, u.port, path, param);
576
577 return exitcode;
578 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.