hurl.c - hurl - Gopher/HTTP/HTTPS file grabber | |
git clone git://git.codemadness.org/hurl | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
hurl.c (13950B) | |
--- | |
1 #include <sys/socket.h> | |
2 #include <sys/time.h> | |
3 | |
4 #include <ctype.h> | |
5 #include <err.h> | |
6 #include <errno.h> | |
7 #include <netdb.h> | |
8 #include <locale.h> | |
9 #include <signal.h> | |
10 #include <stdio.h> | |
11 #include <stdlib.h> | |
12 #include <string.h> | |
13 #include <time.h> | |
14 #include <unistd.h> | |
15 | |
16 #include <tls.h> | |
17 | |
18 #include "arg.h" | |
19 | |
20 #define READ_BUF_SIZ 16384 | |
21 | |
22 #ifndef __OpenBSD__ | |
23 #define pledge(p1,p2) 0 | |
24 #define unveil(p1,p2) 0 | |
25 #endif | |
26 | |
27 #ifndef TLS_CA_CERT_FILE | |
28 #define TLS_CA_CERT_FILE "/etc/ssl/cert.pem" | |
29 #endif | |
30 | |
31 /* URI */ | |
32 struct uri { | |
33 char proto[48]; /* scheme including ":" or "://" */ | |
34 char userinfo[256]; /* username [:password] */ | |
35 char host[256]; | |
36 char port[6]; /* numeric port */ | |
37 char path[1024]; | |
38 char query[1024]; | |
39 char fragment[1024]; | |
40 }; | |
41 | |
42 char *argv0; | |
43 | |
44 /* raw header(s) to add */ | |
45 static const char *config_headers = ""; | |
46 /* max response size in bytes, 0 is unlimited */ | |
47 static size_t config_maxresponsesiz = 0; | |
48 /* time-out in seconds */ | |
49 static long long config_timeout = 0; | |
50 /* legacy ciphers? */ | |
51 static int config_legacy = 0; | |
52 /* TLS CA file */ | |
53 static char *config_ca_file; | |
54 /* parsed URI */ | |
55 static struct uri u; | |
56 /* socket fd */ | |
57 static int sock = -1; | |
58 /* raw command-line argument */ | |
59 static char *url; | |
60 /* TLS context */ | |
61 static struct tls *t; | |
62 /* TLS config */ | |
63 static struct tls_config *tls_config; | |
64 | |
65 /* protocol handlers */ | |
66 int gopher_request(void); | |
67 int http_request(void); | |
68 | |
69 struct handler { | |
70 int (*handler)(void); /* function / handler / callback */ | |
71 const char *proto; /* protocol / scheme, "gopher://" */ | |
72 const char *port; /* default port */ | |
73 int usetls; /* setup TLS (=1) or plain connection (=0) */ | |
74 }; | |
75 | |
76 static const struct handler handlers[] = { | |
77 { .handler = gopher_request, .proto = "gopher://", .port = "70… | |
78 { .handler = gopher_request, .proto = "gophers://", .port = "70… | |
79 { .handler = http_request, .proto = "http://", .port = "80… | |
80 { .handler = http_request, .proto = "https://", .port = "44… | |
81 }; | |
82 | |
83 ssize_t (*readbuf)(char *, size_t); | |
84 ssize_t (*writebuf)(const char *, size_t); | |
85 | |
86 void | |
87 sighandler(int signo) | |
88 { | |
89 if (signo == SIGALRM) | |
90 _exit(2); | |
91 } | |
92 | |
93 int | |
94 parse_content_length(const char *s, size_t *length) | |
95 { | |
96 const char *p; | |
97 char *end; | |
98 long long l; | |
99 | |
100 if (!(p = strcasestr(s, "\r\nContent-Length:"))) | |
101 return -1; | |
102 | |
103 p += sizeof("\r\nContent-Length:") - 1; | |
104 p += strspn(p, " \t"); | |
105 | |
106 if (!isdigit((unsigned char)*p)) | |
107 return -1; | |
108 | |
109 errno = 0; | |
110 l = strtoll(p, &end, 10); | |
111 if (errno || p == end || (*end != '\0' && *end != '\r') || l < 0) | |
112 return -1; | |
113 | |
114 *length = l; | |
115 | |
116 return 0; | |
117 } | |
118 | |
119 int | |
120 uri_parse(const char *s, struct uri *u) | |
121 { | |
122 const char *p = s; | |
123 char *endptr; | |
124 size_t i; | |
125 long l; | |
126 | |
127 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0'; | |
128 u->path[0] = u->query[0] = u->fragment[0] = '\0'; | |
129 | |
130 /* protocol-relative */ | |
131 if (*p == '/' && *(p + 1) == '/') { | |
132 p += 2; /* skip "//" */ | |
133 goto parseauth; | |
134 } | |
135 | |
136 /* scheme / protocol part */ | |
137 for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) … | |
138 *p == '+' || *p == '-' || *p == '.'; p++) | |
139 ; | |
140 /* scheme, except if empty and starts with ":" then it is a path… | |
141 if (*p == ':' && p != s) { | |
142 if (*(p + 1) == '/' && *(p + 2) == '/') | |
143 p += 3; /* skip "://" */ | |
144 else | |
145 p++; /* skip ":" */ | |
146 | |
147 if ((size_t)(p - s) >= sizeof(u->proto)) | |
148 return -1; /* protocol too long */ | |
149 memcpy(u->proto, s, p - s); | |
150 u->proto[p - s] = '\0'; | |
151 | |
152 if (*(p - 1) != '/') | |
153 goto parsepath; | |
154 } else { | |
155 p = s; /* no scheme format, reset to start */ | |
156 goto parsepath; | |
157 } | |
158 | |
159 parseauth: | |
160 /* userinfo (username:password) */ | |
161 i = strcspn(p, "@/?#"); | |
162 if (p[i] == '@') { | |
163 if (i >= sizeof(u->userinfo)) | |
164 return -1; /* userinfo too long */ | |
165 memcpy(u->userinfo, p, i); | |
166 u->userinfo[i] = '\0'; | |
167 p += i + 1; | |
168 } | |
169 | |
170 /* IPv6 address */ | |
171 if (*p == '[') { | |
172 /* bracket not found, host too short or too long */ | |
173 i = strcspn(p, "]"); | |
174 if (p[i] != ']' || i < 3) | |
175 return -1; | |
176 i++; /* including "]" */ | |
177 } else { | |
178 /* domain / host part, skip until port, path or end. */ | |
179 i = strcspn(p, ":/?#"); | |
180 } | |
181 if (i >= sizeof(u->host)) | |
182 return -1; /* host too long */ | |
183 memcpy(u->host, p, i); | |
184 u->host[i] = '\0'; | |
185 p += i; | |
186 | |
187 /* port */ | |
188 if (*p == ':') { | |
189 p++; | |
190 if ((i = strcspn(p, "/?#")) >= sizeof(u->port)) | |
191 return -1; /* port too long */ | |
192 memcpy(u->port, p, i); | |
193 u->port[i] = '\0'; | |
194 /* check for valid port: range 1 - 65535, may be empty */ | |
195 errno = 0; | |
196 l = strtol(u->port, &endptr, 10); | |
197 if (i && (errno || *endptr || l <= 0 || l > 65535)) | |
198 return -1; | |
199 p += i; | |
200 } | |
201 | |
202 parsepath: | |
203 /* path */ | |
204 if ((i = strcspn(p, "?#")) >= sizeof(u->path)) | |
205 return -1; /* path too long */ | |
206 memcpy(u->path, p, i); | |
207 u->path[i] = '\0'; | |
208 p += i; | |
209 | |
210 /* query */ | |
211 if (*p == '?') { | |
212 p++; | |
213 if ((i = strcspn(p, "#")) >= sizeof(u->query)) | |
214 return -1; /* query too long */ | |
215 memcpy(u->query, p, i); | |
216 u->query[i] = '\0'; | |
217 p += i; | |
218 } | |
219 | |
220 /* fragment */ | |
221 if (*p == '#') { | |
222 p++; | |
223 if ((i = strlen(p)) >= sizeof(u->fragment)) | |
224 return -1; /* fragment too long */ | |
225 memcpy(u->fragment, p, i); | |
226 u->fragment[i] = '\0'; | |
227 } | |
228 | |
229 return 0; | |
230 } | |
231 | |
232 int | |
233 edial(const char *host, const char *port) | |
234 { | |
235 struct addrinfo hints, *res, *res0; | |
236 int error, save_errno, s; | |
237 const char *cause = NULL; | |
238 | |
239 memset(&hints, 0, sizeof(hints)); | |
240 hints.ai_family = AF_UNSPEC; | |
241 hints.ai_socktype = SOCK_STREAM; | |
242 hints.ai_flags = AI_NUMERICSERV; /* numeric port only */ | |
243 if ((error = getaddrinfo(host, port, &hints, &res0))) | |
244 errx(1, "%s: %s: %s:%s", __func__, gai_strerror(error), … | |
245 s = -1; | |
246 for (res = res0; res; res = res->ai_next) { | |
247 s = socket(res->ai_family, res->ai_socktype, | |
248 res->ai_protocol); | |
249 if (s == -1) { | |
250 cause = "socket"; | |
251 continue; | |
252 } | |
253 | |
254 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) { | |
255 cause = "connect"; | |
256 save_errno = errno; | |
257 close(s); | |
258 errno = save_errno; | |
259 s = -1; | |
260 continue; | |
261 } | |
262 break; | |
263 } | |
264 if (s == -1) | |
265 errx(1, "%s: %s: %s:%s", __func__, cause, host, port); | |
266 freeaddrinfo(res0); | |
267 | |
268 return s; | |
269 } | |
270 | |
271 void | |
272 setup_plain(void) | |
273 { | |
274 if (pledge("stdio dns inet", NULL) == -1) | |
275 err(1, "pledge"); | |
276 | |
277 sock = edial(u.host, u.port); | |
278 } | |
279 | |
280 void | |
281 setup_tls(void) | |
282 { | |
283 if (tls_init()) | |
284 errx(1, "tls_init failed"); | |
285 if (!(tls_config = tls_config_new())) | |
286 errx(1, "tls config failed"); | |
287 if (config_legacy) { | |
288 /* enable legacy cipher and negotiation. */ | |
289 if (tls_config_set_ciphers(tls_config, "legacy")) | |
290 errx(1, "tls_config_set_ciphers: %s", | |
291 tls_config_error(tls_config)); | |
292 } | |
293 if (config_ca_file) { | |
294 if (unveil(config_ca_file, "r") == -1) | |
295 err(1, "unveil: %s", config_ca_file); | |
296 if (tls_config_set_ca_file(tls_config, config_ca_file) =… | |
297 errx(1, "tls_config_set_ca_file: %s: %s", config… | |
298 tls_config_error(tls_config)); | |
299 } else { | |
300 if (unveil(TLS_CA_CERT_FILE, "r") == -1) | |
301 err(1, "unveil: %s", TLS_CA_CERT_FILE); | |
302 } | |
303 | |
304 if (pledge("stdio dns inet rpath", NULL) == -1) | |
305 err(1, "pledge"); | |
306 | |
307 if (!(t = tls_client())) | |
308 errx(1, "tls_client: %s", tls_error(t)); | |
309 if (tls_configure(t, tls_config)) | |
310 errx(1, "tls_configure: %s", tls_error(t)); | |
311 | |
312 sock = edial(u.host, u.port); | |
313 if (tls_connect_socket(t, sock, u.host) == -1) | |
314 errx(1, "tls_connect: %s", tls_error(t)); | |
315 } | |
316 | |
317 ssize_t | |
318 tls_writebuf(const char *buf, size_t buflen) | |
319 { | |
320 const char *errstr; | |
321 const char *p; | |
322 size_t len; | |
323 ssize_t r, written = 0; | |
324 | |
325 for (len = buflen, p = buf; len > 0; ) { | |
326 r = tls_write(t, p, len); | |
327 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) { | |
328 continue; | |
329 } else if (r == -1) { | |
330 errstr = tls_error(t); | |
331 fprintf(stderr, "tls_write: %s\n", errstr ? errs… | |
332 return -1; | |
333 } | |
334 p += r; | |
335 len -= r; | |
336 written += r; | |
337 } | |
338 return written; | |
339 } | |
340 | |
341 ssize_t | |
342 tls_readbuf(char *buf, size_t bufsiz) | |
343 { | |
344 const char *errstr; | |
345 ssize_t r, len; | |
346 | |
347 for (len = 0; bufsiz > 0;) { | |
348 r = tls_read(t, buf + len, bufsiz); | |
349 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) { | |
350 continue; | |
351 } else if (r == 0) { | |
352 break; | |
353 } else if (r == -1) { | |
354 errstr = tls_error(t); | |
355 fprintf(stderr, "tls_read: %s\n", errstr ? errst… | |
356 return -1; | |
357 } | |
358 len += r; | |
359 bufsiz -= r; | |
360 } | |
361 return len; | |
362 } | |
363 | |
364 ssize_t | |
365 plain_writebuf(const char *buf, size_t buflen) | |
366 { | |
367 ssize_t r; | |
368 | |
369 if ((r = write(sock, buf, buflen)) == -1) | |
370 fprintf(stderr, "write: %s\n", strerror(errno)); | |
371 return r; | |
372 } | |
373 | |
374 ssize_t | |
375 plain_readbuf(char *buf, size_t bufsiz) | |
376 { | |
377 ssize_t r, len; | |
378 | |
379 for (len = 0; bufsiz > 0;) { | |
380 r = read(sock, buf + len, bufsiz); | |
381 if (r == 0) { | |
382 break; | |
383 } else if (r == -1) { | |
384 fprintf(stderr, "read: %s\n", strerror(errno)); | |
385 return -1; | |
386 } | |
387 len += r; | |
388 bufsiz -= r; | |
389 } | |
390 return len; | |
391 } | |
392 | |
393 int | |
394 http_request(void) | |
395 { | |
396 char buf[READ_BUF_SIZ], *p; | |
397 size_t bodylen, expectedlen, n, len; | |
398 ssize_t r; | |
399 int cs, httpok = 0, ret = 1, stdport; | |
400 | |
401 stdport = u.port[0] == '\0' || strcmp(u.port, t ? "443" : "80") … | |
402 | |
403 /* create and send HTTP header */ | |
404 r = snprintf(buf, sizeof(buf), | |
405 "GET %s%s%s HTTP/1.0\r\n" | |
406 "Host: %s%s%s\r\n" | |
407 "Connection: close\r\n" | |
408 "%s%s" | |
409 "\r\n", | |
410 u.path[0] ? u.path : "/", | |
411 u.query[0] ? "?" : "", u.query, | |
412 u.host, | |
413 stdport ? "" : ":", | |
414 stdport ? "" : u.port, | |
415 config_headers, config_headers[0] ? "\r\n" : ""); | |
416 if (r < 0 || (size_t)r >= sizeof(buf)) { | |
417 fprintf(stderr, "not writing header because it is trunca… | |
418 goto err; | |
419 } | |
420 | |
421 if ((r = writebuf(buf, r)) == -1) | |
422 goto err; | |
423 | |
424 /* NOTE: HTTP header must fit in the buffer, buffer size is -1 t… | |
425 terminate the buffer for a string comparison. */ | |
426 if ((r = readbuf(buf, sizeof(buf) - 1)) == -1) | |
427 goto err; | |
428 len = r; | |
429 buf[len] = '\0'; /* NUL terminate buffer */ | |
430 | |
431 if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) … | |
432 !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1)) | |
433 httpok = 1; | |
434 | |
435 if (!(p = strstr(buf, "\r\n\r\n"))) { | |
436 fprintf(stderr, "no HTTP header found or header too big\… | |
437 goto err; | |
438 } | |
439 *p = '\0'; /* NUL terminate header part */ | |
440 cs = parse_content_length(buf, &expectedlen); | |
441 p += strlen("\r\n\r\n"); | |
442 bodylen = len - (p - buf); /* (partial) body after header */ | |
443 | |
444 if (httpok) { | |
445 n = len - (p - buf); | |
446 fwrite(p, 1, n, stdout); | |
447 if (ferror(stdout)) { | |
448 fprintf(stderr, "fwrite: stdout: %s\n", strerror… | |
449 goto err; | |
450 } | |
451 } else { | |
452 /* if not 200 OK print header */ | |
453 fputs(buf, stderr); | |
454 fputs("\r\n\r\n", stderr); | |
455 /* NOTE: we are nice and keep reading (not closing) unti… | |
456 } | |
457 | |
458 while ((r = readbuf(buf, sizeof(buf))) > 0) { | |
459 len += r; | |
460 bodylen += r; | |
461 | |
462 if (httpok) { | |
463 fwrite(buf, 1, r, stdout); | |
464 if (ferror(stdout)) { | |
465 fprintf(stderr, "fwrite: stdout: %s\n", … | |
466 goto err; | |
467 } | |
468 } | |
469 | |
470 if (config_maxresponsesiz && len >= config_maxresponsesi… | |
471 break; | |
472 } | |
473 if (r == -1) | |
474 goto err; | |
475 if (config_maxresponsesiz && len >= config_maxresponsesiz) { | |
476 fprintf(stderr, "response too big: %zu >= %zu\n", | |
477 len, config_maxresponsesiz); | |
478 goto err; | |
479 } | |
480 if (cs != -1 && expectedlen != bodylen) { | |
481 fprintf(stderr, "Content-Length mismatch: %zu expected !… | |
482 expectedlen, bodylen); | |
483 goto err; | |
484 } | |
485 ret = 0; | |
486 | |
487 err: | |
488 return httpok ? ret : 2; | |
489 } | |
490 | |
491 int | |
492 gopher_request(void) | |
493 { | |
494 char buf[READ_BUF_SIZ]; | |
495 const char *path; | |
496 size_t len = 0; | |
497 ssize_t r; | |
498 int ret = 1; | |
499 | |
500 /* create and send path, skip type part, empty path is allowed, | |
501 see RFC 4266 The gopher URI Scheme - section 2.1 */ | |
502 path = u.path; | |
503 if (*path == '/') { | |
504 path++; | |
505 if (*path) | |
506 path++; /* skip type */ | |
507 } | |
508 | |
509 r = snprintf(buf, sizeof(buf), "%s%s%s\r\n", | |
510 path, u.query[0] ? "?" : "", u.query); | |
511 if (r < 0 || (size_t)r >= sizeof(buf)) { | |
512 fprintf(stderr, "not writing header because it is trunca… | |
513 goto err; | |
514 } | |
515 | |
516 if ((r = writebuf(buf, r)) == -1) | |
517 goto err; | |
518 | |
519 while ((r = readbuf(buf, sizeof(buf))) > 0) { | |
520 len += r; | |
521 | |
522 fwrite(buf, 1, r, stdout); | |
523 if (ferror(stdout)) { | |
524 fprintf(stderr, "fwrite: stdout: %s\n", strerror… | |
525 goto err; | |
526 } | |
527 | |
528 if (config_maxresponsesiz && len >= config_maxresponsesi… | |
529 break; | |
530 } | |
531 if (r == -1) | |
532 goto err; | |
533 | |
534 if (config_maxresponsesiz && len >= config_maxresponsesiz) { | |
535 fprintf(stderr, "response too big: %zu >= %zu\n", | |
536 len, config_maxresponsesiz); | |
537 goto err; | |
538 } | |
539 ret = 0; | |
540 | |
541 err: | |
542 return ret; | |
543 } | |
544 | |
545 void | |
546 usage(void) | |
547 { | |
548 fprintf(stderr, "usage: %s [-c ca_file] [-H headers] [-l] " | |
549 "[-m maxresponse] [-t timeout] url\n", argv0); | |
550 exit(1); | |
551 } | |
552 | |
553 int | |
554 main(int argc, char **argv) | |
555 { | |
556 char *end; | |
557 size_t i; | |
558 int statuscode; | |
559 long long l; | |
560 | |
561 ARGBEGIN { | |
562 case 'c': | |
563 config_ca_file = EARGF(usage()); | |
564 break; | |
565 case 'H': /* header(s) */ | |
566 config_headers = EARGF(usage()); | |
567 break; | |
568 case 'l': /* legacy ciphers */ | |
569 config_legacy = 1; | |
570 break; | |
571 case 'm': /* max filesize */ | |
572 errno = 0; | |
573 l = strtoll(EARGF(usage()), &end, 10); | |
574 if (errno || *end != '\0' || l < 0) | |
575 usage(); | |
576 config_maxresponsesiz = l; | |
577 break; | |
578 case 't': /* timeout */ | |
579 errno = 0; | |
580 l = strtoll(EARGF(usage()), &end, 10); | |
581 if (errno || *end != '\0' || l < 0 || l >= 65535) | |
582 usage(); | |
583 config_timeout = l; | |
584 break; | |
585 default: | |
586 usage(); | |
587 } ARGEND | |
588 | |
589 if (argc != 1) | |
590 usage(); | |
591 | |
592 url = argv[0]; | |
593 if (uri_parse(url, &u) == -1) | |
594 errx(1, "invalid URL: %s", url); | |
595 if (u.userinfo[0]) | |
596 errx(1, "userinfo field not supported in the URL: %s", u… | |
597 | |
598 if (config_timeout > 0) { | |
599 signal(SIGALRM, sighandler); | |
600 alarm(config_timeout); | |
601 } | |
602 | |
603 /* match a protocol handler */ | |
604 for (i = 0; i < sizeof(handlers) / sizeof(*handlers); i++) { | |
605 if (strcmp(u.proto, handlers[i].proto)) | |
606 continue; | |
607 if (!u.port[0]) | |
608 strcpy(u.port, handlers[i].port); /* default por… | |
609 | |
610 /* setup TLS or plain connection */ | |
611 if (handlers[i].usetls) { | |
612 setup_tls(); | |
613 readbuf = tls_readbuf; | |
614 writebuf = tls_writebuf; | |
615 } else { | |
616 setup_plain(); | |
617 readbuf = plain_readbuf; | |
618 writebuf = plain_writebuf; | |
619 } | |
620 | |
621 if (pledge("stdio", NULL) == -1) | |
622 err(1, "pledge"); | |
623 | |
624 statuscode = handlers[i].handler(); | |
625 | |
626 /* cleanup TLS and plain connection */ | |
627 if (t) { | |
628 tls_close(t); | |
629 tls_free(t); | |
630 } | |
631 if (sock != -1) | |
632 close(sock); | |
633 | |
634 return statuscode; | |
635 } | |
636 if (u.proto[0]) | |
637 errx(1, "unsupported protocol specified: %s", u.proto); | |
638 else | |
639 errx(1, "no protocol specified"); | |
640 | |
641 return 1; | |
642 } |