Introduction
Introduction Statistics Contact Development Disclaimer Help
hurl.c - hurl - Gopher/HTTP/HTTPS file grabber
git clone git://git.codemadness.org/hurl
Log
Files
Refs
README
LICENSE
---
hurl.c (13950B)
---
1 #include <sys/socket.h>
2 #include <sys/time.h>
3
4 #include <ctype.h>
5 #include <err.h>
6 #include <errno.h>
7 #include <netdb.h>
8 #include <locale.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <time.h>
14 #include <unistd.h>
15
16 #include <tls.h>
17
18 #include "arg.h"
19
20 #define READ_BUF_SIZ 16384
21
22 #ifndef __OpenBSD__
23 #define pledge(p1,p2) 0
24 #define unveil(p1,p2) 0
25 #endif
26
27 #ifndef TLS_CA_CERT_FILE
28 #define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
29 #endif
30
31 /* URI */
32 struct uri {
33 char proto[48]; /* scheme including ":" or "://" */
34 char userinfo[256]; /* username [:password] */
35 char host[256];
36 char port[6]; /* numeric port */
37 char path[1024];
38 char query[1024];
39 char fragment[1024];
40 };
41
42 char *argv0;
43
44 /* raw header(s) to add */
45 static const char *config_headers = "";
46 /* max response size in bytes, 0 is unlimited */
47 static size_t config_maxresponsesiz = 0;
48 /* time-out in seconds */
49 static long long config_timeout = 0;
50 /* legacy ciphers? */
51 static int config_legacy = 0;
52 /* TLS CA file */
53 static char *config_ca_file;
54 /* parsed URI */
55 static struct uri u;
56 /* socket fd */
57 static int sock = -1;
58 /* raw command-line argument */
59 static char *url;
60 /* TLS context */
61 static struct tls *t;
62 /* TLS config */
63 static struct tls_config *tls_config;
64
65 /* protocol handlers */
66 int gopher_request(void);
67 int http_request(void);
68
69 struct handler {
70 int (*handler)(void); /* function / handler / callback */
71 const char *proto; /* protocol / scheme, "gopher://" */
72 const char *port; /* default port */
73 int usetls; /* setup TLS (=1) or plain connection (=0) */
74 };
75
76 static const struct handler handlers[] = {
77 { .handler = gopher_request, .proto = "gopher://", .port = "70…
78 { .handler = gopher_request, .proto = "gophers://", .port = "70…
79 { .handler = http_request, .proto = "http://", .port = "80…
80 { .handler = http_request, .proto = "https://", .port = "44…
81 };
82
83 ssize_t (*readbuf)(char *, size_t);
84 ssize_t (*writebuf)(const char *, size_t);
85
86 void
87 sighandler(int signo)
88 {
89 if (signo == SIGALRM)
90 _exit(2);
91 }
92
93 int
94 parse_content_length(const char *s, size_t *length)
95 {
96 const char *p;
97 char *end;
98 long long l;
99
100 if (!(p = strcasestr(s, "\r\nContent-Length:")))
101 return -1;
102
103 p += sizeof("\r\nContent-Length:") - 1;
104 p += strspn(p, " \t");
105
106 if (!isdigit((unsigned char)*p))
107 return -1;
108
109 errno = 0;
110 l = strtoll(p, &end, 10);
111 if (errno || p == end || (*end != '\0' && *end != '\r') || l < 0)
112 return -1;
113
114 *length = l;
115
116 return 0;
117 }
118
119 int
120 uri_parse(const char *s, struct uri *u)
121 {
122 const char *p = s;
123 char *endptr;
124 size_t i;
125 long l;
126
127 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
128 u->path[0] = u->query[0] = u->fragment[0] = '\0';
129
130 /* protocol-relative */
131 if (*p == '/' && *(p + 1) == '/') {
132 p += 2; /* skip "//" */
133 goto parseauth;
134 }
135
136 /* scheme / protocol part */
137 for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) …
138 *p == '+' || *p == '-' || *p == '.'; p++)
139 ;
140 /* scheme, except if empty and starts with ":" then it is a path…
141 if (*p == ':' && p != s) {
142 if (*(p + 1) == '/' && *(p + 2) == '/')
143 p += 3; /* skip "://" */
144 else
145 p++; /* skip ":" */
146
147 if ((size_t)(p - s) >= sizeof(u->proto))
148 return -1; /* protocol too long */
149 memcpy(u->proto, s, p - s);
150 u->proto[p - s] = '\0';
151
152 if (*(p - 1) != '/')
153 goto parsepath;
154 } else {
155 p = s; /* no scheme format, reset to start */
156 goto parsepath;
157 }
158
159 parseauth:
160 /* userinfo (username:password) */
161 i = strcspn(p, "@/?#");
162 if (p[i] == '@') {
163 if (i >= sizeof(u->userinfo))
164 return -1; /* userinfo too long */
165 memcpy(u->userinfo, p, i);
166 u->userinfo[i] = '\0';
167 p += i + 1;
168 }
169
170 /* IPv6 address */
171 if (*p == '[') {
172 /* bracket not found, host too short or too long */
173 i = strcspn(p, "]");
174 if (p[i] != ']' || i < 3)
175 return -1;
176 i++; /* including "]" */
177 } else {
178 /* domain / host part, skip until port, path or end. */
179 i = strcspn(p, ":/?#");
180 }
181 if (i >= sizeof(u->host))
182 return -1; /* host too long */
183 memcpy(u->host, p, i);
184 u->host[i] = '\0';
185 p += i;
186
187 /* port */
188 if (*p == ':') {
189 p++;
190 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
191 return -1; /* port too long */
192 memcpy(u->port, p, i);
193 u->port[i] = '\0';
194 /* check for valid port: range 1 - 65535, may be empty */
195 errno = 0;
196 l = strtol(u->port, &endptr, 10);
197 if (i && (errno || *endptr || l <= 0 || l > 65535))
198 return -1;
199 p += i;
200 }
201
202 parsepath:
203 /* path */
204 if ((i = strcspn(p, "?#")) >= sizeof(u->path))
205 return -1; /* path too long */
206 memcpy(u->path, p, i);
207 u->path[i] = '\0';
208 p += i;
209
210 /* query */
211 if (*p == '?') {
212 p++;
213 if ((i = strcspn(p, "#")) >= sizeof(u->query))
214 return -1; /* query too long */
215 memcpy(u->query, p, i);
216 u->query[i] = '\0';
217 p += i;
218 }
219
220 /* fragment */
221 if (*p == '#') {
222 p++;
223 if ((i = strlen(p)) >= sizeof(u->fragment))
224 return -1; /* fragment too long */
225 memcpy(u->fragment, p, i);
226 u->fragment[i] = '\0';
227 }
228
229 return 0;
230 }
231
232 int
233 edial(const char *host, const char *port)
234 {
235 struct addrinfo hints, *res, *res0;
236 int error, save_errno, s;
237 const char *cause = NULL;
238
239 memset(&hints, 0, sizeof(hints));
240 hints.ai_family = AF_UNSPEC;
241 hints.ai_socktype = SOCK_STREAM;
242 hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
243 if ((error = getaddrinfo(host, port, &hints, &res0)))
244 errx(1, "%s: %s: %s:%s", __func__, gai_strerror(error), …
245 s = -1;
246 for (res = res0; res; res = res->ai_next) {
247 s = socket(res->ai_family, res->ai_socktype,
248 res->ai_protocol);
249 if (s == -1) {
250 cause = "socket";
251 continue;
252 }
253
254 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
255 cause = "connect";
256 save_errno = errno;
257 close(s);
258 errno = save_errno;
259 s = -1;
260 continue;
261 }
262 break;
263 }
264 if (s == -1)
265 errx(1, "%s: %s: %s:%s", __func__, cause, host, port);
266 freeaddrinfo(res0);
267
268 return s;
269 }
270
271 void
272 setup_plain(void)
273 {
274 if (pledge("stdio dns inet", NULL) == -1)
275 err(1, "pledge");
276
277 sock = edial(u.host, u.port);
278 }
279
280 void
281 setup_tls(void)
282 {
283 if (tls_init())
284 errx(1, "tls_init failed");
285 if (!(tls_config = tls_config_new()))
286 errx(1, "tls config failed");
287 if (config_legacy) {
288 /* enable legacy cipher and negotiation. */
289 if (tls_config_set_ciphers(tls_config, "legacy"))
290 errx(1, "tls_config_set_ciphers: %s",
291 tls_config_error(tls_config));
292 }
293 if (config_ca_file) {
294 if (unveil(config_ca_file, "r") == -1)
295 err(1, "unveil: %s", config_ca_file);
296 if (tls_config_set_ca_file(tls_config, config_ca_file) =…
297 errx(1, "tls_config_set_ca_file: %s: %s", config…
298 tls_config_error(tls_config));
299 } else {
300 if (unveil(TLS_CA_CERT_FILE, "r") == -1)
301 err(1, "unveil: %s", TLS_CA_CERT_FILE);
302 }
303
304 if (pledge("stdio dns inet rpath", NULL) == -1)
305 err(1, "pledge");
306
307 if (!(t = tls_client()))
308 errx(1, "tls_client: %s", tls_error(t));
309 if (tls_configure(t, tls_config))
310 errx(1, "tls_configure: %s", tls_error(t));
311
312 sock = edial(u.host, u.port);
313 if (tls_connect_socket(t, sock, u.host) == -1)
314 errx(1, "tls_connect: %s", tls_error(t));
315 }
316
317 ssize_t
318 tls_writebuf(const char *buf, size_t buflen)
319 {
320 const char *errstr;
321 const char *p;
322 size_t len;
323 ssize_t r, written = 0;
324
325 for (len = buflen, p = buf; len > 0; ) {
326 r = tls_write(t, p, len);
327 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
328 continue;
329 } else if (r == -1) {
330 errstr = tls_error(t);
331 fprintf(stderr, "tls_write: %s\n", errstr ? errs…
332 return -1;
333 }
334 p += r;
335 len -= r;
336 written += r;
337 }
338 return written;
339 }
340
341 ssize_t
342 tls_readbuf(char *buf, size_t bufsiz)
343 {
344 const char *errstr;
345 ssize_t r, len;
346
347 for (len = 0; bufsiz > 0;) {
348 r = tls_read(t, buf + len, bufsiz);
349 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
350 continue;
351 } else if (r == 0) {
352 break;
353 } else if (r == -1) {
354 errstr = tls_error(t);
355 fprintf(stderr, "tls_read: %s\n", errstr ? errst…
356 return -1;
357 }
358 len += r;
359 bufsiz -= r;
360 }
361 return len;
362 }
363
364 ssize_t
365 plain_writebuf(const char *buf, size_t buflen)
366 {
367 ssize_t r;
368
369 if ((r = write(sock, buf, buflen)) == -1)
370 fprintf(stderr, "write: %s\n", strerror(errno));
371 return r;
372 }
373
374 ssize_t
375 plain_readbuf(char *buf, size_t bufsiz)
376 {
377 ssize_t r, len;
378
379 for (len = 0; bufsiz > 0;) {
380 r = read(sock, buf + len, bufsiz);
381 if (r == 0) {
382 break;
383 } else if (r == -1) {
384 fprintf(stderr, "read: %s\n", strerror(errno));
385 return -1;
386 }
387 len += r;
388 bufsiz -= r;
389 }
390 return len;
391 }
392
393 int
394 http_request(void)
395 {
396 char buf[READ_BUF_SIZ], *p;
397 size_t bodylen, expectedlen, n, len;
398 ssize_t r;
399 int cs, httpok = 0, ret = 1, stdport;
400
401 stdport = u.port[0] == '\0' || strcmp(u.port, t ? "443" : "80") …
402
403 /* create and send HTTP header */
404 r = snprintf(buf, sizeof(buf),
405 "GET %s%s%s HTTP/1.0\r\n"
406 "Host: %s%s%s\r\n"
407 "Connection: close\r\n"
408 "%s%s"
409 "\r\n",
410 u.path[0] ? u.path : "/",
411 u.query[0] ? "?" : "", u.query,
412 u.host,
413 stdport ? "" : ":",
414 stdport ? "" : u.port,
415 config_headers, config_headers[0] ? "\r\n" : "");
416 if (r < 0 || (size_t)r >= sizeof(buf)) {
417 fprintf(stderr, "not writing header because it is trunca…
418 goto err;
419 }
420
421 if ((r = writebuf(buf, r)) == -1)
422 goto err;
423
424 /* NOTE: HTTP header must fit in the buffer, buffer size is -1 t…
425 terminate the buffer for a string comparison. */
426 if ((r = readbuf(buf, sizeof(buf) - 1)) == -1)
427 goto err;
428 len = r;
429 buf[len] = '\0'; /* NUL terminate buffer */
430
431 if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) …
432 !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1))
433 httpok = 1;
434
435 if (!(p = strstr(buf, "\r\n\r\n"))) {
436 fprintf(stderr, "no HTTP header found or header too big\…
437 goto err;
438 }
439 *p = '\0'; /* NUL terminate header part */
440 cs = parse_content_length(buf, &expectedlen);
441 p += strlen("\r\n\r\n");
442 bodylen = len - (p - buf); /* (partial) body after header */
443
444 if (httpok) {
445 n = len - (p - buf);
446 fwrite(p, 1, n, stdout);
447 if (ferror(stdout)) {
448 fprintf(stderr, "fwrite: stdout: %s\n", strerror…
449 goto err;
450 }
451 } else {
452 /* if not 200 OK print header */
453 fputs(buf, stderr);
454 fputs("\r\n\r\n", stderr);
455 /* NOTE: we are nice and keep reading (not closing) unti…
456 }
457
458 while ((r = readbuf(buf, sizeof(buf))) > 0) {
459 len += r;
460 bodylen += r;
461
462 if (httpok) {
463 fwrite(buf, 1, r, stdout);
464 if (ferror(stdout)) {
465 fprintf(stderr, "fwrite: stdout: %s\n", …
466 goto err;
467 }
468 }
469
470 if (config_maxresponsesiz && len >= config_maxresponsesi…
471 break;
472 }
473 if (r == -1)
474 goto err;
475 if (config_maxresponsesiz && len >= config_maxresponsesiz) {
476 fprintf(stderr, "response too big: %zu >= %zu\n",
477 len, config_maxresponsesiz);
478 goto err;
479 }
480 if (cs != -1 && expectedlen != bodylen) {
481 fprintf(stderr, "Content-Length mismatch: %zu expected !…
482 expectedlen, bodylen);
483 goto err;
484 }
485 ret = 0;
486
487 err:
488 return httpok ? ret : 2;
489 }
490
491 int
492 gopher_request(void)
493 {
494 char buf[READ_BUF_SIZ];
495 const char *path;
496 size_t len = 0;
497 ssize_t r;
498 int ret = 1;
499
500 /* create and send path, skip type part, empty path is allowed,
501 see RFC 4266 The gopher URI Scheme - section 2.1 */
502 path = u.path;
503 if (*path == '/') {
504 path++;
505 if (*path)
506 path++; /* skip type */
507 }
508
509 r = snprintf(buf, sizeof(buf), "%s%s%s\r\n",
510 path, u.query[0] ? "?" : "", u.query);
511 if (r < 0 || (size_t)r >= sizeof(buf)) {
512 fprintf(stderr, "not writing header because it is trunca…
513 goto err;
514 }
515
516 if ((r = writebuf(buf, r)) == -1)
517 goto err;
518
519 while ((r = readbuf(buf, sizeof(buf))) > 0) {
520 len += r;
521
522 fwrite(buf, 1, r, stdout);
523 if (ferror(stdout)) {
524 fprintf(stderr, "fwrite: stdout: %s\n", strerror…
525 goto err;
526 }
527
528 if (config_maxresponsesiz && len >= config_maxresponsesi…
529 break;
530 }
531 if (r == -1)
532 goto err;
533
534 if (config_maxresponsesiz && len >= config_maxresponsesiz) {
535 fprintf(stderr, "response too big: %zu >= %zu\n",
536 len, config_maxresponsesiz);
537 goto err;
538 }
539 ret = 0;
540
541 err:
542 return ret;
543 }
544
545 void
546 usage(void)
547 {
548 fprintf(stderr, "usage: %s [-c ca_file] [-H headers] [-l] "
549 "[-m maxresponse] [-t timeout] url\n", argv0);
550 exit(1);
551 }
552
553 int
554 main(int argc, char **argv)
555 {
556 char *end;
557 size_t i;
558 int statuscode;
559 long long l;
560
561 ARGBEGIN {
562 case 'c':
563 config_ca_file = EARGF(usage());
564 break;
565 case 'H': /* header(s) */
566 config_headers = EARGF(usage());
567 break;
568 case 'l': /* legacy ciphers */
569 config_legacy = 1;
570 break;
571 case 'm': /* max filesize */
572 errno = 0;
573 l = strtoll(EARGF(usage()), &end, 10);
574 if (errno || *end != '\0' || l < 0)
575 usage();
576 config_maxresponsesiz = l;
577 break;
578 case 't': /* timeout */
579 errno = 0;
580 l = strtoll(EARGF(usage()), &end, 10);
581 if (errno || *end != '\0' || l < 0 || l >= 65535)
582 usage();
583 config_timeout = l;
584 break;
585 default:
586 usage();
587 } ARGEND
588
589 if (argc != 1)
590 usage();
591
592 url = argv[0];
593 if (uri_parse(url, &u) == -1)
594 errx(1, "invalid URL: %s", url);
595 if (u.userinfo[0])
596 errx(1, "userinfo field not supported in the URL: %s", u…
597
598 if (config_timeout > 0) {
599 signal(SIGALRM, sighandler);
600 alarm(config_timeout);
601 }
602
603 /* match a protocol handler */
604 for (i = 0; i < sizeof(handlers) / sizeof(*handlers); i++) {
605 if (strcmp(u.proto, handlers[i].proto))
606 continue;
607 if (!u.port[0])
608 strcpy(u.port, handlers[i].port); /* default por…
609
610 /* setup TLS or plain connection */
611 if (handlers[i].usetls) {
612 setup_tls();
613 readbuf = tls_readbuf;
614 writebuf = tls_writebuf;
615 } else {
616 setup_plain();
617 readbuf = plain_readbuf;
618 writebuf = plain_writebuf;
619 }
620
621 if (pledge("stdio", NULL) == -1)
622 err(1, "pledge");
623
624 statuscode = handlers[i].handler();
625
626 /* cleanup TLS and plain connection */
627 if (t) {
628 tls_close(t);
629 tls_free(t);
630 }
631 if (sock != -1)
632 close(sock);
633
634 return statuscode;
635 }
636 if (u.proto[0])
637 errx(1, "unsupported protocol specified: %s", u.proto);
638 else
639 errx(1, "no protocol specified");
640
641 return 1;
642 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.