GopherProxy

	feed.c - frontends - front-ends for some sites (experiment)
	Log
	Files
	Refs
	README
	LICENSE
	---
	feed.c (29885B)
	---
	1 #include <err.h>
	2 #include <errno.h>
	3 #include <stdint.h>
	4 #include <stdio.h>
	5 #include <stdlib.h>
	6 #include <string.h>
	7 #include <strings.h>
	8 #include <time.h>
	9 #include <unistd.h>
	10
	11 #include "https.h"
	12 #include "util.h"
	13 #include "youtube.h"
	14 #include "xml.h"
	15
	16 #define ISINCONTENT(ctx) ((ctx).iscontent && !((ctx).iscontenttag))
	17 #define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
	18
	19 /* string and byte-length */
	20 #define STRP(s) s,sizeof(s)-1
	21
	22 enum FeedType {
	23 FeedTypeNone = 0,
	24 FeedTypeAtom = 2
	25 };
	26
	27 /* String data / memory pool */
	28 typedef struct string {
	29 char data; / data */
	30 size_t len; /* string length */
	31 size_t bufsiz; /* allocated size */
	32 } String;
	33
	34 /* NOTE: the order of these fields (content, date, author) indicate the
	35 * priority to use them, from least important to high. */
	36 enum TagId {
	37 TagUnknown = 0,
	38 /* Atom */
	39 /* creation date has higher priority */
	40 AtomTagPublished,
	41 AtomTagTitle,
	42 AtomTagMediaDescription,
	43 AtomTagId,
	44 AtomTagLink,
	45 AtomTagLinkAlternate,
	46 AtomTagAuthor, AtomTagAuthorName,
	47 TagYoutubeVideoId,
	48 TagLast
	49 };
	50
	51 typedef struct feedtag {
	52 char name; / name of tag to match */
	53 size_t len; /* len of `name` */
	54 enum TagId id; /* unique ID */
	55 } FeedTag;
	56
	57 typedef struct field {
	58 String str;
	59 enum TagId tagid; /* tagid set previously, used for tag priority…
	60 } FeedField;
	61
	62 enum {
	63 /* sfeed fields */
	64 FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldConte…
	65 FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCateg…
	66 FeedFieldYoutubeId, /* yt:videoId */
	67 FeedFieldLast
	68 };
	69
	70 typedef struct feedcontext {
	71 String field; / current FeedItem field String …
	72 FeedField fields[FeedFieldLast]; /* data for current item…
	73 FeedTag tag; /* unique current parsed tag */
	74 int iscontent; /* in content data */
	75 int iscontenttag; /* in content tag */
	76 enum FeedType feedtype;
	77 } FeedContext;
	78
	79 static long long datetounix(long long, int, int, int, int, int);
	80 static FeedTag * gettag(enum FeedType, const char *, size_t);
	81 static long gettzoffset(const char *);
	82 static int isattr(const char , size_t, const char , size_t);
	83 static int istag(const char , size_t, const char , size_t);
	84 static int parsetime(const char , long long );
	85
	86 static void atom_header(void);
	87 static void atom_item(void);
	88 static void atom_footer(void);
	89 static void gph_header(void);
	90 static void gph_footer(void);
	91 static void html_header(void);
	92 static void html_footer(void);
	93 static void json_header(void);
	94 static void json_item(void);
	95 static void json_footer(void);
	96 static void sfeed_item(void); /* TSV / sfeed */
	97 static void twtxt_item(void);
	98
	99 static void string_append(String , const char , size_t);
	100 static void string_buffer_realloc(String *, size_t);
	101 static void string_clear(String *);
	102 static void string_print_encoded(String *);
	103 static void string_print_timestamp(String *);
	104 static void string_print(String *);
	105 static void xmlattr(XMLParser , const char , size_t, const char *, siz…
	106 const char *, size_t);
	107 static void xmlattrentity(XMLParser , const char , size_t, const char …
	108 size_t, const char *, size_t);
	109 static void xmlattrstart(XMLParser , const char , size_t, const char *,
	110 size_t);
	111 static void xmldata(XMLParser , const char , size_t);
	112 static void xmldataentity(XMLParser , const char , size_t);
	113 static void xmltagend(XMLParser , const char , size_t, int);
	114 static void xmltagstart(XMLParser , const char , size_t);
	115 static void xmltagstartparsed(XMLParser , const char , size_t, int);
	116
	117 /* Atom, must be alphabetical order */
	118 static const FeedTag atomtags[] = {
	119 { STRP("author"), AtomTagAuthor },
	120 { STRP("id"), AtomTagId },
	121 /* Atom: <link href="" />, RSS has <link></link> */
	122 { STRP("link"), AtomTagLink },
	123 { STRP("media:description"), AtomTagMediaDescription },
	124 { STRP("published"), AtomTagPublished },
	125 { STRP("title"), AtomTagTitle },
	126 { STRP("yt:videoId"), TagYoutubeVideoId }
	127 };
	128
	129 /* special case: nested <author><name> */
	130 static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
	131 static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorNa…
	132
	133 /* reference to no / unknown tag */
	134 static const FeedTag notag = { STRP(""), TagUnknown };
	135
	136 /* map TagId type to RSS/Atom field, all tags must be defined */
	137 static const int fieldmap[TagLast] = {
	138 [TagUnknown] = -1,
	139 /* Atom */
	140 [AtomTagPublished] = FeedFieldTime,
	141 [AtomTagTitle] = FeedFieldTitle,
	142 [AtomTagMediaDescription] = FeedFieldContent,
	143 [AtomTagId] = FeedFieldId,
	144 [AtomTagLink] = -1,
	145 [AtomTagLinkAlternate] = FeedFieldLink,
	146 [AtomTagAuthor] = -1,
	147 [AtomTagAuthorName] = FeedFieldAuthor,
	148 [TagYoutubeVideoId] = FeedFieldYoutubeId
	149 };
	150
	151 static const int FieldSeparator = '\t';
	152
	153 static FeedContext ctx;
	154 static XMLParser parser; /* XML parser state */
	155 static String attrrel, tmpstr;
	156
	157 static struct search_response *search_res = NULL;
	158 static void (*printfields)(void) = sfeed_item;
	159 static int cgimode = 0, godmode = 0;
	160 static const char server_name = "127.0.0.1", server_port = "70";
	161
	162 static int
	163 tagcmp(const void v1, const void v2)
	164 {
	165 return strcasecmp(((FeedTag )v1)->name, ((FeedTag )v2)->name);
	166 }
	167
	168 /* Unique tagid for parsed tag name. */
	169 static FeedTag *
	170 gettag(enum FeedType feedtype, const char *name, size_t namelen)
	171 {
	172 FeedTag f, *r = NULL;
	173
	174 f.name = (char *)name;
	175
	176 switch (feedtype) {
	177 case FeedTypeAtom:
	178 r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atom…
	179 sizeof(atomtags[0]), tagcmp);
	180 break;
	181 default:
	182 break;
	183 }
	184
	185 return r;
	186 }
	187
	188 /* Clear string only; don't free, prevents unnecessary reallocation. */
	189 static void
	190 string_clear(String *s)
	191 {
	192 if (s->data)
	193 s->data[0] = '\0';
	194 s->len = 0;
	195 }
	196
	197 static void
	198 string_buffer_realloc(String *s, size_t newlen)
	199 {
	200 size_t alloclen;
	201
	202 if (newlen > SIZE_MAX / 2) {
	203 alloclen = SIZE_MAX;
	204 } else {
	205 for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
	206 ;
	207 }
	208 if (!(s->data = realloc(s->data, alloclen)))
	209 err(1, "realloc");
	210 s->bufsiz = alloclen;
	211 }
	212
	213 /* Append data to String, s->data and data may not overlap. */
	214 static void
	215 string_append(String s, const char data, size_t len)
	216 {
	217 if (!len)
	218 return;
	219
	220 if (s->len >= SIZE_MAX - len) {
	221 errno = ENOMEM;
	222 err(1, "realloc");
	223 }
	224
	225 /* check if allocation is necessary, never shrink the buffer. */
	226 if (s->len + len >= s->bufsiz)
	227 string_buffer_realloc(s, s->len + len + 1);
	228 memcpy(s->data + s->len, data, len);
	229 s->len += len;
	230 s->data[s->len] = '\0';
	231 }
	232
	233 /* Print text, encode TABs, newlines and '\', remove other whitespace.
	234 * Remove leading and trailing whitespace. */
	235 static void
	236 string_print_encoded(String *s)
	237 {
	238 const char p, e;
	239
	240 if (!s->data \|\| !s->len)
	241 return;
	242
	243 p = s->data;
	244 e = p + strlen(p);
	245
	246 for (; *p && p != e; p++) {
	247 switch (*p) {
	248 case '\n': putchar('\\'); putchar('n'); break;
	249 case '\\': putchar('\\'); putchar('\\'); break;
	250 case '\t': putchar('\\'); putchar('t'); break;
	251 default:
	252 /* ignore control chars */
	253 if (!ISCNTRL((unsigned char)*p))
	254 putchar(*p);
	255 break;
	256 }
	257 }
	258 }
	259
	260 /* Print text, replace TABs, carriage return and other whitespace with '…
	261 * Other control chars are removed. Remove leading and trailing whitespa…
	262 static void
	263 string_print(String *s)
	264 {
	265 char p, e;
	266
	267 if (!s->data \|\| !s->len)
	268 return;
	269
	270 p = s->data;
	271 e = p + s->len;
	272 for (; *p && p != e; p++) {
	273 if (ISSPACE((unsigned char)*p))
	274 putchar(' '); /* any whitespace to space */
	275 else if (!ISCNTRL((unsigned char)*p))
	276 /* ignore other control chars */
	277 putchar(*p);
	278 }
	279 }
	280
	281 /* Print as UNIX timestamp, print nothing if the time is empty or invali…
	282 static void
	283 string_print_timestamp(String *s)
	284 {
	285 long long t;
	286
	287 if (!s->data \|\| !s->len)
	288 return;
	289
	290 if (parsetime(s->data, &t) != -1)
	291 printf("%lld", t);
	292 }
	293
	294 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestam…
	295 Parameters should be passed as they are in a struct tm:
	296 that is: year = year - 1900, month = month - 1. */
	297 static long long
	298 datetounix(long long year, int mon, int day, int hour, int min, int sec)
	299 {
	300 /* seconds in a month in a regular (non-leap) year */
	301 static const long secs_through_month[] = {
	302 0, 31 * 86400, 59 * 86400, 90 * 86400,
	303 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
	304 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
	305 int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
	306 long long t;
	307
	308 /* optimization: handle common range year 1902 up to and includi…
	309 if (year - 2ULL <= 136) {
	310 /* amount of leap days relative to 1970: every 4 years */
	311 leaps = (year - 68) >> 2;
	312 if (!((year - 68) & 3)) {
	313 leaps--;
	314 is_leap = 1;
	315 } else {
	316 is_leap = 0;
	317 }
	318 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 8…
	319 } else {
	320 /* general leap year calculation:
	321 leap years occur mostly every 4 years but every 100 y…
	322 a leap year is skipped unless the year is divisible b…
	323 cycles = (year - 100) / 400;
	324 rem = (year - 100) % 400;
	325 if (rem < 0) {
	326 cycles--;
	327 rem += 400;
	328 }
	329 if (!rem) {
	330 is_leap = 1;
	331 } else {
	332 if (rem >= 300)
	333 centuries = 3, rem -= 300;
	334 else if (rem >= 200)
	335 centuries = 2, rem -= 200;
	336 else if (rem >= 100)
	337 centuries = 1, rem -= 100;
	338 if (rem) {
	339 leaps = rem / 4U;
	340 rem %= 4U;
	341 is_leap = !rem;
	342 }
	343 }
	344 leaps += (97 * cycles) + (24 * centuries) - is_leap;
	345
	346 /* adjust 8 leap days from 1970 up to and including 2000:
	347 ((30 * 365) + 8) * 86400 = 946771200 */
	348 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94…
	349 }
	350 t += secs_through_month[mon];
	351 if (is_leap && mon >= 2)
	352 t += 86400;
	353 t += 86400LL * (day - 1);
	354 t += 3600LL * hour;
	355 t += 60LL * min;
	356 t += sec;
	357
	358 return t;
	359 }
	360
	361 /* Get timezone from string, return time offset in seconds from UTC. */
	362 static long
	363 gettzoffset(const char *s)
	364 {
	365 const char *p;
	366 long tzhour = 0, tzmin = 0;
	367 size_t i;
	368
	369 switch (*s) {
	370 case '-': /* offset */
	371 case '+':
	372 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*…
	373 tzhour = (tzhour * 10) + (*p - '0');
	374 if (*p == ':')
	375 p++;
	376 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p+…
	377 tzmin = (tzmin * 10) + (*p - '0');
	378 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ?…
	379 default: /* timezone name */
	380 break;
	381 }
	382 return 0;
	383 }
	384
	385 /* Parse time string `s` into the UNIX timestamp `tp`.
	386 Returns 0 on success or -1 on failure. */
	387 static int
	388 parsetime(const char s, long long tp)
	389 {
	390 int va[6] = { 0 }, i, v, vi;
	391
	392 /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H…
	393 if (!ISDIGIT((unsigned char)s[0]) \|\|
	394 !ISDIGIT((unsigned char)s[1]) \|\|
	395 !ISDIGIT((unsigned char)s[2]) \|\|
	396 !ISDIGIT((unsigned char)s[3]))
	397 return -1;
	398
	399 /* parse time parts (and possibly remaining date parts) */
	400 for (vi = 0; *s && vi < 6; vi++) {
	401 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
	402 ISDIGIT((unsigned char)*s); s++, i++)…
	403 v = (v * 10) + (*s - '0');
	404 }
	405 va[vi] = v;
	406
	407 if ((vi < 2 && *s == '-') \|\|
	408 (vi == 2 && (s == 'T' \|\| ISSPACE((unsigned char)s)…
	409 (vi > 2 && *s == ':'))
	410 s++;
	411 }
	412
	413 /* invalid range */
	414 if (va[0] < 0 \|\| va[0] > 9999 \|\|
	415 va[1] < 1 \|\| va[1] > 12 \|\|
	416 va[2] < 1 \|\| va[2] > 31 \|\|
	417 va[3] < 0 \|\| va[3] > 23 \|\|
	418 va[4] < 0 \|\| va[4] > 59 \|\|
	419 va[5] < 0 \|\| va[5] > 60) /* allow leap second */
	420 return -1;
	421
	422 *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], v…
	423 gettzoffset(s);
	424
	425 return 0;
	426 }
	427
	428 static void
	429 atom_header(void)
	430 {
	431 fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
	432 "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
	433 "\t<title>Newsfeed</title>\n", stdout);
	434 }
	435
	436 static void
	437 atom_footer(void)
	438 {
	439 fputs("</feed>\n", stdout);
	440 }
	441
	442 static void
	443 atom_item(void)
	444 {
	445 struct item v, found = NULL;
	446 size_t i;
	447
	448 /* must have a video id */
	449 if (!ctx.fields[FeedFieldYoutubeId].str.len)
	450 return;
	451
	452 for (i = 0; i < search_res->nitems; i++) {
	453 v = &(search_res->items[i]);
	454 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
	455 found = v;
	456 }
	457 /* Only print the video if it was found in the feed aswell.
	458 This way it filters away shorts too. */
	459 if (!found)
	460 return;
	461
	462 fputs("<entry>\n\t<title>", stdout);
	463 xmlencode(ctx.fields[FeedFieldTitle].str.data);
	464 if (found->duration[0]) {
	465 fputs(" [", stdout);
	466 xmlencode(found->duration);
	467 fputs("]", stdout);
	468 }
	469 fputs("</title>\n", stdout);
	470 if (ctx.fields[FeedFieldLink].str.len) {
	471 fputs("\t<link rel=\"alternate\" href=\"", stdout);
	472 xmlencode(ctx.fields[FeedFieldLink].str.data);
	473 fputs("\" />\n", stdout);
	474 }
	475 /* prefer link over id for Atom <id>. */
	476 fputs("\t<id>", stdout);
	477 if (ctx.fields[FeedFieldLink].str.len)
	478 xmlencode(ctx.fields[FeedFieldLink].str.data);
	479 else if (ctx.fields[FeedFieldId].str.len)
	480 xmlencode(ctx.fields[FeedFieldId].str.data);
	481 fputs("</id>\n", stdout);
	482
	483 /* just print the original timestamp, it should conform */
	484 fputs("\t<updated>", stdout);
	485 string_print(&ctx.fields[FeedFieldTime].str);
	486 fputs("</updated>\n", stdout);
	487
	488 if (ctx.fields[FeedFieldAuthor].str.len) {
	489 fputs("\t<author><name>", stdout);
	490 xmlencode(ctx.fields[FeedFieldAuthor].str.data);
	491 fputs("</name></author>\n", stdout);
	492 }
	493 if (ctx.fields[FeedFieldContent].str.len) {
	494 fputs("\t<content>", stdout);
	495 xmlencode(ctx.fields[FeedFieldContent].str.data);
	496 fputs("</content>\n", stdout);
	497 }
	498 fputs("</entry>\n", stdout);
	499 }
	500
	501
	502 static void
	503 html_header(void)
	504 {
	505 fputs("<!DOCTYPE HTML>\n"
	506 "<html>\n"
	507 "<head>\n"
	508 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=…
	509 "</head>\n"
	510 "<body><pre>\n", stdout);
	511 }
	512
	513 static void
	514 html_footer(void)
	515 {
	516 fputs("</pre></body>\n</html>\n", stdout);
	517 }
	518
	519 static void
	520 html_item(void)
	521 {
	522 struct item v, found = NULL;
	523 size_t i;
	524
	525 /* must have a video id */
	526 if (!ctx.fields[FeedFieldYoutubeId].str.len)
	527 return;
	528
	529 for (i = 0; i < search_res->nitems; i++) {
	530 v = &(search_res->items[i]);
	531 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
	532 found = v;
	533 }
	534 /* Only print the video if it was found in the feed aswell.
	535 This way it filters away shorts too. */
	536 if (!found)
	537 return;
	538
	539 /* just print the original timestamp, it should conform */
	540 xmlencode(ctx.fields[FeedFieldTime].str.data);
	541 fputs(" ", stdout);
	542
	543 if (ctx.fields[FeedFieldLink].str.len) {
	544 fputs("<a href=\"", stdout);
	545 xmlencode(ctx.fields[FeedFieldLink].str.data);
	546 fputs("\">", stdout);
	547 }
	548
	549 xmlencode(ctx.fields[FeedFieldTitle].str.data);
	550
	551 if (found->duration[0]) {
	552 fputs(" [", stdout);
	553 xmlencode(found->duration);
	554 fputs("]", stdout);
	555 }
	556 if (ctx.fields[FeedFieldLink].str.len) {
	557 fputs("</a>", stdout);
	558 }
	559 fputs("\n", stdout);
	560 }
	561
	562 static void
	563 gphencode(const char *s)
	564 {
	565 gophertext(stdout, s, strlen(s));
	566 }
	567
	568 static void
	569 gph_header(void)
	570 {
	571 }
	572
	573 static void
	574 gph_footer(void)
	575 {
	576 fputs(".\r\n", stdout);
	577 }
	578
	579 static void
	580 gph_item(void)
	581 {
	582 struct item v, found = NULL;
	583 size_t i;
	584
	585 /* must have a video id */
	586 if (!ctx.fields[FeedFieldYoutubeId].str.len)
	587 return;
	588
	589 for (i = 0; i < search_res->nitems; i++) {
	590 v = &(search_res->items[i]);
	591 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
	592 found = v;
	593 }
	594 /* Only print the video if it was found in the feed aswell.
	595 This way it filters away shorts too. */
	596 if (!found)
	597 return;
	598
	599 fputs("h", stdout);
	600 /* just print the original timestamp, it should conform */
	601 gphencode(ctx.fields[FeedFieldTime].str.data);
	602 fputs(" ", stdout);
	603 gphencode(ctx.fields[FeedFieldTitle].str.data);
	604 if (found->duration[0]) {
	605 fputs(" [", stdout);
	606 gphencode(found->duration);
	607 fputs("]", stdout);
	608 }
	609 fputs("\t", stdout);
	610 if (ctx.fields[FeedFieldLink].str.len) {
	611 fputs("URL:", stdout);
	612 gphencode(ctx.fields[FeedFieldLink].str.data);
	613 }
	614 printf("\t%s\t%s\r\n", server_name, server_port);
	615 }
	616
	617 static void
	618 json_header(void)
	619 {
	620 fputs("{\n"
	621 "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
	622 "\"title\": \"Newsfeed\",\n"
	623 "\"items\": [\n", stdout);
	624 }
	625
	626 static void
	627 json_footer(void)
	628 {
	629 fputs("]\n}\n", stdout);
	630 }
	631
	632 static void
	633 json_printfield(const char *s)
	634 {
	635 for (; *s; s++) {
	636 if (*s == '\\')
	637 fputs("\\\\", stdout);
	638 else if (*s == '"')
	639 fputs("\\\"", stdout);
	640 else if (ISCNTRL((unsigned char)*s))
	641 printf("\\u00%02x", (unsigned char)*s);
	642 else
	643 putchar(*s);
	644 }
	645 }
	646
	647 static void
	648 json_item(void)
	649 {
	650 static int json_firstitem = 1;
	651 struct item v, found = NULL;
	652 size_t i;
	653
	654 /* must have a video id */
	655 if (!ctx.fields[FeedFieldYoutubeId].str.len)
	656 return;
	657
	658 for (i = 0; i < search_res->nitems; i++) {
	659 v = &(search_res->items[i]);
	660 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
	661 found = v;
	662 }
	663 /* Only print the video if it was found in the feed aswell.
	664 This way it filters away shorts too. */
	665 if (!found)
	666 return;
	667
	668 if (!json_firstitem)
	669 fputs(",\n", stdout);
	670 json_firstitem = 0;
	671
	672 fputs("{\n\t\"id\": \"", stdout);
	673 json_printfield(ctx.fields[FeedFieldId].str.data);
	674 fputs("\"", stdout);
	675
	676 /* just print the original timestamp, it should conform */
	677 fputs(",\n\t\"date_published\": \"", stdout);
	678 string_print(&ctx.fields[FeedFieldTime].str);
	679 fputs("\"", stdout);
	680
	681 fputs(",\n\t\"title\": \"", stdout);
	682 json_printfield(ctx.fields[FeedFieldTitle].str.data);
	683 if (found->duration[0]) {
	684 fputs(" [", stdout);
	685 json_printfield(found->duration);
	686 fputs("]", stdout);
	687 }
	688 fputs("\"", stdout);
	689
	690 if (ctx.fields[FeedFieldLink].str.len) {
	691 fputs(",\n\t\"url\": \"", stdout);
	692 json_printfield(ctx.fields[FeedFieldLink].str.data);
	693 fputs("\"", stdout);
	694 }
	695
	696 if (ctx.fields[FeedFieldAuthor].str.len) {
	697 fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
	698 json_printfield(ctx.fields[FeedFieldAuthor].str.data);
	699 fputs("\"}]", stdout);
	700 }
	701
	702 fputs(",\n\t\"content_text\": \"", stdout);
	703 json_printfield(ctx.fields[FeedFieldContent].str.data);
	704 fputs("\"\n}", stdout);
	705 }
	706
	707 static void
	708 sfeed_item(void)
	709 {
	710 struct item v, found = NULL;
	711 size_t i;
	712
	713 /* must have a video id */
	714 if (!ctx.fields[FeedFieldYoutubeId].str.len)
	715 return;
	716
	717 for (i = 0; i < search_res->nitems; i++) {
	718 v = &(search_res->items[i]);
	719 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
	720 found = v;
	721 }
	722 /* Only print the video if it was found in the feed aswell.
	723 This way it filters away shorts too. */
	724 if (!found)
	725 return;
	726
	727 string_print_timestamp(&ctx.fields[FeedFieldTime].str);
	728 putchar(FieldSeparator);
	729 string_print(&ctx.fields[FeedFieldTitle].str);
	730 if (found->duration[0]) {
	731 fputs(" [", stdout);
	732 fputs(found->duration, stdout);
	733 fputs("]", stdout);
	734 }
	735 putchar(FieldSeparator);
	736 string_print(&ctx.fields[FeedFieldLink].str);
	737 putchar(FieldSeparator);
	738 string_print_encoded(&ctx.fields[FeedFieldContent].str);
	739 putchar(FieldSeparator);
	740 fputs("plain", stdout);
	741 putchar(FieldSeparator);
	742 string_print(&ctx.fields[FeedFieldId].str);
	743 putchar(FieldSeparator);
	744 string_print(&ctx.fields[FeedFieldAuthor].str);
	745 putchar(FieldSeparator);
	746 /* no/empty enclosure */
	747 putchar(FieldSeparator);
	748 /* empty category */
	749 putchar('\n');
	750 }
	751
	752 static void
	753 twtxt_item(void)
	754 {
	755 struct item v, found = NULL;
	756 size_t i;
	757
	758 /* must have a video id */
	759 if (!ctx.fields[FeedFieldYoutubeId].str.len)
	760 return;
	761
	762 for (i = 0; i < search_res->nitems; i++) {
	763 v = &(search_res->items[i]);
	764 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
	765 found = v;
	766 }
	767 /* Only print the video if it was found in the feed aswell.
	768 This way it filters away shorts too. */
	769 if (!found)
	770 return;
	771
	772 string_print(&ctx.fields[FeedFieldTime].str);
	773 putchar(FieldSeparator);
	774 string_print(&ctx.fields[FeedFieldTitle].str);
	775 if (found->duration[0]) {
	776 fputs(" [", stdout);
	777 fputs(found->duration, stdout);
	778 fputs("]", stdout);
	779 }
	780 fputs(": ", stdout);
	781 string_print(&ctx.fields[FeedFieldLink].str);
	782 putchar('\n');
	783 }
	784
	785 static int
	786 istag(const char name, size_t len, const char name2, size_t len2)
	787 {
	788 return (len == len2 && !strcasecmp(name, name2));
	789 }
	790
	791 static int
	792 isattr(const char name, size_t len, const char name2, size_t len2)
	793 {
	794 return (len == len2 && !strcasecmp(name, name2));
	795 }
	796
	797 static void
	798 xmlattr(XMLParser p, const char t, size_t tl, const char *n, size_t nl,
	799 const char *v, size_t vl)
	800 {
	801 if (ISINCONTENT(ctx))
	802 return;
	803
	804 if (!ctx.tag.id)
	805 return;
	806
	807 if (ISCONTENTTAG(ctx))
	808 return;
	809
	810 if (ctx.tag.id == AtomTagLink) {
	811 if (isattr(n, nl, STRP("rel"))) {
	812 string_append(&attrrel, v, vl);
	813 } else if (isattr(n, nl, STRP("href"))) {
	814 string_append(&tmpstr, v, vl);
	815 }
	816 }
	817 }
	818
	819 static void
	820 xmlattrentity(XMLParser p, const char t, size_t tl, const char *n, siz…
	821 const char *data, size_t datalen)
	822 {
	823 char buf[8];
	824 int len;
	825
	826 if (ISINCONTENT(ctx))
	827 return;
	828
	829 if (!ctx.tag.id)
	830 return;
	831
	832 /* try to translate entity, else just pass as data to
	833 * xmlattr handler. */
	834 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
	835 xmlattr(p, t, tl, n, nl, buf, (size_t)len);
	836 else
	837 xmlattr(p, t, tl, n, nl, data, datalen);
	838 }
	839
	840 static void
	841 xmlattrstart(XMLParser p, const char t, size_t tl, const char *n, size…
	842 {
	843 if (ISINCONTENT(ctx))
	844 return;
	845
	846 if (attrrel.len && isattr(n, nl, STRP("rel")))
	847 string_clear(&attrrel);
	848 else if (tmpstr.len &&
	849 (isattr(n, nl, STRP("href")) \|\|
	850 isattr(n, nl, STRP("url"))))
	851 string_clear(&tmpstr); /* use the last value for multipl…
	852 }
	853
	854 static void
	855 xmldata(XMLParser p, const char s, size_t len)
	856 {
	857 if (!ctx.field)
	858 return;
	859
	860 string_append(ctx.field, s, len);
	861 }
	862
	863 static void
	864 xmldataentity(XMLParser p, const char data, size_t datalen)
	865 {
	866 char buf[8];
	867 int len;
	868
	869 if (!ctx.field)
	870 return;
	871
	872 /* try to translate entity, else just pass as data to
	873 * xmldata handler. */
	874 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
	875 xmldata(p, buf, (size_t)len);
	876 else
	877 xmldata(p, data, datalen);
	878 }
	879
	880 static void
	881 xmltagstart(XMLParser p, const char t, size_t tl)
	882 {
	883 const FeedTag *f;
	884
	885 if (ISINCONTENT(ctx))
	886 return;
	887
	888 /* start of RSS or Atom item / entry */
	889 if (ctx.feedtype == FeedTypeNone) {
	890 if (istag(t, tl, STRP("entry")))
	891 ctx.feedtype = FeedTypeAtom;
	892 return;
	893 }
	894
	895 /* field tagid already set or nested tags. */
	896 if (ctx.tag.id) {
	897 /* nested <author><name> for Atom */
	898 if (ctx.tag.id == AtomTagAuthor &&
	899 istag(t, tl, STRP("name"))) {
	900 memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ct…
	901 } else {
	902 return; /* other nested tags are not allowed: re…
	903 }
	904 }
	905
	906 /* in item */
	907 if (ctx.tag.id == TagUnknown) {
	908 if (!(f = gettag(ctx.feedtype, t, tl)))
	909 f = &notag;
	910 memcpy(&(ctx.tag), f, sizeof(ctx.tag));
	911 }
	912
	913 ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
	914 string_clear(&attrrel);
	915 }
	916
	917 static void
	918 xmltagstartparsed(XMLParser p, const char t, size_t tl, int isshort)
	919 {
	920 enum TagId tagid;
	921
	922 if (ISINCONTENT(ctx))
	923 return;
	924
	925 /* set tag type based on its attribute value */
	926 if (ctx.tag.id == AtomTagLink) {
	927 /* empty or "alternate": other types could be
	928 "enclosure", "related", "self" or "via" */
	929 if (!attrrel.len \|\| isattr(attrrel.data, attrrel.len, ST…
	930 ctx.tag.id = AtomTagLinkAlternate;
	931 else
	932 ctx.tag.id = AtomTagLink; /* unknown */
	933 }
	934
	935 tagid = ctx.tag.id;
	936
	937 /* map tag type to field: unknown or lesser priority is ignored,
	938 when tags of the same type are repeated only the first is use…
	939 if (fieldmap[tagid] == -1 \|\|
	940 tagid <= ctx.fields[fieldmap[tagid]].tagid) {
	941 return;
	942 }
	943
	944 if (ctx.iscontenttag) {
	945 ctx.iscontent = 1;
	946 ctx.iscontenttag = 0;
	947 }
	948
	949 ctx.field = &(ctx.fields[fieldmap[tagid]].str);
	950 ctx.fields[fieldmap[tagid]].tagid = tagid;
	951
	952 /* clear field if it is overwritten (with a priority order) for …
	953 value, if the field can have multiple values then do not clea…
	954 string_clear(ctx.field);
	955 }
	956
	957 static void
	958 xmltagend(XMLParser p, const char t, size_t tl, int isshort)
	959 {
	960 size_t i;
	961
	962 if (ctx.feedtype == FeedTypeNone)
	963 return;
	964
	965 if (ISINCONTENT(ctx)) {
	966 /* not a closed content field */
	967 if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
	968 return;
	969 } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)…
	970 /* matched tag end: close it */
	971 } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
	972 istag(t, tl, STRP("entry"))))) /* Atom */
	973 {
	974 /* end of Atom entry */
	975 printfields();
	976
	977 /* clear strings */
	978 for (i = 0; i < FeedFieldLast; i++) {
	979 string_clear(&ctx.fields[i].str);
	980 ctx.fields[i].tagid = TagUnknown;
	981 }
	982 /* allow parsing of Atom and RSS concatenated in one XML…
	983 ctx.feedtype = FeedTypeNone;
	984 } else {
	985 return; /* not end of field */
	986 }
	987
	988 /* temporary string: for fields that cannot be processed
	989 directly and need more context, for example by its tag
	990 attributes, like the Atom link rel="alternate\|enclosure". */
	991 if (tmpstr.len && ctx.field) {
	992 string_clear(ctx.field);
	993 string_append(ctx.field, tmpstr.data, tmpstr.len);
	994 }
	995
	996 /* close field */
	997 string_clear(&tmpstr); /* reuse and clear temporary string */
	998
	999 if (ctx.tag.id == AtomTagAuthorName)
	1000 memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* …
	1001 else
	1002 memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
	1003
	1004 ctx.iscontent = 0;
	1005 ctx.field = NULL;
	1006 }
	1007
	1008 static char *
	1009 request_channel_feed(const char *channelid)
	1010 {
	1011 char path[2048];
	1012 int r;
	1013
	1014 r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%…
	1015 /* check if request is too long (truncation) */
	1016 if (r < 0 \|\| (size_t)r >= sizeof(path))
	1017 return NULL;
	1018
	1019 return request("www.youtube.com", path, "");
	1020 }
	1021
	1022 int
	1023 isvalidchannel(const char *s)
	1024 {
	1025 size_t len;
	1026
	1027 for (len = 0; *s; s++, len++) {
	1028 if (ISALPHA((unsigned char)*s) \|\|
	1029 ISDIGIT((unsigned char)*s) \|\|
	1030 s == '-' \|\| s == '_')
	1031 continue;
	1032 return 0;
	1033 }
	1034
	1035 return *s == '\0' && len == 24;
	1036 }
	1037
	1038 void
	1039 usage(void)
	1040 {
	1041 const char *line1 = "Bad Request, path should be the channel id …
	1042 const char *line2 = "Supported extensions are: [atom\|gph\|html\|js…
	1043
	1044 if (cgimode) {
	1045 if (godmode) {
	1046 printf("3%s\tErr\t%s\t%s\r\n", line1, server_nam…
	1047 printf("3%s\tErr\t%s\t%s\r\n", line2, server_nam…
	1048 } else {
	1049 fputs("Status: 400 Bad Request\r\n", stdout);
	1050 fputs("Content-Type: text/plain; charset=utf-8\r…
	1051 printf("400 %s\n", line1);
	1052 printf("\n%s", line2);
	1053 }
	1054 exit(0);
	1055 } else {
	1056 fputs("usage: feed <channelid> [atom\|gph\|html\|json\|tsv\|t…
	1057 fputs("For example: feed UCrbvoMC0zUvPL8vjswhLOSw txt\n"…
	1058 exit(1);
	1059 }
	1060 }
	1061
	1062 int
	1063 main(int argc, char *argv[])
	1064 {
	1065 char buf[256];
	1066 const char *channelid = NULL;
	1067 char data, format = "tsv", p, path = NULL, *tmp;
	1068 size_t i;
	1069
	1070 if (pledge("stdio dns inet rpath unveil", NULL) == -1)
	1071 err(1, "pledge");
	1072
	1073 if ((tmp = getenv("REQUEST_URI")))
	1074 path = tmp;
	1075 else if ((tmp = getenv("REQUEST")))
	1076 path = tmp;
	1077
	1078 if (path) {
	1079 cgimode = 1;
	1080
	1081 if ((tmp = getenv("SERVER_NAME")))
	1082 server_name = tmp;
	1083 if ((tmp = getenv("SERVER_PORT")))
	1084 server_port = tmp;
	1085 if ((tmp = getenv("SERVER_PROTOCOL")) && strstr(tmp, "go…
	1086 godmode = 1;
	1087
	1088 strlcpy(buf, path, sizeof(buf));
	1089 path = buf;
	1090
	1091 if (!(p = strrchr(path, '/')))
	1092 usage();
	1093
	1094 channelid = p + 1;
	1095 if ((p = strrchr(channelid, '.'))) {
	1096 p = '\0'; / NULL terminate */
	1097 format = p + 1;
	1098 }
	1099 } else {
	1100 if (argc <= 1)
	1101 usage();
	1102
	1103 channelid = argv[1];
	1104 if (argc > 2)
	1105 format = argv[2];
	1106 }
	1107 if (!channelid \|\| !isvalidchannel(channelid))
	1108 usage();
	1109
	1110 if (!strcmp(format, "atom") \|\| !strcmp(format, "xml"))
	1111 printfields = atom_item;
	1112 else if (!strcmp(format, "gph"))
	1113 printfields = gph_item;
	1114 else if (!strcmp(format, "html"))
	1115 printfields = html_item;
	1116 else if (!strcmp(format, "json"))
	1117 printfields = json_item;
	1118 else if (!strcmp(format, "tsv") \|\| !strcmp(format, "sfeed"))
	1119 printfields = sfeed_item;
	1120 else if (!strcmp(format, "txt") \|\| !strcmp(format, "twtxt"))
	1121 printfields = twtxt_item;
	1122 else
	1123 usage();
	1124
	1125 search_res = youtube_channel_videos(channelid);
	1126 if (!search_res \|\| search_res->nitems == 0) {
	1127 /* error or no videos found */
	1128 return 0;
	1129 }
	1130
	1131 if (!(data = request_channel_feed(channelid)))
	1132 return 1; /* error, no data at all */
	1133
	1134 if (pledge("stdio", NULL) == -1)
	1135 err(1, "pledge");
	1136
	1137 setxmldata(data, strlen(data));
	1138
	1139 memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
	1140
	1141 parser.xmlattr = xmlattr;
	1142 parser.xmlattrentity = xmlattrentity;
	1143 parser.xmlattrstart = xmlattrstart;
	1144 parser.xmlcdata = xmldata;
	1145 parser.xmldata = xmldata;
	1146 parser.xmldataentity = xmldataentity;
	1147 parser.xmltagend = xmltagend;
	1148 parser.xmltagstart = xmltagstart;
	1149 parser.xmltagstartparsed = xmltagstartparsed;
	1150
	1151 /* init all fields, make sure it has a value */
	1152 for (i = 0; i < FeedFieldLast; i++) {
	1153 string_append(&(ctx.fields[i].str), " ", 1);
	1154 string_clear(&(ctx.fields[i].str));
	1155 }
	1156
	1157 if (cgimode && !godmode) {
	1158 fputs("Status: 200 OK\r\n", stdout);
	1159 if (!strcmp(format, "atom") \|\| !strcmp(format, "xml"))
	1160 fputs("Content-Type: text/xml; charset=utf-8\r\n…
	1161 else if (!strcmp(format, "html"))
	1162 fputs("Content-Type: text/html; charset=utf-8\r\…
	1163 else if (!strcmp(format, "json"))
	1164 fputs("Content-Type: application/json; charset=u…
	1165 else
	1166 fputs("Content-Type: text/plain; charset=utf-8\r…
	1167 }
	1168
	1169 if (!strcmp(format, "atom") \|\| !strcmp(format, "xml"))
	1170 atom_header();
	1171 else if (!strcmp(format, "gph"))
	1172 gph_header();
	1173 else if (!strcmp(format, "html"))
	1174 html_header();
	1175 else if (!strcmp(format, "json"))
	1176 json_header();
	1177
	1178 /* NOTE: getnext is defined in xml.h for inline optimization */
	1179 xml_parse(&parser);
	1180
	1181 if (!strcmp(format, "atom") \|\| !strcmp(format, "xml"))
	1182 atom_footer();
	1183 else if (!strcmp(format, "gph"))
	1184 gph_footer();
	1185 else if (!strcmp(format, "html"))
	1186 html_footer();
	1187 else if (!strcmp(format, "json"))
	1188 json_footer();
	1189
	1190 return 0;
	1191 }