GopherProxy

	initial repo, quick hack - sub - subscene.com subtitle search
	git clone git://git.codemadness.org/sub
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 0f97d1bd0a8f55ffad37d17e5d7080576e6db684
	Author: Hiltjo Posthuma <[email protected]>
	Date: Sun, 19 Oct 2014 12:49:06 +0000

	initial repo, quick hack

	Diffstat:
	A Makefile \| 5 +++++
	A sub-dl \| 11 +++++++++++
	A sub-search \| 18 ++++++++++++++++++
	A sub.c \| 170 +++++++++++++++++++++++++++++…
	A util.c \| 35 +++++++++++++++++++++++++++++…
	A util.h \| 2 ++
	A xml.c \| 325 +++++++++++++++++++++++++++++…
	A xml.h \| 49 +++++++++++++++++++++++++++++…

	8 files changed, 615 insertions(+), 0 deletions(-)
	---
	diff --git a/Makefile b/Makefile
	@@ -0,0 +1,5 @@
	+build: clean
	+ cc xml.c util.c sub.c -o sub
	+
	+clean:
	+ rm -f sub *.o
	diff --git a/sub-dl b/sub-dl
	@@ -0,0 +1,11 @@
	+#!/bin/sh
	+
	+url=$(curl "$1" \| grep -oE '(/subtitle/download\?mac=[^"]*)')
	+if test x"$url" = x""; then
	+ exit 1
	+else
	+ url="http://subscene.com${url}"
	+ file="/tmp/sub.$$.zip"
	+ curl "${url}" > "$file"
	+ unzip "$file" "*.srt"
	+fi
	diff --git a/sub-search b/sub-search
	@@ -0,0 +1,18 @@
	+#!/bin/sh
	+
	+usage() {
	+ printf 'usage: sub-search [term]\n' >&2
	+ exit 1
	+}
	+
	+getep() {
	+ printf '%s' "$1" \| grep -oE '([0-9]{2}[Ee][0-9]{2})'
	+}
	+
	+test x"$1" = x"" && usage
	+
	+query="$1"
	+url="http://subscene.com/subtitles/release"
	+ep=$(getep "${url}")
	+
	+curl --get --data-urlencode "q=${query}" --data-urlencode "r=true" "${url}" \| …
	diff --git a/sub.c b/sub.c
	@@ -0,0 +1,170 @@
	+#include <ctype.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+#include <strings.h>
	+
	+#include "util.h"
	+#include "xml.h"
	+
	+struct sub {
	+ int issub;
	+ char title[256];
	+ char lang[256];
	+ int hi;
	+ int files;
	+ char author[256];
	+ char authorurl[256];
	+ char description[256];
	+ char url[256];
	+};
	+
	+static XMLParser parser; /* XML parser state */
	+static struct sub sub;
	+static char curclass[64];
	+static char spanclass[64];
	+
	+static int
	+istag(const char s1, const char s2) {
	+ return !strcasecmp(s1, s2);
	+}
	+
	+static int
	+isattr(const char s1, const char s2) {
	+ return !strcasecmp(s1, s2);
	+}
	+
	+static void
	+xml_handler_data(XMLParser p, const char data, size_t datalen) {
	+ char *s = "";
	+ char buf[1024];
	+ size_t len;
	+
	+ if(!curclass[0])
	+ return;
	+
	+ /* skip leading space */
	+ for(s = (char )data; s && isspace(*s); s++);
	+ strlcpy(buf, s, sizeof(buf));
	+ for(s = buf; *s; s++) {
	+ if(s == '\r' \|\| s == '\n')
	+ *s = ' ';
	+ }
	+ /* trim remaining space */
	+ len = strlen(buf);
	+ for(; len > 0; len--) {
	+ if(!isspace(buf[len - 1]))
	+ break;
	+ buf[len - 1] = '\0';
	+ }
	+
	+ s = buf;
	+ if(!strlen(s))
	+ return;
	+ /* link */
	+ if(strcmp(curclass, "a1") == 0) {
	+ if(strcmp(spanclass, "") == 0) {
	+ strlcpy(sub.title, s, sizeof(sub.title));
	+ } else {
	+ strlcpy(sub.lang, s, sizeof(sub.lang));
	+ }
	+ }
	+ /* files */
	+ if(strcmp(curclass, "a3") == 0) {
	+ sub.files = atoi(s);
	+ }
	+
	+ /* hearing impaired? */
	+ if(strcmp(curclass, "a41") == 0) {
	+ sub.hi = 1;
	+ }
	+ /*if(strcmp(curclass, "a40") == 0) {
	+ sub.hi = 0;
	+ }*/
	+ /* author / user profile */
	+ if(strcmp(curclass, "a5") == 0) {
	+ strlcpy(sub.author, s, sizeof(sub.author));
	+ }
	+ /* description */
	+ if(strcmp(curclass, "a6") == 0) {
	+ strlcpy(sub.description, s, sizeof(sub.description));
	+ }
	+}
	+
	+static void
	+xml_handler_start_element(XMLParser p, const char tag, size_t taglen) {
	+ (void)p;
	+ (void)taglen;
	+
	+ if(istag(tag, "tr")) {
	+ memset(&sub, 0, sizeof(sub));
	+ }
	+}
	+
	+static void
	+xml_handler_end_element(XMLParser p, const char tag, size_t taglen,
	+ int isshort)
	+{
	+ (void)p;
	+ (void)taglen;
	+ (void)isshort;
	+
	+ if(istag(tag, "tr") && sub.issub == 1) {
	+ printf("LANG:%s\tTITLE:%s\tURL:http://subscene.com%s\tHI:%d\tF…
	+ sub.lang, sub.title, sub.url, sub.hi, sub.files, sub.au…
	+ } else if(istag(tag, "td")) {
	+ curclass[0] = '\0';
	+ } else if(istag(tag, "span")) {
	+ spanclass[0] = '\0';
	+ }
	+}
	+
	+static void
	+xml_handler_attr(XMLParser p, const char tag, size_t taglen,
	+ const char name, size_t namelen, const char value, size_t valuelen)
	+{
	+ (void)p;
	+ (void)taglen;
	+ (void)namelen;
	+ (void)valuelen;
	+
	+ if(istag(tag, "td")) {
	+ if(isattr(name, "class")) {
	+ strlcpy(curclass, value, sizeof(curclass));
	+ /* link */
	+ if(strcmp(value, "a1") == 0) {
	+ sub.issub = 1;
	+ }
	+ }
	+ } else if(istag(tag, "span")) {
	+ if(strcmp(curclass, "a1") == 0) {
	+ if(isattr(name, "class")) {
	+ strlcpy(spanclass, value, sizeof(spanclass));
	+ }
	+ }
	+ } else if(istag(tag, "a")) {
	+ /* subtitle / author profile url */
	+ if(strcmp(name, "href") == 0) {
	+ if((strcmp(curclass, "a1") == 0)) {
	+ strlcpy(sub.url, value, sizeof(sub.url));
	+ }
	+ if((strcmp(curclass, "a5") == 0)) {
	+ strlcpy(sub.authorurl, value, sizeof(sub.autho…
	+ }
	+ }
	+ }
	+}
	+
	+int
	+main(void) {
	+ xmlparser_init(&parser, stdin);
	+
	+ parser.xmltagstart = xml_handler_start_element;
	+ parser.xmltagend = xml_handler_end_element;
	+ parser.xmlattr = xml_handler_attr;
	+ parser.xmldata = xml_handler_data;
	+
	+ xmlparser_parse(&parser);
	+
	+ return EXIT_SUCCESS;
	+}
	diff --git a/util.c b/util.c
	@@ -0,0 +1,35 @@
	+#include <stdio.h>
	+#include <string.h>
	+#include <stdlib.h>
	+#include <sys/types.h>
	+
	+#include "util.h"
	+
	+/*
	+ * Taken from OpenBSD.
	+ * Copy src to string dst of size siz. At most siz-1 characters
	+ * will be copied. Always NUL terminates (unless siz == 0).
	+ * Returns strlen(src); if retval >= siz, truncation occurred.
	+ */
	+size_t
	+strlcpy(char dst, const char src, size_t siz) {
	+ char *d = dst;
	+ const char *s = src;
	+ size_t n = siz;
	+
	+ /* copy as many bytes as will fit */
	+ if (n != 0) {
	+ while (--n != 0) {
	+ if ((d++ = s++) == '\0')
	+ break;
	+ }
	+ }
	+ /* not enough room in dst, add NUL and traverse rest of src */
	+ if (n == 0) {
	+ if (siz != 0)
	+ d = '\0'; / NUL-terminate dst */
	+ while (*s++)
	+ ;
	+ }
	+ return(s - src - 1); /* count does not include NUL */
	+}
	diff --git a/util.h b/util.h
	@@ -0,0 +1,2 @@
	+#undef strlcpy
	+size_t strlcpy(char , const char , size_t);
	diff --git a/xml.c b/xml.c
	@@ -0,0 +1,325 @@
	+#include <stdio.h>
	+#include <string.h>
	+#include <stdlib.h>
	+#include <ctype.h>
	+
	+#include "xml.h"
	+
	+static __inline__ int /* like getc(), but do some smart buffering */
	+xmlparser_getnext(XMLParser *x) {
	+ return fgetc(x->fp);
	+#if 0
	+ if(x->readoffset >= x->readlastbytes) {
	+ x->readoffset = 0;
	+ if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf…
	+ return EOF; /* 0 bytes read, assume EOF */
	+ }
	+ return (int)x->readbuf[x->readoffset++];
	+#endif
	+}
	+
	+static __inline__ void
	+xmlparser_parseattrs(XMLParser *x) {
	+ size_t namelen = 0, valuelen;
	+ int c, endsep, endname = 0;
	+
	+ while((c = xmlparser_getnext(x)) != EOF) {
	+ if(isspace(c)) { /* TODO: simplify endname ? */
	+ if(namelen)
	+ endname = 1;
	+ continue;
	+ }
	+ if(c == '?')
	+ ; /* ignore */
	+ else if(c == '=') {
	+ x->name[namelen] = '\0';
	+ } else if(namelen && ((endname && isalpha(c)) \|\| (c == '>' \|\| …
	+ /* attribute without value */
	+ x->name[namelen] = '\0';
	+ if(x->xmlattrstart)
	+ x->xmlattrstart(x, x->tag, x->taglen, x->name,…
	+ if(x->xmlattr)
	+ x->xmlattr(x, x->tag, x->taglen, x->name, name…
	+ if(x->xmlattrend)
	+ x->xmlattrend(x, x->tag, x->taglen, x->name, n…
	+ endname = 0;
	+ x->name[0] = c;
	+ namelen = 1;
	+ } else if(namelen && (c == '\'' \|\| c == '"')) {
	+ /* attribute with value */
	+ endsep = c; /* c is end separator */
	+ if(x->xmlattrstart)
	+ x->xmlattrstart(x, x->tag, x->taglen, x->name,…
	+ for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) {
	+ if(c == '&' && x->xmlattrentity) { /* entities…
	+ x->data[valuelen] = '\0';
	+ /* call data function with data before…
	+ if(valuelen && x->xmlattr)
	+ x->xmlattr(x, x->tag, x->tagle…
	+ x->data[0] = c;
	+ valuelen = 1;
	+ while((c = xmlparser_getnext(x)) != EO…
	+ if(c == endsep)
	+ break;
	+ if(valuelen < sizeof(x->data) …
	+ x->data[valuelen++] = …
	+ else {
	+ /* TODO: entity too lo…
	+ x->data[valuelen] = '\…
	+ if(x->xmlattr)
	+ x->xmlattr(x, …
	+ valuelen = 0;
	+ break;
	+ }
	+ if(c == ';') {
	+ x->data[valuelen] = '\…
	+ x->xmlattrentity(x, x-…
	+ valuelen = 0;
	+ break;
	+ }
	+ }
	+ } else if(c != endsep) {
	+ if(valuelen < sizeof(x->data) - 1) {
	+ x->data[valuelen++] = c;
	+ } else {
	+ x->data[valuelen] = '\0';
	+ if(x->xmlattr)
	+ x->xmlattr(x, x->tag, …
	+ x->data[0] = c;
	+ valuelen = 1;
	+ }
	+ }
	+ if(c == endsep) {
	+ x->data[valuelen] = '\0';
	+ if(x->xmlattr)
	+ x->xmlattr(x, x->tag, x->tagle…
	+ if(x->xmlattrend)
	+ x->xmlattrend(x, x->tag, x->ta…
	+ break;
	+ }
	+ }
	+ namelen = 0;
	+ endname = 0;
	+ } else if(namelen < sizeof(x->name) - 1)
	+ x->name[namelen++] = c;
	+ if(c == '>') {
	+ break;
	+ } else if(c == '/') {
	+ x->isshorttag = 1;
	+ namelen = 0;
	+ x->name[0] = '\0';
	+ }
	+ }
	+}
	+
	+static __inline__ void
	+xmlparser_parsecomment(XMLParser *x) {
	+ size_t datalen = 0, i = 0;
	+ int c;
	+
	+ if(x->xmlcommentstart)
	+ x->xmlcommentstart(x);
	+ while((c = xmlparser_getnext(x)) != EOF) {
	+ if(c == '-' && i < 2)
	+ i++;
	+ else if(c == '>') {
	+ if(i == 2) { /* -- */
	+ if(datalen >= 2) {
	+ datalen -= 2;
	+ x->data[datalen] = '\0';
	+ if(x->xmlcomment)
	+ x->xmlcomment(x, x->data, data…
	+ }
	+ if(x->xmlcommentend)
	+ x->xmlcommentend(x);
	+ break;
	+ }
	+ i = 0;
	+ }
	+ /* \|\| (c == '-' && d >= sizeof(x->data) - 4)) { */
	+ /* TODO: what if the end has --, and it's cut on the boundary,…
	+ if(datalen < sizeof(x->data) - 1)
	+ x->data[datalen++] = c;
	+ else {
	+ x->data[datalen] = '\0';
	+ if(x->xmlcomment)
	+ x->xmlcomment(x, x->data, datalen);
	+ x->data[0] = c;
	+ datalen = 1;
	+ }
	+ }
	+}
	+
	+/* TODO:
	+ * <test><![CDATA[1234567dddd8]]]>
	+ *
	+ * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1
	+ * test comment function too for similar bug?
	+ *
	+ */
	+static __inline__ void
	+xmlparser_parsecdata(XMLParser *x) {
	+ size_t datalen = 0, i = 0;
	+ int c;
	+
	+ if(x->xmlcdatastart)
	+ x->xmlcdatastart(x);
	+ while((c = xmlparser_getnext(x)) != EOF) {
	+ if(c == ']' && i < 2) {
	+ i++;
	+ } else if(c == '>') {
	+ if(i == 2) { /* ]] */
	+ if(datalen >= 2) {
	+ datalen -= 2;
	+ x->data[datalen] = '\0';
	+ if(x->xmlcdata && datalen)
	+ x->xmlcdata(x, x->data, datale…
	+ }
	+ if(x->xmlcdataend)
	+ x->xmlcdataend(x);
	+ break;
	+ }
	+ i = 0;
	+ }
	+ /* TODO: what if the end has ]>, and it's cut on the boundary …
	+ if(datalen < sizeof(x->data) - 1) {
	+ x->data[datalen++] = c;
	+ } else {
	+ x->data[datalen] = '\0';
	+ if(x->xmlcdata)
	+ x->xmlcdata(x, x->data, datalen);
	+ x->data[0] = c;
	+ datalen = 1;
	+ }
	+ }
	+}
	+
	+void
	+xmlparser_init(XMLParser x, FILE fp) {
	+ memset(x, 0, sizeof(XMLParser));
	+ x->fp = fp;
	+}
	+
	+void
	+xmlparser_parse(XMLParser *x) {
	+ int c, ispi;
	+ size_t datalen, tagdatalen, taglen;
	+
	+ while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < …
	+
	+ while(c != EOF) {
	+ if(c == '<') { /* parse tag */
	+ if((c = xmlparser_getnext(x)) == EOF)
	+ return;
	+ x->tag[0] = '\0';
	+ x->taglen = 0;
	+ if(c == '!') { /* cdata and comments */
	+ for(tagdatalen = 0; (c = xmlparser_getnext(x))…
	+ if(tagdatalen <= strlen("[CDATA[")) /*…
	+ x->data[tagdatalen++] = c; /* …
	+ if(c == '>')
	+ break;
	+ else if(c == '-' && tagdatalen == strl…
	+ (x->data[0] == '-')) {…
	+ xmlparser_parsecomment(x);
	+ break;
	+ } else if(c == '[') {
	+ if(tagdatalen == strlen("[CDAT…
	+ x->data[1] == 'C' && x…
	+ x->data[3] == 'A' && x…
	+ x->data[5] == 'A' && x…
	+ xmlparser_parsecdata(x…
	+ break;
	+ #if 0
	+ } else {
	+ /* TODO ? */
	+ /* markup declaration …
	+ while((c = xmlparser_g…
	+ #endif
	+ }
	+ }
	+ }
	+ } else { /* normal tag (open, short open, close), proc…
	+ if(isspace(c))
	+ while((c = xmlparser_getnext(x)) != EO…
	+ if(c == EOF)
	+ return;
	+ x->tag[0] = c;
	+ ispi = (c == '?') ? 1 : 0;
	+ x->isshorttag = ispi;
	+ taglen = 1;
	+ while((c = xmlparser_getnext(x)) != EOF) {
	+ if(c == '/') /* TODO: simplify short t…
	+ x->isshorttag = 1; /* short ta…
	+ else if(c == '>' \|\| isspace(c)) {
	+ x->tag[taglen] = '\0';
	+ if(x->tag[0] == '/') { /* end …
	+ x->taglen = --taglen; …
	+ if(taglen && x->xmltag…
	+ x->xmltagend(x…
	+ } else {
	+ x->taglen = taglen;
	+ if(x->xmltagstart)
	+ x->xmltagstart…
	+ if(isspace(c))
	+ xmlparser_pars…
	+ if(x->xmltagstartparse…
	+ x->xmltagstart…
	+ }
	+ if((x->isshorttag \|\| ispi) && …
	+ x->xmltagend(x, x->tag…
	+ break;
	+ } else if(taglen < sizeof(x->tag) - 1)
	+ x->tag[taglen++] = c;
	+ }
	+ }
	+ } else {
	+ /* parse data */
	+ datalen = 0;
	+ if(x->xmldatastart)
	+ x->xmldatastart(x);
	+ while((c = xmlparser_getnext(x)) != EOF) {
	+ if(c == '&' && x->xmldataentity) {
	+ if(datalen) {
	+ x->data[datalen] = '\0';
	+ x->xmldata(x, x->data, datalen…
	+ }
	+ x->data[0] = c;
	+ datalen = 1;
	+ while((c = xmlparser_getnext(x)) != EO…
	+ if(c == '<')
	+ break;
	+ if(datalen < sizeof(x->data) -…
	+ x->data[datalen++] = c;
	+ if(isspace(c))
	+ break;
	+ else if(c == ';') {
	+ x->data[datalen] = '\0…
	+ x->xmldataentity(x, x-…
	+ datalen = 0;
	+ break;
	+ }
	+ }
	+ } else if(c != '<') {
	+ if(datalen < sizeof(x->data) - 1) {
	+ x->data[datalen++] = c;
	+ } else {
	+ x->data[datalen] = '\0';
	+ if(x->xmldata)
	+ x->xmldata(x, x->data,…
	+ x->data[0] = c;
	+ datalen = 1;
	+ }
	+ }
	+ if(c == '<') {
	+ x->data[datalen] = '\0';
	+ if(x->xmldata && datalen)
	+ x->xmldata(x, x->data, datalen…
	+ if(x->xmldataend)
	+ x->xmldataend(x);
	+ break;
	+ }
	+ }
	+ }
	+ }
	+}
	diff --git a/xml.h b/xml.h
	@@ -0,0 +1,49 @@
	+#include <stdio.h>
	+#include <string.h>
	+#include <stdlib.h>
	+
	+typedef struct xmlparser {
	+ /* handlers */
	+ void (xmltagstart)(struct xmlparser p, const char *tag, size_t tagle…
	+ void (xmltagstartparsed)(struct xmlparser p, const char *tag,
	+ size_t taglen, int isshort);
	+ void (xmltagend)(struct xmlparser p, const char *tag, size_t taglen,
	+ int isshort);
	+ void (xmldatastart)(struct xmlparser p);
	+ void (xmldata)(struct xmlparser p, const char *data, size_t datalen);
	+ void (xmldataend)(struct xmlparser p);
	+ void (xmldataentity)(struct xmlparser p, const char *data,
	+ size_t datalen);
	+ void (xmlattrstart)(struct xmlparser p, const char *tag, size_t tagl…
	+ const char *name, size_t namelen);
	+ void (xmlattr)(struct xmlparser p, const char *tag, size_t taglen,
	+ const char name, size_t namelen, const char value,
	+ size_t valuelen);
	+ void (xmlattrend)(struct xmlparser p, const char *tag, size_t taglen,
	+ const char *name, size_t namelen);
	+ void (xmlattrentity)(struct xmlparser p, const char *tag, size_t tag…
	+ const char name, size_t namelen, const char value,
	+ size_t valuelen);
	+ void (xmlcdatastart)(struct xmlparser p);
	+ void (xmlcdata)(struct xmlparser p, const char *data, size_t datalen…
	+ void (xmlcdataend)(struct xmlparser p);
	+ void (xmlcommentstart)(struct xmlparser p);
	+ void (xmlcomment)(struct xmlparser p, const char *comment,
	+ size_t commentlen);
	+ void (xmlcommentend)(struct xmlparser p);
	+
	+ FILE fp; / file stream to read from */
	+
	+ /* private; internal state */
	+ char tag[1024]; /* current tag */
	+ int isshorttag; /* current tag is in short form ? */
	+ size_t taglen;
	+ char name[256]; /* current attribute name */
	+ char data[BUFSIZ]; /* data buffer used for tag and attribute data */
	+ size_t readoffset;
	+ size_t readlastbytes;
	+ unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnex…
	+} XMLParser;
	+
	+void xmlparser_init(XMLParser x, FILE fp);
	+void xmlparser_parse(XMLParser *x);