initial repo, quick hack - sub - subscene.com subtitle search | |
git clone git://git.codemadness.org/sub | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 0f97d1bd0a8f55ffad37d17e5d7080576e6db684 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sun, 19 Oct 2014 12:49:06 +0000 | |
initial repo, quick hack | |
Diffstat: | |
A Makefile | 5 +++++ | |
A sub-dl | 11 +++++++++++ | |
A sub-search | 18 ++++++++++++++++++ | |
A sub.c | 170 +++++++++++++++++++++++++++++… | |
A util.c | 35 +++++++++++++++++++++++++++++… | |
A util.h | 2 ++ | |
A xml.c | 325 +++++++++++++++++++++++++++++… | |
A xml.h | 49 +++++++++++++++++++++++++++++… | |
8 files changed, 615 insertions(+), 0 deletions(-) | |
--- | |
diff --git a/Makefile b/Makefile | |
@@ -0,0 +1,5 @@ | |
+build: clean | |
+ cc xml.c util.c sub.c -o sub | |
+ | |
+clean: | |
+ rm -f sub *.o | |
diff --git a/sub-dl b/sub-dl | |
@@ -0,0 +1,11 @@ | |
+#!/bin/sh | |
+ | |
+url=$(curl "$1" | grep -oE '(/subtitle/download\?mac=[^"]*)') | |
+if test x"$url" = x""; then | |
+ exit 1 | |
+else | |
+ url="http://subscene.com${url}" | |
+ file="/tmp/sub.$$.zip" | |
+ curl "${url}" > "$file" | |
+ unzip "$file" "*.srt" | |
+fi | |
diff --git a/sub-search b/sub-search | |
@@ -0,0 +1,18 @@ | |
+#!/bin/sh | |
+ | |
+usage() { | |
+ printf 'usage: sub-search [term]\n' >&2 | |
+ exit 1 | |
+} | |
+ | |
+getep() { | |
+ printf '%s' "$1" | grep -oE '([0-9]{2}[Ee][0-9]{2})' | |
+} | |
+ | |
+test x"$1" = x"" && usage | |
+ | |
+query="$1" | |
+url="http://subscene.com/subtitles/release" | |
+ep=$(getep "${url}") | |
+ | |
+curl --get --data-urlencode "q=${query}" --data-urlencode "r=true" "${url}" | … | |
diff --git a/sub.c b/sub.c | |
@@ -0,0 +1,170 @@ | |
+#include <ctype.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+#include <strings.h> | |
+ | |
+#include "util.h" | |
+#include "xml.h" | |
+ | |
+struct sub { | |
+ int issub; | |
+ char title[256]; | |
+ char lang[256]; | |
+ int hi; | |
+ int files; | |
+ char author[256]; | |
+ char authorurl[256]; | |
+ char description[256]; | |
+ char url[256]; | |
+}; | |
+ | |
+static XMLParser parser; /* XML parser state */ | |
+static struct sub sub; | |
+static char curclass[64]; | |
+static char spanclass[64]; | |
+ | |
+static int | |
+istag(const char *s1, const char *s2) { | |
+ return !strcasecmp(s1, s2); | |
+} | |
+ | |
+static int | |
+isattr(const char *s1, const char *s2) { | |
+ return !strcasecmp(s1, s2); | |
+} | |
+ | |
+static void | |
+xml_handler_data(XMLParser *p, const char *data, size_t datalen) { | |
+ char *s = ""; | |
+ char buf[1024]; | |
+ size_t len; | |
+ | |
+ if(!curclass[0]) | |
+ return; | |
+ | |
+ /* skip leading space */ | |
+ for(s = (char *)data; *s && isspace(*s); s++); | |
+ strlcpy(buf, s, sizeof(buf)); | |
+ for(s = buf; *s; s++) { | |
+ if(*s == '\r' || *s == '\n') | |
+ *s = ' '; | |
+ } | |
+ /* trim remaining space */ | |
+ len = strlen(buf); | |
+ for(; len > 0; len--) { | |
+ if(!isspace(buf[len - 1])) | |
+ break; | |
+ buf[len - 1] = '\0'; | |
+ } | |
+ | |
+ s = buf; | |
+ if(!strlen(s)) | |
+ return; | |
+ /* link */ | |
+ if(strcmp(curclass, "a1") == 0) { | |
+ if(strcmp(spanclass, "") == 0) { | |
+ strlcpy(sub.title, s, sizeof(sub.title)); | |
+ } else { | |
+ strlcpy(sub.lang, s, sizeof(sub.lang)); | |
+ } | |
+ } | |
+ /* files */ | |
+ if(strcmp(curclass, "a3") == 0) { | |
+ sub.files = atoi(s); | |
+ } | |
+ | |
+ /* hearing impaired? */ | |
+ if(strcmp(curclass, "a41") == 0) { | |
+ sub.hi = 1; | |
+ } | |
+ /*if(strcmp(curclass, "a40") == 0) { | |
+ sub.hi = 0; | |
+ }*/ | |
+ /* author / user profile */ | |
+ if(strcmp(curclass, "a5") == 0) { | |
+ strlcpy(sub.author, s, sizeof(sub.author)); | |
+ } | |
+ /* description */ | |
+ if(strcmp(curclass, "a6") == 0) { | |
+ strlcpy(sub.description, s, sizeof(sub.description)); | |
+ } | |
+} | |
+ | |
+static void | |
+xml_handler_start_element(XMLParser *p, const char *tag, size_t taglen) { | |
+ (void)p; | |
+ (void)taglen; | |
+ | |
+ if(istag(tag, "tr")) { | |
+ memset(&sub, 0, sizeof(sub)); | |
+ } | |
+} | |
+ | |
+static void | |
+xml_handler_end_element(XMLParser *p, const char *tag, size_t taglen, | |
+ int isshort) | |
+{ | |
+ (void)p; | |
+ (void)taglen; | |
+ (void)isshort; | |
+ | |
+ if(istag(tag, "tr") && sub.issub == 1) { | |
+ printf("LANG:%s\tTITLE:%s\tURL:http://subscene.com%s\tHI:%d\tF… | |
+ sub.lang, sub.title, sub.url, sub.hi, sub.files, sub.au… | |
+ } else if(istag(tag, "td")) { | |
+ curclass[0] = '\0'; | |
+ } else if(istag(tag, "span")) { | |
+ spanclass[0] = '\0'; | |
+ } | |
+} | |
+ | |
+static void | |
+xml_handler_attr(XMLParser *p, const char *tag, size_t taglen, | |
+ const char *name, size_t namelen, const char *value, size_t valuelen) | |
+{ | |
+ (void)p; | |
+ (void)taglen; | |
+ (void)namelen; | |
+ (void)valuelen; | |
+ | |
+ if(istag(tag, "td")) { | |
+ if(isattr(name, "class")) { | |
+ strlcpy(curclass, value, sizeof(curclass)); | |
+ /* link */ | |
+ if(strcmp(value, "a1") == 0) { | |
+ sub.issub = 1; | |
+ } | |
+ } | |
+ } else if(istag(tag, "span")) { | |
+ if(strcmp(curclass, "a1") == 0) { | |
+ if(isattr(name, "class")) { | |
+ strlcpy(spanclass, value, sizeof(spanclass)); | |
+ } | |
+ } | |
+ } else if(istag(tag, "a")) { | |
+ /* subtitle / author profile url */ | |
+ if(strcmp(name, "href") == 0) { | |
+ if((strcmp(curclass, "a1") == 0)) { | |
+ strlcpy(sub.url, value, sizeof(sub.url)); | |
+ } | |
+ if((strcmp(curclass, "a5") == 0)) { | |
+ strlcpy(sub.authorurl, value, sizeof(sub.autho… | |
+ } | |
+ } | |
+ } | |
+} | |
+ | |
+int | |
+main(void) { | |
+ xmlparser_init(&parser, stdin); | |
+ | |
+ parser.xmltagstart = xml_handler_start_element; | |
+ parser.xmltagend = xml_handler_end_element; | |
+ parser.xmlattr = xml_handler_attr; | |
+ parser.xmldata = xml_handler_data; | |
+ | |
+ xmlparser_parse(&parser); | |
+ | |
+ return EXIT_SUCCESS; | |
+} | |
diff --git a/util.c b/util.c | |
@@ -0,0 +1,35 @@ | |
+#include <stdio.h> | |
+#include <string.h> | |
+#include <stdlib.h> | |
+#include <sys/types.h> | |
+ | |
+#include "util.h" | |
+ | |
+/* | |
+ * Taken from OpenBSD. | |
+ * Copy src to string dst of size siz. At most siz-1 characters | |
+ * will be copied. Always NUL terminates (unless siz == 0). | |
+ * Returns strlen(src); if retval >= siz, truncation occurred. | |
+ */ | |
+size_t | |
+strlcpy(char *dst, const char *src, size_t siz) { | |
+ char *d = dst; | |
+ const char *s = src; | |
+ size_t n = siz; | |
+ | |
+ /* copy as many bytes as will fit */ | |
+ if (n != 0) { | |
+ while (--n != 0) { | |
+ if ((*d++ = *s++) == '\0') | |
+ break; | |
+ } | |
+ } | |
+ /* not enough room in dst, add NUL and traverse rest of src */ | |
+ if (n == 0) { | |
+ if (siz != 0) | |
+ *d = '\0'; /* NUL-terminate dst */ | |
+ while (*s++) | |
+ ; | |
+ } | |
+ return(s - src - 1); /* count does not include NUL */ | |
+} | |
diff --git a/util.h b/util.h | |
@@ -0,0 +1,2 @@ | |
+#undef strlcpy | |
+size_t strlcpy(char *, const char *, size_t); | |
diff --git a/xml.c b/xml.c | |
@@ -0,0 +1,325 @@ | |
+#include <stdio.h> | |
+#include <string.h> | |
+#include <stdlib.h> | |
+#include <ctype.h> | |
+ | |
+#include "xml.h" | |
+ | |
+static __inline__ int /* like getc(), but do some smart buffering */ | |
+xmlparser_getnext(XMLParser *x) { | |
+ return fgetc(x->fp); | |
+#if 0 | |
+ if(x->readoffset >= x->readlastbytes) { | |
+ x->readoffset = 0; | |
+ if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf… | |
+ return EOF; /* 0 bytes read, assume EOF */ | |
+ } | |
+ return (int)x->readbuf[x->readoffset++]; | |
+#endif | |
+} | |
+ | |
+static __inline__ void | |
+xmlparser_parseattrs(XMLParser *x) { | |
+ size_t namelen = 0, valuelen; | |
+ int c, endsep, endname = 0; | |
+ | |
+ while((c = xmlparser_getnext(x)) != EOF) { | |
+ if(isspace(c)) { /* TODO: simplify endname ? */ | |
+ if(namelen) | |
+ endname = 1; | |
+ continue; | |
+ } | |
+ if(c == '?') | |
+ ; /* ignore */ | |
+ else if(c == '=') { | |
+ x->name[namelen] = '\0'; | |
+ } else if(namelen && ((endname && isalpha(c)) || (c == '>' || … | |
+ /* attribute without value */ | |
+ x->name[namelen] = '\0'; | |
+ if(x->xmlattrstart) | |
+ x->xmlattrstart(x, x->tag, x->taglen, x->name,… | |
+ if(x->xmlattr) | |
+ x->xmlattr(x, x->tag, x->taglen, x->name, name… | |
+ if(x->xmlattrend) | |
+ x->xmlattrend(x, x->tag, x->taglen, x->name, n… | |
+ endname = 0; | |
+ x->name[0] = c; | |
+ namelen = 1; | |
+ } else if(namelen && (c == '\'' || c == '"')) { | |
+ /* attribute with value */ | |
+ endsep = c; /* c is end separator */ | |
+ if(x->xmlattrstart) | |
+ x->xmlattrstart(x, x->tag, x->taglen, x->name,… | |
+ for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) { | |
+ if(c == '&' && x->xmlattrentity) { /* entities… | |
+ x->data[valuelen] = '\0'; | |
+ /* call data function with data before… | |
+ if(valuelen && x->xmlattr) | |
+ x->xmlattr(x, x->tag, x->tagle… | |
+ x->data[0] = c; | |
+ valuelen = 1; | |
+ while((c = xmlparser_getnext(x)) != EO… | |
+ if(c == endsep) | |
+ break; | |
+ if(valuelen < sizeof(x->data) … | |
+ x->data[valuelen++] = … | |
+ else { | |
+ /* TODO: entity too lo… | |
+ x->data[valuelen] = '\… | |
+ if(x->xmlattr) | |
+ x->xmlattr(x, … | |
+ valuelen = 0; | |
+ break; | |
+ } | |
+ if(c == ';') { | |
+ x->data[valuelen] = '\… | |
+ x->xmlattrentity(x, x-… | |
+ valuelen = 0; | |
+ break; | |
+ } | |
+ } | |
+ } else if(c != endsep) { | |
+ if(valuelen < sizeof(x->data) - 1) { | |
+ x->data[valuelen++] = c; | |
+ } else { | |
+ x->data[valuelen] = '\0'; | |
+ if(x->xmlattr) | |
+ x->xmlattr(x, x->tag, … | |
+ x->data[0] = c; | |
+ valuelen = 1; | |
+ } | |
+ } | |
+ if(c == endsep) { | |
+ x->data[valuelen] = '\0'; | |
+ if(x->xmlattr) | |
+ x->xmlattr(x, x->tag, x->tagle… | |
+ if(x->xmlattrend) | |
+ x->xmlattrend(x, x->tag, x->ta… | |
+ break; | |
+ } | |
+ } | |
+ namelen = 0; | |
+ endname = 0; | |
+ } else if(namelen < sizeof(x->name) - 1) | |
+ x->name[namelen++] = c; | |
+ if(c == '>') { | |
+ break; | |
+ } else if(c == '/') { | |
+ x->isshorttag = 1; | |
+ namelen = 0; | |
+ x->name[0] = '\0'; | |
+ } | |
+ } | |
+} | |
+ | |
+static __inline__ void | |
+xmlparser_parsecomment(XMLParser *x) { | |
+ size_t datalen = 0, i = 0; | |
+ int c; | |
+ | |
+ if(x->xmlcommentstart) | |
+ x->xmlcommentstart(x); | |
+ while((c = xmlparser_getnext(x)) != EOF) { | |
+ if(c == '-' && i < 2) | |
+ i++; | |
+ else if(c == '>') { | |
+ if(i == 2) { /* -- */ | |
+ if(datalen >= 2) { | |
+ datalen -= 2; | |
+ x->data[datalen] = '\0'; | |
+ if(x->xmlcomment) | |
+ x->xmlcomment(x, x->data, data… | |
+ } | |
+ if(x->xmlcommentend) | |
+ x->xmlcommentend(x); | |
+ break; | |
+ } | |
+ i = 0; | |
+ } | |
+ /* || (c == '-' && d >= sizeof(x->data) - 4)) { */ | |
+ /* TODO: what if the end has --, and it's cut on the boundary,… | |
+ if(datalen < sizeof(x->data) - 1) | |
+ x->data[datalen++] = c; | |
+ else { | |
+ x->data[datalen] = '\0'; | |
+ if(x->xmlcomment) | |
+ x->xmlcomment(x, x->data, datalen); | |
+ x->data[0] = c; | |
+ datalen = 1; | |
+ } | |
+ } | |
+} | |
+ | |
+/* TODO: | |
+ * <test><![CDATA[1234567dddd8]]]> | |
+ * | |
+ * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1 | |
+ * test comment function too for similar bug? | |
+ * | |
+ */ | |
+static __inline__ void | |
+xmlparser_parsecdata(XMLParser *x) { | |
+ size_t datalen = 0, i = 0; | |
+ int c; | |
+ | |
+ if(x->xmlcdatastart) | |
+ x->xmlcdatastart(x); | |
+ while((c = xmlparser_getnext(x)) != EOF) { | |
+ if(c == ']' && i < 2) { | |
+ i++; | |
+ } else if(c == '>') { | |
+ if(i == 2) { /* ]] */ | |
+ if(datalen >= 2) { | |
+ datalen -= 2; | |
+ x->data[datalen] = '\0'; | |
+ if(x->xmlcdata && datalen) | |
+ x->xmlcdata(x, x->data, datale… | |
+ } | |
+ if(x->xmlcdataend) | |
+ x->xmlcdataend(x); | |
+ break; | |
+ } | |
+ i = 0; | |
+ } | |
+ /* TODO: what if the end has ]>, and it's cut on the boundary … | |
+ if(datalen < sizeof(x->data) - 1) { | |
+ x->data[datalen++] = c; | |
+ } else { | |
+ x->data[datalen] = '\0'; | |
+ if(x->xmlcdata) | |
+ x->xmlcdata(x, x->data, datalen); | |
+ x->data[0] = c; | |
+ datalen = 1; | |
+ } | |
+ } | |
+} | |
+ | |
+void | |
+xmlparser_init(XMLParser *x, FILE *fp) { | |
+ memset(x, 0, sizeof(XMLParser)); | |
+ x->fp = fp; | |
+} | |
+ | |
+void | |
+xmlparser_parse(XMLParser *x) { | |
+ int c, ispi; | |
+ size_t datalen, tagdatalen, taglen; | |
+ | |
+ while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < … | |
+ | |
+ while(c != EOF) { | |
+ if(c == '<') { /* parse tag */ | |
+ if((c = xmlparser_getnext(x)) == EOF) | |
+ return; | |
+ x->tag[0] = '\0'; | |
+ x->taglen = 0; | |
+ if(c == '!') { /* cdata and comments */ | |
+ for(tagdatalen = 0; (c = xmlparser_getnext(x))… | |
+ if(tagdatalen <= strlen("[CDATA[")) /*… | |
+ x->data[tagdatalen++] = c; /* … | |
+ if(c == '>') | |
+ break; | |
+ else if(c == '-' && tagdatalen == strl… | |
+ (x->data[0] == '-')) {… | |
+ xmlparser_parsecomment(x); | |
+ break; | |
+ } else if(c == '[') { | |
+ if(tagdatalen == strlen("[CDAT… | |
+ x->data[1] == 'C' && x… | |
+ x->data[3] == 'A' && x… | |
+ x->data[5] == 'A' && x… | |
+ xmlparser_parsecdata(x… | |
+ break; | |
+ #if 0 | |
+ } else { | |
+ /* TODO ? */ | |
+ /* markup declaration … | |
+ while((c = xmlparser_g… | |
+ #endif | |
+ } | |
+ } | |
+ } | |
+ } else { /* normal tag (open, short open, close), proc… | |
+ if(isspace(c)) | |
+ while((c = xmlparser_getnext(x)) != EO… | |
+ if(c == EOF) | |
+ return; | |
+ x->tag[0] = c; | |
+ ispi = (c == '?') ? 1 : 0; | |
+ x->isshorttag = ispi; | |
+ taglen = 1; | |
+ while((c = xmlparser_getnext(x)) != EOF) { | |
+ if(c == '/') /* TODO: simplify short t… | |
+ x->isshorttag = 1; /* short ta… | |
+ else if(c == '>' || isspace(c)) { | |
+ x->tag[taglen] = '\0'; | |
+ if(x->tag[0] == '/') { /* end … | |
+ x->taglen = --taglen; … | |
+ if(taglen && x->xmltag… | |
+ x->xmltagend(x… | |
+ } else { | |
+ x->taglen = taglen; | |
+ if(x->xmltagstart) | |
+ x->xmltagstart… | |
+ if(isspace(c)) | |
+ xmlparser_pars… | |
+ if(x->xmltagstartparse… | |
+ x->xmltagstart… | |
+ } | |
+ if((x->isshorttag || ispi) && … | |
+ x->xmltagend(x, x->tag… | |
+ break; | |
+ } else if(taglen < sizeof(x->tag) - 1) | |
+ x->tag[taglen++] = c; | |
+ } | |
+ } | |
+ } else { | |
+ /* parse data */ | |
+ datalen = 0; | |
+ if(x->xmldatastart) | |
+ x->xmldatastart(x); | |
+ while((c = xmlparser_getnext(x)) != EOF) { | |
+ if(c == '&' && x->xmldataentity) { | |
+ if(datalen) { | |
+ x->data[datalen] = '\0'; | |
+ x->xmldata(x, x->data, datalen… | |
+ } | |
+ x->data[0] = c; | |
+ datalen = 1; | |
+ while((c = xmlparser_getnext(x)) != EO… | |
+ if(c == '<') | |
+ break; | |
+ if(datalen < sizeof(x->data) -… | |
+ x->data[datalen++] = c; | |
+ if(isspace(c)) | |
+ break; | |
+ else if(c == ';') { | |
+ x->data[datalen] = '\0… | |
+ x->xmldataentity(x, x-… | |
+ datalen = 0; | |
+ break; | |
+ } | |
+ } | |
+ } else if(c != '<') { | |
+ if(datalen < sizeof(x->data) - 1) { | |
+ x->data[datalen++] = c; | |
+ } else { | |
+ x->data[datalen] = '\0'; | |
+ if(x->xmldata) | |
+ x->xmldata(x, x->data,… | |
+ x->data[0] = c; | |
+ datalen = 1; | |
+ } | |
+ } | |
+ if(c == '<') { | |
+ x->data[datalen] = '\0'; | |
+ if(x->xmldata && datalen) | |
+ x->xmldata(x, x->data, datalen… | |
+ if(x->xmldataend) | |
+ x->xmldataend(x); | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ } | |
+} | |
diff --git a/xml.h b/xml.h | |
@@ -0,0 +1,49 @@ | |
+#include <stdio.h> | |
+#include <string.h> | |
+#include <stdlib.h> | |
+ | |
+typedef struct xmlparser { | |
+ /* handlers */ | |
+ void (*xmltagstart)(struct xmlparser *p, const char *tag, size_t tagle… | |
+ void (*xmltagstartparsed)(struct xmlparser *p, const char *tag, | |
+ size_t taglen, int isshort); | |
+ void (*xmltagend)(struct xmlparser *p, const char *tag, size_t taglen, | |
+ int isshort); | |
+ void (*xmldatastart)(struct xmlparser *p); | |
+ void (*xmldata)(struct xmlparser *p, const char *data, size_t datalen); | |
+ void (*xmldataend)(struct xmlparser *p); | |
+ void (*xmldataentity)(struct xmlparser *p, const char *data, | |
+ size_t datalen); | |
+ void (*xmlattrstart)(struct xmlparser *p, const char *tag, size_t tagl… | |
+ const char *name, size_t namelen); | |
+ void (*xmlattr)(struct xmlparser *p, const char *tag, size_t taglen, | |
+ const char *name, size_t namelen, const char *value, | |
+ size_t valuelen); | |
+ void (*xmlattrend)(struct xmlparser *p, const char *tag, size_t taglen, | |
+ const char *name, size_t namelen); | |
+ void (*xmlattrentity)(struct xmlparser *p, const char *tag, size_t tag… | |
+ const char *name, size_t namelen, const char *value, | |
+ size_t valuelen); | |
+ void (*xmlcdatastart)(struct xmlparser *p); | |
+ void (*xmlcdata)(struct xmlparser *p, const char *data, size_t datalen… | |
+ void (*xmlcdataend)(struct xmlparser *p); | |
+ void (*xmlcommentstart)(struct xmlparser *p); | |
+ void (*xmlcomment)(struct xmlparser *p, const char *comment, | |
+ size_t commentlen); | |
+ void (*xmlcommentend)(struct xmlparser *p); | |
+ | |
+ FILE *fp; /* file stream to read from */ | |
+ | |
+ /* private; internal state */ | |
+ char tag[1024]; /* current tag */ | |
+ int isshorttag; /* current tag is in short form ? */ | |
+ size_t taglen; | |
+ char name[256]; /* current attribute name */ | |
+ char data[BUFSIZ]; /* data buffer used for tag and attribute data */ | |
+ size_t readoffset; | |
+ size_t readlastbytes; | |
+ unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnex… | |
+} XMLParser; | |
+ | |
+void xmlparser_init(XMLParser *x, FILE *fp); | |
+void xmlparser_parse(XMLParser *x); |