Introduction
Introduction Statistics Contact Development Disclaimer Help
initial repo, quick hack - sub - subscene.com subtitle search
git clone git://git.codemadness.org/sub
Log
Files
Refs
README
LICENSE
---
commit 0f97d1bd0a8f55ffad37d17e5d7080576e6db684
Author: Hiltjo Posthuma <[email protected]>
Date: Sun, 19 Oct 2014 12:49:06 +0000
initial repo, quick hack
Diffstat:
A Makefile | 5 +++++
A sub-dl | 11 +++++++++++
A sub-search | 18 ++++++++++++++++++
A sub.c | 170 +++++++++++++++++++++++++++++…
A util.c | 35 +++++++++++++++++++++++++++++…
A util.h | 2 ++
A xml.c | 325 +++++++++++++++++++++++++++++…
A xml.h | 49 +++++++++++++++++++++++++++++…
8 files changed, 615 insertions(+), 0 deletions(-)
---
diff --git a/Makefile b/Makefile
@@ -0,0 +1,5 @@
+build: clean
+ cc xml.c util.c sub.c -o sub
+
+clean:
+ rm -f sub *.o
diff --git a/sub-dl b/sub-dl
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+url=$(curl "$1" | grep -oE '(/subtitle/download\?mac=[^"]*)')
+if test x"$url" = x""; then
+ exit 1
+else
+ url="http://subscene.com${url}"
+ file="/tmp/sub.$$.zip"
+ curl "${url}" > "$file"
+ unzip "$file" "*.srt"
+fi
diff --git a/sub-search b/sub-search
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+usage() {
+ printf 'usage: sub-search [term]\n' >&2
+ exit 1
+}
+
+getep() {
+ printf '%s' "$1" | grep -oE '([0-9]{2}[Ee][0-9]{2})'
+}
+
+test x"$1" = x"" && usage
+
+query="$1"
+url="http://subscene.com/subtitles/release"
+ep=$(getep "${url}")
+
+curl --get --data-urlencode "q=${query}" --data-urlencode "r=true" "${url}" | …
diff --git a/sub.c b/sub.c
@@ -0,0 +1,170 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "util.h"
+#include "xml.h"
+
+struct sub {
+ int issub;
+ char title[256];
+ char lang[256];
+ int hi;
+ int files;
+ char author[256];
+ char authorurl[256];
+ char description[256];
+ char url[256];
+};
+
+static XMLParser parser; /* XML parser state */
+static struct sub sub;
+static char curclass[64];
+static char spanclass[64];
+
+static int
+istag(const char *s1, const char *s2) {
+ return !strcasecmp(s1, s2);
+}
+
+static int
+isattr(const char *s1, const char *s2) {
+ return !strcasecmp(s1, s2);
+}
+
+static void
+xml_handler_data(XMLParser *p, const char *data, size_t datalen) {
+ char *s = "";
+ char buf[1024];
+ size_t len;
+
+ if(!curclass[0])
+ return;
+
+ /* skip leading space */
+ for(s = (char *)data; *s && isspace(*s); s++);
+ strlcpy(buf, s, sizeof(buf));
+ for(s = buf; *s; s++) {
+ if(*s == '\r' || *s == '\n')
+ *s = ' ';
+ }
+ /* trim remaining space */
+ len = strlen(buf);
+ for(; len > 0; len--) {
+ if(!isspace(buf[len - 1]))
+ break;
+ buf[len - 1] = '\0';
+ }
+
+ s = buf;
+ if(!strlen(s))
+ return;
+ /* link */
+ if(strcmp(curclass, "a1") == 0) {
+ if(strcmp(spanclass, "") == 0) {
+ strlcpy(sub.title, s, sizeof(sub.title));
+ } else {
+ strlcpy(sub.lang, s, sizeof(sub.lang));
+ }
+ }
+ /* files */
+ if(strcmp(curclass, "a3") == 0) {
+ sub.files = atoi(s);
+ }
+
+ /* hearing impaired? */
+ if(strcmp(curclass, "a41") == 0) {
+ sub.hi = 1;
+ }
+ /*if(strcmp(curclass, "a40") == 0) {
+ sub.hi = 0;
+ }*/
+ /* author / user profile */
+ if(strcmp(curclass, "a5") == 0) {
+ strlcpy(sub.author, s, sizeof(sub.author));
+ }
+ /* description */
+ if(strcmp(curclass, "a6") == 0) {
+ strlcpy(sub.description, s, sizeof(sub.description));
+ }
+}
+
+static void
+xml_handler_start_element(XMLParser *p, const char *tag, size_t taglen) {
+ (void)p;
+ (void)taglen;
+
+ if(istag(tag, "tr")) {
+ memset(&sub, 0, sizeof(sub));
+ }
+}
+
+static void
+xml_handler_end_element(XMLParser *p, const char *tag, size_t taglen,
+ int isshort)
+{
+ (void)p;
+ (void)taglen;
+ (void)isshort;
+
+ if(istag(tag, "tr") && sub.issub == 1) {
+ printf("LANG:%s\tTITLE:%s\tURL:http://subscene.com%s\tHI:%d\tF…
+ sub.lang, sub.title, sub.url, sub.hi, sub.files, sub.au…
+ } else if(istag(tag, "td")) {
+ curclass[0] = '\0';
+ } else if(istag(tag, "span")) {
+ spanclass[0] = '\0';
+ }
+}
+
+static void
+xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
+ const char *name, size_t namelen, const char *value, size_t valuelen)
+{
+ (void)p;
+ (void)taglen;
+ (void)namelen;
+ (void)valuelen;
+
+ if(istag(tag, "td")) {
+ if(isattr(name, "class")) {
+ strlcpy(curclass, value, sizeof(curclass));
+ /* link */
+ if(strcmp(value, "a1") == 0) {
+ sub.issub = 1;
+ }
+ }
+ } else if(istag(tag, "span")) {
+ if(strcmp(curclass, "a1") == 0) {
+ if(isattr(name, "class")) {
+ strlcpy(spanclass, value, sizeof(spanclass));
+ }
+ }
+ } else if(istag(tag, "a")) {
+ /* subtitle / author profile url */
+ if(strcmp(name, "href") == 0) {
+ if((strcmp(curclass, "a1") == 0)) {
+ strlcpy(sub.url, value, sizeof(sub.url));
+ }
+ if((strcmp(curclass, "a5") == 0)) {
+ strlcpy(sub.authorurl, value, sizeof(sub.autho…
+ }
+ }
+ }
+}
+
+int
+main(void) {
+ xmlparser_init(&parser, stdin);
+
+ parser.xmltagstart = xml_handler_start_element;
+ parser.xmltagend = xml_handler_end_element;
+ parser.xmlattr = xml_handler_attr;
+ parser.xmldata = xml_handler_data;
+
+ xmlparser_parse(&parser);
+
+ return EXIT_SUCCESS;
+}
diff --git a/util.c b/util.c
@@ -0,0 +1,35 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "util.h"
+
+/*
+ * Taken from OpenBSD.
+ * Copy src to string dst of size siz. At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t siz) {
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+
+ /* copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0')
+ break;
+ }
+ }
+ /* not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+ return(s - src - 1); /* count does not include NUL */
+}
diff --git a/util.h b/util.h
@@ -0,0 +1,2 @@
+#undef strlcpy
+size_t strlcpy(char *, const char *, size_t);
diff --git a/xml.c b/xml.c
@@ -0,0 +1,325 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "xml.h"
+
+static __inline__ int /* like getc(), but do some smart buffering */
+xmlparser_getnext(XMLParser *x) {
+ return fgetc(x->fp);
+#if 0
+ if(x->readoffset >= x->readlastbytes) {
+ x->readoffset = 0;
+ if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf…
+ return EOF; /* 0 bytes read, assume EOF */
+ }
+ return (int)x->readbuf[x->readoffset++];
+#endif
+}
+
+static __inline__ void
+xmlparser_parseattrs(XMLParser *x) {
+ size_t namelen = 0, valuelen;
+ int c, endsep, endname = 0;
+
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(isspace(c)) { /* TODO: simplify endname ? */
+ if(namelen)
+ endname = 1;
+ continue;
+ }
+ if(c == '?')
+ ; /* ignore */
+ else if(c == '=') {
+ x->name[namelen] = '\0';
+ } else if(namelen && ((endname && isalpha(c)) || (c == '>' || …
+ /* attribute without value */
+ x->name[namelen] = '\0';
+ if(x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name,…
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, name…
+ if(x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, n…
+ endname = 0;
+ x->name[0] = c;
+ namelen = 1;
+ } else if(namelen && (c == '\'' || c == '"')) {
+ /* attribute with value */
+ endsep = c; /* c is end separator */
+ if(x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name,…
+ for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) {
+ if(c == '&' && x->xmlattrentity) { /* entities…
+ x->data[valuelen] = '\0';
+ /* call data function with data before…
+ if(valuelen && x->xmlattr)
+ x->xmlattr(x, x->tag, x->tagle…
+ x->data[0] = c;
+ valuelen = 1;
+ while((c = xmlparser_getnext(x)) != EO…
+ if(c == endsep)
+ break;
+ if(valuelen < sizeof(x->data) …
+ x->data[valuelen++] = …
+ else {
+ /* TODO: entity too lo…
+ x->data[valuelen] = '\…
+ if(x->xmlattr)
+ x->xmlattr(x, …
+ valuelen = 0;
+ break;
+ }
+ if(c == ';') {
+ x->data[valuelen] = '\…
+ x->xmlattrentity(x, x-…
+ valuelen = 0;
+ break;
+ }
+ }
+ } else if(c != endsep) {
+ if(valuelen < sizeof(x->data) - 1) {
+ x->data[valuelen++] = c;
+ } else {
+ x->data[valuelen] = '\0';
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, …
+ x->data[0] = c;
+ valuelen = 1;
+ }
+ }
+ if(c == endsep) {
+ x->data[valuelen] = '\0';
+ if(x->xmlattr)
+ x->xmlattr(x, x->tag, x->tagle…
+ if(x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->ta…
+ break;
+ }
+ }
+ namelen = 0;
+ endname = 0;
+ } else if(namelen < sizeof(x->name) - 1)
+ x->name[namelen++] = c;
+ if(c == '>') {
+ break;
+ } else if(c == '/') {
+ x->isshorttag = 1;
+ namelen = 0;
+ x->name[0] = '\0';
+ }
+ }
+}
+
+static __inline__ void
+xmlparser_parsecomment(XMLParser *x) {
+ size_t datalen = 0, i = 0;
+ int c;
+
+ if(x->xmlcommentstart)
+ x->xmlcommentstart(x);
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '-' && i < 2)
+ i++;
+ else if(c == '>') {
+ if(i == 2) { /* -- */
+ if(datalen >= 2) {
+ datalen -= 2;
+ x->data[datalen] = '\0';
+ if(x->xmlcomment)
+ x->xmlcomment(x, x->data, data…
+ }
+ if(x->xmlcommentend)
+ x->xmlcommentend(x);
+ break;
+ }
+ i = 0;
+ }
+ /* || (c == '-' && d >= sizeof(x->data) - 4)) { */
+ /* TODO: what if the end has --, and it's cut on the boundary,…
+ if(datalen < sizeof(x->data) - 1)
+ x->data[datalen++] = c;
+ else {
+ x->data[datalen] = '\0';
+ if(x->xmlcomment)
+ x->xmlcomment(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+/* TODO:
+ * <test><![CDATA[1234567dddd8]]]>
+ *
+ * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1
+ * test comment function too for similar bug?
+ *
+ */
+static __inline__ void
+xmlparser_parsecdata(XMLParser *x) {
+ size_t datalen = 0, i = 0;
+ int c;
+
+ if(x->xmlcdatastart)
+ x->xmlcdatastart(x);
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == ']' && i < 2) {
+ i++;
+ } else if(c == '>') {
+ if(i == 2) { /* ]] */
+ if(datalen >= 2) {
+ datalen -= 2;
+ x->data[datalen] = '\0';
+ if(x->xmlcdata && datalen)
+ x->xmlcdata(x, x->data, datale…
+ }
+ if(x->xmlcdataend)
+ x->xmlcdataend(x);
+ break;
+ }
+ i = 0;
+ }
+ /* TODO: what if the end has ]>, and it's cut on the boundary …
+ if(datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if(x->xmlcdata)
+ x->xmlcdata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+void
+xmlparser_init(XMLParser *x, FILE *fp) {
+ memset(x, 0, sizeof(XMLParser));
+ x->fp = fp;
+}
+
+void
+xmlparser_parse(XMLParser *x) {
+ int c, ispi;
+ size_t datalen, tagdatalen, taglen;
+
+ while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < …
+
+ while(c != EOF) {
+ if(c == '<') { /* parse tag */
+ if((c = xmlparser_getnext(x)) == EOF)
+ return;
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ if(c == '!') { /* cdata and comments */
+ for(tagdatalen = 0; (c = xmlparser_getnext(x))…
+ if(tagdatalen <= strlen("[CDATA[")) /*…
+ x->data[tagdatalen++] = c; /* …
+ if(c == '>')
+ break;
+ else if(c == '-' && tagdatalen == strl…
+ (x->data[0] == '-')) {…
+ xmlparser_parsecomment(x);
+ break;
+ } else if(c == '[') {
+ if(tagdatalen == strlen("[CDAT…
+ x->data[1] == 'C' && x…
+ x->data[3] == 'A' && x…
+ x->data[5] == 'A' && x…
+ xmlparser_parsecdata(x…
+ break;
+ #if 0
+ } else {
+ /* TODO ? */
+ /* markup declaration …
+ while((c = xmlparser_g…
+ #endif
+ }
+ }
+ }
+ } else { /* normal tag (open, short open, close), proc…
+ if(isspace(c))
+ while((c = xmlparser_getnext(x)) != EO…
+ if(c == EOF)
+ return;
+ x->tag[0] = c;
+ ispi = (c == '?') ? 1 : 0;
+ x->isshorttag = ispi;
+ taglen = 1;
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '/') /* TODO: simplify short t…
+ x->isshorttag = 1; /* short ta…
+ else if(c == '>' || isspace(c)) {
+ x->tag[taglen] = '\0';
+ if(x->tag[0] == '/') { /* end …
+ x->taglen = --taglen; …
+ if(taglen && x->xmltag…
+ x->xmltagend(x…
+ } else {
+ x->taglen = taglen;
+ if(x->xmltagstart)
+ x->xmltagstart…
+ if(isspace(c))
+ xmlparser_pars…
+ if(x->xmltagstartparse…
+ x->xmltagstart…
+ }
+ if((x->isshorttag || ispi) && …
+ x->xmltagend(x, x->tag…
+ break;
+ } else if(taglen < sizeof(x->tag) - 1)
+ x->tag[taglen++] = c;
+ }
+ }
+ } else {
+ /* parse data */
+ datalen = 0;
+ if(x->xmldatastart)
+ x->xmldatastart(x);
+ while((c = xmlparser_getnext(x)) != EOF) {
+ if(c == '&' && x->xmldataentity) {
+ if(datalen) {
+ x->data[datalen] = '\0';
+ x->xmldata(x, x->data, datalen…
+ }
+ x->data[0] = c;
+ datalen = 1;
+ while((c = xmlparser_getnext(x)) != EO…
+ if(c == '<')
+ break;
+ if(datalen < sizeof(x->data) -…
+ x->data[datalen++] = c;
+ if(isspace(c))
+ break;
+ else if(c == ';') {
+ x->data[datalen] = '\0…
+ x->xmldataentity(x, x-…
+ datalen = 0;
+ break;
+ }
+ }
+ } else if(c != '<') {
+ if(datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if(x->xmldata)
+ x->xmldata(x, x->data,…
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+ if(c == '<') {
+ x->data[datalen] = '\0';
+ if(x->xmldata && datalen)
+ x->xmldata(x, x->data, datalen…
+ if(x->xmldataend)
+ x->xmldataend(x);
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/xml.h b/xml.h
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+typedef struct xmlparser {
+ /* handlers */
+ void (*xmltagstart)(struct xmlparser *p, const char *tag, size_t tagle…
+ void (*xmltagstartparsed)(struct xmlparser *p, const char *tag,
+ size_t taglen, int isshort);
+ void (*xmltagend)(struct xmlparser *p, const char *tag, size_t taglen,
+ int isshort);
+ void (*xmldatastart)(struct xmlparser *p);
+ void (*xmldata)(struct xmlparser *p, const char *data, size_t datalen);
+ void (*xmldataend)(struct xmlparser *p);
+ void (*xmldataentity)(struct xmlparser *p, const char *data,
+ size_t datalen);
+ void (*xmlattrstart)(struct xmlparser *p, const char *tag, size_t tagl…
+ const char *name, size_t namelen);
+ void (*xmlattr)(struct xmlparser *p, const char *tag, size_t taglen,
+ const char *name, size_t namelen, const char *value,
+ size_t valuelen);
+ void (*xmlattrend)(struct xmlparser *p, const char *tag, size_t taglen,
+ const char *name, size_t namelen);
+ void (*xmlattrentity)(struct xmlparser *p, const char *tag, size_t tag…
+ const char *name, size_t namelen, const char *value,
+ size_t valuelen);
+ void (*xmlcdatastart)(struct xmlparser *p);
+ void (*xmlcdata)(struct xmlparser *p, const char *data, size_t datalen…
+ void (*xmlcdataend)(struct xmlparser *p);
+ void (*xmlcommentstart)(struct xmlparser *p);
+ void (*xmlcomment)(struct xmlparser *p, const char *comment,
+ size_t commentlen);
+ void (*xmlcommentend)(struct xmlparser *p);
+
+ FILE *fp; /* file stream to read from */
+
+ /* private; internal state */
+ char tag[1024]; /* current tag */
+ int isshorttag; /* current tag is in short form ? */
+ size_t taglen;
+ char name[256]; /* current attribute name */
+ char data[BUFSIZ]; /* data buffer used for tag and attribute data */
+ size_t readoffset;
+ size_t readlastbytes;
+ unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnex…
+} XMLParser;
+
+void xmlparser_init(XMLParser *x, FILE *fp);
+void xmlparser_parse(XMLParser *x);
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.