Introduction
Introduction Statistics Contact Development Disclaimer Help
parse own username and fullname from data, add item username and fullname - tsc…
git clone git://git.codemadness.org/tscrape
Log
Files
Refs
README
LICENSE
---
commit 4640420521e94158d80f94202ed40f7dc4a66169
parent f712b91a8db0fb66f7facf349ea859da07717dc7
Author: Hiltjo Posthuma <[email protected]>
Date: Sat, 12 Aug 2017 17:15:41 +0200
parse own username and fullname from data, add item username and fullname
Diffstat:
M tscrape.c | 60 +++++++++++++++++++----------…
M util.h | 3 ++-
2 files changed, 38 insertions(+), 25 deletions(-)
---
diff --git a/tscrape.c b/tscrape.c
@@ -22,17 +22,17 @@ enum {
Stream = 2,
Header = 4,
Timestamp = 8,
- Text = 16,
- Fullname = 32,
- Username = 64
+ Text = 16
};
/* data */
static char fullname[1024];
+static int ispinned;
+static char itemusername[1024];
+static char itemfullname[1024];
static char timestamp[16];
static char text[4096];
static char username[1024];
-static int ispinned;
static char classname[256];
static char datatime[16];
@@ -50,13 +50,17 @@ printtweet(void)
if (parsetime(timestamp, &t, buf, sizeof(buf)) != -1)
printf("%lld", (long long)t);
putchar('\t');
+ printescape(username);
+ putchar('\t');
+ printescape(fullname);
+ putchar('\t');
printescape(text);
putchar('\t');
printescape(itemid);
putchar('\t');
- printescape(username);
+ printescape(itemusername);
putchar('\t');
- printescape(fullname);
+ printescape(itemfullname);
putchar('\t');
printescape(retweetid);
putchar('\t');
@@ -93,9 +97,7 @@ xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
if (!strcmp(t, "p"))
state &= ~Text;
else if (!strcmp(t, "span"))
- state &= ~(Timestamp|Username);
- else if (!strcmp(t, "strong"))
- state &= ~Fullname;
+ state &= ~(Timestamp);
}
static void
@@ -118,8 +120,8 @@ xmltagstartparsed(XMLParser *x, const char *t, size_t tl, i…
state = 0;
} else if (!strcmp(t, "li") && isclassmatch(v, STRP("js-stream-item"))…
state |= Item;
- datatime[0] = text[0] = timestamp[0] = fullname[0] = '\0';
- itemid[0] = username[0] = retweetid[0] = '\0';
+ datatime[0] = text[0] = timestamp[0] = itemfullname[0] = '\0';
+ itemid[0] = itemusername[0] = retweetid[0] = '\0';
ispinned = 0;
if (isclassmatch(v, STRP("js-pinned")))
ispinned = 1;
@@ -129,14 +131,10 @@ xmltagstartparsed(XMLParser *x, const char *t, size_t tl,…
state |= Stream;
} else if (!strcmp(t, "a") && isclassmatch(v, STRP("js-action-…
state |= Header;
- } else if (!strcmp(t, "strong") && isclassmatch(v, STRP("fulln…
- state |= Fullname;
} else if (!strcmp(t, "span") && isclassmatch(v, STRP("js-shor…
state |= Timestamp;
strlcpy(timestamp, datatime, sizeof(timestamp));
datatime[0] = '\0';
- } else if (!strcmp(t, "span") && isclassmatch(v, STRP("usernam…
- state |= Username;
}
}
if ((state & Text) && !strcmp(t, "a") && !isspace(text[0]))
@@ -147,6 +145,17 @@ static void
xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
const char *v, size_t vl)
{
+ /* NOTE: assumes classname attribute is set before data-* in current t…
+ if (!state && !strcmp(t, "div") && isclassmatch(classname, STRP("user-…
+ if (!strcmp(a, "data-screen-name")) {
+ strlcat(username, " ", sizeof(username));
+ strlcat(username, v, sizeof(username));
+ } else if (!strcmp(a, "data-name")) {
+ strlcat(fullname, " ", sizeof(fullname));
+ strlcat(fullname, v, sizeof(fullname));
+ }
+ }
+
if (!strcmp(a, "class")) {
strlcat(classname, v, sizeof(classname));
} else if (state & Item) {
@@ -155,6 +164,16 @@ xmlattr(XMLParser *x, const char *t, size_t tl, const char…
strlcpy(itemid, v, sizeof(itemid));
else if (!strcmp(a, "data-retweet-id"))
strlcpy(retweetid, v, sizeof(retweetid));
+
+ if (isclassmatch(classname, STRP("js-stream-tweet"))) {
+ if (!strcmp(a, "data-screen-name")) {
+ strlcat(itemusername, " ", sizeof(item…
+ strlcat(itemusername, v, sizeof(itemus…
+ } else if (!strcmp(a, "data-name")) {
+ strlcat(itemfullname, " ", sizeof(item…
+ strlcat(itemfullname, v, sizeof(itemfu…
+ }
+ }
} else if (!strcmp(t, "span") && !strcmp(a, "data-time")) {
/* UNIX timestamp */
strlcpy(datatime, v, sizeof(datatime));
@@ -183,14 +202,7 @@ xmlattrentity(XMLParser *x, const char *t, size_t tl, cons…
static void
xmldata(XMLParser *x, const char *d, size_t dl)
{
- if (state & Username) {
- if (d[0] == '@')
- strlcat(username, " ", sizeof(username));
- strlcat(username, d, sizeof(username));
- } else if (state & Fullname) {
- strlcat(fullname, " ", sizeof(fullname));
- strlcat(fullname, d, sizeof(fullname));
- } else if (state & Text) {
+ if (state & Text) {
if (!isclassmatch(classname, STRP("u-hidden")))
strlcat(text, d, sizeof(text));
}
@@ -202,7 +214,7 @@ xmldataentity(XMLParser *x, const char *d, size_t dl)
char buf[16];
ssize_t len;
- if (!(state & (Text|Username|Fullname)))
+ if (!(state & Text))
return;
if ((len = html_entitytostr(d, buf, sizeof(buf))) > 0)
xmldata(x, buf, (size_t)len);
diff --git a/util.h b/util.h
@@ -24,8 +24,9 @@ struct feed {
enum {
FieldUnixTimestamp = 0,
- FieldText, FieldItemid,
FieldUsername, FieldFullname,
+ FieldText, FieldItemid,
+ FieldItemUsername, FieldItemFullname,
FieldRetweetid, FieldIspinned,
FieldLast
};
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.