Introduction
Introduction Statistics Contact Development Disclaimer Help
Further expand URLs in retweets - tscrape - twitter scraper
git clone git://git.codemadness.org/tscrape
Log
Files
Refs
README
LICENSE
---
commit e2c4c24378d937edd6f9d717267d9f08b268df78
parent d204e2373cc9f7e3f3afa3d4f2afb7976f67b4ae
Author: Leonardo Taccari <[email protected]>
Date: Sat, 6 Jun 2020 01:11:57 +0200
Further expand URLs in retweets
Diffstat:
M tscrape.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
---
diff --git a/tscrape.c b/tscrape.c
@@ -250,6 +250,12 @@ addurl(const char *url, const char *expanded_url)
{
struct url *u;
+ for (u = urls; u; u = u->next) {
+ if (!strncmp(url, u->url, u->url_len)) {
+ return;
+ }
+ }
+
if (!(u = calloc(1, sizeof(*u))))
err(1, "calloc");
strlcpy(u->url, url, sizeof(u->url));
@@ -389,25 +395,26 @@ processnodes(struct json_node *nodes, size_t depth, const…
}
}
-// TODO: retweeted.status.entities.urls[]
-#if 0
- if (depth == 6 &&
+ if (depth == 7 &&
nodes[0].type == JSON_TYPE_ARRAY &&
nodes[1].type == JSON_TYPE_OBJECT &&
nodes[2].type == JSON_TYPE_OBJECT &&
nodes[3].type == JSON_TYPE_OBJECT &&
nodes[4].type == JSON_TYPE_ARRAY &&
- nodes[5].type == JSON_TYPE_STRING &&
+ nodes[5].type == JSON_TYPE_OBJECT &&
+ nodes[6].type == JSON_TYPE_STRING &&
!strcmp(nodes[2].name, "retweeted_status") &&
!strcmp(nodes[3].name, "entities") &&
!strcmp(nodes[4].name, "urls")) {
- if (!strcmp(nodes[5].name, "url")) {
- printf("DEBUG: url: %s\n", str);
- } else if (!strcmp(nodes[5].name, "expanded_url")) {
- printf("DEBUG: expanded_url: %s\n", str);
+ if (!strcmp(nodes[6].name, "url")) {
+// printf("DEBUG: url: %s\n", str);
+ strlcpy(url, str, sizeof(url));
+ } else if (!strcmp(nodes[6].name, "expanded_url")) {
+// printf("DEBUG: expanded_url: %s\n", str);
+ addurl(url, str);
+ url[0] = '\0';
}
}
-#endif
}
int
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.