Further expand URLs in retweets - tscrape - twitter scraper | |
git clone git://git.codemadness.org/tscrape | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit e2c4c24378d937edd6f9d717267d9f08b268df78 | |
parent d204e2373cc9f7e3f3afa3d4f2afb7976f67b4ae | |
Author: Leonardo Taccari <[email protected]> | |
Date: Sat, 6 Jun 2020 01:11:57 +0200 | |
Further expand URLs in retweets | |
Diffstat: | |
M tscrape.c | 25 ++++++++++++++++--------- | |
1 file changed, 16 insertions(+), 9 deletions(-) | |
--- | |
diff --git a/tscrape.c b/tscrape.c | |
@@ -250,6 +250,12 @@ addurl(const char *url, const char *expanded_url) | |
{ | |
struct url *u; | |
+ for (u = urls; u; u = u->next) { | |
+ if (!strncmp(url, u->url, u->url_len)) { | |
+ return; | |
+ } | |
+ } | |
+ | |
if (!(u = calloc(1, sizeof(*u)))) | |
err(1, "calloc"); | |
strlcpy(u->url, url, sizeof(u->url)); | |
@@ -389,25 +395,26 @@ processnodes(struct json_node *nodes, size_t depth, const… | |
} | |
} | |
-// TODO: retweeted.status.entities.urls[] | |
-#if 0 | |
- if (depth == 6 && | |
+ if (depth == 7 && | |
nodes[0].type == JSON_TYPE_ARRAY && | |
nodes[1].type == JSON_TYPE_OBJECT && | |
nodes[2].type == JSON_TYPE_OBJECT && | |
nodes[3].type == JSON_TYPE_OBJECT && | |
nodes[4].type == JSON_TYPE_ARRAY && | |
- nodes[5].type == JSON_TYPE_STRING && | |
+ nodes[5].type == JSON_TYPE_OBJECT && | |
+ nodes[6].type == JSON_TYPE_STRING && | |
!strcmp(nodes[2].name, "retweeted_status") && | |
!strcmp(nodes[3].name, "entities") && | |
!strcmp(nodes[4].name, "urls")) { | |
- if (!strcmp(nodes[5].name, "url")) { | |
- printf("DEBUG: url: %s\n", str); | |
- } else if (!strcmp(nodes[5].name, "expanded_url")) { | |
- printf("DEBUG: expanded_url: %s\n", str); | |
+ if (!strcmp(nodes[6].name, "url")) { | |
+// printf("DEBUG: url: %s\n", str); | |
+ strlcpy(url, str, sizeof(url)); | |
+ } else if (!strcmp(nodes[6].name, "expanded_url")) { | |
+// printf("DEBUG: expanded_url: %s\n", str); | |
+ addurl(url, str); | |
+ url[0] = '\0'; | |
} | |
} | |
-#endif | |
} | |
int |