improve link references, add option to show full URL inline - webdump - HTML to… | |
git clone git://git.codemadness.org/webdump | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 6365a78f6c050106e64b281d29d8ef550f131bf1 | |
parent 56ec7ea6c49d79cc3aaf301d2e6040e15d17785a | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Fri, 8 Sep 2023 11:25:13 +0200 | |
improve link references, add option to show full URL inline | |
- fix URL references not being visible when only the -l option is specified | |
(without -i). Now each option can be specified separately. | |
- add -I option to show full URL option inline. | |
Diffstat: | |
M webdump.1 | 5 ++++- | |
M webdump.c | 90 +++++++++++++++++------------… | |
2 files changed, 53 insertions(+), 42 deletions(-) | |
--- | |
diff --git a/webdump.1 b/webdump.1 | |
@@ -6,7 +6,7 @@ | |
.Nd convert HTML to plain-text | |
.Sh SYNOPSIS | |
.Nm | |
-.Op Fl 8ailrx | |
+.Op Fl 8aiIlrx | |
.Op Fl b Ar baseurl | |
.Op Fl s Ar selector | |
.Op Fl u Ar selector | |
@@ -30,6 +30,9 @@ This is used to make links absolute. | |
.It Fl i | |
Toggle if link reference numbers are displayed inline or not, by default it is | |
not enabled. | |
+.It Fl I | |
+Toggle if URLs for link reference are displayed inline or not, by default it is | |
+not enabled. | |
.It Fl l | |
Toggle if link references are displayed at the bottom or not, by default it is | |
not enabled. | |
diff --git a/webdump.c b/webdump.c | |
@@ -47,6 +47,7 @@ struct uri { | |
static int allowansi = 0; /* allow ANSI escape codes */ | |
static int showrefbottom = 0; /* show link references at the bottom */ | |
static int showrefinline = 0; /* show link reference number inline */ | |
+static int showurlinline = 0; /* show full link reference inline */ | |
static int linewrap = 0; /* line-wrapping */ | |
static int termwidth = 77; /* terminal width */ | |
static int resources = 0; /* write resources line-by-line to fd 3? */ | |
@@ -1319,46 +1320,49 @@ handleinlinelink(void) | |
char buf[4096], *url; | |
int b, r; | |
- /* show links as reference at the bottom */ | |
- if ((showrefbottom || resources) && (attr_src.len || attr_href.len)) { | |
- /* by default use the original URL */ | |
- if (attr_src.len) | |
- url = attr_src.data; | |
- else | |
- url = attr_href.data; | |
- | |
- b = -1; | |
- if (uri_hasscheme(url)) | |
- ; /* already absolute: nothing to do */ | |
- else if (basehref[0]) /* prefer -b option over <base> */ | |
- b = uri_parse(basehref, &base); | |
- else if (basehrefdoc[0]) | |
- b = uri_parse(basehrefdoc, &base); | |
- | |
- if (b != -1 && | |
- uri_parse(url, &olduri) != -1 && | |
- uri_makeabs(&newuri, &olduri, &base) != -1 && | |
- newuri.proto[0]) { | |
- r = uri_format(buf, sizeof(buf), &newuri); | |
- if (r >= 0 && (size_t)r < sizeof(buf)) | |
- url = buf; | |
- } | |
+ if (!showrefbottom && !showrefinline && !showurlinline && !resources) | |
+ return; /* there is no need to collect the reference */ | |
- if (!url[0]) | |
- return; | |
+ if (!attr_src.len && !attr_href.len) | |
+ return; /* there is no reference */ | |
- cur = &nodes[curnode]; | |
+ /* by default use the original URL */ | |
+ if (attr_src.len) | |
+ url = attr_src.data; | |
+ else | |
+ url = attr_href.data; | |
+ | |
+ b = -1; | |
+ if (uri_hasscheme(url)) | |
+ ; /* already absolute: nothing to do */ | |
+ else if (basehref[0]) /* prefer -b option over <base> */ | |
+ b = uri_parse(basehref, &base); | |
+ else if (basehrefdoc[0]) | |
+ b = uri_parse(basehrefdoc, &base); | |
+ | |
+ if (b != -1 && | |
+ uri_parse(url, &olduri) != -1 && | |
+ uri_makeabs(&newuri, &olduri, &base) != -1 && | |
+ newuri.proto[0]) { | |
+ r = uri_format(buf, sizeof(buf), &newuri); | |
+ if (r >= 0 && (size_t)r < sizeof(buf)) | |
+ url = buf; | |
+ } | |
- if (showrefinline && !(cur->tag.displaytype & DisplayNone)) { | |
- string_clear(&nodes_links[curnode]); | |
- string_append(&nodes_links[curnode], url, strlen(url)); | |
- } | |
+ if (!url[0]) | |
+ return; | |
+ | |
+ cur = &nodes[curnode]; | |
- /* add hidden links directly to the reference, | |
- the order doesn't matter */ | |
- if (cur->tag.displaytype & DisplayNone) | |
- addlinkref(url, cur->tag.name, 1); | |
+ if (!(cur->tag.displaytype & DisplayNone)) { | |
+ string_clear(&nodes_links[curnode]); | |
+ string_append(&nodes_links[curnode], url, strlen(url)); | |
} | |
+ | |
+ /* add hidden links directly to the reference, | |
+ the order doesn't matter */ | |
+ if (cur->tag.displaytype & DisplayNone) | |
+ addlinkref(url, cur->tag.name, 1); | |
} | |
void | |
@@ -1574,11 +1578,12 @@ endnode(struct node *cur) | |
/* add link and show the link number in the visible order */ | |
if (!ishidden && nodes_links[curnode].len > 0) { | |
addlinkref(nodes_links[curnode].data, cur->tag.name, ishidden); | |
-#if 1 | |
- hprintf("[%zu]", ++linkcount); | |
-#else | |
- hprintf("[%s: %s]", cur->tag.name, nodes_links[curnode].data); | |
-#endif | |
+ if (showrefinline) | |
+ hprintf("[%zu]", ++linkcount); | |
+ if (showurlinline) | |
+ hprintf(" [%s: %s]", | |
+ !tagcmp(cur->tag.name, "a") ? "link" : cur->ta… | |
+ nodes_links[curnode].data); | |
} | |
handleendtag(&(cur->tag)); | |
@@ -2014,7 +2019,7 @@ xmlattrstart(XMLParser *p, const char *t, size_t tl, cons… | |
void | |
usage(void) | |
{ | |
- fprintf(stderr, "%s [-8ailrx] [-b basehref] [-s selector] [-u selector… | |
+ fprintf(stderr, "%s [-8aiIlrx] [-b basehref] [-s selector] [-u selecto… | |
exit(1); | |
} | |
@@ -2038,6 +2043,9 @@ main(int argc, char **argv) | |
case 'i': | |
showrefinline = !showrefinline; | |
break; | |
+ case 'I': | |
+ showurlinline = !showurlinline; | |
+ break; | |
case 'l': | |
showrefbottom = !showrefbottom; | |
break; |