selector syntax: document it and add feature to filter on a specific nth node -… | |
git clone git://git.codemadness.org/webdump | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 56ec7ea6c49d79cc3aaf301d2e6040e15d17785a | |
parent 94f0ad42fcfbe17b01d9e573a786435d1acc0232 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Fri, 8 Sep 2023 11:07:57 +0200 | |
selector syntax: document it and add feature to filter on a specific nth node | |
Diffstat: | |
M webdump.1 | 38 +++++++++++++++++++++++++++--… | |
M webdump.c | 33 ++++++++++++++++++++++++++++-… | |
2 files changed, 63 insertions(+), 8 deletions(-) | |
--- | |
diff --git a/webdump.1 b/webdump.1 | |
@@ -1,4 +1,4 @@ | |
-.Dd September 7, 2023 | |
+.Dd September 8, 2023 | |
.Dt WEBDUMP 1 | |
.Os | |
.Sh NAME | |
@@ -36,12 +36,16 @@ not enabled. | |
.It Fl r | |
Toggle if line-wrapping mode is enabled, by default it is not enabled. | |
.It Fl s | |
-CSS-like selectors, this sets a reader mode to hide content | |
-matching the selector, for example: "main" or "main#id" or "main.class". | |
+CSS-like selectors, this sets a reader mode to hide content matching the | |
+selector, see the section | |
+.Sx SELECTOR SYNTAX | |
+for the syntax. | |
Multiple selectors can be specified by separating them with a comma. | |
.It Fl u | |
-CSS-like selectors, this sets a reader mode to hide content | |
-matching the selector, for example: "main" or "main#id" or "main.class". | |
+CSS-like selectors, this sets a reader mode to hide content matching the | |
+selector, see the section | |
+.Sx SELECTOR SYNTAX | |
+for the syntax. | |
Multiple selectors can be specified by separating them with a comma. | |
.It Fl w Ar termwidth | |
The terminal width. | |
@@ -49,6 +53,30 @@ The default is 77 characters. | |
.It Fl x | |
Write resources as TAB-separated lines to file descriptor 3. | |
.El | |
+.Sh SELECTOR SYNTAX | |
+The syntax has some inspiration from CSS, but it is more limited. | |
+Some examples: | |
+.Bl -item | |
+.It | |
+"main" would match on the "main" tags. | |
+.It | |
+"#someid" would match on any tag which has the id attribute set to "someid". | |
+.It | |
+".someclass" would match on any tag which has the class attribute set to | |
+"someclass". | |
+.It | |
+"main#someid" would match on the "main" tag which has the id attribute set to | |
+"someid". | |
+.It | |
+"main.someclass" would match on the "main" tags which has the class | |
+attribute set to "someclass". | |
+.It | |
+"ul li" would match on any "li" tag which also has a parent "ul" tag. | |
+.It | |
+"li@0" would match on any "li" tag which is also the first child element of its | |
+parent container. | |
+Note that this differs from filtering on a collection of "li" elements. | |
+.El | |
.Sh EXIT STATUS | |
.Ex -std | |
.Sh EXAMPLES | |
diff --git a/webdump.c b/webdump.c | |
@@ -123,6 +123,7 @@ struct node { | |
struct selectornode { | |
char tagname[256]; | |
+ long index; /* index of node to match on: -1 if not matching on index … | |
/* attributes */ | |
char id[256]; | |
char classnames[1024]; | |
@@ -1073,11 +1074,13 @@ int | |
compileselector(const char *sel, struct selectornode *nodes, size_t maxnodes) | |
{ | |
int depth = 0, len; | |
+ long l; | |
const char *s, *start; | |
char tmp[256]; | |
int nameset = 0; | |
memset(&nodes[0], 0, sizeof(nodes[0])); | |
+ nodes[0].index = -1; | |
s = sel; | |
for (; *s && ISSPACE((unsigned char)*s); s++) | |
@@ -1087,7 +1090,7 @@ compileselector(const char *sel, struct selectornode *nod… | |
for (; ; s++) { | |
/* end of tag */ | |
if (!nameset && | |
- (*s == '#' || *s == '.' || *s == '[' || | |
+ (*s == '#' || *s == '.' || *s == '@' || | |
*s == '\0' || ISSPACE((unsigned char)*s))) { | |
nameset = 1; | |
len = s - start; /* tag name */ | |
@@ -1111,15 +1114,32 @@ compileselector(const char *sel, struct selectornode *n… | |
nameset = 0; | |
memset(&nodes[depth], 0, sizeof(nodes[depth])); | |
+ nodes[depth].index = -1; | |
/* end of selector */ | |
if (*s == '\0') | |
break; | |
} | |
+ /* index */ | |
+ if (*s == '@') { | |
+ len = strcspn(s + 1, ".#@ \t\n"); | |
+ if (len >= sizeof(tmp)) | |
+ return 0; | |
+ memcpy(tmp, s + 1, len); | |
+ tmp[len] = '\0'; | |
+ | |
+ l = strtol(tmp, NULL, 10); | |
+ if (l >= 0) | |
+ nodes[depth].index = l; | |
+ s += len; | |
+ start = s + 1; | |
+ continue; | |
+ } | |
+ | |
/* id */ | |
if (*s == '#') { | |
- len = strcspn(s + 1, ".#[ \t\n"); | |
+ len = strcspn(s + 1, ".#@ \t\n"); | |
if (len >= sizeof(tmp)) | |
return 0; | |
memcpy(tmp, s + 1, len); | |
@@ -1132,7 +1152,7 @@ compileselector(const char *sel, struct selectornode *nod… | |
/* class */ | |
if (*s == '.') { | |
- len = strcspn(s + 1, ".#[ \t\n"); | |
+ len = strcspn(s + 1, ".#@ \t\n"); | |
if (len >= sizeof(tmp)) | |
return 0; | |
memcpy(tmp, s + 1, len); | |
@@ -1225,6 +1245,13 @@ iscssmatch(struct selector *sel, struct node *root, int … | |
!isclassmatch(root[d].classnames, sel->nodes[md].classname… | |
continue; /* no */ | |
+ /* index matched */ | |
+ if (sel->nodes[md].index != -1 && | |
+ (d == 0 || | |
+ root[d - 1].nchildren == 0 || | |
+ sel->nodes[md].index != root[d - 1].nchildren - 1)) | |
+ continue; | |
+ | |
md++; | |
/* all matched of one selector */ | |
if (md == sel->depth) |