Introduction
Introduction Statistics Contact Development Disclaimer Help
improve forms a bit - webdump - HTML to plain-text converter for webpages
git clone git://git.codemadness.org/webdump
Log
Files
Refs
README
LICENSE
---
commit 630f76162a192327a3eecd4fc0adcb9b31cd4504
parent 0705fb754f00c7866b2cc8cee0739a88a584a2e1
Author: Hiltjo Posthuma <[email protected]>
Date: Fri, 8 Sep 2023 15:05:38 +0200
improve forms a bit
- Treat fieldset and legend as block elements.
- Support more types, default or unsupported is "text".
- Show the default selected value for radio and checkboxes.
- Don't show hidden input types.
- Add a DisplayType DisplayInput to check the tag faster.
Diffstat:
M webdump.c | 64 +++++++++++++++++++++--------…
1 file changed, 44 insertions(+), 20 deletions(-)
---
diff --git a/webdump.c b/webdump.c
@@ -68,16 +68,17 @@ enum DisplayType {
DisplayUnknown = 0,
DisplayInline = 1 << 0,
DisplayInlineBlock = 1 << 1, /* unused for now */
- DisplayBlock = 1 << 2,
- DisplayNone = 1 << 3,
- DisplayPre = 1 << 4,
- DisplayList = 1 << 5,
- DisplayListOrdered = 1 << 6,
- DisplayListItem = 1 << 7,
- DisplayTable = 1 << 8,
- DisplayTableRow = 1 << 9,
- DisplayTableCell = 1 << 10,
- DisplayHeader = 1 << 11
+ DisplayInput = 1 << 2,
+ DisplayBlock = 1 << 3,
+ DisplayNone = 1 << 4,
+ DisplayPre = 1 << 5,
+ DisplayList = 1 << 6,
+ DisplayListOrdered = 1 << 7,
+ DisplayListItem = 1 << 8,
+ DisplayTable = 1 << 9,
+ DisplayTableRow = 1 << 10,
+ DisplayTableCell = 1 << 11,
+ DisplayHeader = 1 << 12
};
/* ANSI markup */
@@ -143,7 +144,9 @@ struct selectors {
};
static const char *str_bullet_item = "* ";
+static const char *str_checkbox_checked = "x";
static const char *str_ruler = "-";
+static const char *str_radio_checked = "*";
/* base href, to make URLs absolute */
static char *basehref = "";
@@ -153,6 +156,7 @@ static struct uri base;
/* buffers for some attributes of the current tag */
String attr_alt; /* alt attribute */
+String attr_checked; /* checked attribute */
String attr_class; /* class attribute */
String attr_href; /* href attribute */
String attr_id; /* id attribute */
@@ -221,6 +225,7 @@ static struct tag tags[] = {
{ "dt", DisplayBlock, MarkupBold, 0, …
{ "em", DisplayInline, MarkupItalic, 0, …
{ "embed", DisplayInline, 0, 0, …
+{ "fieldset", DisplayBlock, 0, 0, …
{ "figcaption", DisplayBlock, 0, 0, …
{ "figure", DisplayBlock, 0, 0, …
{ "footer", DisplayBlock, 0, 0, …
@@ -236,8 +241,9 @@ static struct tag tags[] = {
{ "html", DisplayBlock, 0, 0, …
{ "i", DisplayInline, MarkupItalic, 0, …
{ "img", DisplayInline, MarkupUnderline, 0, …
-{ "input", DisplayInline, 0, 0, …
+{ "input", DisplayInput, 0, 0, …
{ "label", DisplayInline, MarkupBold, 0, …
+{ "legend", DisplayBlock, 0, 0, …
{ "li", DisplayListItem, 0, DisplayList…
{ "link", DisplayInline, 0, 0, …
{ "main", DisplayBlock, 0, 0, …
@@ -1684,6 +1690,7 @@ xmltagstart(XMLParser *p, const char *t, size_t tl)
cur = &nodes[curnode];
string_clear(&attr_alt);
+ string_clear(&attr_checked);
string_clear(&attr_class);
string_clear(&attr_href);
string_clear(&attr_id);
@@ -1891,18 +1898,23 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t t…
if (!tagcmp(cur->tag.name, "input")) {
if (!attr_type.len) {
hprintf("[%-15s]", attr_value.len ? attr_value.data : …
- } else if (!strcasecmp(attr_type.data, "text")) {
- hprintf("[%-15s]", attr_value.len ? attr_value.data : …
- } else if (!strcasecmp(attr_type.data, "search")) {
- hprintf("[%-15s]", attr_value.len ? attr_value.data : …
- } else if (!strcasecmp(attr_type.data, "button")) {
- hprintf("[%s]", attr_value.len ? attr_value.data : "");
- } else if (!strcasecmp(attr_type.data, "submit")) {
+ } else if (!strcasecmp(attr_type.data, "button") ||
+ !strcasecmp(attr_type.data, "submit") ||
+ !strcasecmp(attr_type.data, "reset")) {
hprintf("[%s]", attr_value.len ? attr_value.data : "");
} else if (!strcasecmp(attr_type.data, "checkbox")) {
- hprint("[ ]"); /* TODO: show x or unicode checkmark wh…
+ hprintf("[%s]",
+ attr_checked.len &&
+ !strcasecmp(attr_checked.data, "checked") ? st…
} else if (!strcasecmp(attr_type.data, "radio")) {
- hprint("( )"); /* TODO: show x or unicode checkmark wh…
+ hprintf("[%s]",
+ attr_checked.len &&
+ !strcasecmp(attr_checked.data, "checked") ? st…
+ } else if (!strcasecmp(attr_type.data, "hidden")) {
+ cur->tag.displaytype |= DisplayNone;
+ } else {
+ /* unrecognized / default case is text */
+ hprintf("[%-15s]", attr_value.len ? attr_value.data : …
}
}
@@ -1963,6 +1975,8 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, con…
if (!tagcmp(tag, "img") && !attrcmp(name, "alt"))
string_append(&attr_alt, value, valuelen);
+ if (!attrcmp(name, "checked"))
+ string_append(&attr_checked, value, valuelen);
if (!attrcmp(name, "type"))
string_append(&attr_type, value, valuelen);
if (!attrcmp(name, "value"))
@@ -1987,10 +2001,18 @@ static void
xmlattrend(XMLParser *p, const char *t, size_t tl, const char *n,
size_t nl)
{
+ struct node *cur;
+
+ cur = &nodes[curnode];
+
/* set base URL, if it is set it cannot be overwritten again */
if (!basehrefset && basehrefdoc[0] &&
!attrcmp(n, "href") && !tagcmp(t, "base"))
basehrefset = uri_parse(basehrefdoc, &base) != -1 ? 1 : 0;
+
+ /* if attribute checked is set but it has no value then set it to "che…
+ if (cur->tag.displaytype & DisplayInput && !attrcmp(n, "checked") && !…
+ string_append(&attr_checked, "checked", sizeof("checked") - 1);
}
static void
@@ -1999,6 +2021,8 @@ xmlattrstart(XMLParser *p, const char *t, size_t tl, cons…
{
if (!attrcmp(n, "alt"))
string_clear(&attr_alt);
+ else if (!attrcmp(n, "checked"))
+ string_clear(&attr_checked);
else if (!attrcmp(n, "class"))
string_clear(&attr_class);
else if (!attrcmp(n, "href"))
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.