Introduction
Introduction Statistics Contact Development Disclaimer Help
various improvements - webdump - HTML to plain-text converter for webpages
git clone git://git.codemadness.org/webdump
Log
Files
Refs
README
LICENSE
---
commit 011b4885a533382d98f1aee6cb9619e280c99947
parent 89c9108dc27fe27e0f028f67508a1156ed242d2a
Author: Hiltjo Posthuma <[email protected]>
Date: Mon, 18 Sep 2023 19:06:03 +0200
various improvements
Improve link references:
- Add RB tree to lookup link references: this uses a stripped-down version of
OpenBSD tree.h
- Add 2 separate linked-lists for the order of visible and hidden links.
- Hidden links and now also deduplicated.
Improve nested nodes and max depth:
- Rework and increase the allowed depth of nodes. Allocate them on the heap.
Diffstat:
M Makefile | 2 +-
A tree.h | 483 +++++++++++++++++++++++++++++…
M webdump.c | 177 ++++++++++++++++++++---------…
3 files changed, 597 insertions(+), 65 deletions(-)
---
diff --git a/Makefile b/Makefile
@@ -19,7 +19,7 @@ BIN = ${NAME}
SCRIPTS =
SRC = ${BIN:=.c}
-HDR = arg.h namedentities.h xml.h
+HDR = arg.h namedentities.h tree.h xml.h
LIBXML = libxml.a
LIBXMLSRC = \
diff --git a/tree.h b/tree.h
@@ -0,0 +1,483 @@
+/* $OpenBSD: tree.h,v 1.31 2023/03/08 04:43:09 guenther Exp $ */
+/*
+ * Copyright 2002 Niels Provos <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TREE_H
+#define TREE_H
+
+/*
+ * This file defines a red-black tree structure.
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute. It fulfills a set of conditions:
+ * - every search path from the root to a leaf consists of the
+ * same number of black nodes,
+ * - each red node (except for the root) has a black parent,
+ * - each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+/* Macros that define a red-black tree */
+#define RB_HEAD(name, type) \
+struct name { \
+ struct type *rbh_root; /* root of the tree */ \
+}
+
+#define RB_INITIALIZER(root) \
+ { NULL }
+
+#define RB_INIT(root) do { \
+ (root)->rbh_root = NULL; \
+} while (0)
+
+#define RB_BLACK 0
+#define RB_RED 1
+#define RB_ENTRY(type) \
+struct { \
+ struct type *rbe_left; /* left element */ …
+ struct type *rbe_right; /* right element */ …
+ struct type *rbe_parent; /* parent element */ \
+ int rbe_color; /* node color */ \
+}
+
+#define RB_LEFT(elm, field) (elm)->field.rbe_left
+#define RB_RIGHT(elm, field) (elm)->field.rbe_right
+#define RB_PARENT(elm, field) (elm)->field.rbe_parent
+#define RB_COLOR(elm, field) (elm)->field.rbe_color
+#define RB_ROOT(head) (head)->rbh_root
+#define RB_EMPTY(head) (RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do { …
+ RB_PARENT(elm, field) = parent; …
+ RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \
+ RB_COLOR(elm, field) = RB_RED; \
+} while (0)
+
+#define RB_SET_BLACKRED(black, red, field) do { …
+ RB_COLOR(black, field) = RB_BLACK; \
+ RB_COLOR(red, field) = RB_RED; \
+} while (0)
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \
+ (tmp) = RB_RIGHT(elm, field); \
+ if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) { \
+ RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \
+ } \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); …
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_LEFT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+} while (0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \
+ (tmp) = RB_LEFT(elm, field); \
+ if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) { \
+ RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); …
+ } \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); …
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_RIGHT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+} while (0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp) …
+ RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)
+#define RB_PROTOTYPE_STATIC(name, type, field, cmp) …
+ RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __attribute__((__unused_…
+#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \
+attr void name##_RB_INSERT_COLOR(struct name *, struct type *); …
+attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+attr struct type *name##_RB_REMOVE(struct name *, struct type *); \
+attr struct type *name##_RB_INSERT(struct name *, struct type *); \
+attr struct type *name##_RB_FIND(struct name *, struct type *); …
+attr struct type *name##_RB_NFIND(struct name *, struct type *); \
+attr struct type *name##_RB_NEXT(struct type *); \
+attr struct type *name##_RB_PREV(struct type *); \
+attr struct type *name##_RB_MINMAX(struct name *, int); …
+ \
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp) …
+ RB_GENERATE_INTERNAL(name, type, field, cmp,)
+#define RB_GENERATE_STATIC(name, type, field, cmp) …
+ RB_GENERATE_INTERNAL(name, type, field, cmp, __attribute__((__unused__…
+#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \
+attr void \
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \
+{ \
+ struct type *parent, *gparent, *tmp; \
+ while ((parent = RB_PARENT(elm, field)) && \
+ RB_COLOR(parent, field) == RB_RED) { \
+ gparent = RB_PARENT(parent, field); \
+ if (parent == RB_LEFT(gparent, field)) { \
+ tmp = RB_RIGHT(gparent, field); …
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_RIGHT(parent, field) == elm) { \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_RIGHT(head, gparent, tmp, field); \
+ } else { \
+ tmp = RB_LEFT(gparent, field); \
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_LEFT(parent, field) == elm) { \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_LEFT(head, gparent, tmp, field); \
+ } \
+ } \
+ RB_COLOR(head->rbh_root, field) = RB_BLACK; \
+} \
+ \
+attr void \
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *el…
+{ \
+ struct type *tmp; \
+ while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \
+ elm != RB_ROOT(head)) { \
+ if (RB_LEFT(parent, field) == elm) { \
+ tmp = RB_RIGHT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) …
+ (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)…
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); …
+ } else { \
+ if (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == R…
+ struct type *oleft; \
+ if ((oleft = RB_LEFT(tmp, field)))\
+ RB_COLOR(oleft, field) = RB_BL…
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_RIGHT(head, tmp, oleft, fiel…
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field)…
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_RIGHT(tmp, field)) \
+ RB_COLOR(RB_RIGHT(tmp, field), field) …
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } else { \
+ tmp = RB_LEFT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = RB_LEFT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) …
+ (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)…
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); …
+ } else { \
+ if (RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB…
+ struct type *oright; \
+ if ((oright = RB_RIGHT(tmp, field)))\
+ RB_COLOR(oright, field) = RB_B…
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_LEFT(head, tmp, oright, fiel…
+ tmp = RB_LEFT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field)…
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_LEFT(tmp, field)) \
+ RB_COLOR(RB_LEFT(tmp, field), field) =…
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } \
+ } \
+ if (elm) \
+ RB_COLOR(elm, field) = RB_BLACK; \
+} \
+ \
+attr struct type * \
+name##_RB_REMOVE(struct name *head, struct type *elm) \
+{ \
+ struct type *child, *parent, *old = elm; \
+ int color; \
+ if (RB_LEFT(elm, field) == NULL) \
+ child = RB_RIGHT(elm, field); \
+ else if (RB_RIGHT(elm, field) == NULL) \
+ child = RB_LEFT(elm, field); \
+ else { \
+ struct type *left; \
+ elm = RB_RIGHT(elm, field); \
+ while ((left = RB_LEFT(elm, field))) \
+ elm = left; \
+ child = RB_RIGHT(elm, field); \
+ parent = RB_PARENT(elm, field); …
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; …
+ else \
+ RB_RIGHT(parent, field) = child; \
+ } else \
+ RB_ROOT(head) = child; \
+ if (RB_PARENT(elm, field) == old) \
+ parent = elm; \
+ (elm)->field = (old)->field; \
+ if (RB_PARENT(old, field)) { \
+ if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+ RB_LEFT(RB_PARENT(old, field), field) = elm;\
+ else \
+ RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+ } else \
+ RB_ROOT(head) = elm; \
+ RB_PARENT(RB_LEFT(old, field), field) = elm; \
+ if (RB_RIGHT(old, field)) \
+ RB_PARENT(RB_RIGHT(old, field), field) = elm; \
+ if (parent) { \
+ left = parent; \
+ do { \
+ } while ((left = RB_PARENT(left, field))); \
+ } \
+ goto color; \
+ } \
+ parent = RB_PARENT(elm, field); …
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; …
+ else \
+ RB_RIGHT(parent, field) = child; \
+ } else \
+ RB_ROOT(head) = child; \
+color: \
+ if (color == RB_BLACK) \
+ name##_RB_REMOVE_COLOR(head, parent, child); \
+ return (old); \
+} \
+ \
+/* Inserts a node into the RB tree */ \
+attr struct type * \
+name##_RB_INSERT(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp; \
+ struct type *parent = NULL; \
+ int comp = 0; \
+ tmp = RB_ROOT(head); \
+ while (tmp) { \
+ parent = tmp; \
+ comp = (cmp)(elm, parent); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ RB_SET(elm, parent, field); \
+ if (parent != NULL) { \
+ if (comp < 0) \
+ RB_LEFT(parent, field) = elm; \
+ else \
+ RB_RIGHT(parent, field) = elm; \
+ } else \
+ RB_ROOT(head) = elm; \
+ name##_RB_INSERT_COLOR(head, elm); \
+ return (NULL); \
+} \
+ \
+/* Finds the node with the same key as elm */ \
+attr struct type * \
+name##_RB_FIND(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ int comp; \
+ while (tmp) { \
+ comp = cmp(elm, tmp); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ return (NULL); \
+} \
+ \
+/* Finds the first node greater than or equal to the search key */ \
+attr struct type * \
+name##_RB_NFIND(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ struct type *res = NULL; \
+ int comp; \
+ while (tmp) { \
+ comp = cmp(elm, tmp); \
+ if (comp < 0) { …
+ res = tmp; \
+ tmp = RB_LEFT(tmp, field); \
+ } \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ return (res); \
+} \
+ \
+attr struct type * \
+name##_RB_NEXT(struct type *elm) \
+{ \
+ if (RB_RIGHT(elm, field)) { \
+ elm = RB_RIGHT(elm, field); \
+ while (RB_LEFT(elm, field)) \
+ elm = RB_LEFT(elm, field); \
+ } else { \
+ if (RB_PARENT(elm, field) && \
+ (elm == RB_LEFT(RB_PARENT(elm, field), field))) \
+ elm = RB_PARENT(elm, field); \
+ else { \
+ while (RB_PARENT(elm, field) && …
+ (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+ elm = RB_PARENT(elm, field); \
+ elm = RB_PARENT(elm, field); \
+ } \
+ } \
+ return (elm); \
+} \
+ \
+attr struct type * \
+name##_RB_PREV(struct type *elm) \
+{ \
+ if (RB_LEFT(elm, field)) { \
+ elm = RB_LEFT(elm, field); \
+ while (RB_RIGHT(elm, field)) \
+ elm = RB_RIGHT(elm, field); \
+ } else { \
+ if (RB_PARENT(elm, field) && \
+ (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \
+ elm = RB_PARENT(elm, field); \
+ else { \
+ while (RB_PARENT(elm, field) && …
+ (elm == RB_LEFT(RB_PARENT(elm, field), field)))\
+ elm = RB_PARENT(elm, field); \
+ elm = RB_PARENT(elm, field); \
+ } \
+ } \
+ return (elm); \
+} \
+ \
+attr struct type * \
+name##_RB_MINMAX(struct name *head, int val) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ struct type *parent = NULL; \
+ while (tmp) { \
+ parent = tmp; \
+ if (val < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else \
+ tmp = RB_RIGHT(tmp, field); \
+ } \
+ return (parent); \
+}
+
+#define RB_NEGINF -1
+#define RB_INF 1
+
+#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y) name##_RB_FIND(x, y)
+#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y)
+#define RB_NEXT(name, x, y) name##_RB_NEXT(y)
+#define RB_PREV(name, x, y) name##_RB_PREV(y)
+#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head) \
+ for ((x) = RB_MIN(name, head); \
+ (x) != NULL; \
+ (x) = name##_RB_NEXT(x))
+
+#define RB_FOREACH_SAFE(x, name, head, y) \
+ for ((x) = RB_MIN(name, head); \
+ ((x) != NULL) && ((y) = name##_RB_NEXT(x), 1); \
+ (x) = (y))
+
+#define RB_FOREACH_REVERSE(x, name, head) \
+ for ((x) = RB_MAX(name, head); \
+ (x) != NULL; \
+ (x) = name##_RB_PREV(x))
+
+#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \
+ for ((x) = RB_MAX(name, head); \
+ ((x) != NULL) && ((y) = name##_RB_PREV(x), 1); \
+ (x) = (y))
+
+#endif /* TREE_H */
diff --git a/webdump.c b/webdump.c
@@ -10,6 +10,7 @@
#include "arg.h"
char *argv0;
+#include "tree.h"
#include "xml.h"
static XMLParser parser;
@@ -151,19 +152,31 @@ struct selectors {
size_t count;
};
-/* linked-list of link references */
+/* RB tree of link references */
struct linkref {
char *type;
enum TagId tagid;
char *url;
int ishidden;
size_t linknr;
- struct linkref *next;
+ RB_ENTRY(linkref) entry;
};
-static struct linkref *links_head;
-static struct linkref *links_cur;
-static int linkcount; /* visible link count */
+/* link references and hidden link references */
+struct linkref **visrefs;
+static size_t nvisrefs, ncapvisrefs; /* visible link count / capacity */
+struct linkref **hiddenrefs;
+static size_t nhiddenrefs, ncaphiddenrefs; /* hidden link count / capacity */
+
+int
+linkrefcmp(struct linkref *r1, struct linkref *r2)
+{
+ return strcmp(r1->url, r2->url);
+}
+
+RB_HEAD(linkreftree, linkref) linkrefhead = RB_INITIALIZER(&linkrefhead);
+RB_PROTOTYPE(linkreftree, linkref, entry, linkrefcmp)
+RB_GENERATE(linkreftree, linkref, entry, linkrefcmp)
static const char *str_bullet_item = "* ";
static const char *str_checkbox_checked = "x";
@@ -213,10 +226,11 @@ static char rbuf[1024];
static int rbuflen;
static int rnbufcells = 0; /* pending cell count to add */
-#define MAX_DEPTH 256
-static struct node nodes[MAX_DEPTH];
-static String nodes_links[MAX_DEPTH]; /* keep track of links per node */
-static int curnode;
+#define MAX_NODE_DEPTH 65535 /* absolute maximum node depth */
+static struct node *nodes;
+static String *nodes_links; /* keep track of links per node */
+static size_t ncapnodes;
+static int curnode; /* current node depth */
/* reader / selector mode */
static int reader_mode = 0;
@@ -1378,39 +1392,59 @@ handleinlinealt(void)
}
}
-/* slow linear lookup of link references
- TODO: optimize it, maybe using tree.h RB_TREE? */
+/* lookup a link reference by url in the red-black tree */
static struct linkref *
findlinkref(const char *url)
{
- struct linkref *cur;
+ struct linkref find;
- for (cur = links_head; cur; cur = cur->next) {
- if (!strcmp(url, cur->url))
- return cur;
- }
- return NULL;
+ find.url = (char *)url;
+
+ return RB_FIND(linkreftree, &linkrefhead, &find);
}
+/* add a link reference. Returns the added link reference, or the existing link
+ reference if links are deduplicated */
static struct linkref *
-addlinkref(const char *url, const char *_type, enum TagId tagid, int ishidden,
- int linknr)
+addlinkref(const char *url, const char *_type, enum TagId tagid, int ishidden)
{
+ struct linkref *link;
+ size_t linknr;
+
+ /* if links are deduplicates return the existing link */
+ if (uniqrefs && (link = findlinkref(url)))
+ return link;
+
if (tagid == TagA)
_type = "link";
- /* add to linked list */
- if (!links_head)
- links_cur = links_head = ecalloc(1, sizeof(*links_head));
- else
- links_cur = links_cur->next = ecalloc(1, sizeof(*links_head));
- links_cur->url = estrdup(url);
- links_cur->type = estrdup(_type);
- links_cur->tagid = tagid;
- links_cur->ishidden = ishidden;
- links_cur->linknr = linknr;
+ link = ecalloc(1, sizeof(*link));
+
+ if (!ishidden) {
+ linknr = ++nvisrefs;
+ if (nvisrefs >= ncapvisrefs)
+ ncapvisrefs += 256; /* greedy alloc */
+ visrefs = erealloc(visrefs, sizeof(*visrefs) * ncapvisrefs);
+ visrefs[linknr - 1] = link; /* add pointer to list */
+ } else {
+ linknr = ++nhiddenrefs;
+ if (nhiddenrefs >= ncaphiddenrefs)
+ ncaphiddenrefs += 256; /* greedy alloc */
+ hiddenrefs = erealloc(hiddenrefs, sizeof(*hiddenrefs) * ncaphi…
+ hiddenrefs[linknr - 1] = link; /* add pointer to list */
+ }
- return links_cur;
+ link->url = estrdup(url);
+ link->type = estrdup(_type);
+ link->tagid = tagid;
+ link->ishidden = ishidden;
+ link->linknr = linknr;
+
+ /* add to tree: the tree is only used for checking unique link referen…
+ if (uniqrefs)
+ RB_INSERT(linkreftree, &linkrefhead, link);
+
+ return link;
}
static void
@@ -1462,43 +1496,65 @@ handleinlinelink(void)
/* add hidden links directly to the reference,
the order doesn't matter */
if (cur->tag.displaytype & DisplayNone)
- addlinkref(url, cur->tag.name, cur->tag.id, 1, 0);
+ addlinkref(url, cur->tag.name, cur->tag.id, 1);
}
void
printlinkrefs(void)
{
+ struct linkref *ref;
size_t i;
- int hashiddenrefs = 0;
- if (!links_head)
+ if (!nvisrefs && !nhiddenrefs)
return;
if (resources) {
- for (i = 1, links_cur = links_head; links_cur; links_cur = lin…
- dprintf(3, "%s\t%s\n", links_cur->type, links_cur->url…
+ for (i = 0; i < nvisrefs; i++) {
+ ref = visrefs[i];
+ dprintf(3, "%s\t%s\n", ref->type, ref->url);
+ }
+ for (i = 0; i < nhiddenrefs; i++) {
+ ref = hiddenrefs[i];
+ dprintf(3, "%s\t%s\n", ref->type, ref->url);
+ }
}
printf("\nReferences\n\n");
- i = 1;
- for (links_cur = links_head; links_cur; links_cur = links_cur->next) {
- if (links_cur->ishidden) {
- hashiddenrefs = 1;
- continue;
- }
- printf(" %zu. %s (%s)\n", links_cur->linknr, links_cur->url, l…
- i++;
+ for (i = 0; i < nvisrefs; i++) {
+ ref = visrefs[i];
+ printf(" %zu. %s (%s)\n", ref->linknr, ref->url, ref->type);
}
- if (hashiddenrefs)
+ if (nhiddenrefs > 0)
printf("\n\nHidden references\n\n");
/* hidden links don't have a link number, just count them */
- for (links_cur = links_head; links_cur; links_cur = links_cur->next) {
- if (!links_cur->ishidden)
- continue;
- printf(" %zu. %s (%s)\n", i, links_cur->url, links_cur->type);
- i++;
+ for (i = 0; i < nhiddenrefs; i++) {
+ ref = hiddenrefs[i];
+ printf(" %zu. %s (%s)\n", ref->linknr, ref->url, ref->type);
+ }
+}
+
+#define NODE_CAP_INC 256
+
+/* increase node depth, allocate space for nodes if needed */
+static void
+incnode(void)
+{
+ curnode++;
+
+ if (curnode >= MAX_NODE_DEPTH)
+ errx(1, "max node depth reached: %d", curnode);
+
+ if (curnode >= ncapnodes) {
+ nodes = erealloc(nodes, sizeof(*nodes) * (ncapnodes + NODE_CAP…
+ nodes_links = erealloc(nodes_links, sizeof(*nodes_links) * (nc…
+
+ /* clear new region */
+ memset(&nodes[ncapnodes], 0, sizeof(*nodes) * NODE_CAP_INC);
+ memset(&nodes_links[ncapnodes], 0, sizeof(*nodes_links) * NODE…
+
+ ncapnodes += NODE_CAP_INC; /* greedy alloc */
}
}
@@ -1670,17 +1726,8 @@ endnode(struct node *cur)
/* add link and show the link number in the visible order */
if (!ishidden && nodes_links[curnode].len > 0) {
- if (uniqrefs)
- ref = findlinkref(nodes_links[curnode].data);
- else
- ref = NULL;
-
- /* new link: add it */
- if (!ref) {
- linkcount++;
- ref = addlinkref(nodes_links[curnode].data,
- cur->tag.name, cur->tag.id, ishidden, linkcoun…
- }
+ ref = addlinkref(nodes_links[curnode].data,
+ cur->tag.name, cur->tag.id, ishidden);
if (showrefinline || showurlinline) {
hflush();
@@ -1825,9 +1872,6 @@ xmltagstart(XMLParser *p, const char *t, size_t tl)
char *s;
int i, j, k, nchildfound, parenttype;
- if (curnode >= MAX_DEPTH - 2)
- errx(1, "max tag depth reached: %d\n", curnode);
-
cur = &nodes[curnode];
string_clear(&attr_alt);
@@ -1920,7 +1964,7 @@ xmltagstart(XMLParser *p, const char *t, size_t tl)
}
}
- curnode++;
+ incnode();
string_clear(&nodes_links[curnode]); /* clear possible link reference …
cur = &nodes[curnode];
memset(cur, 0, sizeof(*cur)); /* clear / reset node */
@@ -2333,6 +2377,11 @@ main(int argc, char **argv)
usage();
} ARGEND
+ /* initial nodes */
+ ncapnodes = NODE_CAP_INC;
+ nodes = ecalloc(ncapnodes, sizeof(*nodes));
+ nodes_links = ecalloc(ncapnodes, sizeof(*nodes_links));
+
/* top-most document root needs initialization */
nodes[0].tag.name = "";
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.