Introduction
Introduction Statistics Contact Development Disclaimer Help
several improvements and more efficient xml parser - xml2tsv - a simple xml-to-…
Log
Files
Refs
Tags
README
LICENSE
---
commit 60c249ec24ab865c4a55759c7ffde2da99530b1d
parent b416c171bb34297d7f8bc4c027de7136a113d144
Author: Hiltjo Posthuma <[email protected]>
Date: Wed, 30 Sep 2020 11:42:07 +0100
several improvements and more efficient xml parser
Diffstat:
M xml.c | 36 +++--------------------------…
M xml.h | 12 +++++-------
M xml2tsv.c | 93 ++++++++++-------------------…
3 files changed, 38 insertions(+), 103 deletions(-)
---
diff --git a/xml.c b/xml.c
@@ -116,49 +116,19 @@ startvalue:
static void
xml_parsecomment(XMLParser *x)
{
- size_t datalen = 0, i = 0;
+ size_t i = 0;
int c;
- if (x->xmlcommentstart)
- x->xmlcommentstart(x);
while ((c = GETNEXT()) != EOF) {
- if (c == '-' || c == '>') {
- if (x->xmlcomment && datalen) {
- x->data[datalen] = '\0';
- x->xmlcomment(x, x->data, datalen);
- datalen = 0;
- }
- }
-
if (c == '-') {
- if (++i > 2) {
- if (x->xmlcomment)
- for (; i > 2; i--)
- x->xmlcomment(x, "-", 1);
+ if (++i > 2)
i = 2;
- }
continue;
} else if (c == '>' && i == 2) {
- if (x->xmlcommentend)
- x->xmlcommentend(x);
return;
} else if (i) {
- if (x->xmlcomment) {
- for (; i > 0; i--)
- x->xmlcomment(x, "-", 1);
- }
i = 0;
}
-
- if (datalen < sizeof(x->data) - 1) {
- x->data[datalen++] = c;
- } else {
- x->data[datalen] = '\0';
- if (x->xmlcomment)
- x->xmlcomment(x, x->data, datalen);
- x->data[0] = c;
- datalen = 1;
- }
}
}
@@ -286,7 +256,7 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
l = strtol(++e, &end, 16);
else
l = strtol(e, &end, 10);
- /* invalid value or not a well-formed entity or invalid codepoint */
+ /* invalid value or not a well-formed entity or invalid code point */
if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff)
return -1;
len = codepointtoutf8(l, buf);
diff --git a/xml.h b/xml.h
@@ -1,5 +1,5 @@
-#ifndef _XML_H
-#define _XML_H
+#ifndef _XML_H_
+#define _XML_H_
#include <stdio.h>
@@ -16,9 +16,6 @@ typedef struct xmlparser {
void (*xmlcdatastart)(struct xmlparser *);
void (*xmlcdata)(struct xmlparser *, const char *, size_t);
void (*xmlcdataend)(struct xmlparser *);
- void (*xmlcommentstart)(struct xmlparser *);
- void (*xmlcomment)(struct xmlparser *, const char *, size_t);
- void (*xmlcommentend)(struct xmlparser *);
void (*xmldata)(struct xmlparser *, const char *, size_t);
void (*xmldataend)(struct xmlparser *);
void (*xmldataentity)(struct xmlparser *, const char *, size_t);
@@ -29,8 +26,9 @@ typedef struct xmlparser {
size_t, int);
#ifndef GETNEXT
- #define GETNEXT (x)->getnext
- int (*getnext)(void);
+ /* GETNEXT overridden to reduce function call overhead and
+ further context optimizations. */
+ #define GETNEXT getchar
#endif
/* current tag */
diff --git a/xml2tsv.c b/xml2tsv.c
@@ -64,7 +64,7 @@ void stack_init(tstack_t *t){
/* utility functions */
/* quote_print: quote \\, \n, \t, and strip other ctrl chars */
-void quote_print(FILE *f, const char *s){
+void quote_print(const char *s){
const char *tmp = s;
size_t len;
int i;
@@ -72,36 +72,45 @@ void quote_print(FILE *f, const char *s){
len = strcspn(tmp, "\\\n\t");
for(i=0; i<len; i++, tmp++){
if (!iscntrl((unsigned char)*tmp)){
- fwrite(tmp, 1, 1, f);
+ putchar(*tmp);
}
}
switch (*tmp){
case '\n':
if (len > 0){
- fprintf(f, "\\n");
+ fputs("\\n", stdout);
}
tmp ++;
break;
case '\t':
- fprintf(f, "\\t");
+ fputs("\\t", stdout);
tmp ++;
break;
case '\r':
- fprintf(f, "\\r");
+ fputs("\\r", stdout);
tmp ++;
break;
case '\\':
- fprintf(f, "\\\\");
+ fputs("\\\\", stdout);
tmp ++;
break;
}
}
}
-void print_cur_str(FILE *f, tstack_t *t){
+void print_cur_str(tstack_t *t){
int i;
for (i=0; i<=t->top; i++){
- fprintf(f, "/%s", t->st[i]);
+ putchar('/');
+ fputs(t->st[i], stdout);
+ }
+}
+
+void print_cur_str_fp(FILE *f, tstack_t *t){
+ int i;
+ for (i=0; i<=t->top; i++){
+ fputc('/', f);
+ fputs(t->st[i], f);
}
}
@@ -110,13 +119,13 @@ void print_cur_str(FILE *f, tstack_t *t){
tstack_t st;
char emitsep;
-/* xml callbacks */
+/* XML callbacks */
void
xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
const char *v, size_t vl)
{
- printf("%s", v);
+ fputs(v, stdout);
}
void
@@ -133,56 +142,33 @@ xmlattrentity(XMLParser *x, const char *t, size_t tl, con…
}
void
-xmlattrend(XMLParser *x, const char *t, size_t tl, const char *a, size_t al)
-{
-}
-
-void
xmlattrstart(XMLParser *x, const char *t, size_t tl, const char *a, size_t al)
{
- printf("%c%s%c", SEP, a, SATTR);
+ putchar(SEP);
+ fputs(a, stdout);
+ putchar(SATTR);
}
void
xmlcdatastart(XMLParser *x)
{
- printf("%c", SEP);
+ putchar(SEP);
}
void
xmlcdata(XMLParser *x, const char *d, size_t dl)
{
- quote_print(stdout, d);
-}
-
-void
-xmlcdataend(XMLParser *x)
-{
-}
-
-void
-xmlcommentstart(XMLParser *x)
-{
-}
-
-void
-xmlcomment(XMLParser *x, const char *c, size_t cl)
-{
-}
-
-void
-xmlcommentend(XMLParser *x)
-{
+ quote_print(d);
}
void
xmldata(XMLParser *x, const char *d, size_t dl)
{
if (strcspn(d, " \t\n") && emitsep){
- printf("%c", SEP);
+ putchar(SEP);
emitsep = FALSE;
}
- quote_print(stdout, d);
+ quote_print(d);
}
void
@@ -220,12 +206,6 @@ xmltagend(XMLParser *x, const char *t, size_t tl, int issh…
if (strcmp(t, tag)){
fprintf(stderr, "Error: tag-end '%s' closes tag '%s'\n", t, ta…
}
-
-/* if (isshort) {
- printf("\n");
- print_cur_str(stdout, &st);
- }
-*/
}
void
@@ -235,13 +215,8 @@ xmltagstart(XMLParser *x, const char *t, size_t tl)
fprintf(stderr, "Error: stack full. Ignoring tag '%s' (parent …
return;
}
- printf("\n");
- print_cur_str(stdout, &st);
-}
-
-void
-xmltagstartparsed(XMLParser *x, const char *t, size_t tl, int isshort)
-{
+ putchar('\n');
+ print_cur_str(&st);
}
int
@@ -252,30 +227,22 @@ main(void)
XMLParser x = { 0 };
x.xmlattr = xmlattr;
- x.xmlattrend = xmlattrend;
x.xmlattrstart = xmlattrstart;
x.xmlattrentity = xmlattrentity;
x.xmlcdatastart = xmlcdatastart;
x.xmlcdata = xmlcdata;
- x.xmlcdataend = xmlcdataend;
- x.xmlcommentstart = xmlcommentstart;
- x.xmlcomment = xmlcomment;
- x.xmlcommentend = xmlcommentend;
x.xmldata = xmldata;
x.xmldataend = xmldataend;
x.xmldataentity = xmldataentity;
x.xmldatastart = xmldatastart;
x.xmltagend = xmltagend;
x.xmltagstart = xmltagstart;
- x.xmltagstartparsed = xmltagstartparsed;
-
- x.getnext = getchar;
xml_parse(&x);
- printf("\n");
+ putchar('\n');
if (! stack_empty(&st)) {
fprintf(stderr, "Error: tags still open at EOF: ");
- print_cur_str(stderr, &st);
+ print_cur_str_fp(stderr, &st);
fprintf(stderr, "\n");
}
return 0;
You are viewing proxied material from bitreich.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.