#!/usr/bin/awk -f
# cdata.awk version 2 by Ben Collver <
[email protected]>
#
# reads XML from stdin and prints to stdout, replacing CDATA
# with hexBinary data when the CDATA contains angle brackets.
#
# OLD: <![CDATA[<ABC>]]>
# NEW: <awk:cdata type="hexBinary">3C4142433E</awk:cdata>
function hex_print(str, count, i, tokens) {
count = split(str, tokens, "")
for (i = 1; i <= count; i++) {
printf("%02X", hex_encode_ord[tokens[i]])
}
return
}
function hex_init( i, c) {
for (i = 0; i <= 255; i++) {
c = sprintf("%c", i)
hex_encode_ord[c] = i
}
return
}
function inject_ns(str) {
if (match(str, /^<\?xml[^?>]*\?>/)) {
retval = substr(str, 1, RLENGTH) "\n" \
"<awk:ok xmlns:awk=\"" \
"
gopher://gopherpedia.com/0/AWK" \
"\">" \
substr(str, RLENGTH+1)
} else {
retval = str
}
return retval
}
BEGIN {
RS = "]]>"
hex_init()
}
match($0, /<!\[CDATA\[/) {
# CDATA section
pos = RSTART
str = substr($0, 1, pos-1)
printf "%s", inject_ns(str)
str = substr($0, pos+9)
if (str ~ /[<>]/) {
printf "<awk:cdata type=\"awk:hexBinary\">"
hex_print(str)
printf "</awk:cdata>"
} else {
printf "<![CDATA[%s]]>", str
}
next
}
{
printf "%s", $0
}