#!/usr/bin/awk -f
# cdata.awk version 2 by Ben Collver <[email protected]>
#
# reads XML from stdin and prints to stdout, replacing CDATA
# with hexBinary data when the CDATA contains angle brackets.
#
# OLD: <![CDATA[<ABC>]]>
# NEW: <awk:cdata type="hexBinary">3C4142433E</awk:cdata>

function hex_print(str,     count, i, tokens) {
   count = split(str, tokens, "")
   for (i = 1; i <= count; i++) {
       printf("%02X", hex_encode_ord[tokens[i]])
   }
   return
}

function hex_init(     i, c) {
   for (i = 0; i <= 255; i++) {
       c = sprintf("%c", i)
       hex_encode_ord[c] = i
   }
   return
}

function inject_ns(str) {
   if (match(str, /^<\?xml[^?>]*\?>/)) {
       retval = substr(str, 1, RLENGTH) "\n" \
           "<awk:ok xmlns:awk=\""            \
           "gopher://gopherpedia.com/0/AWK"  \
           "\">" \
           substr(str, RLENGTH+1)
   } else {
       retval = str
   }
   return retval
}

BEGIN {
   RS = "]]>"
   hex_init()
}

match($0, /<!\[CDATA\[/) {
   # CDATA section
   pos = RSTART
   str = substr($0, 1, pos-1)
   printf "%s", inject_ns(str)
   str = substr($0, pos+9)
   if (str ~ /[<>]/) {
       printf "<awk:cdata type=\"awk:hexBinary\">"
       hex_print(str)
       printf "</awk:cdata>"
   } else {
       printf "<![CDATA[%s]]>", str
   }
   next
}

{
   printf "%s", $0
}