# ita2.awk version 1 by Ben Collver
#
# Convert between ITA-2 and ASCII.
# Input & output is always padded to 8-bit bytes.
#
# ITA-2
# <https://commons.wikimedia.org/wiki/File:
# International_Telegraph_Alphabet_2_brightened.jpg>
#
# ITA-5 AKA ASCII
# <https://en.wikipedia.org/wiki/T.50_(standard)>
#
# Extensions:
# * Handle characters that don't exist in ITA-2.
#   Setting: escape_unknown_characters = [0|1]
#       0 = Error on non-existing characters
#       1 = Escape non-existing characters (default)
#   escape format = FIGURES ALLSPACE_NOTINUSE digit1 digit2 digit3
#   digit = ITA-2 encoded octal digit

function decode_line(line,     c, c2, ddd, i) {
   len = length(line)
   for (i = 1; i <= len; i++) {
       c = substr(line, i, 1)
       c2 = substr(line, i + 1, 1)
       if (c == LETTERS) {
           mode = mode_letters
       } else if (c == FIGURES) {
           mode = mode_figures
           if (c2 == ALLSPACE_NOTINUSE) {
               ddd = substr(line, i + 2, 3)
               printf "%s", unescape_char(ddd)
               i += 4
           }
       } else if (c == CARRIAGERETURN) {
           printf "\r"
       } else if (c == LINEFEED) {
           printf "\n"
       } else if (c == SPACE) {
           printf " "
       } else if (c == ALLSPACE_NOTINUSE) {
           print "Error: unexpected ALLSPACE_NOTINUSE"
           exit 1
       } else {
           if (mode == mode_letters) {
               if (c in letters_decode) {
                   printf "%s", letters_decode[c]
               } else {
                   printf "Error: unrecognized letter %c(%d)\n", c, ord[c]
                   exit 1
               }
           } else {
               if (c in figures_decode) {
                   printf "%s", figures_decode[c]
               } else {
                   printf "Error: unrecognized figure %c(%d)\n", c, ord[c]
                   exit 1
               }
           }
       }
   }
   return
}

function encode_char(c,     retval) {
   retval = ""
   if (c == "\r") {
       retval = CARRIAGERETURN
   } else if (c == "\n") {
       retval = LINEFEED
   } else if (c == " ") {
       retval = SPACE
   } else if (c in letters) {
       if (mode == mode_figures) {
           retval = LETTERS letters[c]
           mode = mode_letters
       } else {
           retval = letters[c]
       }
   } else if (c in figures) {
       if (mode == mode_letters) {
           retval = FIGURES figures[c]
           mode = mode_figures
       } else {
           retval = figures[c]
       }
   } else {
       if (escape_unknown_characters) {
           retval = escape_char(c)
       } else {
           printf "Error: Unknown character: %s\n", c
           exit 1
       }
   }
   return retval
}

function encode_line(line,     c, i, len, x) {
   len = length(line)
   for (i = 1; i <= len; i++) {
       c = substr(line, i, 1)
       x = encode_char(c)
       printf "%s", x
   }
   return
}

function escape_char(c,     d1, d2, d3, retval, str) {
   str = sprintf("%03o", ord[c])
   d1 = substr(str, 1, 1)
   d2 = substr(str, 2, 1)
   d3 = substr(str, 3, 1)
   retval = FIGURES ALLSPACE_NOTINUSE figures[d1] figures[d2] figures[d3]
   mode = mode_figures
   return retval
}

function unescape_char(ddd,     d1, d2, d3, o, retval) {
   d1 = substr(ddd, 1, 1)
   d2 = substr(ddd, 2, 1)
   d3 = substr(ddd, 3, 1)
   o = figures_decode[d1] figures_decode[d2] figures_decode[d3]
   if (escape_unknown_characters) {
       retval = sprintf("%c", oct[o])
   } else {
       retval = sprintf("\\%s", o)
   }
   return retval
}

BEGIN {
   escape_unknown_characters = 1
   command_decode = 2
   command_encode = 1
   command = command_encode
   if (ARGV[1] == "decode") {
       delete ARGV[1]
       command = command_decode
       # hack to read entire input into $0
       RS = "\377"
   }
   mode_figures = 2
   mode_letters = 1
   mode = mode_letters
   ALLSPACE_NOTINUSE = "\0"
   CARRIAGERETURN = "\10"
   LINEFEED = "\2"
   SPACE = "\4"
   LETTERS = "\37"
   FIGURES = "\33"

   letters["A"] = "\3"
   letters["B"] = "\31"
   letters["C"] = "\16"
   letters["D"] = "\11"
   letters["E"] = "\1"
   letters["F"] = "\15"
   letters["G"] = "\32"
   letters["H"] = "\24"
   letters["I"] = "\6"
   letters["J"] = "\13"
   letters["K"] = "\17"
   letters["L"] = "\22"
   letters["M"] = "\34"
   letters["N"] = "\14"
   letters["O"] = "\30"
   letters["P"] = "\26"
   letters["Q"] = "\27"
   letters["R"] = "\12"
   letters["S"] = "\5"
   letters["T"] = "\20"
   letters["U"] = "\7"
   letters["V"] = "\36"
   letters["W"] = "\23"
   letters["X"] = "\35"
   letters["Y"] = "\25"
   letters["Z"] = "\21"
   figures["-"] = letters["A"]
   figures["?"] = letters["B"]
   figures[":"] = letters["C"]
   figures["\5"] = letters["D"] # ENQ
   figures["3"] = letters["E"]
   figures["%"] = letters["F"]
   figures["@"] = letters["G"]
   figures["£"] = letters["H"]
   figures["8"] = letters["I"]
   figures["\7"] = letters["J"] # BEL
   figures["("] = letters["K"]
   figures[")"] = letters["L"]
   figures["."] = letters["M"]
   figures[","] = letters["N"]
   figures["9"] = letters["O"]
   figures["0"] = letters["P"]
   figures["1"] = letters["Q"]
   figures["4"] = letters["R"]
   figures["'"] = letters["S"]
   figures["5"] = letters["T"]
   figures["7"] = letters["U"]
   figures["="] = letters["V"]
   figures["2"] = letters["W"]
   figures["/"] = letters["X"]
   figures["6"] = letters["Y"]
   figures["+"] = letters["Z"]
   for (i = 0; i < 256; i++) {
       c = sprintf("%c", i)
       ord[c] = i
       c = sprintf("%03o", i)
       oct[c] = i
   }
   for (i in figures) {
       figures_decode[figures[i]] = i
   }
   for (i in letters) {
       letters_decode[letters[i]] = i
   }
}

{
   if (command == command_encode) {
       encode_line($0)
   } else {
       decode_line($0)
   }
}