# ita2.awk version 1 by Ben Collver
#
# Convert between ITA-2 and ASCII.
# Input & output is always padded to 8-bit bytes.
#
# ITA-2
# <
https://commons.wikimedia.org/wiki/File:
# International_Telegraph_Alphabet_2_brightened.jpg>
#
# ITA-5 AKA ASCII
# <
https://en.wikipedia.org/wiki/T.50_(standard)>
#
# Extensions:
# * Handle characters that don't exist in ITA-2.
# Setting: escape_unknown_characters = [0|1]
# 0 = Error on non-existing characters
# 1 = Escape non-existing characters (default)
# escape format = FIGURES ALLSPACE_NOTINUSE digit1 digit2 digit3
# digit = ITA-2 encoded octal digit
function decode_line(line, c, c2, ddd, i) {
len = length(line)
for (i = 1; i <= len; i++) {
c = substr(line, i, 1)
c2 = substr(line, i + 1, 1)
if (c == LETTERS) {
mode = mode_letters
} else if (c == FIGURES) {
mode = mode_figures
if (c2 == ALLSPACE_NOTINUSE) {
ddd = substr(line, i + 2, 3)
printf "%s", unescape_char(ddd)
i += 4
}
} else if (c == CARRIAGERETURN) {
printf "\r"
} else if (c == LINEFEED) {
printf "\n"
} else if (c == SPACE) {
printf " "
} else if (c == ALLSPACE_NOTINUSE) {
print "Error: unexpected ALLSPACE_NOTINUSE"
exit 1
} else {
if (mode == mode_letters) {
if (c in letters_decode) {
printf "%s", letters_decode[c]
} else {
printf "Error: unrecognized letter %c(%d)\n", c, ord[c]
exit 1
}
} else {
if (c in figures_decode) {
printf "%s", figures_decode[c]
} else {
printf "Error: unrecognized figure %c(%d)\n", c, ord[c]
exit 1
}
}
}
}
return
}
function encode_char(c, retval) {
retval = ""
if (c == "\r") {
retval = CARRIAGERETURN
} else if (c == "\n") {
retval = LINEFEED
} else if (c == " ") {
retval = SPACE
} else if (c in letters) {
if (mode == mode_figures) {
retval = LETTERS letters[c]
mode = mode_letters
} else {
retval = letters[c]
}
} else if (c in figures) {
if (mode == mode_letters) {
retval = FIGURES figures[c]
mode = mode_figures
} else {
retval = figures[c]
}
} else {
if (escape_unknown_characters) {
retval = escape_char(c)
} else {
printf "Error: Unknown character: %s\n", c
exit 1
}
}
return retval
}
function encode_line(line, c, i, len, x) {
len = length(line)
for (i = 1; i <= len; i++) {
c = substr(line, i, 1)
x = encode_char(c)
printf "%s", x
}
return
}
function escape_char(c, d1, d2, d3, retval, str) {
str = sprintf("%03o", ord[c])
d1 = substr(str, 1, 1)
d2 = substr(str, 2, 1)
d3 = substr(str, 3, 1)
retval = FIGURES ALLSPACE_NOTINUSE figures[d1] figures[d2] figures[d3]
mode = mode_figures
return retval
}
function unescape_char(ddd, d1, d2, d3, o, retval) {
d1 = substr(ddd, 1, 1)
d2 = substr(ddd, 2, 1)
d3 = substr(ddd, 3, 1)
o = figures_decode[d1] figures_decode[d2] figures_decode[d3]
if (escape_unknown_characters) {
retval = sprintf("%c", oct[o])
} else {
retval = sprintf("\\%s", o)
}
return retval
}
BEGIN {
escape_unknown_characters = 1
command_decode = 2
command_encode = 1
command = command_encode
if (ARGV[1] == "decode") {
delete ARGV[1]
command = command_decode
# hack to read entire input into $0
RS = "\377"
}
mode_figures = 2
mode_letters = 1
mode = mode_letters
ALLSPACE_NOTINUSE = "\0"
CARRIAGERETURN = "\10"
LINEFEED = "\2"
SPACE = "\4"
LETTERS = "\37"
FIGURES = "\33"
letters["A"] = "\3"
letters["B"] = "\31"
letters["C"] = "\16"
letters["D"] = "\11"
letters["E"] = "\1"
letters["F"] = "\15"
letters["G"] = "\32"
letters["H"] = "\24"
letters["I"] = "\6"
letters["J"] = "\13"
letters["K"] = "\17"
letters["L"] = "\22"
letters["M"] = "\34"
letters["N"] = "\14"
letters["O"] = "\30"
letters["P"] = "\26"
letters["Q"] = "\27"
letters["R"] = "\12"
letters["S"] = "\5"
letters["T"] = "\20"
letters["U"] = "\7"
letters["V"] = "\36"
letters["W"] = "\23"
letters["X"] = "\35"
letters["Y"] = "\25"
letters["Z"] = "\21"
figures["-"] = letters["A"]
figures["?"] = letters["B"]
figures[":"] = letters["C"]
figures["\5"] = letters["D"] # ENQ
figures["3"] = letters["E"]
figures["%"] = letters["F"]
figures["@"] = letters["G"]
figures["£"] = letters["H"]
figures["8"] = letters["I"]
figures["\7"] = letters["J"] # BEL
figures["("] = letters["K"]
figures[")"] = letters["L"]
figures["."] = letters["M"]
figures[","] = letters["N"]
figures["9"] = letters["O"]
figures["0"] = letters["P"]
figures["1"] = letters["Q"]
figures["4"] = letters["R"]
figures["'"] = letters["S"]
figures["5"] = letters["T"]
figures["7"] = letters["U"]
figures["="] = letters["V"]
figures["2"] = letters["W"]
figures["/"] = letters["X"]
figures["6"] = letters["Y"]
figures["+"] = letters["Z"]
for (i = 0; i < 256; i++) {
c = sprintf("%c", i)
ord[c] = i
c = sprintf("%03o", i)
oct[c] = i
}
for (i in figures) {
figures_decode[figures[i]] = i
}
for (i in letters) {
letters_decode[letters[i]] = i
}
}
{
if (command == command_encode) {
encode_line($0)
} else {
decode_line($0)
}
}