#!/usr/bin/awk -f
# gopher-validate.awk Version 5
#
# Validate gopher directory or text
#
# Relevant RFCs:
#
gopher://gopher.fnord.one/0/Mirrors/RFC/rfc1436.txt
#
gopher://gopher.fnord.one/0/Mirrors/RFC/rfc8962.txt
#
# See also:
#
https://tildegit.org/sloum/mapcheck/src/branch/master/mapcheck
#
https://codemadness.org/git/gopher-validator/file/gopher-validator.c.html
#
# This script requires:
# - /bin/sh
# - curl
# - iconv
# - /usr/bin/awk
# - /usr/bin/mktemp
function mktemp() {
cmd = "/usr/bin/mktemp"
while ((cmd | getline) > 0) {
retval = $0
}
result = close(cmd)
if (result != 0) {
print "Error: mktemp failed exit status: " result
exit
}
if (length(retval) == 0) {
print "Error: mktemp failed, no tmpfile"
exit
}
return retval
}
function unlink(name) {
system("rm " name)
return
}
function validate_encoding(iconv, encoding, file) {
cmd = sprintf("%s -f %s -t %s <%s >/dev/null 2>/dev/null",
iconv, encoding, encoding, file)
result = system(cmd)
if (result == 0) {
retval = 1
} else {
retval = 0
}
return retval
}
function validate_gophermap(file) {
# valid gopher item types
types["0"] = 0
types["1"] = 0
types["2"] = 0
types["3"] = 0
types["4"] = 0
types["5"] = 0
types["6"] = 0
types["7"] = 0
types["8"] = 0
types["9"] = 0
types["I"] = 0
types["M"] = 0
types["P"] = 0
types["T"] = 0
types["c"] = 0
types["e"] = 0
types["g"] = 0
types["h"] = 0
types["i"] = 0
types["s"] = 0
types["v"] = 0
types["+"] = 0
types[";"] = 0
types["!"] = 0
types["."] = 0
FS = "\t"
ilines = 0
iline_warning_seen = 0
lineno = 0
seen_end = 0
retval = 1
while (getline <file) {
lineno++
if (/\r$/) {
$0 = substr($0, 1, length($0) - 1)
} else {
printf "Error: Missing CR character on line %d\n\n", lineno
print "RFC1436 Section 2:"
print " A CR LF denotes the end of the item."
retval = 0
break
}
if (seen_end) {
print "Error: Unexpected trailing text after end of gophermap.\n"
print "RFC1436 Introduction:"
print " The server responds with a block of text terminated"
print " by a period on a line by itself and closes the"
print " connection."
retval = 0
break
}
if (NF == 1 && $0 == ".") {
seen_end = 1
continue
}
if (NF == 4 || (NF == 5 && $5 == "+")) {
# items with 4 fields are normal
# gopher+ items are ignored
} else {
printf "Error: %d fields on line %d, expected 4\n\n", NF, lineno
print "RFC1436 Introduction:"
print " Each item in a directory is identified by a"
print " [1] type... [and] user-visible name..."
print " [2] an opaque selector string..."
print " [3] a host name..."
print " [4] and an IP portnumber..."
retval = 0
break
}
if (length($1) > 71) {
msg = "Error"
if (warn_longlines) {
msg = "Warning"
}
printf "%s: Long user display string on line %d\n\n", msg, lineno
print "RFC1436 Section 3.9:"
print " ... user display string should be kept"
print " under 70 characters in length.\n"
if (!warn_longlines) {
retval = 0
break
}
}
Item_Type = substr($1, 1, 1)
if (!(Item_Type in types)) {
printf "Warning: Non-standard gophertype \"%s\" on line %d\n\n",
Item_Type, lineno
print "RFC1436 Section 3.8:"
print "A list of defined item-type characters follows:"
print "0, 1, 2, 3, 4, 5, 6, 7, 8, 9, +, T, g, I"
print "UMN gopher object/GSgopherobj.h:"
print "M, P, c, e, h, i, s, v, ;, !, .\n"
}
User_Name = substr($1, 2)
if (length(User_Name) > 0 && User_Name !~ /^[[:print:]]+$/) {
printf "Error: Non-printable characters on line %d\n\n", lineno
print "RFC1436 Appendix:"
print " It is *highly* recommended that the User_Name field"
print " contain only printable characters, since many"
print " different clients will be using it."
retval = 0
break
}
if (/^i/) {
ilines++
} else {
ilines = 0
}
if (ilines > 20 && !iline_warning_seen) {
print "Warning: Over 20 consecutive info-lines"
print "Gophermap may contain content rather than navigation."
print "See:"
print "
gopher://gopher.icu/phlog/Computing/The-state-of-gopher.md"
print ""
iline_warning_seen = 1
}
}
close(file)
return retval
}
function validate_text(curl, iconv, uri) {
# use curl to fetch gopher directory or text
curlcfg = mktemp()
curlout = mktemp()
print "--max-filesize 256K" > curlcfg
print "--max-redirs 0" >> curlcfg
print "--output " curlout >> curlcfg
print "--proto =gopher,gophers" >> curlcfg
print "--silent" >> curlcfg
print "--url " uri >> curlcfg
fflush(curlcfg)
result = system(curl " -K " curlcfg)
unlink(curlcfg)
if (result != 0) {
unlink(curlout)
print "Error: Curl couldn't fetch URI"
exit 1
}
# use iconv to validate the result character encoding
valid_encodings[0] = "ASCII"
valid_encodings[1] = "UTF-8"
# All 8-bit characters represent valid ISO-8859-1 (Latin)
# encoding so checking for invalid encoding is meaningless.
# valid_encodings[2] = "ISO-8859-1"
is_valid = 0
for (i = 0; i < 2; i++) {
encoding = valid_encodings[i]
result = validate_encoding(iconv, encoding, curlout)
if (result) {
is_valid = 1
break
}
}
if (!is_valid) {
print "Error: Invalid character encoding."
print "Expected ASCII or UTF-8.\n"
print "RFC1436 Section 4(b):"
print " The well-tempered server ought to send \"text\"..."
unlink(curlout)
exit 1
}
# validate gophermap if it is a directory
if (type == "1") {
result = validate_gophermap(curlout)
if (result == 0) {
unlink(curlout)
exit 1
}
}
print "Valid"
unlink(curlout)
exit 0
}
function validate_uri(uri) {
if (uri !~ /^gophers?:\/\//) {
print "Error: expected gopher: protocol URI"
exit 1
}
result = match(uri, /^gophers?:\/\/[^\/]*\/(.)\//)
if (result == 0) {
type = ""
} else {
type = substr(uri, RLENGTH - 1, 1)
}
if (length(type) != 1) {
print "Error: couldn't find item type in URI"
exit 1
}
if (type != "0" && type != "1") {
print "Error: Expected item type 0 or 1 in URI"
exit 1
}
return type
}
BEGIN {
if (ARGC < 2) {
print "Usage: gopher-validate.awk -- [options] URI"
print ""
print "Options:"
print "--warn-longlines Warn instead of error on long lines"
print ""
exit 1
}
uri = ""
warn_longlines = 0
for (i = 1; i < ARGC; i++) {
arg = ARGV[i]
if (arg == "--warn-longlines") {
warn_longlines = 1
} else {
if (length(uri) > 0) {
print "Error: Unrecognized argument: " arg
exit 1
} else {
uri = arg
}
}
}
type = validate_uri(uri)
curl = "curl"
result = system("command -v " curl " >/dev/null")
if (result) {
print "Error: Couldn't find command: " curl
exit 1
}
iconv = "iconv"
result = system("command -v " iconv " >/dev/null")
if (result) {
print "Error: Couldn't find command: " iconv
exit 1
}
validate_text(curl, iconv, uri)
}