Check-in by ben on 2025-07-06 01:31:02

Remove feature to block by IP address.

 INSERTED    DELETED
        0          1 config.m4
        0          1 readme.txt
        0          6 src/account/index.dcgi.m4
        0         11 src/cgi.awk
        0          1 src/config.awk
        0          6 src/details/index.dcgi.m4
        0          6 src/download/index.dcgi.m4
        0          6 src/list/index.dcgi.m4
        0          6 src/lists/index.dcgi.m4
        0          6 src/listsort/index.dcgi.m4
        0          6 src/raw/index.cgi.m4
        0          6 src/search/index.dcgi.m4
        0          6 src/sort/index.dcgi.m4
        0          6 src/wizard/step1/index.dcgi.m4
        0          6 src/wizard/step2/index.dcgi.m4
        0          6 src/wizard/step3/index.dcgi.m4
        0         86 TOTAL over 16 changed files

Index: config.m4
==================================================================
--- config.m4
+++ config.m4
@@ -4,11 +4,10 @@
dnl
define(__AGENT__, Lynx/2.9.0dev.10 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/1.1.1w)dnl
define(__API_ENDPOINT__, http://archive.org)dnl
define(__API_SSL_ENDPOINT__, https://archive.org)dnl
define(__AWK_EXT__, 0)dnl
-define(__BLOCKLIST__, /home/user/pharos/block.txt)dnl
define(__CACHE_DB__, /home/user/pharos/db/cache.dat)dnl
define(__CACHE_ENABLED__, 0)dnl
define(__CGIPATH__, /~user/pharos)dnl
define(__CMD_AWK__, /usr/bin/awk)dnl
define(__CMD_CURL__, /usr/bin/curl)dnl

Index: readme.txt
==================================================================
--- readme.txt
+++ readme.txt
@@ -33,11 +33,10 @@
Configuration
=============
To set configuration variables, edit config.m4

AWK_EXT allows use of non-standard features in gawk and mawk
-BLOCKLIST is a file to block by IP address, one regex per line
CACHE_ENABLED caches content in sqlite to reduce API calls

Installation
============
Installation depends on m4.

Index: src/account/index.dcgi.m4
==================================================================
--- src/account/index.dcgi.m4
+++ src/account/index.dcgi.m4
@@ -18,16 +18,10 @@
    type, url)
{
    acct = parts[3]
    email = search

-    if (blocked) {
-        printf "[h|Account|URL:%s/details/%s|%s|%s]\n",
-            api_ssl_endpoint, acct, server, port
-        exit 0
-    }
-
    signature = sprintf("account/%s/%s", acct, email)
    str = cache_init(signature)
    if (length(str) > 0) {
        print str
        return

Index: src/cgi.awk
==================================================================
--- src/cgi.awk
+++ src/cgi.awk
@@ -1,17 +1,6 @@
function cgi_init(    ip) {
-    blocked = 0
-    ip = ENVIRON["REMOTE_ADDR"]
-    while ((getline < blocklist) > 0) {
-        # bad bot ignoring robots.txt, block by IP address
-        if (match(ip, $0)) {
-            blocked = 1
-            break
-        }
-    }
-    close(blocklist)
-
    search = ARGV[1]
    arguments = ARGV[2]
    traversal = ARGV[5]
    selector = ARGV[6]


Index: src/config.awk
==================================================================
--- src/config.awk
+++ src/config.awk
@@ -1,11 +1,10 @@
function config_init() {
    agent = "__AGENT__"
    api_endpoint = "__API_ENDPOINT__"
    api_ssl_endpoint = "__API_SSL_ENDPOINT__"
    awk_ext = __AWK_EXT__
-    blocklist = "__BLOCKLIST__"
    cache_db = "__CACHE_DB__"
    cache_enabled = __CACHE_ENABLED__
    cgipath = "__CGIPATH__"
    cmd_curl = "__CMD_CURL__"
    cmd_enc = "__CMD_ENV__"

Index: src/details/index.dcgi.m4
==================================================================
--- src/details/index.dcgi.m4
+++ src/details/index.dcgi.m4
@@ -18,16 +18,10 @@
    license, output, pub_date, scanner, signature, str, thumb, title,
    topic, topics, type, uploader_account, uploader_email, url)
{
    item_id = parts[3]

-    if (blocked) {
-        printf "[h|Details|URL:%s/details/%s|%s|%s]\n",
-            api_ssl_endpoint, item_id, server, port
-        exit 0
-    }
-
    signature = "details/" item_id
    str = cache_init(signature)
    if (length(str) > 0) {
        print str
        return

Index: src/download/index.dcgi.m4
==================================================================
--- src/download/index.dcgi.m4
+++ src/download/index.dcgi.m4
@@ -14,16 +14,10 @@
    is_proxy, item_server, label, mtime, name, source, url)
{
    dir = parts[2]
    item_id = parts[3]

-    if (blocked) {
-        printf "[h|Download|URL:%s/download/%s|%s|%s]\n",
-            api_ssl_endpoint, item_id, server, port
-        exit 0
-    }
-
    if (dir == "download") {
        is_proxy = 1
    } else {
        # dir == "direct"
        is_proxy = 0

Index: src/list/index.dcgi.m4
==================================================================
--- src/list/index.dcgi.m4
+++ src/list/index.dcgi.m4
@@ -43,16 +43,10 @@

    split(search, parts, "/")
    acct = parts[1]
    list_id = parts[2]

-    if (blocked) {
-        printf "[h|List|URL:%s/details/%s/lists/%d|%s|%s]\n",
-            api_ssl_endpoint, acct, list_id, server, port
-        exit 0
-    }
-
    signature = sprintf("list/%s/%s", input, search)
    str = cache_init(signature)
    if (length(str) > 0) {
        print str
        return

Index: src/lists/index.dcgi.m4
==================================================================
--- src/lists/index.dcgi.m4
+++ src/lists/index.dcgi.m4
@@ -14,16 +14,10 @@

function main(     cmd, count, fields, iaout, i, id, is_private, item,
    item_count, item_id, label, name, output, record, records,
    signature, str, url)
{
-    if (blocked) {
-        printf "[h|Lists|URL:%s/details/%s/lists|%s|%s]\n",
-            api_ssl_endpoint, search, server, port
-        exit 0
-    }
-
    signature = sprintf("%s/lists", search)
    str = cache_init(signature)
    if (length(str) > 0) {
        print str
        return

Index: src/listsort/index.dcgi.m4
==================================================================
--- src/listsort/index.dcgi.m4
+++ src/listsort/index.dcgi.m4
@@ -7,16 +7,10 @@

include(src/config.awk)
incl(src/cgi.awk)

function main(     acct, i, lbl, list_id, opt) {
-    if (blocked) {
-        printf "[h|Internet Archive|URL:%s/|%s|%s]\n",
-            api_ssl_endpoint, server, port
-        exit 0
-    }
-
    lbl[1] = "Relevance"
    opt[1] = ""
    lbl[2] = "Weekly views [^]"
    opt[2] = "week asc"
    lbl[3] = "Weekly views [v]"

Index: src/raw/index.cgi.m4
==================================================================
--- src/raw/index.cgi.m4
+++ src/raw/index.cgi.m4
@@ -22,16 +22,10 @@
incl(src/cgi.awk)
incl(src/util.awk)
incl(src/web.awk)

function main() {
-    if (blocked) {
-        printf "[h|Internet Archive|URL:%s/|%s|%s]\n",
-            api_ssl_endpoint, server, port
-        exit 0
-    }
-
    if (path == "/debug/") {
        dump(search, TYPE_HEADERS)
    } else if (path == "/raw/") {
        dump(search, TYPE_RAW)
    } else if (path == "/text/") {

Index: src/search/index.dcgi.m4
==================================================================
--- src/search/index.dcgi.m4
+++ src/search/index.dcgi.m4
@@ -13,16 +13,10 @@
function main(search,     cmd, count, creator, descr, field, fields, i,
    iaout, id, item, items, jsout, label, numfound, order,
    order_names, page, rows, searchstr, sort_param, str, title, type,
    url)
{
-    if (blocked) {
-        printf "[h|Search|URL:%s/|%s|%s]\n", api_ssl_endpoint,
-            server, port
-        exit 0
-    }
-
    order_names["addeddate"] = "addeddate"
    order_names["collection_size"] = "collection_size"
    order_names["createddate"] = "createddate"
    order_names["creator"] = "creatorSorter"
    order_names["date"] = "date"

Index: src/sort/index.dcgi.m4
==================================================================
--- src/sort/index.dcgi.m4
+++ src/sort/index.dcgi.m4
@@ -7,16 +7,10 @@

include(src/config.awk)
incl(src/cgi.awk)

function main(     i, lbl, opt) {
-    if (blocked) {
-        printf "[h|Internet Archive|URL:%s/|%s|%s]\n",
-            api_ssl_endpoint, server, port
-        exit 0
-    }
-
    lbl[1]  = "Default [^]"
    opt[1]  = "nav_order asc"
    lbl[2]  = "Default [v]"
    opt[2]  = "nav_order desc"
    lbl[3]  = "Weekly views [^]"

Index: src/wizard/step1/index.dcgi.m4
==================================================================
--- src/wizard/step1/index.dcgi.m4
+++ src/wizard/step1/index.dcgi.m4
@@ -7,16 +7,10 @@

include(src/config.awk)
incl(src/cgi.awk)

function main(     i, lbl, opt, searchstr) {
-    if (blocked) {
-        printf "[h|Internet Archive|URL:%s/|%s|%s]\n",
-            api_ssl_endpoint, server, port
-        exit 0
-    }
-
    searchstr = parts[4]

    lbl[1]  = "Any field contains"
    opt[1]  = "anyfield"
    lbl[2]  = "Any field does not contain"

Index: src/wizard/step2/index.dcgi.m4
==================================================================
--- src/wizard/step2/index.dcgi.m4
+++ src/wizard/step2/index.dcgi.m4
@@ -7,16 +7,10 @@

include(src/config.awk)
incl(src/cgi.awk)

function main(     field, newsearch, searchstr) {
-    if (blocked) {
-        printf "[h|Internet Archive|URL:%s/|%s|%s]\n",
-            api_ssl_endpoint, server, port
-        exit 0
-    }
-
    field = parts[4]
    searchstr = parts[5]

    if (field == "mediatype") {
        print "# Mediatype is:"

Index: src/wizard/step3/index.dcgi.m4
==================================================================
--- src/wizard/step3/index.dcgi.m4
+++ src/wizard/step3/index.dcgi.m4
@@ -7,16 +7,10 @@

include(src/config.awk)
incl(src/cgi.awk)

function main(     field, label, newsearch, op, searchstr, value) {
-    if (blocked) {
-        printf "[h|Internet Archive|URL:%s/|%s|%s]\n",
-            api_ssl_endpoint, server, port
-        exit 0
-    }
-
    field = parts[4]
    searchstr = parts[5]
    value = search

    if (field ~ /^-/) {