Add archive.org URI retrieval. Add first support for URI shortening. - annna - … | |
git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws6… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
--- | |
commit b9b579a607ee73661922628578156f44e2fc23da | |
parent a8df83437f63bfe2a68eafedcdb86502927aafe8 | |
Author: Annna Robert-Houdin <[email protected]> | |
Date: Sun, 12 Jan 2025 20:56:28 +0100 | |
Add archive.org URI retrieval. Add first support for URI shortening. | |
Diffstat: | |
M annna-message-common | 25 +++++++++++++++---------- | |
A archiveorg-uri | 32 +++++++++++++++++++++++++++++… | |
M fetch-uri | 14 -------------- | |
M modules/tor/whitelist-from-tor | 1 + | |
4 files changed, 48 insertions(+), 24 deletions(-) | |
--- | |
diff --git a/annna-message-common b/annna-message-common | |
@@ -83,6 +83,8 @@ case "${text}" in | |
esac | |
# Set below and annna will concatenate at the end. | |
+ # HTML title. | |
+ urititle="" | |
# Subtitle URI. | |
sturi="" | |
# Replacement URI. | |
@@ -113,7 +115,17 @@ case "${text}" in | |
tmpf=$(mktemp) | |
fetch-uri "${uri}" > "${tmpf}" | |
- urititle="$(grabtitle < "${tmpf}")" | |
+ if [ ! -s "${tmpf}" ]; | |
+ then | |
+ archiveorguri="$(archiveorg-uri "${uri}")" | |
+ if [ -n "${archiveorguri}" ]; | |
+ then | |
+ sarchiveorguri="$(bitreich-uri-shortener "${archiveorg… | |
+ nuris="archive: ${sarchiveorguri}" | |
+ fetch-uri "${archiveorguri}" > "${tmpf}" | |
+ fi | |
+ fi | |
+ [ -s "${tmpf}" ] && urititle="$(grabtitle < "${tmpf}")" | |
case "${urititle}" in | |
"") | |
@@ -181,10 +193,6 @@ case "${text}" in | |
nuris="$nuris metadata: gophers://codemadness.org/1/id… | |
fi | |
;; | |
- *www.wsj.com/*|*www.ft.com/*|*www.nytimes.com/*) | |
- archvuri=$(fetch-uri -h "https://archive.is/newest/$uri" | awk… | |
- [ -n "$archvuri" ] && nuris="archive: $archvuri" | |
- ;; | |
*www.reddit.com*) | |
nuri="$(printf '%s\n' "${uri}" | sed "s;www.reddit.com;old.red… | |
nuris="old.reddit: ${nuri}" | |
@@ -267,7 +275,7 @@ case "${text}" in | |
*) | |
mimetype="$(file -b --mime-type "${tmpf}")" | |
case "${mimetype}" in | |
- text/*) | |
+ text/*|application/javascript) | |
nocuri=0 | |
;; | |
esac | |
@@ -278,10 +286,7 @@ case "${text}" in | |
then | |
if [ $nocuri -eq 0 ]; | |
then | |
- if [ -z "${curi}" ]; | |
- then | |
- curi="$(html2text < "${tmpf}" | /br/bin/bitrei… | |
- fi | |
+ [ -z "${curi}" ] && curi="$(html2text < "${tmpf}" | /b… | |
outputstr="${outputstr} content: ${curi} ;" | |
fi | |
diff --git a/archiveorg-uri b/archiveorg-uri | |
@@ -0,0 +1,32 @@ | |
+#!/bin/sh | |
+ | |
+export PATH="$HOME/bin:$PATH" | |
+ | |
+usage() { | |
+ printf "usage: %s [-h] URI\n" "$(basename "$0")" >&2 | |
+ exit 1 | |
+} | |
+ | |
+if [ $# -lt 1 ] || [ $# -gt 2 ] | |
+then | |
+ usage | |
+fi | |
+ | |
+if [ $# -eq 2 ] | |
+then | |
+ [ $1 = -h ] || usage | |
+ opth=-I | |
+ shift | |
+fi | |
+ | |
+uri="$1" | |
+aiapiuri="http://archive.org/wayback/available?url=${uri}" | |
+usetor=0 | |
+apiai="$(fetch-uri "$aiapiuri")" | |
+available="$(echo "$apiai" | grep 'available')" | |
+if [ -n "${available}" ]; | |
+then | |
+ aiuri="$(echo "$apiai" | awk -F 'url": "' '{print $3}' 2>/dev/null | a… | |
+ printf "%s\n" "${aiuri}" | |
+fi | |
+ | |
diff --git a/fetch-uri b/fetch-uri | |
@@ -31,17 +31,3 @@ esac | |
grep -qx "$host" "/home/annna/bin/modules/tor/whitelist-from-tor" || usetor=1 | |
curl -qgsm 5 --fail -L --max-redirs 3 -A "$ua" $opth ${usetor:+--preproxy sock… | |
-# Taken from: https://github.com/uriel1998/muna/blob/master/muna.sh | |
-if [ $? -eq 22 ]; | |
-then | |
- aiapiuri="http://archive.org/wayback/available?url=${uri}" | |
- usetor=0 | |
- apiai="$(curl -qgsm 5 --fail -L --max-redirs 3 -A "$ua" $opth ${usetor… | |
- available="$(echo "$apiai" | grep 'available')" | |
- if [ -n "${available}" ]; | |
- then | |
- aiuri="$(echo "$apiai" | awk -F 'url": "' '{print $3}' 2>/dev/… | |
- curl -qgsm 5 --fail -L --max-redirs 3 -A "$ua" $opth ${usetor:… | |
- fi | |
-fi | |
- | |
diff --git a/modules/tor/whitelist-from-tor b/modules/tor/whitelist-from-tor | |
@@ -1,3 +1,4 @@ | |
www.forgottenweapons.com | |
forgottenweapons.com | |
archive.is | |
+archive.org |