#!/usr/bin/env perl
# txt2pre --- convert my site's txt files to `pre'-based atom/rss/html
# Copyright (C) 2014-2021 all contributors <
[email protected]>
# Copyright (c) 2021 Amin Bandali <
[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <
https://www.gnu.org/licenses/>.
# This simple script borrows from a script of the same name from the
# wonderful public-inbox project, under AGPLv3+, with additions of
# my own.
# Update (2021-11-01): this script isn't currently used for generating
# my site's pages anymore; but kept for future reference.
use strict;
use warnings 'all';
use Getopt::Long;
my $format = 'html';
my $lang = 'en';
my $index = '';
my $header = '';
my $footer = '';
GetOptions ('format=s' => \$format,
'lang=s' => \$lang,
'index' => \$index,
'header' => \$header,
'footer' => \$footer)
or die("bad command line arguments\n");
my $author =
$lang eq 'en' ? 'bandali'
: $lang eq 'fa' ? 'بندعلی'
: '';
my $site_title =
$lang eq 'en' ? "${author}'s personal site"
: $lang eq 'fa' ? "سایت شخصی $author"
: '';
my $site_desc =
$lang eq 'en' ? "notes and blog posts by $author"
: $lang eq 'fa' ? "نوشتهها و بلاگ پستهای $author"
: '';
my $site_url =
($lang eq 'en') ? '
https://bndl.org'
: ($lang eq 'fa') ? '
https://bndl.org/fa/'
: '';
my $feed_id =
($lang eq 'en') ? "tag:bndl.org,2020:notes.$format"
: ($lang eq 'fa') ? "tag:bndl.org,2020:fa/notes.$format"
: '';
my $link_re =
qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher)://
[\@:\w\.-]+(?:/
(?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
(?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
(?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
)?
)}xi;
my %pairs = (
"(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
"'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby
"!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby
);
my %html_map = (
'&' => '&',
'<' => '<',
'>' => '>',
# '"' => '"',
# "'" => ''',
);
sub html_esc {
my ($s) = @_;
$s =~ s/([&<>])/$html_map{$1}/sge;
$s;
}
sub linkify {
my ($s) = @_;
$s =~ s^$link_re^
my $beg = $1 || '';
my $url = $2;
my $end = '';
# it's fairly common to end URLs in messages with
# '.', ',' or ';' to denote the end of a statement;
# assume the intent was to end the statement/sentence
# in English
if (defined(my $re = $pairs{$beg})) {
if ($url =~ s/$re//) {
$end = $1;
}
} elsif ($url =~ s/(\))?([\.,;])\z//) {
$end = $2;
# require ')' to be paired with '('
if (defined $1) { # ')'
if (index($url, '(') < 0) {
$end = ")$end";
} else {
$url .= ')';
}
}
} elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
$end = ')';
}
$beg . "<a href=\"$url\">$url</a>" . $end;
^geo;
$s;
}
my $out = '';
# atom/rss feed header and footer
if ($index and ($format eq 'atom' or $format eq 'rss')) {
if ($header) {
my $now_iso8601 = `date -Iseconds -u | tr -d \\\\n`;
my $now_rfc5322 = `date -uR | tr -d \\\\n`;
my $atom_rel = $format eq 'atom' ? 'self' : 'alternate';
my $rss_rel = $format eq 'rss' ? 'self' : 'alternate';
my $link = $format eq 'atom' ? 'link' : 'atom:link';
my $links = '';
if ($lang eq 'en') {
$links = qq(
<$link hreflang="fa" href="
https://bndl.org/fa/notes.atom" rel="alternate" type="application/atom+xml" />
<$link hreflang="fa" href="
https://bndl.org/fa/notes.rss" rel="alternate" type="application/rss+xml" />
<$link hreflang="fa" href="
https://bndl.org/fa/bandali.fa.txt" rel="alternate" type="text/plain" />
<$link hreflang="fa" href="
https://bndl.org/fa/" rel="alternate" type="text/html" />
<$link href="
https://bndl.org/notes.atom" rel="$atom_rel" type="application/atom+xml" />
<$link href="
https://bndl.org/notes.rss" rel="$rss_rel" type="application/rss+xml" />
<$link href="
https://bndl.org/bandali.txt" rel="alternate" type="text/plain" />
<$link href="
https://bndl.org" rel="alternate" type="text/html" />);
} elsif ($lang eq 'fa') {
$links = qq(
<link hreflang="en" href="
https://bndl.org/notes.atom" rel="alternate" type="application/atom+xml" />
<link hreflang="en" href="
https://bndl.org/notes.rss" rel="alternate" type="application/rss+xml" />
<link hreflang="en" href="
https://bndl.org/bandali.txt" rel="alternate" type="text/plain" />
<link hreflang="en" href="
https://bndl.org" rel="alternate" type="text/html" />
<link href="
https://bndl.org/fa/notes.atom" rel="$atom_rel" type="application/atom+xml" />
<link href="
https://bndl.org/fa/notes.rss" rel="$rss_rel" type="application/rss+xml" />
<link href="
https://bndl.org/fa/bandali.fa.txt" rel="alternate" type="text/plain" />
<link href="
https://bndl.org/fa/" rel="alternate" type="text/html" />);
}
$links =~ s/^\n//;
$out .= '<?xml version="1.0" encoding="UTF-8" ?>';
$out .= ($format eq 'atom') ? qq(
<feed xml:lang="$lang" xmlns="
http://www.w3.org/2005/Atom">
<title>$site_title</title>
<subtitle>$site_desc</subtitle>
<id>$feed_id</id>
$links
<updated>$now_iso8601</updated>)
: ($format eq 'rss') ? qq(
<rss version="2.0"
xmlns:atom="
http://www.w3.org/2005/Atom"
xmlns:content="
http://purl.org/rss/1.0/modules/content/">
<channel>
<title>$site_title</title>
<description>$site_desc</description>
<link>$site_url</link>
<language>$lang</language>
<lastBuildDate>$now_rfc5322</lastBuildDate>
<pubDate>$now_rfc5322</pubDate>
<ttl>1800</ttl>
$links)
: '';
} elsif ($footer) {
$out .= ($format eq 'atom') ? '</feed>'
: ($format eq 'rss') ? '</channel></rss>'
: '';
}
# we're done
goto PRINT;
}
my $txt = do { local $/; <STDIN> };
my $title = html_esc($txt =~ /\A([^\n]+)/);
$title =~ s/^\s+|\s+$//g;
$title .= " — $author" if $title !~ /$author/;
my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/;
($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd;
($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub;
$upd = $pub if (!$upd);
($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url;
$url = '
https://bndl.org/bandali-cv.txt'
if (!$url and $title =~ /curriculum vitae/);
$url = html_esc($url) if $url;
$txt = linkify(html_esc($txt));
my $upd_iso8601 = `date -Iseconds -ud '$upd' | tr -d \\\\n` if $upd;
my $pub_iso8601 = `date -Iseconds -ud '$pub' | tr -d \\\\n` if $pub;
my $pub_rfc5322 = `date -uRd '$pub' | tr -d \\\\n` if $pub;
my $url_html = $url =~ s/(?:[.]$lang)?[.]txt$/.html/r if $url;
$url_html =~ s|/bandali-(.*)|/$1| if $url_html;
my $slug = $url_html =~ s|.*/(.*)[.]html$|$1|r if $url_html;
my $note_id = "$feed_id:$slug" if $url_html;
# note header
if ($format eq 'html') {
$out .=
'<!doctype html>'
. qq(<html lang="$lang") . ($lang eq 'fa'
? ' dir="rtl"'
: '' . '>')
. qq(<head>
<meta http-equiv="Content-Type"
content="text/html; charset=utf-8" />\n)
. "<title>$title</title>\n"
. qq(<link rel="icon" href="data:,">\n)
. ($url
? qq(<link rel="alternate" href="$url"
title="plain text" type="text/plain" />\n)
: '')
. (($index and $lang eq 'en')
? qq(<link rel="alternate" href="
https://bndl.org/fa/"
hreflang="fa" title="persian" />\n)
: ($index and $lang eq 'fa')
? qq(<link rel="alternate" href="
https://bndl.org/"
hreflang="en" title="english" />\n)
: '')
. qq(<style>\@media(prefers-color-scheme:dark){
body{background:#1c1c1c;color:white;}a:link{color:#acdeff;}
a:visited{color:#f8f;}a:active{color:#e00;}})
. ($lang eq 'fa'
? qq(\n\@font-face{font-family:sahel;font-weight:normal;
src:local('Sahel WOL'),local('Sahel'),
url('sahel.woff2')format('woff2');}pre{font-family:sahel})
: '')
. "</style>\n"
. '</head><body><pre>';
} elsif ($format eq 'atom' or $format eq 'rss') {
my $atom_updated =
($format eq 'atom') ? 'updated'
: ($format eq 'rss') ? 'atom:updated'
: '';
my $updated =
"<$atom_updated>$upd_iso8601</$atom_updated>\n" if $upd;
$out .= ($format eq 'atom') ? qq(
<entry xml:base="$site_url">
<author><name>$author</name></author>
<id>$note_id</id>
<published>$pub_iso8601</published>\n)
($updated ? $updated : '') .
qq(<link href="$url" rel="alternate" type="text/plain" />
<link href="$url_html" rel="alternate" type="text/html" />
<title>$title</title>
<content type="html"><![CDATA[<pre>)
: ($format eq 'rss') ? qq(
<item>
<title>$title</title>
<link>$url_html</link>
<guid isPermaLink="false">$note_id</guid>
<pubDate>$pub_rfc5322</pubDate>\n)
(($updated and $pub ne $upd) ? $updated : '') .
qq(<content:encoded><![CDATA[<pre>)
: '';
}
# note body
$out .= $txt;
# note footer
if ($format eq 'html') {
$out .= '</pre></body></html>';
} elsif ($format eq 'atom') {
$out .= "</pre>]]></content></entry>";
} elsif ($format eq 'rss') {
$out .= "</pre>]]></content:encoded></item>";
}
PRINT:
print("$out\n");
STDOUT->flush;