#! /usr/bin/env python
# Copyright (C) 2008, Stefan Schwarzer <
[email protected]>
"""
This is a command line utility to reorder footnotes, denoted by
numbers in brackets, as in the example below. The file to process is
given as the single command line argument whereas the output is
written to standard output. Errors and warning are written to standard
error.
In the output file, footnotes in the main text are numbered
1, 2, 3, .... The numbers in the footnote list, i. e. after the
marker "@footnote:", are sorted numerically to fit the order in the
main text. There can be multiple references in the main text pointing
to the same number in the footnote list.
The program warns about common mistakes like footnotes in the main
text which don't occur in the footnote list, or footnotes in the
list which have no correspondence in the main text.
Here's an input file example:
A great brown fox [13] jumped of a pile of lorem ipsum [4], [7]. He met
with a silver penguin, browsing the Linux Kernel Mailinglist [3]. They
debated other the question whether to start a C-program with "main
(int argc, char **argv)" or with "main (int argc, char *argv[])".
Square brackets annoyed them [9999]. A repeated index [4].
@footnote:
[13] Al Fabetus: "On characters and animals", 1888, self published.
[4] Lorem Ipsum, <a href="
http://en.wikipedia.org/wiki/Lorem_ipsum">Web Link</a>
[9999] Annoying Link.
[7] B. Fox: "More on Blind Text".
[3] Linux Kernel Maintainers: LKML
This would become:
A great brown fox [1] jumped of a pile of lorem ipsum [2], [3]. He met
with a silver penguin, browsing the Linux Kernel Mailinglist [4]. They
debated other the question whether to start a C-program with "main
(int argc, char **argv)" or with "main (int argc, char *argv[])".
Square brackets annoyed them [5]. A repeated index [2].
@footnote:
[1] Al Fabetus: "On characters and animals", 1888, self published.
[2] Lorem Ipsum, <a href="
http://en.wikipedia.org/wiki/Lorem_ipsum">Web Link</a>
[3] B. Fox: "More on Blind Text".
[4] Linux Kernel Maintainers: LKML
[5] Annoying Link.
"""
import re
import sys
# separates main text and footnotes list
FOOTNOTE_MARKER = "@footnote:"
# bracketed index numbers
index_regex = re.compile(r"\[\d+\]")
# mapping of former to reassigned index numbers
old_to_new = {}
# next index number to use for replacement in main text
next_number = 1
def warn(text):
print >> sys.stderr, "Warning:", text
def reassign_in_main_text(match):
"""Return the new bracketed index number for the main text. If
an old index already has a new one assigned, reuse it.
"""
global next_number
number = int(match.group(0)[1:-1])
if number not in old_to_new:
old_to_new[number] = next_number
next_number += 1
return "[%d]" % old_to_new[number]
def process_main_text(fobj):
"""Print the main text with the reassigned numbers, reading the
input lines from file object `fobj`.
"""
for line in fobj:
# rstrip handles whitespace at end of line
if line.rstrip() == FOOTNOTE_MARKER:
return
print index_regex.sub(reassign_in_main_text, line),
def check_for_missing_footnotes(footnotes):
"""Print a warning about indices that are in the main text but
not in the footnote list. `footnotes` is the list of
index/line pairs generated in `process_footnote_list`.
"""
# `next_number` actually is the number of indices in the main
# text plus one, so it fits perfectly here
in_main_text = set(xrange(1, next_number))
in_footnotes = set((num for num, line in footnotes))
not_in_footnotes = [str(num)
for num in sorted(in_main_text - in_footnotes)]
if not_in_footnotes:
warn("indices of missing footnotes: %s" %
", ".join(not_in_footnotes))
def process_footnote_list(fobj):
"""Print a new footnote list with the updated numbers, reading
the input lines from file object `fobj`.
"""
# collect (yet unsorted) footnotes
footnotes = []
for line in fobj:
match = index_regex.search(line)
if match is None:
warn("ignored line: %s" % line.rstrip())
continue
match_str = match.group(0)
old_number = int(match_str[1:-1])
if old_number in old_to_new:
new_number = old_to_new[old_number]
else:
warn("index number %d not in original main text" % old_number)
continue
new_line = line.replace(match_str, "[%d]" % new_number)
footnotes.append((new_number, new_line))
# implicit numerical sorting by the new index numbers
footnotes.sort()
for number, line in footnotes:
print line,
check_for_missing_footnotes(footnotes)
def main(filename):
try:
fobj = open(filename)
except IOError:
print >> sys.stderr, "Error: file '%s' not found" % filename
return
try:
process_main_text(fobj)
print FOOTNOTE_MARKER
process_footnote_list(fobj)
finally:
fobj.close()
if __name__ == '__main__':
try:
filename = sys.argv[1]
except IndexError:
print >> sys.stderr, "Usage: %s input_file" % sys.argv[0]
main(filename)