#! /usr/bin/env python

# Copyright (C) 2008, Stefan Schwarzer <[email protected]>

"""
This is a command line utility to reorder footnotes, denoted by
numbers in brackets, as in the example below. The file to process is
given as the single command line argument whereas the output is
written to standard output. Errors and warning are written to standard
error.

In the output file, footnotes in the main text are numbered
1, 2, 3, .... The numbers in the footnote list, i. e. after the
marker "@footnote:", are sorted numerically to fit the order in the
main text. There can be multiple references in the main text pointing
to the same number in the footnote list.

The program warns about common mistakes like footnotes in the main
text which don't occur in the footnote list, or footnotes in the
list which have no correspondence in the main text.

Here's an input file example:

A great brown fox [13] jumped of a pile of lorem ipsum [4], [7]. He met
with a silver penguin, browsing the Linux Kernel Mailinglist [3]. They
debated other the question whether to start a C-program with "main
(int argc, char **argv)" or with "main (int argc, char *argv[])".
Square brackets annoyed them [9999]. A repeated index [4].
@footnote:
[13] Al Fabetus: "On characters and animals", 1888, self published.
[4] Lorem Ipsum, <a href="http://en.wikipedia.org/wiki/Lorem_ipsum">Web Link</a>
[9999] Annoying Link.
[7] B. Fox: "More on Blind Text".
[3] Linux Kernel Maintainers: LKML

This would become:

A great brown fox [1] jumped of a pile of lorem ipsum [2], [3]. He met
with a silver penguin, browsing the Linux Kernel Mailinglist [4]. They
debated other the question whether to start a C-program with "main
(int argc, char **argv)" or with "main (int argc, char *argv[])".
Square brackets annoyed them [5]. A repeated index [2].
@footnote:
[1] Al Fabetus: "On characters and animals", 1888, self published.
[2] Lorem Ipsum, <a href="http://en.wikipedia.org/wiki/Lorem_ipsum">Web Link</a>
[3] B. Fox: "More on Blind Text".
[4] Linux Kernel Maintainers: LKML
[5] Annoying Link.
"""

import re
import sys


# separates main text and footnotes list
FOOTNOTE_MARKER = "@footnote:"

# bracketed index numbers
index_regex = re.compile(r"\[\d+\]")

# mapping of former to reassigned index numbers
old_to_new = {}

# next index number to use for replacement in main text
next_number = 1

def warn(text):
   print >> sys.stderr, "Warning:", text

def reassign_in_main_text(match):
   """Return the new bracketed index number for the main text. If
   an old index already has a new one assigned, reuse it.
   """
   global next_number
   number = int(match.group(0)[1:-1])
   if number not in old_to_new:
       old_to_new[number] = next_number
       next_number += 1
   return "[%d]" % old_to_new[number]

def process_main_text(fobj):
   """Print the main text with the reassigned numbers, reading the
   input lines from file object `fobj`.
   """
   for line in fobj:
       # rstrip handles whitespace at end of line
       if line.rstrip() == FOOTNOTE_MARKER:
           return
       print index_regex.sub(reassign_in_main_text, line),

def check_for_missing_footnotes(footnotes):
   """Print a warning about indices that are in the main text but
   not in the footnote list. `footnotes` is the list of
   index/line pairs generated in `process_footnote_list`.
   """
   # `next_number` actually is the number of indices in the main
   # text plus one, so it fits perfectly here
   in_main_text = set(xrange(1, next_number))
   in_footnotes = set((num for num, line in footnotes))
   not_in_footnotes = [str(num)
                       for num in sorted(in_main_text - in_footnotes)]
   if not_in_footnotes:
       warn("indices of missing footnotes: %s" %
            ", ".join(not_in_footnotes))

def process_footnote_list(fobj):
   """Print a new footnote list with the updated numbers, reading
   the input lines from file object `fobj`.
   """
   # collect (yet unsorted) footnotes
   footnotes = []
   for line in fobj:
       match = index_regex.search(line)
       if match is None:
           warn("ignored line: %s" % line.rstrip())
           continue
       match_str = match.group(0)
       old_number = int(match_str[1:-1])
       if old_number in old_to_new:
           new_number = old_to_new[old_number]
       else:
           warn("index number %d not in original main text" % old_number)
           continue
       new_line = line.replace(match_str, "[%d]" % new_number)
       footnotes.append((new_number, new_line))
   # implicit numerical sorting by the new index numbers
   footnotes.sort()
   for number, line in footnotes:
       print line,
   check_for_missing_footnotes(footnotes)

def main(filename):
   try:
       fobj = open(filename)
   except IOError:
       print >> sys.stderr, "Error: file '%s' not found" % filename
       return
   try:
       process_main_text(fobj)
       print FOOTNOTE_MARKER
       process_footnote_list(fobj)
   finally:
       fobj.close()


if __name__ == '__main__':
   try:
       filename = sys.argv[1]
   except IndexError:
       print >> sys.stderr, "Usage: %s input_file" % sys.argv[0]
   main(filename)