#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
===================================
Linux Magazin 10/08
Babylon zu fünft / Sprach-Vergleich
===================================

by Grégoire Weber <[email protected]> and
  Robert Hunger  <[email protected]>

- based on the solution from David Mertz and Ka-Ping Yee (Listing 4)
- with ideas from the nice PHP OO solution from Zeev Suraski (Listing 1)

USAGE: renumber-footnotes.py [--test] [-v] file

 --test run unit tests
 -v     run unit tests in verbose mode
"""

import sys
import re

SAMPLE_INPUT = '''
A great brown fox [13] jumped of a pile of lorem ipsum [4], [7]. He met
with a silver penguin, browsing the Linux Kernel Mailinglist [3]. They
debated other the question whether to start a C-program with "main (int argc,
char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed
them [9999]. Multiple references may exist to same targets [4].
@footnote:
[13] Al Fabetus: "On characters and animals", 1888, self published.
[4] Lorem Ipsum, <a href="http://en.wikipedia.org/wiki/Lorem_ipsum">Web Link</a>
[9999] Annoying Link.
[7] B. Fox: "More on Blind Text".
[3] Linux Kernel Maintainers: LKML
'''

SAMPLE_EXPECTED = '''
A great brown fox [1] jumped of a pile of lorem ipsum [2], [3]. He met
with a silver penguin, browsing the Linux Kernel Mailinglist [4]. They
debated other the question whether to start a C-program with "main (int argc,
char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed
them [5]. Multiple references may exist to same targets [2].
@footnote:
[1] Al Fabetus: "On characters and animals", 1888, self published.
[2] Lorem Ipsum, <a href="http://en.wikipedia.org/wiki/Lorem_ipsum">Web Link</a>
[3] B. Fox: "More on Blind Text".
[4] Linux Kernel Maintainers: LKML
[5] Annoying Link.
'''

class FootnoteRenumberer(object):
   """renumber footnote references

   Begins with '[1]' incrementing with each unknown footnote
   reference. Returns the footnote reference of first occurence for
   already known references.

   >>> footnoteRenumberer = FootnoteRenumberer()
   >>> re.sub(r'(\[\d+\])', footnoteRenumberer, 'foo [99] bar [3] su [99]')
   'foo [1] bar [2] su [1]'
   >>> footnoteRenumberer._counter
   3
   >>> footnoteRenumberer._map
   {'[3]': '[2]', '[99]': '[1]'}
   """
   def __init__(self):
       self._counter = 1
       self._map = {}

   def __call__(self, match):
       footnoteNumber = match.group(1)
       if footnoteNumber not in self._map:
           self._map[footnoteNumber] = "[%s]" % self._counter
           self._counter += 1
       return self._map[footnoteNumber]

def extractNumber(line):
   """extract number for footnote lines, returns None else

   Unnumberd lines appear at the top when sorted.
   """
   try:
       return int(line[1:].split(']', 1)[0])
   except:
       return None

def renumberFootnotes(lineIter, write=sys.stdout.write):
   """
   >>> lines = []
   >>> renumberFootnotes(SAMPLE_INPUT.splitlines(True), lines.append)
   >>> "".join(lines) == SAMPLE_EXPECTED
   True
   """
   footnoteRenumberer = FootnoteRenumberer()
   footnoteRegex = re.compile(r'(\[\d+\])')
   putLine = write
   footnotes = []
   for line in lineIter:
       if line.startswith("@footnote:"):
           putLine(line)
           putLine = footnotes.append
           continue
       putLine(footnoteRegex.sub(footnoteRenumberer, line))
   footnotes.sort(key=extractNumber)
   write("".join(footnotes))

if __name__ == '__main__':
   if '--test' in sys.argv[1:]:
       import doctest
       doctest.testmod()
   elif len(sys.argv) == 2:
       renumberFootnotes(file(sys.argv[1]))
   else:
       print __doc__
       sys.exit(1)