Linux Magazin 10/08

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
===================================
Linux Magazin 10/08
Babylon zu fünft / Sprach-Vergleich
===================================

by Grégoire Weber <[email protected]> and
Robert Hunger <[email protected]>

- based on the solution from David Mertz and Ka-Ping Yee (Listing 4)
- with ideas from the nice PHP OO solution from Zeev Suraski (Listing 1)

USAGE: renumber-footnotes.py [--test] [-v] file

--test run unit tests
-v run unit tests in verbose mode
"""

import sys
import re

SAMPLE_INPUT = '''
A great brown fox [13] jumped of a pile of lorem ipsum [4], [7]. He met
with a silver penguin, browsing the Linux Kernel Mailinglist [3]. They
debated other the question whether to start a C-program with "main (int argc,
char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed
them [9999]. Multiple references may exist to same targets [4].
@footnote:
[13] Al Fabetus: "On characters and animals", 1888, self published.
[4] Lorem Ipsum, <a href="http://en.wikipedia.org/wiki/Lorem_ipsum">Web Link</a>
[9999] Annoying Link.
[7] B. Fox: "More on Blind Text".
[3] Linux Kernel Maintainers: LKML
'''

SAMPLE_EXPECTED = '''
A great brown fox [1] jumped of a pile of lorem ipsum [2], [3]. He met
with a silver penguin, browsing the Linux Kernel Mailinglist [4]. They
debated other the question whether to start a C-program with "main (int argc,
char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed
them [5]. Multiple references may exist to same targets [2].
@footnote:
[1] Al Fabetus: "On characters and animals", 1888, self published.
[2] Lorem Ipsum, <a href="http://en.wikipedia.org/wiki/Lorem_ipsum">Web Link</a>
[3] B. Fox: "More on Blind Text".
[4] Linux Kernel Maintainers: LKML
[5] Annoying Link.
'''

class FootnoteRenumberer(object):
"""renumber footnote references

Begins with '[1]' incrementing with each unknown footnote
reference. Returns the footnote reference of first occurence for
already known references.

>>> footnoteRenumberer = FootnoteRenumberer()
>>> re.sub(r'(\[\d+\])', footnoteRenumberer, 'foo [99] bar [3] su [99]')
'foo [1] bar [2] su [1]'
>>> footnoteRenumberer._counter
3
>>> footnoteRenumberer._map
{'[3]': '[2]', '[99]': '[1]'}
"""
def __init__(self):
self._counter = 1
self._map = {}

def __call__(self, match):
footnoteNumber = match.group(1)
if footnoteNumber not in self._map:
self._map[footnoteNumber] = "[%s]" % self._counter
self._counter += 1
return self._map[footnoteNumber]

def extractNumber(line):
"""extract number for footnote lines, returns None else

Unnumberd lines appear at the top when sorted.
"""
try:
return int(line[1:].split(']', 1)[0])
except:
return None

def renumberFootnotes(lineIter, write=sys.stdout.write):
"""
>>> lines = []
>>> renumberFootnotes(SAMPLE_INPUT.splitlines(True), lines.append)
>>> "".join(lines) == SAMPLE_EXPECTED
True
"""
footnoteRenumberer = FootnoteRenumberer()
footnoteRegex = re.compile(r'(\[\d+\])')
putLine = write
footnotes = []
for line in lineIter:
if line.startswith("@footnote:"):
putLine(line)
putLine = footnotes.append
continue
putLine(footnoteRegex.sub(footnoteRenumberer, line))
footnotes.sort(key=extractNumber)
write("".join(footnotes))

if __name__ == '__main__':
if '--test' in sys.argv[1:]:
import doctest
doctest.testmod()
elif len(sys.argv) == 2:
renumberFootnotes(file(sys.argv[1]))
else:
print __doc__
sys.exit(1)