#!/usr/bin/env python
"""Usage: FootnoteSorter.py [-a] [--test] [< input.txt]
   -a     Number footnotes in order of apperance in text
   --test Self-testing using the SAMPLE in the script source.
"""

SAMPLE = '''A great brown fox [13] jumped of a pile of lorem ipsum [4], [7]. He
met with a silver penguin, browsing the Linux Kernel Mailinglist [3]. They
debated other the question whether to start a C-program with "main (int argc,
char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed
them [9999]. Multiple references may exist to same targets [4].

@footnote:

[13] Al Fabetus: "On characters and animals", 1888, self published.
[4] Lorem Ipsum, <a href="http://en.wikipedia.org/wiki/Lorem_ipsum">Web Link</a>
[9999] Annoying Link.
[7] B. Fox: "More on Blind Text".
[3] Linux Kernel Maintainers: LKML
'''

import sys, re, fileinput, tempfile, os

FOOTNOTE_PATTERN = re.compile(r'\[\d+\]')

def checkOption(args, option):
   '''Check if an option is contained in the array args.
   If it is in the array, it will be deleted.'''
   try:
       index = args.index (option)
       del args[index]
       return True
   except ValueError:
       return False

def fileFilter(input, old2New):
   '''Filter every line of input.'''

   def op(m, old2New=old2New):
       '''This gets called for every match of the FOOTNOTE_PATTERN.'''
       key = m.group(0)
       # Check if we already have a mapping for this reference
       newValue = old2New.get(key)
       if newValue is None:
           # If not, create a new one
           newValue = '[%d]' % (len(old2New) + 1)
           old2New[key] = newValue
       return newValue

   # Read every line of input and replace the footnote references in it
   # using the operator defined above.
   for line in input:
       line = FOOTNOTE_PATTERN.sub(op, line)
       sys.stdout.write(line)

if __name__ == '__main__':
   isTest = checkOption (sys.argv, '--test')
   byFirstOccurrence = checkOption (sys.argv, '-a')

   if isTest:
       # Create a text input from the sample
       input = SAMPLE.split('\n')
       for i in range(0,len(input)):
           input[i] = '%s\n' % input[i]
   else:
       input = fileinput.FileInput(openhook=fileinput.hook_compressed)

   if byFirstOccurrence:
       # This is a simple case. Just read the file
       # and build the map as we encounter references.
       fileFilter (input, {})
   else:
       # Here, we need a temporary file because we have to do
       # two passes. In the first pass, we figure out the
       # correct order by reading the footnote block.
       #
       # In the second pass, we use the mapping to filter
       # the file just as in the simple case.
       #
       # Since the filter will find existing mappings, it won't
       # create them by itself and thus, we get the desired result.
       tmpFD, tmpFile = tempfile.mkstemp(text=True)
       try:
           fh = os.fdopen(tmpFD, 'w+')

           old2New = {}
           isText = True
           for line in input:
               # Make a copy of the input
               fh.write(line)
               if isText:
                   if line.startswith('@footnote:'):
                       isText = False
                   continue
               else:
                   # If we are in the footnote part, create the mapping
                   # by adding new references as we encounter them.
                   pos = line.find(']')
                   if pos == -1:
                       continue
                   oldValue = line[0:pos+1]
                   newValue = '[%d]' % (len(old2New) + 1)
                   old2New[oldValue] = newValue

           # Read the temporary file again from the start
           fh.seek(0)
           # and filter it just as above.
           fileFilter (fh, old2New)
           fh.close()
       finally:
           # Always delete the temporary file
           os.remove(tmpFile)