import xml.sax
import xml.sax.handler

"""ifarchivexml:

This module parses the Master-Index.xml file that is available at
<http://www.ifarchive.org/indexes/Master-Index.xml>.

You can use this module like this:
 import ifarchivexml
 (root, dirs, files) = ifarchivexml.parse('Master-Index.xml')

root is an IFDir object representing the root directory ('if-archive').
dirs is a dictionary mapping directory names ('if-archive/games', for
example) to IFDir objects. files is a dictionary mapping file pathname
('if-archive/games/playgame.FAQ', for example) to IFFile objects.

You can display the contents of either an IFDir or IFFile object with
the obj.dump() method.

"""

CONTEXT_NONE = 0
CONTEXT_DIR = 1
CONTEXT_FILE = 2
CONTEXT_DIRLINK = 3
CONTEXT_FILELINK = 4

class IFDir:
   description = None
   def __init__(self):
       self.subdirs = []
       self.files = []
   def __repr__(self):
       return '<IFDir \'' + self.name + '\'>'
   def dump(self):
       print 'name:   ', self.name
       print 'xdir:   ', self.xdir
       print 'parent: ', self.parent, ('('+str(self.parentobj)+')')
       print 'subdircount:', self.subdircount
       print 'filecount:  ', self.filecount
       if (self.description != None):
           print 'description:'
           print self.description
       print 'subdirs:'
       for subdir in self.subdirs:
           print ' ', str(subdir)
       print 'files:'
       for file in self.files:
           print ' ', str(file)

class IFFile:
   size = None
   date = None
   md5 = None
   rawdate = None
   symlink = None
   description = None
   def __repr__(self):
       return '<IFFile \'' + self.path + '\'>'
   def dump(self):
       print 'path:   ', self.path
       print 'name:   ', self.name
       print 'directory: ', self.directory, ('('+str(self.directoryobj)+')')
       if (self.symlink == 'dir'):
           print 'symlink to dir:'
           print '  name: ', self.symlinkname
           print '  xdir: ', self.symlinkxdir
       if (self.symlink == 'file'):
           print 'symlink to file:'
           print '  path: ', self.symlinkpath
       print 'size:   ', self.size
       print 'date:   ', self.date
       print 'rawdate:', self.rawdate
       print 'md5:    ', self.md5
       print 'orderindex:', self.orderindex
       if (self.description != None):
           print 'description:'
           print self.description

class IFAParser(xml.sax.handler.ContentHandler):
   def __init__(self):
       xml.sax.ContentHandler.__init__(self)
       self.grabbeddata = ''
       self.curdir = None
       self.directories = {}
       self.files = {}
       self.orderindex = 0
       self.context = CONTEXT_NONE
       self.elements = {
           'ifarchive': (self.ignore_start, self.ifarchive_end),
           'directory': (self.directory_start, self.directory_end),
           'file': (self.file_start, self.file_end),
           'name': (self.grabdata_start, self.name_end),
           'xdir': (self.grabdata_start, self.xdir_end),
           'filecount': (self.grabdata_start, self.filecount_end),
           'subdircount': (self.grabdata_start, self.subdircount_end),
           'parent': (self.grabdata_start, self.parent_end),
           'path': (self.grabdata_start, self.path_end),
           'size': (self.grabdata_start, self.size_end),
           'date': (self.grabdata_start, self.date_end),
           'rawdate': (self.grabdata_start, self.rawdate_end),
           'md5': (self.grabdata_start, self.md5_end),
           'description': (self.grabdata_start, self.description_end),
           'symlink': (self.symlink_start, self.symlink_end),
       }

   def characters(self, data):
       self.grabbeddata = (self.grabbeddata + data)

   def startElement(self, name, attrs):
       if (not self.elements.has_key(name)):
           return
       (startfunc, endfunc) = self.elements.get(name)
       startfunc(attrs)

   def endElement(self, name):
       if (not self.elements.has_key(name)):
           return
       (startfunc, endfunc) = self.elements.get(name)
       endfunc()

   def ignore_start(self, dict):
       pass
   def ignore_end(self):
       pass

   def grabdata_start(self, dict):
       self.grabbeddata = ''
   def grabdata(self):
       dat = self.grabbeddata
       self.grabbeddata = ''
       return dat

   def directory_start(self, dict):
       if (self.context == CONTEXT_NONE):
           self.curdir = IFDir()
           self.context = CONTEXT_DIR
       elif (self.context == CONTEXT_FILE):
           self.grabdata_start(None)

   def directory_end(self):
       if (self.context == CONTEXT_DIR):
           name = self.curdir.name
           self.directories[name] = self.curdir
           self.curdir = None
           self.context = CONTEXT_NONE
       elif (self.context == CONTEXT_FILE):
           data = self.grabdata()
           if (self.curfile != None):
               self.curfile.directory = data

   def file_start(self, dict):
       if (self.context == CONTEXT_NONE):
           self.curfile = IFFile()
           self.context = CONTEXT_FILE

   def file_end(self):
       if (self.context == CONTEXT_FILE):
           path = self.curfile.path
           self.curfile.orderindex = self.orderindex
           self.orderindex = self.orderindex+1
           self.files[path] = self.curfile
           self.curfile = None
           self.context = CONTEXT_NONE

   def symlink_start(self, dict):
       if (self.context == CONTEXT_FILE):
           if (dict['type'] == 'dir'):
               self.context = CONTEXT_DIRLINK
               self.curfile.symlink = 'dir'
           else:
               self.context = CONTEXT_FILELINK
               self.curfile.symlink = 'file'

   def symlink_end(self):
       if (self.context == CONTEXT_DIRLINK):
           self.context = CONTEXT_FILE
       elif (self.context == CONTEXT_FILELINK):
           self.context = CONTEXT_FILE

   def name_end(self):
       if (self.context == CONTEXT_DIR):
           name = self.grabdata()
           if (self.curdir != None):
               self.curdir.name = name
       elif (self.context == CONTEXT_FILE):
           name = self.grabdata()
           if (self.curfile != None):
               self.curfile.name = name
       elif (self.context == CONTEXT_DIRLINK):
           name = self.grabdata()
           if (self.curfile != None):
               self.curfile.symlinkname = name

   def parent_end(self):
       if (self.context == CONTEXT_DIR):
           data = self.grabdata()
           if (self.curdir != None):
               self.curdir.parent = data

   def xdir_end(self):
       if (self.context == CONTEXT_DIR):
           data = self.grabdata()
           if (self.curdir != None):
               self.curdir.xdir = data
       elif (self.context == CONTEXT_DIRLINK):
           data = self.grabdata()
           if (self.curfile != None):
               self.curfile.symlinkxdir = data

   def subdircount_end(self):
       if (self.context == CONTEXT_DIR):
           data = self.grabdata()
           if (self.curdir != None):
               self.curdir.subdircount = int(data)

   def filecount_end(self):
       if (self.context == CONTEXT_DIR):
           data = self.grabdata()
           if (self.curdir != None):
               self.curdir.filecount = int(data)

   def path_end(self):
       if (self.context == CONTEXT_FILE):
           data = self.grabdata()
           if (self.curfile != None):
               self.curfile.path = data
       elif (self.context == CONTEXT_FILELINK):
           data = self.grabdata()
           if (self.curfile != None):
               self.curfile.symlinkpath = data

   def size_end(self):
       if (self.context == CONTEXT_FILE):
           data = self.grabdata()
           if (self.curfile != None):
               self.curfile.size = int(data)

   def date_end(self):
       if (self.context == CONTEXT_FILE):
           data = self.grabdata()
           if (self.curfile != None):
               self.curfile.date = data

   def rawdate_end(self):
       if (self.context == CONTEXT_FILE):
           data = self.grabdata()
           if (self.curfile != None):
               self.curfile.rawdate = int(data)

   def md5_end(self):
       if (self.context == CONTEXT_FILE):
           data = self.grabdata()
           if (self.curfile != None):
               self.curfile.md5 = data

   def description_end(self):
       if (self.context == CONTEXT_DIR):
           data = self.grabdata()
           if (self.curdir != None):
               self.curdir.description = data
       elif (self.context == CONTEXT_FILE):
           data = self.grabdata()
           if (self.curfile != None):
               self.curfile.description = data

   def ifarchive_end(self):
       for dir in self.directories.values():
           parent = dir.parent
           if (parent == ''):
               dir.parentobj = None
           else:
               dir.parentobj = self.directories[parent]
               dir.parentobj.subdirs.append(dir)
       for file in self.files.values():
           parent = file.directory
           file.directoryobj = self.directories[parent]
           file.directoryobj.files.append(file)

def parse(filename):
   parser = IFAParser()

   fl = open(filename, 'r')
   xml.sax.parse(fl, parser)
   fl.close()

   rootdir = parser.directories['if-archive']
   result = (rootdir, parser.directories, parser.files)
   return result