# (c) 2013-2018 Sebastian Humenda
# This code is licenced under the terms of the LGPL-3+, see the file COPYING for
# more details.
"""This module contains the ImageCache, caching formulas which have already been
converted. This allows to re-use images for formulas which occur multiple timesd
within a document. Furthermore, it can significantly speed up incremental
document creation, because the cache is remembered across GladTeX runs.

Cache format:

   { # dict of formulas
       'some formula': # formula as key into dictionary
           { # list of display math / inline maths variants
               True: # displaymath = True
                   { # dictionary of values describing formula
                       'path': 'some/path'
                       'pos': { # positioning within the HTML document
                           'height': ..., 'width':..., 'depth:....
                       }
                   }
                   }
           }
   }

Formulas are `normalized`, so spacing is unified to detect possibly equal
formulas more easyly.
"""

import json
import os

CACHE_VERSION = '2.0'

def normalize_formula(formula):
   """This function normalizes a formula. This e.g. means that multiple white
   spaces are squeezed into one and a tab will be replaced by a space. With
   this it is more realistic that a recurring formula in a document is detected
   as such, even though if it might have been written with different spacing.
   Empty braces ({}) are removed as well."""
   return formula.replace('{}', ' ').replace('\t', ' ').replace('  ', ' '). \
       rstrip().lstrip()

def recover_bools(object):
   """After JSon is read from disk, keys as False or True have been serialized
   to 'false' and 'true', but they're not recovered by the json parser. This
   function alters converts these keys back to booleans; note: it only works
   with references, so this function doesn't return anything."""
   if isinstance(object, dict):
       for key in ['false', 'true']:
           if key in object:
               val = object[key] # store value
               object[key == 'true'] = val # safe it with boolean representation
               del object[key] # remove string key
       # iterate recursively through dict
       for value in object.values():
           recover_bools(value)
   if isinstance(object, list):
       for item in object:
           recover_bools(item)

class JsonParserException(Exception):
   """Specialized exception class for handling errors while parsing the JSON
   cache."""
   pass

class ImageCache:
   """
   This cache stores formulas which have been converted already and don't need
   to be converted again. This is both a disk usage and performance
   improvement. The cache can be written and read from disk.

   If the argument keep_old_cache is True, the cache will raise a
   JsonParserException if that file could not be read (i.e. incompatible
   GladTeX version). If set to False, it'll discard the cache along with all
   eqn* files and start with a clean cache.

   cache = ImageCache()
   c.add_formula('\\tau', # the formulas
       {'height': 1, 'depth': 2, 'width='3'}, # the positioning information for the output document
       'eqn042.png', displaymath=True):
   assert len(cache) == 1 # one entry
   c.write()
   assert os.path.exists('gladtex.cache')
   """
   VERSION_STR = 'GladTeX__cache__version'

   def __init__(self, path='gladtex.cache', keep_old_cache=True):
       self.__cache = {}
       self.__set_version(CACHE_VERSION)
       self.__path = path
       if os.path.exists(path):
           try:
               self._read()
           except JsonParserException:
               if keep_old_cache:
                   raise
               else:
                   self._remove_old_cache_and_files()

   def __len__(self):
       """Return number of formulas in the cache."""
       # ignore version
       return len(self.__cache) - 1

   def __set_version(self, version):
       """Set version of cache (data structure format)."""
       self.__cache[ImageCache.VERSION_STR] = version

   def write(self):
       """Write cache to disk. The file name will be the one configured during
       initialisation of the cache."""
       if len(self.__cache) == 0:
           return
       with open(self.__path, 'w', encoding='UTF-8') as file:
           file.write(json.dumps(self.__cache))

   def _read(self):
       """Read Json from disk into cache, if file exists.
       :raises JsonParserException if json could not be parsed"""
       def raise_error(msg):
           raise JsonParserException(msg + "\nPlease delete the cache (and" + \
                       " the images) and rerun the program.")
       if os.path.exists(self.__path):
           #pylint: disable=broad-except
           try:
               with open(self.__path, 'r', encoding='utf-8') as file:
                   self.__cache = json.load(file)
           except Exception as e:
               msg = "error while reading cache from %s: " % os.path.abspath(self.__path)
               if isinstance(e, (ValueError, OSError)):
                   msg += str(e.args[0])
               elif isinstance(e, UnicodeDecodeError):
                   msg += 'expected UTF-8 encoding, erroneous byte ' + \
                           '{0} at {1}:{2} ({3})'.format(*(e.args[1:]))
               else:
                   msg += str(e.args[0])
               raise_error(msg)
       if not isinstance(self.__cache, dict):
           raise_error("Decoded Json is not a dictionary.")
       if not self.__cache.get(ImageCache.VERSION_STR):
           self.__set_version(CACHE_VERSION)
       cur_version = self.__cache.get(ImageCache.VERSION_STR)
       if cur_version != CACHE_VERSION:
           raise_error("Cache in %s has version %s, expected %s." % \
                   (self.__path, cur_version, CACHE_VERSION))
       recover_bools(self.__cache)

   def _remove_old_cache_and_files(self):
       os.remove(self.__path)
       directory = os.path.split(self.__path)[0]
       if not directory:
           directory = '.'
       # remove all files starting with eqn*
       for file in os.listdir(directory):
           if not file.startswith('eqn'):
               continue
           file = os.path.join(directory, file)
           if os.path.isfile(file):
               os.remove(file)

   def add_formula(self, formula, pos, file_path, displaymath=False):
       """Add formula to cache. The pos argument contains the positioning
       info for the output document and is a dict with 'height', 'width' and
       'depth'.
       Keep in mind that formulas set with displaymath are not the same as
       those set iwth inlinemath.
       This method raises OSError if specified image doesn't exist or if it got
       an absolute file_path."""
       if not pos or not formula or not file_path:
           raise ValueError("the supplied arguments may not be empty/none")
       if not isinstance(displaymath, bool):
           raise ValueError("displaymath must be a boolean")
       if os.path.isabs(file_path):
           raise OSError(("The file path to the image may NOT be an absolute "
                   "path: ") + file_path)
       if '\\' in file_path:
           file_path = file_path.replace('\\', '/')
       if not os.path.exists(file_path):
           raise OSError("cannot add %s to the cache: doesn't exist" %
                   file_path)
       formula = normalize_formula(formula)
       if not formula in self.__cache:
           self.__cache[formula] = {}
       val = self.__cache[formula]
       if not displaymath in val:
           val[displaymath] = {'pos' : pos, 'path' : file_path}

   def remove_formula(self, formula, displaymath):
       """This method removes the given formula from the cache. A KeyError is
       raised, if the formula did not exist. Internally, formulas are
       normalized to detect similarities."""
       formula = normalize_formula(formula)
       if not formula in self.__cache:
           raise KeyError("key %s not in cache" % formula)
       else:
           value = self.__cache[formula]
           if displaymath in value:
               del self.__cache[formula]
           else:
               raise KeyError("key %s (%s) not in cache" % (formula, displaymath))

   def contains(self, formula, displaymath):
       """Check whether a formula was already cached and return True if
       found."""
       try:
           return bool(self.get_data_for(formula, displaymath))
       except KeyError:
           return False


   def get_data_for(self, formula, displaymath):
       """
       Retrieve meta data about a formula from the cache.

       The meta information is used to embed the formula in the HTML document.
       It is a dictionary with the keys 'pos' and 'path'. The positioning info
       is described in the documentation of this class.
       This method raises a KeyError if the formula wasn't found."""
       formula = normalize_formula(formula)
       if not formula in self.__cache:
           raise KeyError(formula, displaymath)
       else:
           # check whether file still exists
           value = self.__cache[formula]
           if displaymath in value.keys():
               if not os.path.exists(value[displaymath]['path']):
                   del self.__cache[formula]
                   raise KeyError((formula, displaymath))
               else:
                   return value[displaymath]
           else:
               raise KeyError((formula, displaymath))