# (c) 2013-2018 Sebastian Humenda
# This code is licenced under the terms of the LGPL-3+, see the file COPYING for
# more details.
"""In order to convert images only if they are not already cached, the cached
converter sacrifices customizability for convenience and provides a class
converting a formula directly to a png file."""
import concurrent.futures
import multiprocessing
import os
import subprocess
from . import caching, image, typesetting
from .caching import normalize_formula
from .image import Format
class ConversionException(Exception):
"""This exception is raised whenever a problem occurs during conversion.
Example:
c = ConversionException("cause", "\\tau", 10, 38, 5)
assert c.cause == cause
assert c.formula == '\\tau'
assert c.src_line_number == 10 # line number in source document (counting from 1)
assert c.src_pos_on_line == 38 # position of formula in source line, counting from 1
assert c.formula_count == 5 # fifth formula in document (starting from 1)
"""
# mind your own business mr. pylint:
#pylint: disable=too-many-arguments
def __init__(self, cause, formula, formula_count, src_line_number=None,
src_pos_on_line=None):
# provide a default error message
if src_line_number and src_pos_on_line:
super().__init__("LaTeX failed at formula line {}, {}, no. {}: {}".format(
src_line_number, src_pos_on_line, formula_count, cause))
else:
super().__init__("LaTeX failed at formula no. {}: {}".format(
formula_count, cause))
# provide attributes for upper level error handling
self.cause = cause
self.formula = formula
self.src_line_number = src_line_number
self.src_pos_on_line = src_pos_on_line
self.formula_count = formula_count
class CachedConverter:
"""Convert formulas to images.
c = CachedConverter(base_path)
for formula in [... formulas ...]:
pos, file_path = c.convert(formula)
...
The formula is either converted or retrieved from a cache in the same
directory like the images.
:param base_path directory to place files in (relative to the file
converted. So if the file being converted is foo/bar.htex and the
base_path is set to img, the image will be placed in foo/img/.
:param keep_old_cache If an existing cache cannot be read (incompatible
GladTeX version, ...) Aand the flag is set, the program will simply
crash and tell the user to remove the cache (default). If set to False,
the program will instead remove the cache and all eqn* files and
recreate the cache.
:param encoding The encoding for the LaTeX document, default None
"""
GLADTEX_CACHE_FILE_NAME = 'gladtex.cache'
def set_option(self, option, value):
"""Set one of the options accepted for gleetex.image.Tex2img. It is a
proxy function.
`option` must be one of dpi, fontsize, transparency, background_color,
foreground_color, preamble, latex_maths_env, keep_latex_source, png."""
if not option in self.__options.keys():
raise ValueError("Option must be one of " + \
', '.join(self.__options.keys()))
self.__options[option] = value
def set_replace_nonascii(self, flag):
"""If set, GladTeX will convert all non-ascii character to LaTeX
commands. This setting is passed through to typesetting.LaTeXDocument."""
self.__replace_nonascii = flag
def convert_all(self, formulas):
"""convert_all(formulas)
Convert all formulas using self.convert concurrently. Each element of
`formulas` must be a tuple containing (formula, displaymath,
Formulas already contained in the cache are not converted."""
formulas_to_convert = self._get_formulas_to_convert(formulas)
if formulas_to_convert:
self.__converter = image.Tex2img(Format.Png
if self.__options['png'] else Format.Svg)
# apply configured image output options
for option, value in self.__options.items():
if value and hasattr(self.__converter, 'set_' + option):
if isinstance(value, str): # only try string -> number
try: # some values are numbers
value = float(value)
except ValueError:
pass
getattr(self.__converter, 'set_' + option)(value)
self._convert_concurrently(formulas_to_convert)
def _get_formulas_to_convert(self, formulas):
"""Return a list of formulas to convert, along with their count in the
global list of formulas of the document being converted and the file
name. Function was decomposed for better testability."""
formulas_to_convert = [] # find as many file names as equations
file_ext = (Format.Png.value if self.__options['png']
else Format.Svg.value)
eqn_path = lambda x: os.path.join(self.__base_path, 'eqn%03d.%s' % (x,
file_ext))
# is (formula, display_math) already in the list of formulas to convert;
# displaymath is important since formulas look different in inline maths
formula_was_converted = lambda f, dsp: (normalize_formula(f), dsp) in \
((normalize_formula(u[0]), u[3]) for u in formulas_to_convert)
# find enough free file names
file_name_count = 0
used_file_names = [] # track which file names have been assigned
for formula_count, (pos, dsp, formula) in enumerate(formulas):
if not self.__cache.contains(formula, dsp) and \
not formula_was_converted(formula, dsp):
while os.path.exists(eqn_path(file_name_count)) or \
eqn_path(file_name_count) in used_file_names:
file_name_count += 1
used_file_names.append(eqn_path(file_name_count))
formulas_to_convert.append((formula, pos, eqn_path(file_name_count),
dsp, formula_count + 1))
return formulas_to_convert
def _convert_concurrently(self, formulas_to_convert):
"""The actual concurrent conversion process. Method is intended to be
called from convert_all()."""
if self.__base_path and not os.path.exists(self.__base_path):
# create directory *before* it is required in the concurrent
# formulacreation step
os.makedirs(self.__base_path)
thread_count = int(multiprocessing.cpu_count() * 2.5)
# convert missing formulas
with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor:
# start conversion and mark each thread with its formula, position
# in the source file and formula_count (index into a global list of
# formulas)
jobs = {executor.submit(self.__convert, eqn, path, dsp): (eqn, pos, count)
for (eqn, pos, path, dsp, count) in formulas_to_convert}
error_occurred = None
for future in concurrent.futures.as_completed(jobs):
if error_occurred and not future.done():
future.cancel()
continue
formula, pos_in_src, formula_count = jobs[future]
try:
data = future.result()
except subprocess.SubprocessError as e:
# retrieve the position (line, pos on line) in the source
# document from original formula list
if pos_in_src: # missing for the pandocfilter case
pos_in_src = list(p+1 for p in pos_in_src) # user expects lines/pos_in_src' to count from 1
self.__cache.write() # write back cache with valid entries
if not pos_in_src: # pandocfilter case:
error_occurred = ConversionException(str(e.args[0]),
formula, formula_count)
else:
error_occurred = ConversionException(str(e.args[0]), formula,
formula_count, pos_in_src[0], pos_in_src[1])
else:
self.__cache.add_formula(formula, data['pos'], data['path'],
data['displaymath'])
self.__cache.write()
#pylint: disable=raising-bad-type
if error_occurred:
raise error_occurred
def __convert(self, formula, output_path, displaymath=False):
"""convert(formula, output_path, displaymath=False)
Convert given formula with displaymath/inlinemath.
This method wraps the formula in a tex document, executes all the steps
to produce a image and return the positioning information for the
HTML output. It does not check the cache.
:param formula formula to convert
:param output_path image output path
:param displaymath whether or not to use displaymath during the conversion
:return dictionary with position (pos), image path (path) and formula
style (displaymath, boolean) as a dictionary with the keys in
parenthesis"""
latex = typesetting.LaTeXDocument(formula)
latex.set_displaymath(displaymath)
def set(opt, setter):
if self.__options[opt]:
getattr(latex, 'set_' + setter)(self.__options[opt])
set('preamble', 'preamble_string')
set('latex_maths_env', 'latex_environment')
set('background_color', 'background_color')
set('foreground_color', 'foreground_color')
if self.__encoding:
latex.set_encoding(self.__encoding)
if self.__replace_nonascii:
latex.set_replace_nonascii(True)
# dvipng needs the additionalindication of transparency (enabled by
# default) when setting a background colour
if self.__options['background_color']:
self.__converter.set_transparency(False)
pos = self.__converter.convert(latex,
os.path.splitext(output_path)[0])
return {'pos' : pos, 'path' : output_path, 'displaymath' :
displaymath}
def get_data_for(self, formula, display_math):
"""Simple wrapper around ImageCache, enriching the returned data with
the information provided as arguments to this function. This helps when
using a formula without its context."""
data = self.__cache.get_data_for(formula, display_math).copy()
data.update({'formula': formula, 'displaymath': display_math})
return data