#!/usr/bin/env python3
# (c) 2013-2018 Sebastian Humenda
# This code is licenced under the terms of the LGPL-3+, see the file COPYING for
# more details.
import argparse
import multiprocessing
import os
import posixpath
import sys
import gleetex
from gleetex import parser
class HelpfulCmdParser(argparse.ArgumentParser):
"""This variant of arg parser always prints the full help whenever an error
occurs."""
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
def format_ordinal(number):
endings = ['th', 'st', 'nd', 'rd'] + ['th'] * 6
return '%d%s' % (number, endings[number%10])
class Main:
"""This class parses command line arguments and deals with the
conversion. Only the run method needs to be called."""
def __init__(self):
self.__encoding = "utf-8"
def _parse_args(self, args):
"""Parse command line arguments and return option instance."""
epilog = "GladTeX %s,
http://humenda.github.io/GladTeX" % gleetex.VERSION
description = ("GladTeX is a preprocessor that enables the use of LaTeX"
" maths within HTML files. The maths, embedded in <EQ>...</EQ> "
"tags, as if within \\(..\\) in LaTeX (or $...$ in TeX), is fed "
"through latex and replaced by images.\n\nPlease also see the "
"documentation on the web or from the manual page for more "
"information, especially on environment variables.")
cmd = HelpfulCmdParser(epilog=epilog, description=description)
cmd.add_argument("-a", action="store_true", dest="exclusionfile", help="save text alternatives " +
"for images which are too long for the alt attribute into a " +
"single separate file and link images to it")
cmd.add_argument('-b', dest='background_color',
help=("Set background color for resulting images "
"(default transparent, use hex)"))
cmd.add_argument('-c', dest='foreground_color',
help=("Set foreground color for resulting images (default "
"000000, hex)"))
cmd.add_argument('-d', dest='directory', help="Directory in which to" +
" store generated images in (relative to the output file)")
cmd.add_argument('-e', dest='latex_maths_env',
help="Set custom maths environment to surround the formula" + \
" (e.g. flalign)")
cmd.add_argument('-f', metavar='SIZE', dest='fontsize', default=12,
help="Set font size in pt (default 12)")
cmd.add_argument('-E', dest='encoding', default=None,
help="Overwrite encoding to use (default UTF-8)")
cmd.add_argument('-i', metavar='CLASS', dest='inlinemath',
help="CSS class to assign to inline math (default: 'inlinemath')")
cmd.add_argument('-l', metavar='CLASS', dest='displaymath',
help="CSS class to assign to block-level math (default: 'displaymath')")
cmd.add_argument('-K', dest='keep_latex_source', action="store_true",
default=False, help="keep LaTeX file(s) when converting formulas (useful for debugging)")
cmd.add_argument('-m', dest='machinereadable', action="store_true",
default=False,
help="Print output in machine-readable format (less concise, better parseable)")
cmd.add_argument("-n", action="store_true", dest="notkeepoldcache",
help=("Purge unreadable caches along with all eqn*.png files. "
"Caches can be unreadable if the used GladTeX version is "
"incompatible. If this option is unset, GladTeX will "
"simply fail when the cache is unreadable."))
cmd.add_argument('-o', metavar='FILENAME', dest='output',
help=("Set output file name; '-' will print text to stdout (by"
"default input file name is used and .htex extension changed "
"to .html)"))
cmd.add_argument('-p', metavar='LATEX_STATEMENT', dest="preamble",
help="Add given LaTeX code to preamble of document; that'll " +\
"affect the conversion of every image")
cmd.add_argument('-P', dest="pandocfilter", action='store_true',
help="Use GladTeX as a Pandoc filter: read a Pandoc JSON AST "
"from stdin, convert the images, change math blocks to "
"images and write JSON to stdout")
cmd.add_argument('--png', action='store_true', dest='png',
help="Use PNG instead of SVG for images")
cmd.add_argument('-r', '--resolution', metavar='DPI', dest='dpi',
default=None,
help=("Set resolution in DPI, only available if PNG output "
"selected; also see `-f`"))
cmd.add_argument('-R', action="store_true", dest='replace_nonascii',
default=False, help="Replace non-ascii characters in formulas "
"through their LaTeX commands")
cmd.add_argument("-u", metavar="URL", dest='url',
help="URL to image files (relative links are default)")
cmd.add_argument('input', help="Input .htex file with LaTeX " +
"formulas (if omitted or -, stdin will be read)")
return cmd.parse_args(args)
def exit(self, text, status):
"""Exit function. Could be used to register any clean up action."""
sys.stderr.write(text)
if not text.endswith('\n'):
sys.stderr.write('\n')
sys.exit(status)
def validate_options(self, opts):
"""Validate certain arguments suppliedon the command line. The user will
get a (hopefully) helpful error message if he/she gave an invalid
parameter."""
if opts.fontsize and opts.dpi:
print("Options -f and -d can't be used at the same time.")
sys.exit(14)
if opts.dpi and not opts.png:
print(("Impossible to set resolution when using SVG as output, "
"try -f"))
sys.exit(14)
def get_input_output(self, options):
"""Determine whether GladTeX is reading from stdin/file, writing to
stdout/file and determine base_directory if files are in another
directory.
If no output file name is given and there is a input file to read
from, output is written to a file ending on .html instead of .htex.
The returned document is either string or byte, the latter if encoding
is unknown."""
data = None
output = '-'
if options.input == '-':
data = sys.stdin.read()
else:
try:
# if encoding was specified or if a pandoc filter is supplied,
# read document with default encoding
if options.encoding or options.pandocfilter:
encoding = ('UTF-8' if options.pandoc else options.encoding)
with open(options.input, encoding=encoding) as f:
data = f.read()
else: # read as binary and guess from HTML meta charset
with open(options.input, 'rb') as file:
data = file.read()
except UnicodeDecodeError as e:
self.exit(('Error while reading from %s: %s\nProbably this file'
' has a different encoding, try specifying -E.') % \
(options.input, str(e)), 88)
except IsADirectoryError:
self.exit("Error: cannot open %s for reading: is a directory." \
% options.input, 19)
except FileNotFoundError:
self.exit("Error: file %s not found." % options.input, 20)
# check which output file name to use
if options.output:
output = options.output
elif options.input != '-':
output = os.path.splitext(options.input)[0] + '.html'
# else case: output = '-' (see above)
base_path = ''
if options.output and os.path.dirname(options.output):
base_path = os.path.dirname(output)
elif options.input != '-' and os.path.dirname(options.input):
base_path = os.path.dirname(options.input)
if base_path: # if finally a basepath found:, strip \\ if on Windows
base_path = posixpath.join(*(base_path.split('\\')))
# the basepath needs to be relative to the output file
return (data, base_path, output)
def run(self, args):
options = self._parse_args(args[1:])
self.validate_options(options)
self.__encoding = options.encoding
fmt = ('pandocfilter' if options.pandocfilter else 'html')
doc, base_path, output = self.get_input_output(options)
old_cwd = os.getcwd()
if base_path:
os.chdir(base_path)
try:
# doc is either a list of raw HTML chunks and formulas or a tuple of
# (document AST, list of formulas) if options.pandocfilter
self.__encoding, doc = parser.parse_document(doc, fmt)
except gleetex.parser.ParseException as e:
input_fn = ('stdin' if options.input == '-' else options.input)
self.exit('Error while parsing {}: {}'.format(input_fn,
str(e)), 5)
link_path = (options.directory if options.directory else '')
processed = self.convert_images(doc, link_path, options)
with gleetex.htmlhandling.HtmlImageFormatter(base_path=link_path,
link_path=options.url) as img_fmt:
img_fmt.set_exclude_long_formulas(True)
if options.replace_nonascii:
img_fmt.set_replace_nonascii(True)
if options.url:
img_fmt.set_url(options.url)
if options.inlinemath:
img_fmt.set_inline_math_css_class(options.inlinemath)
if options.displaymath:
img_fmt.set_display_math_css_class(options.displaymath)
os.chdir(old_cwd)
with (sys.stdout if output == '-'
else open(output, 'w', encoding=self.__encoding)) as file:
if options.pandocfilter:
gleetex.pandoc.write_pandoc_ast(file, processed, img_fmt)
else:
gleetex.htmlhandling.write_html(file, processed, img_fmt)
def convert_images(self, parsed_document, base_path, options):
"""Convert all formulas to images and store file path and equation in a
list to be processed later on."""
base_path = ('' if not base_path or base_path == '.' else base_path)
result = []
try:
conv = gleetex.cachedconverter.CachedConverter(base_path,
not options.notkeepoldcache, encoding=self.__encoding)
except gleetex.caching.JsonParserException as e:
self.exit(e.args[0], 78)
self.set_options(conv, options)
if options.pandocfilter:
formulas = parsed_document[1]
else: # HTML chunks from EqnParser
formulas = [c for c in parsed_document if isinstance(c, (tuple,
list))]
try:
conv.convert_all(formulas)
except gleetex.cachedconverter.ConversionException as e:
self.emit_latex_error(e, options.machinereadable,
options.replace_nonascii)
if options.pandocfilter:
# return (ast, formulas), just with formulas being replaced with the
# conversion data
return (parsed_document[0], [conv.get_data_for(eqn, style)
for _p, style, eqn in formulas])
for chunk in parsed_document:
# output of EqnParser: list-alike is formula, str is raw HTML
if isinstance(chunk, (tuple, list)):
_p, displaymath, formula = chunk
try:
result.append(conv.get_data_for(formula, displaymath))
except KeyError as e:
raise KeyError(("formula '%s' not found; that means it was "
"not converted which should usually not happen.") % e.args[0])
else:
result.append(chunk)
return result
def set_options(self, conv, options):
"""Apply options from command line parser to the converter."""
# set options
options_to_query = ['preamble', 'latex_maths_env',
'png', 'keep_latex_source', 'foreground_color',
'background_color']
for option_str in options_to_query:
option = getattr(options, option_str)
if option:
if option in ('True', 'False', 'false', 'true'):
option = bool(option)
conv.set_option(option_str, option)
if options.dpi:
conv.set_option("dpi", float(options.dpi))
elif options.fontsize:
conv.set_option("fontsize", options.fontsize)
if options.replace_nonascii:
conv.set_replace_nonascii(True)
def emit_latex_error(self, err, machine_readable, escape):
"""Format a LaTeX error in a meaningful way. The argument escape
specifies, whether the -R switch had been passed. If the pandocfilter
mode is active, formula positions will be omitted; this makes the code
more complex."""
if 'DEBUG' in os.environ and os.environ['DEBUG'] == '1':
raise err
escaped = err.formula
if escape:
escaped = gleetex.typesetting.escape_unicode_maths(err.formula)
msg = None
additional = ''
if 'Package inputenc' in err.args[0]:
additional += ('Add the switch `-R` to automatically replace unicode '
'characters with LaTeX command sequences.')
if machine_readable:
msg = 'Number: {}\nFormula: {}{}\nMessage: {}'.format(err.formula_count,
err.formula,
('' if escaped == err.formula
else '\nLaTeXified formula: %s' % escaped),
err.cause)
if err.src_line_number and err.src_pos_on_line:
msg = ('Line: {}, {}\n' + msg).format(err.src_line_number,
err.src_pos_on_line)
if additional:
msg += '; ' + additional
else:
formula = ' ' + err.formula.replace('\n', '\n ')
escaped = (' ' + escaped.replace('\n', '\n ') if escaped !=
err.formula else '')
msg = "Error while converting formula %d\n" % err.formula_count
if err.src_line_number and err.src_pos_on_line:
msg += " at line %d, %d:\n" % (err.src_line_number,
err.src_pos_on_line,)
msg += '%s%s\n%s' % (formula, (''
if not escaped or escaped == err.formula
else '\nFormula without unicode symbols:\n%s' % escaped),
err.cause)
if additional:
import textwrap
msg += ' undefined.\n' + '\n'.join(textwrap.wrap(additional, 80))
self.exit(msg, 91)
def main():
"""Entry point for setuptools."""
# enable multiprocessing on Windows, see python docs
multiprocessing.freeze_support()
m = Main()
# run as pandoc filter?
args = sys.argv # fallback if no environment variable set
if 'GLADTEX_ARGS' in os.environ:
args = [sys.argv[0]] + os.environ['GLADTEX_ARGS'].split(' ')
if '-P' not in args:
args = [args[0]] + ['-P'] + args[1:] + ['-']
m.run(args)
if __name__ == '__main__':
main()