# Copyright (C) 2002 Dekel Tsur <dekelts@tau.ac.il>

#!/usr/bin/env python
# Copyright (C) 2002 Dekel Tsur <[email protected]>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

# WARNING:
# Ldiff might have unwanted effect on the current direcory.
# It might be wise to backup your files before running it.

version = "0.5"

import getopt,os,sys,re,string,difflib

add_begin = "\\changestart"
add_end = "\\changeend"
del_begin = "\\overstrikeon"
del_end = "\\overstrikeoff"

###########################################################################

def half(L):
return [L[2*i] for i in xrange((len(L)+1)/2)]

def wdiff(text1, text2, show_deleted):
text1 = re.split(r"([\s~]+)", text1)
text2 = re.split(r"([\s~]+)", text2)

text1b = half(text1)
text2b = half(text2)
L = difflib.SequenceMatcher(None, text1b, text2b).get_opcodes()
L = filter(lambda x:x[0] != 'equal', L)

# Merge two adjacent changed blocks if the common block between them is small
i = 0
while i < len(L)-1:
x = L[i]
y = L[i+1]
if (x[0] == 'replace' or y[0] == 'replace') and y[1] <= x[2]+2 and \
(x[2]-x[1]+y[2]-y[1] > y[1]-x[2] or x[4]-x[3]+y[4]-y[3] > y[3]-x[4]):
L[i] = ('replace', x[1], y[2], x[3], y[4])
del L[i+1]
else:
i += 1

for x in L:
type = x[0]
y = map(lambda a:2*a, x[1:])
if type != 'insert' and show_deleted:
deleted_text = string.join(text1[y[0]:y[1]-1])
# the -1 removes the space at the end of the deleted text
if deleted_text != '':
deleted_text = '%\n'+del_begin+'{}'+deleted_text+'%\n'+del_end+'{} '
else:
deleted_text = ""

if type != 'delete':
text2[y[2]] = deleted_text+'%\n'+add_begin+'{}'+text2[y[2]]
text2[y[3]-1] = '%\n'+add_end+'{}'+text2[y[3]-1]
else:
text2[y[2]] = deleted_text+text2[y[2]]

return string.join(text2, "")

###########################################################################
math_rexp = r"\$|\\$|\\$|\\\[|\\\]|\\(?:begin|end)\{(?:equation|eqnarray|align)\*?\}"

def system(command):
print "Running "+command
return os.system(command)

def read_file(file, revision):
if revision != "":
tmpname = "ldiff_tmp_" + file
if revision == "-1":
revision_flag = ""
else:
revision_flag = "-r"+revision
system("cvs diff %s -u %s | patch -R -o%s" % (revision_flag, file, tmpname))
lines = read_file2(tmpname)
os.remove(tmpname)
return lines
else:
return read_file2(file)

def read_file2(file):
if file[-3:] == "lyx":
# If the file is a lyx file, convert it to latex
lyx = os.getenv("LYX")
if lyx == "":
lyx = "lyx"
system(lyx+" -e latex " + file)
file2 = file[:-3]+"tex"
lines = read_file3(file2)
os.remove(file2)
return lines
else:
return read_file3(file)

def read_file3(file):
fh = open(file)
lines = fh.readlines()
fh.close()
return lines

def get_documentclass(lines):
for line in lines:
mo = re.search(r"\\document(class|style).*{(.*)}", line)
if mo:
return mo.group(2)
return "article"

def find_rexp(rexp, lines):
for i in xrange(len(lines)):
if re.search(rexp, lines[i]):
return i
return -1

def preprocess(lines, separate_title):
# Clean input files
# lines = lines of the input file
# separate_title = if True, then include the \title..\maketitle part in
# the second string
# It returns 3 strings: the first contains the preamble line,
# the second may contain the \title...\maketitle part (according to the
# value of separate_title), and the third contain the body of the file

preamble_end = find_rexp(r"\\begin{document}", lines)
preamble_text = string.join(lines[:preamble_end], "")
text_begin = preamble_end

title_text = ""
if separate_title:
title_end = find_rexp(r"\\maketitle", lines)
if title_end > preamble_end:
title_text = string.join(lines[preamble_end:title_end], "")
text_begin = title_end

text = ""
for i in xrange(text_begin, len(lines)):
# Remove comments
line = re.sub(r"(?<!\\)%.*", r"%", lines[i])
# Put %\n before commands
line = re.sub(r"(\$\w+){)", "%\n\\1", line)
# Put %\n after \emph{
line = re.sub(r"(\\(emph|textbf){)", "\\1%\n", line)
text = text + line

x = re.split("("+math_rexp+")", text)
math_mode = 0
for i in xrange(len(x)):
y = x[i]
if i % 2:
math_mode = not math_mode
if math_mode:
x[i] = x[i]+" "
else:
x[i] = " "+x[i]
elif math_mode:
# Replace x^a by x^{a}.
# This should give smaller diffs if x^a is replaced by x^b
x[i] = re.sub(r"(?<!\$([_^])(\\\w+|[^{}\\])", r"\1{\2}", x[i])
##x[i] = re.sub(r"(\\\w+|[^{}\\])([_^])", r"{\1}\2", x[i])
# Add some space in order to reduce the diff
# We try to add spaces in "safe" positions, but some spaces are
# added in unwanted places and then removed later
x[i] = re.sub(r"(?<!\\)([=<>+-,()}\\])", r" \1", x[i])
x[i] = re.sub(r"(?<!\\)([=<>+-,(){}])", r"\1 ", x[i])
# Remove some of the spaces that were added above
x[i] = re.sub(r"\$begin|end|label|ref|cite|text|textrm|mbox){ (\w+) }", r"\\\1{\2}", x[i])
x[i] = re.sub(r"} {", r"}{", x[i])
else:
# put space between \begin{<env>} and the optional argument
x[i] = re.sub(r"(\\begin{\w+})\[", r"\1 [", x[i])

return preamble_text, title_text, string.join(x, "")

invert_array = {"\(":"$", "\)":"\(","\[":"\]", "\[":"\]" }
def invert_command(command):
if invert_array.has_key(command):
return invert_array[command]
elif command[:6] == "\\begin":
return "\\end"+command[6:]
elif command[:4] == "\\end":
return "\\begin"+command[4:]
else:
return command

def postprocess(text):
# Try to change the code to prevent latex errors

x = re.split("("+math_rexp+"|\\"+del_begin+"\\{\\}|\\"+del_end+"\\{\\})", text)
math_mode = 0
math_mode_save = 0
math_mode_diff = 0
delete_mode = 0
delete_mode_start = 0
delete_math_balance = 0
last_math_command = ""
for i in xrange(len(x)):
if i % 2:
# x[i] is either empty, mathmode start/end command,
# or delete block start/end command
if x[i] == del_begin+"{}":
delete_mode = 1
delete_mode_start = i
delete_math_balance = 0
math_mode_save = math_mode
elif x[i] == del_end+"{}":
delete_mode = 0
if math_mode_save != math_mode:
# We need to make sure that the mode at the end of the deleted
# block is the same as the beginning
x[i] = "{}"+invert_command(last_math_command)+x[i]
math_mode = math_mode_save
elif x[delete_mode_start+1:i] == [""]*(i-delete_mode_start-1):
# There is nothing in the deleted block, so remove the
# delete block start & end commands
x[delete_mode_start] = x[i] = ""
elif x[i] != "": # math start/end
if math_mode and delete_mode and ( \
math_mode_diff != 0 or \
invert_command(x[i]) != last_math_command ):
# If we exit from math mode inside a deleted block, and it is
# not "safe", (namely the math block doesn't have balanced
# brackets, or the command used to exit math mode does
# not match the command in which the math block begins)
# then use mbox to go into text mode
x[i] = ""
x[i+1] = "\mbox{"+x[i+1]+"}"
if i+2 < len(x) and x[i+2] != del_end+"{}":
x[i+2] = ""
elif x[i+1:i+3] == ["", del_end+"{}"] and \
math_mode_save == math_mode:
# The deleted block is about to end, and
# the current math command will cause a mismatch of modes
x[i] = ""
else:
math_mode = not math_mode
last_math_command = x[i]
math_mode_diff = 0
else:
# check balance of brackets
diff = len(re.findall(r"(?<!\\){", x[i])) - \
len(re.findall(r"(?<!\\)}", x[i]))
if delete_mode:
# Remove labels in deleted blocks as they may appear
# in changed block
x[i] = re.sub(r"\$label)\{.*?\}", "", x[i])
if diff > 0:
# If the number of '{' is greater than the number of '}'
# add diff closing brackets at the end
x[i] += "}"*diff
elif diff < 0:
# If the number of '}' is greater than the number of '{'
# remove the first -diff brackets
x[i] = re.sub(r"(?<!\$}", "", x[i], -diff)
elif math_mode:
math_mode_diff += diff

return string.join(x, "")

def usage():
print """Usage: ldiff [options] [<file1>] <file2>
Show the differences between two latex/lyx files.
ldiff <file1> <file2> to compare two files.
ldiff <file> to compare <file> with the most recent version checked into CVS.
ldiff -r<rev> <file> to compare <file> with revision <rev> of <file>.
ldiff -r<rev1> -r<rev2> <file> to compare revision <rev1> with revision <rev2>.

Options:
-h, --help This information
-v, --version Output version information
-b, --nocolor Do not colorize the changed text
-d, --nodeleted Don't show deleted text
-t, --notitle Don't show differences in the title
-l, --latex Produce only the latex file
-p, --nodvipost Don't use dvipost
-s, --separation Separation between change bars and text
(default value = -50)
"""

_options = ["help", "version", "nocolor", "nodeleted", "notitle", "latex",\
"nodvipost", "separation="]
try:
opts, args = getopt.getopt(sys.argv[1:], "hvbdtlps:r:", _options)
except getopt.error:
usage()
sys.exit(1)

rev_list = []
deleted = 1
colorize = 1
onlylatex = 0
notitle = 0
dvipost = 1
sep = "-50"
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
if o in ("-v", "--version"):
print "ldiff, version "+version
sys.exit()
if o in ("-d", "--nodeleted"):
deleted = 0
if o in ("-b", "--nocolor"):
colorize = 0
if o in ("-t", "--notitle"):
notitle = 1
if o in ("-l", "--latex"):
onlylatex = 1
if o in ("-p", "--nodvipost"):
dvipost = 0
if o in ("-s", "--separation"):
sep = a
if o == "-r":
rev_list.append(a)

if len(args) == 2:
if rev_list != []:
usage()
sys.exit(1)
text1 = read_file(args[0], "")
text2 = read_file(args[1], "")
filebase = args[1][:-4]+"-diff"
elif len(args) == 1:
if len(rev_list) == 0:
rev_list = ["-1", ""]
elif len(rev_list) == 1:
rev_list += [""]
text1 = read_file(args[0], rev_list[0])
text2 = read_file(args[0], rev_list[1])
filebase = args[0][:-4]+"-diff"
else:
usage()
sys.exit()

if get_documentclass(text1) != get_documentclass(text2):
notitle = 1
preamble1, title1, text1 = preprocess(text1, notitle)
preamble2, title2, text2 = preprocess(text2, notitle)

lines = string.split(wdiff(text1, text2, deleted), "\n")
filetex = filebase+".tex"

fh = open(filetex, 'w')
fh.write(preamble2)

if dvipost:
fh.write(r"""
\usepackage{dvipost}
\dvipost{cbexp=0pt}
\dvipost{cbsep=%spt}
""" % sep)
if colorize:
fh.write(r"""
\dvipost{cbstart color push Blue}
\dvipost{cbend color pop}
\dvipost{osstart color push Red}
\dvipost{osend color pop}
""")
else:
fh.write(r"""
\newcommand{%s}{\special{color push Blue}}
\newcommand{%s}{\special{color push Black}}
\newcommand{%s}{\special{color push Red}}
\newcommand{%s}{\special{color push Black}}
""" % (add_begin,add_end,del_begin,del_end))

fh.write(r"""
\makeatletter
\let\ldiff@old@maketitle=\maketitle
\let\ldiff@old@thanks=\thanks
\let\ldiff@old@footnote=\footnote
\let\ldiff@old@endfigure=\endfigure
\let\ldiff@old@endtable=\endtable
\def\maketitle{\ldiff@old@maketitle%s%s}
\def\thanks#1{\ldiff@old@thanks{#1%s}%s}
\long\def\footnote#1{\ldiff@old@footnote{#1%s}%s}
\def\endfigure{%s\ldiff@old@endfigure%s}
\def\endtable{%s\ldiff@old@endtable%s}
\makeatother
""" % (( (del_end+add_end)*2+"{}",)*10) )

fh.write(title2)
text = ""
for line in lines:
line = re.sub(r"(?<!\\)%(.*)"+'\\'+del_end, r"\1"+del_end, line)
line = re.sub(r"(?<!\\)%(.*)", r"\1%", line)
text = text+line+"\n"

if deleted:
text = postprocess(text)
fh.write(text)
fh.close()

if onlylatex:
sys.exit()

filedvi = filebase+".dvi"
fileps = filebase+".ps"

latex_command = "latex --interaction=batchmode "
os.system(latex_command+filetex)
os.system("bibtex "+filebase)
os.system(latex_command+filetex)
os.system(latex_command+filetex)
if dvipost:
os.system("dvipost %s %s" % (filedvi, filedvi))
os.system("dvips %s -o %s" % (filedvi, fileps))
print "\nLatex Warnings:"
os.system("grep Warning "+filebase+".log")
print "\nLatex Errors:"
os.system("grep ^! "+filebase+".log")
os.system("rm %s.{aux,bbl,blg,dvi,log}" % filebase)