/*
Simple program to check LaTeX code for matching parentheses, etc.
For installation & usage, see README and run 'check-parens --help'.
TODO(?):
- handling of empty '\right.' delimiter is hacky.
- delimiter size detection is a bit a hack: '\big{(}' doesn't work.
- false positives for special constructs like:
'\newenvironment{\foo}{\begin{bar}}{\end{bar}}'
Copyright (C) 2012--2015 Jaap Eldering (
[email protected]).
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <cstdio>
#include <cstring>
#include <cctype>
#include <cstdlib>
#include <cstdarg>
#include <getopt.h>
#include <errno.h>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <sstream>
#include <fstream>
#include <iostream>
#include "check-parens-defconfig.hxx"
using namespace std;
#define PROGRAM "check-parens"
#define VERSION "2015.07.24"
#define AUTHORS "Jaap Eldering"
const int EXIT_NONFATAL = 1;
const int EXIT_FATAL = 2;
const string defconfigfilename = ".check-parens.conf";
enum paren_type { paren_hard, paren_soft, paren_env };
char *progname;
char *filename;
FILE *texfile;
int ignore_soft;
int ignore_size;
int file_line_error;
int debug_level;
int show_help;
int show_version;
struct option const long_opts[] = {
{"ignore-soft", no_argument, &ignore_soft, 1 },
{"ignore-size", no_argument, &ignore_size, 1 },
{"file-line-error", no_argument, &file_line_error, 1 },
{"no-file-line-error", no_argument, &file_line_error, 0 },
{"config-file", required_argument, NULL, 'c'},
{"debug", no_argument, NULL, 'd'},
{"help", no_argument, &show_help, 1 },
{"version", no_argument, &show_version, 1 },
{ NULL, 0, NULL, 0 }
};
void version()
{
printf("\
%s -- version %s\n\
Written by %s\n\n\
%s comes with ABSOLUTELY NO WARRANTY. This is free software, and\n\
you are welcome to redistribute it under the GNU General Public Licence\n\
as published by the Free Software Foundation; either version 3, or (at your\n\
option) any later version.\n",PROGRAM,VERSION,AUTHORS,PROGRAM);
exit(0);
}
void usage()
{
printf("\
Usage: %s [option]... <latex-file>...\n\
Check LaTeX source <latex-file> for non-matching parentheses.\n\
The exitcode is the number of files with fatal errors.\n\
\n\
-i --ignore-soft ignore mismatch of \"soft\" parentheses that\n\
do not break LaTeX syntax\n\
-s --ignore-size ignore mismatch of delimiter sizes\n\
--file-line-error format mismatch messages as 'file:line:char:error'\n\
just as many compilers do (default)\n\
--no-file-line-error print messages in more human readable format\n\
-c --config-file=FILE read configuration from FILE (default: ~/%s)\n\
-d --debug add debugging output, repeat for more verbosity\n\
--help display this help and exit\n\
--version output version information and exit\n",
progname, defconfigfilename.c_str());
exit(0);
}
void error(int errnum, const char *format, ...)
{
va_list ap;
va_start(ap,format);
fprintf(stderr,"%s",progname);
if ( format!=NULL ) {
fprintf(stderr,": ");
vfprintf(stderr,format,ap);
}
if ( errnum!=0 ) {
fprintf(stderr,": %s",strerror(errnum));
}
if ( format==NULL && errnum==0 ) {
fprintf(stderr,": unknown error");
}
fprintf(stderr,"\nTry `%s --help' for more information.\n",progname);
va_end(ap);
exit(-1);
}
void debug(int level, const char *format, ...)
{
va_list ap;
if ( debug_level<level ) return;
va_start(ap,format);
fprintf(stderr,"debug: ");
vfprintf(stderr,format,ap);
va_end(ap);
}
size_t line = 1, pos = 0;
size_t currline = 0, currpos = 0;
struct paren {
string open, close, size;
paren_type type;
size_t line, pos;
paren() {}
paren(string _open, string _close, paren_type _type=paren_hard):
open(_open), close(_close), type(_type), line(currline), pos(currpos) {}
paren(const paren& p, string _size):
open(p.open), close(p.close), size(_size), type(p.type),
line(currline), pos(currpos) {}
};
vector<paren> paren_list;
map<string,string> delim_list;
map<string,paren> paren_open, paren_close;
struct paren_stack {
vector<paren> data;
int lasthard;
paren_stack(): data(), lasthard(-1) {}
bool empty() { return data.empty(); }
size_t size() { return data.size(); }
void push(const paren& p)
{
if ( !p.type==paren_soft ) lasthard = data.size();
data.push_back(p);
}
const paren& top()
{
return data.back();
}
void pop()
{
data.pop_back();
while ( lasthard>=(int)data.size() ||
(lasthard>=0 && data[lasthard].type==paren_soft) ) lasthard--;
}
// Check if argument is at top (or highest "hard" parenthesis)
int attop(const string& s, bool soft)
{
if ( soft ) {
return data.size()>0 && s==data.back().close;
} else {
return lasthard>=0 && s==data[lasthard].close;
}
}
// Remove irrelevant elements from stack, e.g. before printing it.
void clean()
{
for(int i=data.size()-1; i>=0; --i) {
paren& p = data[i];
if ( (ignore_soft && p.type==paren_soft) ) {
data.erase(data.begin()+i);
}
}
}
};
int readchar()
{
int c = getc(texfile);
pos++;
if ( c==EOF ) return EOF;
if ( c=='\n' ) { line++; pos = 0; }
return c;
}
int unreadchar(int c)
{
if ( c=='\n' ) { line--; pos = 0; }
pos--;
return ungetc(c,texfile);
}
void message(size_t line, size_t pos, const char *format, ...)
{
va_list ap;
va_start(ap,format);
if ( file_line_error ) {
if ( line!=0 ) printf("%s:%zu:%zu: ",filename,line,pos);
} else {
if ( line!=0 ) printf("Line %zu, char %zu: ",line,pos);
}
vprintf(format,ap);
printf("\n");
va_end(ap);
}
void readconfig(istream& conf)
{
string line;
while ( getline(conf,line) && !conf.fail() ) {
istringstream ss(line);
string command, par_open, par_close, delim, token;
if ( !(ss >> command) || command.substr(0,1)=="#" ) continue;
if ( command=="paren" ) {
ss >> token;
if ( token!="=" ) error(0,"expected `=' after `%s' command",command.c_str());
if ( ! (ss >> par_open >> par_close) ) error(0,"expected paren open and close");
paren_type type = paren_hard;
if ( ss >> token && token=="soft" ) type = paren_soft;
paren_list.push_back(paren(par_open,par_close,type));
} else if ( command=="delimsize" ) {
ss >> token;
if ( token!="=" ) error(0,"expected `=' after `%s' command",command.c_str());
if ( ! (ss >> delim) ) error(0,"expected a paren delimiter");
delim_list[delim] = delim;
while ( ss >> token ) delim_list[token] = delim;
} else {
error(0,"found unknown command `%s' in configuration file",command.c_str());
}
}
}
int readtoken(string &tok, bool verbatim = false)
{
int c;
if ( (c=readchar())==EOF ) return 0;
currline = line;
currpos = pos;
tok = string(1,c);
// Gobble comments until end of line
if ( !verbatim && c=='%' ) {
while ( c!=EOF && c!='\n' ) c = readchar();
}
if ( c!='\\' ) return 1;
if ( (c=readchar())==EOF ) return 1;
tok += c;
if ( !isalpha(c) ) return 1;
while ( (c=readchar())!=EOF ) {
if ( isalpha(c) ) {
tok += c;
} else {
unreadchar(c);
break;
}
}
if ( (tok=="\\begin" || tok=="\\end") && c=='{' ) {
size_t saveline = currline;
size_t savepos = currpos;
tok += readchar();
string tok2;
while ( readtoken(tok2)!=EOF && tok2!="}" ) tok += tok2;
if ( tok2!="}" ) {
message(saveline,savepos,"unclosed environment delimiter.");
return 0;
}
tok += tok2;
currline = saveline;
currpos = savepos;
}
return 1;
}
void printstack(paren_stack stk)
{
message(0,0,"Parentheses stack contains %zu elements:",stk.size());
while ( !stk.empty() ) {
paren p = stk.top(); stk.pop();
message(p.line,p.pos,"'%s' (requires closing '%s').",
p.open.c_str(),p.close.c_str());
}
}
// Returns 0 on success, or EXIT_NONFATAL or EXIT_FATAL.
int checkfile(char * const fname)
{
string tok, delimsize, delimnext;
paren_stack stk;
bool soft_paren;
bool nonfatal_error = false;
filename = fname;
texfile = fopen(filename,"r");
if ( texfile==NULL ) error(errno,"opening file '%s'",filename);
line = 1, pos = 0;
currline = 0, currpos = 0;
debug(1,"starting to check file '%s'.\n",fname);
while ( readtoken(tok) ) {
debug(2,"%zu:%zu tok = '%s'\n",currline,currpos,tok.c_str());
// (Re)set parenthesis delimiter size:
delimsize = delimnext;
delimnext = ( delim_list.count(tok) ? tok : "" );
// Gobble whitespace after delimiter size tokens
if ( delimsize!="" && isspace(tok[0]) ) {
delimnext = delimsize;
continue;
}
// Skip over \verb and verbatim environments:
if ( tok=="\\verb" ) {
size_t saveline = currline;
size_t savepos = currpos;
char c, endchar = readchar();
if ( endchar==EOF ) {
message(currline,currpos,"Unexpected end of file.");
printstack(stk);
return EXIT_FATAL;
}
debug(1,"%zu:%zu found '%s', searching for end character '%c'\n",
saveline,savepos,tok.c_str(),endchar);
do {
c = readchar();
if ( c==EOF ) {
message(currline,currpos,"Unexpected end of file.");
printstack(stk);
return EXIT_FATAL;
}
} while ( c!=endchar );
debug(1,"%zu:%zu found verbatim end '%c'\n",currline,currpos,c);
continue;
}
if ( tok=="\\begin{verbatim}" ) {
string endtok = "\\end"+tok.substr(6);
debug(1,"%zu:%zu found '%s', searching for end token '%s'\n",
currline,currpos,tok.c_str(),endtok.c_str());
do { readtoken(tok,true); } while ( tok!=endtok );
debug(1,"%zu:%zu found verbatim end '%s'\n",
currline,currpos,tok.c_str());
continue;
}
// First try to pop token from stack, then push;
// then matching of mathmode $$'s works out-of-the-box.
soft_paren = false;
if ( paren_close.count(tok) ) soft_paren = (paren_close[tok].type==paren_soft);
if ( !stk.empty() && stk.attop(tok,soft_paren) ) {
// Print warnings for any soft parentheses dropped:
paren p = stk.top(); stk.pop();
while ( p.close!=tok ) {
if ( !ignore_soft ) {
message(p.line,p.pos,
"closing '%s' at %zu:%zu does not match opening '%s'.",
tok.c_str(),currline,currpos,p.open.c_str());
nonfatal_error = true;
}
p = stk.top(); stk.pop();
}
debug(1,"pop '%s' %s %s\n",tok.c_str(),p.size.c_str(),delimsize.c_str());
// Check for same delimiter size:
if ( !ignore_size && delim_list[p.size]!=delim_list[delimsize] ) {
message(currline,currpos,
"size '%s' of '%s' does not match "
"opening size '%s' at %zu:%zu.",
delimsize.c_str(),tok.c_str(),
p.size.c_str(),p.line,p.pos);
nonfatal_error = true;
}
continue;
}
// Special case closing '\right.'
if ( !stk.empty() ) {
paren p = stk.top();
if ( tok=="." && delimsize=="\\right" &&
p.type==paren_soft && delim_list[p.size]=="\\left" ) {
stk.pop();
continue;
}
}
// Now push new open tokens onto the stack:
if ( paren_open.count(tok) ) {
debug(1,"push '%s' %s\n",tok.c_str(),delimsize.c_str());
stk.push(paren(paren_open[tok],delimsize));
continue;
}
if ( tok.substr(0,7)=="\\begin{" ) {
debug(1,"push '%s'\n",tok.c_str());
stk.push(paren(tok,"\\end"+tok.substr(6)));
continue;
}
// Finally, any stack-able token now means a close error:
if ( paren_close.count(tok) || tok.substr(0,5)=="\\end{" ) {
soft_paren = false;
if ( paren_close.count(tok) ) soft_paren = (paren_close[tok].type==paren_soft);
if ( !soft_paren || !ignore_soft ) {
if ( stk.empty() ) {
message(currline,currpos,
"closing '%s' without matching open.",
tok.c_str());
nonfatal_error = true;
} else {
paren p = stk.top();
message(p.line,p.pos,
"closing '%s' at %zu:%zu does not match opening '%s'.",
tok.c_str(),currline,currpos,p.open.c_str());
nonfatal_error = true;
}
}
if ( !soft_paren ) {
message(0,0,"Aborting after fatal mismatch in file '%s'.",filename);
printstack(stk);
return EXIT_FATAL;
}
}
}
debug(1,"finished checking file '%s', checking stack.\n",fname);
stk.clean();
if ( !stk.empty() ) {
message(0,0,"Unmatched elements at end of file '%s'.",filename);
printstack(stk);
return EXIT_FATAL;
}
if ( fclose(texfile)!=0 ) error(errno,"closing file '%s'",filename);
if ( nonfatal_error ) return EXIT_NONFATAL;
return 0;
}
int main(int argc, char **argv)
{
string configfilename;
int opt, res;
progname = argv[0];
/* Parse command-line options */
ignore_soft = ignore_size = debug_level = 0;
file_line_error = 1;
show_help = show_version = 0;
opterr = 0;
while ( (opt = getopt_long(argc,argv,"+isc:d",long_opts,(int *) 0))!=-1 ) {
switch ( opt ) {
case 0: /* long-only option */
break;
case 'i':
ignore_soft = 1;
break;
case 's':
ignore_size = 1;
break;
case 'c':
configfilename = string(optarg);
break;
case 'd':
debug_level++;
break;
case ':': /* getopt error */
case '?':
error(0,"unknown option or missing argument `%c'",optopt);
break;
default:
error(0,"getopt returned character code `%c' ??",(char)opt);
}
}
if ( show_help ) usage();
if ( show_version ) version();
if ( argc<=optind ) error(0,"no file(s) specified");
// Read configuration from default file if it exists:
if ( configfilename=="" && getenv("HOME")!=NULL ) {
configfilename = string(getenv("HOME")) + "/" + defconfigfilename;
ifstream test(configfilename.c_str());
if ( !test.good() ) configfilename = "";
}
// For some reason (at least my) GCC doesn't support std::string argument?
ifstream conffile(configfilename.c_str());
if ( configfilename!="" && !conffile.good() ) {
error(errno,"cannot open configuration file `%s'",configfilename.c_str());
}
if ( configfilename!="" ) {
readconfig(conffile);
debug(1,"using configuration file = '%s'\n",configfilename.c_str());
} else {
istringstream confstr(defconfigstring);
readconfig(confstr);
debug(1,"using internal configuration\n");
}
// Initialize map for opening/closing parentheses:
for(size_t i=0; i<paren_list.size(); ++i) {
paren_open [paren_list[i].open] = paren_list[i];
paren_close[paren_list[i].close] = paren_list[i];
}
int error = 0;
for(res=0; optind<argc; ++optind) {
// Print empty/filename line between output for different files:
if ( file_line_error ) {
if ( error>0 ) printf("\n");
} else {
printf("In file %s:\n",argv[optind]);
}
error = checkfile(argv[optind]);
if ( error>=2 ) res++;
if ( !file_line_error ) printf("\n");
}
return res;
}