\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42)
\"
\" Standard preamble:
\" ========================================================================
de Sp \" Vertical space (when we can't use .PP)
if t .sp .5v
if n .sp
.
de Vb \" Begin verbatim text
ft CW
nf
ne \\$1
.
de Ve \" End verbatim text
ft R
fi
.
\" Set up some character translations and predefined strings.  \*(-- will
\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
\" double quote, and \*(R" will give a right double quote.  \*(C+ will
\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
\" nothing in troff, for use with C<>.
tr \(*W-
ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
ie n \{\
   ds -- \(*W-
   ds PI pi
   if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
   if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
   ds L" ""
   ds R" ""
   ds C` ""
   ds C' ""
'br\}
el\{\
   ds -- \|\(em\|
   ds PI \(*p
   ds L" ``
   ds R" ''
   ds C`
   ds C'
'br\}
\"
\" Escape single quotes in literal strings from groff's Unicode transform.
ie \n(.g .ds Aq \(aq
el       .ds Aq '
\"
\" If the F register is >0, we'll generate index entries on stderr for
\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
\" entries marked with X<> in POD.  Of course, you'll have to process the
\" output yourself in some meaningful fashion.
\"
\" Avoid warning from groff about undefined register 'F'.
de IX
.
nr rF 0
if \n(.g .if rF .nr rF 1
if (\n(rF:(\n(.g==0)) \{\
   if \nF \{\
       de IX
       tm Index:\\$1\t\\n%\t"\\$2"
.
       if !\nF==2 \{\
           nr % 0
           nr F 2
       \}
   \}
\}
rr rF
\"
\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
\" Fear.  Run.  Save yourself.  No user-serviceable parts.
   \" fudge factors for nroff and troff
if n \{\
   ds #H 0
   ds #V .8m
   ds #F .3m
   ds #[ \f1
   ds #] \fP
\}
if t \{\
   ds #H ((1u-(\\\\n(.fu%2u))*.13m)
   ds #V .6m
   ds #F 0
   ds #[ \&
   ds #] \&
\}
   \" simple accents for nroff and troff
if n \{\
   ds ' \&
   ds ` \&
   ds ^ \&
   ds , \&
   ds ~ ~
   ds /
\}
if t \{\
   ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
   ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
   ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
   ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
   ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
   ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
\}
   \" troff and (daisy-wheel) nroff accents
ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
ds 8 \h'\*(#H'\(*b\h'-\*(#H'
ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
ds ae a\h'-(\w'a'u*4/10)'e
ds Ae A\h'-(\w'A'u*4/10)'E
   \" corrections for vroff
if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
   \" for low resolution devices (crt and lpr)
if \n(.H>23 .if \n(.V>19 \
\{\
   ds : e
   ds 8 ss
   ds o a
   ds d- d\h'-1'\(ga
   ds D- D\h'-1'\(hy
   ds th \o'bp'
   ds Th \o'LP'
   ds ae ae
   ds Ae AE
\}
rm #[ #] #H #V #F C
\" ========================================================================
\"
IX Title "ltx2crossrefxml 1"
TH ltx2crossrefxml 1 "2025-07-10" "" "LATEX CROSSREFWARE"
\" For nroff, turn off justification.  Always turn off hyphenation; it makes
\" way too many mistakes in technical documents.
if n .ad l
nh
SH "NAME"
ltx2crossrefxml.pl \- create XML files for submitting to crossref.org
SH "SYNOPSIS"
IX Header "SYNOPSIS"
ltx2crossrefxml [\fB\-\-debug\fR] [\fB\-c\fR \fIconfig_file\fR]  [\fB\-o\fR \fIoutput_file\fR] [\fB\-input\-is\-xml\fR]
               \fIlatex_file1\fR \fIlatex_file2\fR ...
SH "OPTIONS"
IX Header "OPTIONS"
IP "\fB\-c\fR \fIconfig_file\fR" 4
IX Item "-c config_file"
Configuration file.  If this file is absent, defaults are used.
See below for its format.
IP "\fB\-o\fR \fIoutput_file\fR" 4
IX Item "-o output_file"
Output file.  If this option is not used, the \s-1XML\s0 is output to stdout.
IP "\fB\-rpi\-is\-xml\fR" 4
IX Item "-rpi-is-xml"
Do not transform author and title input strings, assume they are valid \s-1XML.\s0
IP "\fB\-\-debug\fR" 4
IX Item "--debug"
Output some progress reports.
PP
The usual \f(CW\*(C`\-\-help\*(C'\fR and \f(CW\*(C`\-\-version\*(C'\fR options are also supported. Options
can begin with either \f(CW\*(C`\-\*(C'\fR or \f(CW\*(C`\-\-\*(C'\fR, and ordered arbitrarily.
SH "DESCRIPTION"
IX Header "DESCRIPTION"
For each given \fIlatex_file\fR, this script reads \f(CW\*(C`.rpi\*(C'\fR and (if they
exist) \f(CW\*(C`.bbl\*(C'\fR and \f(CW\*(C`.aux\*(C'\fR files and outputs corresponding \s-1XML\s0 that
can be uploaded to Crossref (<https://crossref.org>). Any extension
of \fIlatex_file\fR is ignored, and \fIlatex_file\fR itself is not read (and
need not even exist).
PP
Each \f(CW\*(C`.rpi\*(C'\fR file specifies the metadata for a single article to be
uploaded to Crossref (a \f(CW\*(C`journal_article\*(C'\fR element in their schema); an
example is below. These files are output by the \f(CW\*(C`resphilosophica\*(C'\fR
package (<https://ctan.org/pkg/resphilosophica>), \f(CW\*(C`aomart\*(C'\fR package
(<https://ctan.org/pkg/aomart>), the TUGboat publication procedure
(<https://tug.org/TUGboat/repository.html>) and other packages. They
can also be created by hand or by whatever other method you implement.
PP
Any \f(CW\*(C`.bbl\*(C'\fR, \f(CW\*(C`.aux\*(C'\fR, and \f(CW\*(C`.bib\*(C'\fR files are used for the citation
information in the output \s-1XML.\s0 See the \s-1CITATIONS\s0 section below.
PP
Unless \f(CW\*(C`\-\-rpi\-is\-xml\*(C'\fR is specified, for all text (authors, title,
citations), standard TeX control sequences are replaced with plain text
or \s-1UTF\-8\s0 or eliminated, as appropriate. The \f(CW\*(C`LaTeX::ToUnicode::convert\*(C'\fR
routine is used for this (<https://ctan.org/pkg/bibtexperllibs>).
Tricky TeX control sequences will almost surely not be handled
correctly.
PP
If \f(CW\*(C`\-\-rpi\-is\-xml\*(C'\fR is given, the author and title strings from the rpi
files are output as-is, assuming they are valid \s-1XML\s0; no checking is
done.
PP
Citation text from \f(CW\*(C`.bbl\*(C'\fR files is always converted from LaTeX to plain
text.
PP
This script just writes an \s-1XML\s0 file. It's up to you to do the
uploading to Crossref; for example, you can use their Java tool
\&\f(CW\*(C`crossref\-upload\-tool.jar\*(C'\fR
(<https://www.crossref.org/education/member\-setup/direct\-deposit\-xml/https\-post>).
PP
For the definition of the Crossref schema currently output by this
script, see
<https://data.crossref.org/reports/help/schema_doc/5.3.1/index.html>
with additional links and information at
<https://www.crossref.org/documentation/schema\-library/metadata\-deposit\-schema\-5\-3\-1/>.
SH "CONFIGURATION FILE FORMAT"
IX Header "CONFIGURATION FILE FORMAT"
The configuration file is read as Perl code. Thus, comment lines
starting with \f(CW\*(C`#\*(C'\fR and blank lines are ignored. The other lines are
typically assignments in the form (spaces are optional):
PP
Vb 1
\&    $variable = value ;
Ve
PP
Usually the value is a \f(CW"string"\fR enclosed in \s-1ASCII\s0 double-quote or
single-quote characters, per Perl syntax. The idea is to specify the
user-specific and journal-specific values needed for the Crossref
upload. The variables which are used are these:
PP
Vb 7
\&    $depositorName = "Depositor Name";
\&    $depositorEmail = \*([email protected]\*(Aq;
\&    $registrant = \*(AqRegistrant\*(Aq;  # required, organization name
\&    $fullTitle = "FULL TITLE";   # required, journal name
\&    $issn = "1234\-5678";         # required, ISSN
\&    $abbrevTitle = "ABBR. TTL."; # optional, abbreviated journal name
\&    $coden = "CODEN";            # optional
Ve
PP
For a given run, all \f(CW\*(C`.rpi\*(C'\fR data read is assumed to belong to the
journal that is specified in the configuration file. More precisely, the
configuration data is written as a \f(CW\*(C`journal_metadata\*(C'\fR element, with
given \f(CW\*(C`full_title\*(C'\fR, \f(CW\*(C`issn\*(C'\fR, etc., and then each \f(CW\*(C`.rpi\*(C'\fR is written as
\&\f(CW\*(C`journal_issue\*(C'\fR plus \f(CW\*(C`journal_article\*(C'\fR elements.
PP
The configuration file can also define one Perl function:
\&\f(CW\*(C`LaTeX_ToUnicode_convert_hook\*(C'\fR. If it is defined, it is called at the
beginning of the procedure that converts LaTeX text to Unicode, which is
done with the LaTeX::ToUnicode module, from the \f(CW\*(C`bibtexperllibs\*(C'\fR
package (<https://ctan.org/pkg/bibtexperllibs>). The function must
accept one string (the LaTeX text), and return one string (presumably
the transformed string). The standard conversions are then applied to
the returned string, so the configured function need only handle special
cases, such as control sequences particular to the journal at hand.
(See TUGboat's \f(CW\*(C`ltx2crossrefxml\-tugboat.cfg\*(C'\fR for an example.)
SH "RPI FILE FORMAT"
IX Header "RPI FILE FORMAT"
Here's the (relevant part of the) \f(CW\*(C`.rpi\*(C'\fR file corresponding to the
\&\f(CW\*(C`rpsample.tex\*(C'\fR example in the \f(CW\*(C`resphilosophica\*(C'\fR package
(<https://ctan.org/pkg/resphilosophica>):
PP
Vb 10
\&  %authors=Boris Veytsman\eand A. U. Th{\eo }r\eand C. O. R\e"espondent
\&  %title=A Sample Paper:\e\e \eemph  {A Template}
\&  %year=2012
\&  %volume=90
\&  %issue=1\-\-2
\&  %startpage=1
\&  %endpage=1
\&  %doi=10.11612/resphil.A31245
\&  %paperUrl=http://borisv.lk.net/paper12
\&  %publicationType=full_text
Ve
PP
Other lines, some not beginning with %, are ignored (and not shown).
For more details on processing, see the code.
PP
The \f(CW%paperUrl\fR value is what will be associated with the given \f(CW%doi\fR
(output as the \f(CW\*(C`resource\*(C'\fR element). Crossref strongly recommends that
the url be for a so-called landing page, and not directly for a pdf
(<https://www.crossref.org/education/member\-setup/creating\-a\-landing\-page/>).
Special case: if the url is not specified,
and the journal is \fIRes\ Philosophica\fR,
a special-purpose search url using pdcnet.org is returned.
Any other journal must always specify this.
PP
The \f(CW%authors\fR field is split at \f(CW\*(C`\eand\*(C'\fR (ignoring whitespace before
and after), and output as the \f(CW\*(C`contributors\*(C'\fR element, using
\&\f(CW\*(C`sequence="first"\*(C'\fR for the first listed, \f(CW\*(C`sequence="additional"\*(C'\fR for
the remainder. The authors are parsed using \f(CW\*(C`BibTeX::Parser::Author\*(C'\fR
(<https://ctan.org/pkg/bibtexperllibs>).
PP
If the \f(CW%publicationType\fR is not specified, it defaults to
\&\f(CW\*(C`full_text\*(C'\fR, since that has historically been the case; \f(CW\*(C`full_text\*(C'\fR
can also be given explicitly. The other values allowed by the Crossref
schema are \f(CW\*(C`abstract_only\*(C'\fR and \f(CW\*(C`bibliographic_record\*(C'\fR. Finally, if the
value is \f(CW\*(C`omit\*(C'\fR, the \f(CW\*(C`publication_type\*(C'\fR attribute is omitted entirely
from the given \f(CW\*(C`journal_article\*(C'\fR element.
PP
Each \f(CW\*(C`.rpi\*(C'\fR must contain information for only one article, but multiple
files can be read in a single run. It would not be difficult to support
multiple articles in a single \f(CW\*(C`.rpi\*(C'\fR file, but it makes debugging and
error correction easier to keep the input to one article per file.
SS "\s-1MORE ABOUT AUTHOR NAMES\s0"
IX Subsection "MORE ABOUT AUTHOR NAMES"
The three formats for names recognized are (not coincidentally) the same
as BibTeX:
PP
Vb 3
\&   First von Last
\&   von Last, First
\&   von Last, Jr., First
Ve
PP
The forms can be freely intermixed within a single \f(CW%authors\fR line,
separated with \f(CW\*(C`\eand\*(C'\fR (including the backslash). Commas as name
separators are not supported, unlike BibTeX.
PP
In short, you may almost always use the first form; you shouldn't if
either there's a Jr part, or the Last part has multiple tokens but
there's no von part. See the \f(CW\*(C`btxdoc\*(C'\fR (``BibTeXing'' by Oren Patashnik)
document for details. The authors are parsed using
\&\f(CW\*(C`BibTeX::Parser::Author\*(C'\fR (<https://ctan.org/pkg/bibtexperllibs>).
PP
In the \f(CW%authors\fR line of a \f(CW\*(C`.rpi\*(C'\fR file, some secondary directives are
recognized, indicated by \f(CW\*(C`|\*(C'\fR characters. Easiest to explain with an
example:
PP
Vb 1
\&  %authors=|organization|\eLaTeX\e Project Team \eand Alex Brown|orcid=123
Ve
PP
Thus: 1) if \f(CW\*(C`|organization|\*(C'\fR is specified, the author name will be output
as an \f(CW\*(C`organization\*(C'\fR contributor, instead of the usual \f(CW\*(C`person_name\*(C'\fR,
as the Crossref schema requires.
PP
2) If \f(CW\*(C`|orcid=\f(CIvalue\f(CW|\*(C'\fR is specified, the \fIvalue\fR is output as an
\&\f(CW\*(C`ORCID\*(C'\fR element for that \f(CW\*(C`person_name\*(C'\fR.
PP
These two directives, \f(CW\*(C`|organization\*(C'\fR| and \f(CW\*(C`|orcid|\*(C'\fR are mutually
exclusive, because that's how the Crossref schema defines them. The \f(CW\*(C`=\*(C'\fR
sign after \f(CW\*(C`orcid\*(C'\fR is required, while all spaces after the \f(CW\*(C`orcid\*(C'\fR
keyword are ignored. Other than that, the \s-1ORCID\s0 value is output
literally. (E.g., the \s-1ORCID\s0 value of \f(CW123\fR above is clearly invalid,
but it would be output anyway, with no warning.)
PP
Extra \f(CW\*(C`|\*(C'\fR characters, at the beginning or end of the entire \f(CW%authors\fR
string, or doubled in the middle, are accepted and ignored. Whitespace
is ignored around all \f(CW\*(C`|\*(C'\fR characters.
SH "CITATIONS"
IX Header "CITATIONS"
Each \f(CW\*(C`.bbl\*(C'\fR file corresponding to an input \f(CW\*(C`.rpi\*(C'\fR file is read and
used to output a \f(CW\*(C`citation_list\*(C'\fR element for that \f(CW\*(C`journal_article\*(C'\fR in
the output \s-1XML.\s0 If no \f(CW\*(C`.bbl\*(C'\fR file exists for a given \f(CW\*(C`.rpi\*(C'\fR,
no \f(CW\*(C`citation_list\*(C'\fR is output for that article.
PP
The \f(CW\*(C`.bbl\*(C'\fR files are processed to create the \f(CW\*(C`unstructured_citation\*(C'\fR
references defined by Crossref, that is, the contents of the citation
(each paragraph in the \f(CW\*(C`.bbl\*(C'\fR) as a single flat string without markup
of any kind, including font changes.
PP
Bibliography text is unconditionally converted from TeX to \s-1XML,\s0 via the
method described above. It is not unusual for the conversion to be
incomplete or incorrect. It is up to you to check for this; e.g., if any
backslashes or pairs of dollar signs remain in the output, it is most
likely an error.
PP
Furthermore, it is assumed that the \f(CW\*(C`.bbl\*(C'\fR file contains a sequence of
references, each starting with \f(CW\*(C`\ebibitem{\f(CIKEY\f(CW}\*(C'\fR (which itself must be
at the beginning of a line, preceded only by whitespace), and the whole
bibliography ending with \f(CW\*(C`\eend{thebibliography}\*(C'\fR (similarly at the
beginning of a line). A \f(CW\*(C`.bbl\*(C'\fR file not following this format will not
produce useful results. The \f(CW\*(C`.bbl\*(C'\fR file can be created by hand, or with
BibTeX, or any other method, as long as it has this format.
PP
The \f(CW\*(C`key\*(C'\fR attribute for the \f(CW\*(C`citation\*(C'\fR element is taken as the \fI\s-1KEY\s0\fR
argument to the \f(CW\*(C`\ebibitem\*(C'\fR command. The sequential number of the
citation (1, 2, ...). The argument to \f(CW\*(C`\ebibitem\*(C'\fR can be
empty (\f(CW\*(C`\ebibitem{}\*(C'\fR, and the sequence number will be used on its own.
Although TeX will not handle empty \f(CW\*(C`\ebibitem\*(C'\fR keys, it can be
convenient when creating a \f(CW\*(C`.bbl\*(C'\fR purely for Crossref.
PP
The \f(CW\*(C`.rpi\*(C'\fR file is also checked for the bibliography information, in
this same format.
PP
Crossref's structured citations are added as follows, Aas defined by
their schema
(<https://data.crossref.org/reports/help/schema_doc/5.3.1/common5_3_1_xsd.html#citation>):
If an \f(CW\*(C`.aux\*(C'\fR file is present, it is checked for any \f(CW\*(C`\ebibdata\*(C'\fR
commands. The \f(CW\*(C`bib\*(C'\fR files in these commands are read, and the
information there is used to generate \s-1XML\s0 entries. The script uses
\&\f(CW\*(C`kpsewhich\*(C'\fR to look for the bib files, so the usual BibTeX conventions
for the search paths are followed.
SH "EXAMPLES"
IX Header "EXAMPLES"
Vb 2
\&  ltx2crossrefxml.pl ../paper1/paper1.tex ../paper2/paper2.tex \e
\&                      \-o result.xml
\&
\&  ltx2crossrefxml.pl \-c myconfig.cfg paper.tex \-o paper.xml
Ve
SH "AUTHOR"
IX Header "AUTHOR"
Boris Veytsman <https://github.com/borisveytsman/crossrefware>
SH "COPYRIGHT AND LICENSE"
IX Header "COPYRIGHT AND LICENSE"
Copyright (C) 2012\-2025 Boris Veytsman
PP
This is free software.  You may redistribute copies of it under the
terms of the \s-1GNU\s0 General Public License (any version)
<https://www.gnu.org/licenses/gpl.html>.  There is \s-1NO WARRANTY,\s0 to the
extent permitted by law.