%%% ====================================================================
%%%  @LaTeX3-report { LaTeX3-L3-007
%%%  filename        = "l3d007.tex",
%%%  archived        = "ctan:/tex-archive/info/ltx3pub/",
%%%  author          = "Justin Ziegler",
%%%  doc-group       = "Project core team",
%%%  title           = "Technical report on Math Font Encoding",
%%%  version         = "2.00",
%%%  date            = "June 1, 1994",
%%%  time            = "18:52:36 GMT",
%%%  status          = "Public, official",
%%%  abstract        = "This is a report of the \LaTeX3
%%%                     Project work on math font encoding.
%%%                     ",
%%%  keywords        = "Math fonts encoding",
%%%  project-address = "LaTeX3 Project            \\
%%%                     c/o Dr. Chris Rowley      \\
%%%                     The Open University       \\
%%%                     Parsifal College          \\
%%%                     Finchley Road             \\
%%%                     London NW3 7BG, England, UK",
%%%  project-tel     = "+44 20 7794 0575",
%%%  project-FAX     = "+44 20 7556 6196",
%%%  project-email   = "[email protected]",
%%%  copyright       = "Copyright (C) 1994 LaTeX3 Project.
%%%                     All rights reserved.
%%%
%%%                     Permission is granted to make and distribute
%%%                     verbatim copies of this publication or of
%%%                     coherent parts from this publication provided
%%%                     this copyright notice and this permission
%%%                     notice are preserved on all copies.
%%%
%%%                     Permission is granted to copy and distribute
%%%                     translations of this publication or of
%%%                     individual items from this publication into
%%%                     another language provided that the translation
%%%                     is approved by the original copyright holders.
%%%
%%%                     No other permissions to copy or distribute this
%%%                     publication in any form are granted and in
%%%                     particular no permission to copy parts of it
%%%                     in such a way as to materially change its
%%%                     meaning.",
%%%  generalinfo     = "To subscribe to the LaTeX3 discussion list:
%%%
%%%                      Send mail to [email protected]
%%%                      with the following line as the body of the
%%%                      message (substituting your own name):
%%%
%%%                        subscribe LaTeX-L First-name Surname
%%%
%%%                     To find out about volunteer work:
%%%
%%%                      look at the document vol-task.tex which can
%%%                      be obtained electronically, see below.
%%%
%%%                     To retrieve project publications electronically:
%%%
%%%                      Project publications are available for
%%%                      retrieval by anonymous ftp from ctan hosts:
%%%                          cam.ctan.org
%%%                          dante.ctan.org
%%%                          tug.ctan.org
%%%                      in the directory /tex-archive/info/ltx3pub.
%%%
%%%                      The file ltx3pub.bib in that directory gives
%%%                      full bibliographical information including
%%%                      abstracts in BibTeX format.  A brief history
%%%                      of the project and a description of its aims
%%%                      is contained in l3d001.tex.
%%%
%%%                     If you only have access to email, and not ftp
%%%                      You may use the ftpmail service.
%%%                      Send a message just containg the word
%%%                          help
%%%                      to [email protected]
%%%                      for more information about this service.
%%%
%%%                     For offers of financial contributions or
%%%                      contributions of computing equipment or
%%%                      software, contact the project at the above
%%%                      address, or the TeX Users Group.
%%%
%%%                     For offers of technical assistance, contact the
%%%                      project at the above address.
%%%
%%%                     For technical enquiries and suggestions, send
%%%                      e-mail to the latex-l list or contact the
%%%                      project at the above address.",
%%%  checksum        = "23322 5582 30455 207129",
%%%  docstring       = "The checksum field above contains a CRC-16
%%%                     checksum as the first value, followed by the
%%%                     equivalent of the standard UNIX wc (word
%%%                     count) utility output of lines, words, and
%%%                     characters.  This is produced by Robert
%%%                     Solovay's checksum utility.",
%%%  }
%%% ====================================================================

\NeedsTeXFormat{LaTeX2e}

\begin{filecontents}{l3d007.sty}
%%% Action macros for this document
%%%

%%% table stuff from testfont whole thing need wrapup!!!!!!

\tracinglostchars=0

\newcount\mcnt \newcount\ncnt \newcount\pcnt \newdimen\dim
%JWZ the next two counters can be changed for 128 glyphs or 256 glyphs.
\newcount\maxz \maxz=128
\newcount\maxiz \maxiz=127
\newif\ifskipping
\def\setbaselineskip{\setbox0=\hbox{\ncnt=0
\loop\char\ncnt \ifnum \ncnt<\maxiz \advance\ncnt 1 \repeat}
\baselineskip=6pt \advance\baselineskip\ht0 \advance\baselineskip\dp0 }
\def\hours{\ncnt=\time \divide\ncnt 60
 \mcnt=-\ncnt \multiply\mcnt 60 \advance\mcnt \time
 \twodigits\ncnt\twodigits\mcnt}
\def\twodigits#1{\ifnum #1<10 0\fi \number#1}
\def\startfont#1{\font\testfont=#1
%  \leftline{\scriptsize Test of #1\unskip\ on \today\ at \hours}
%  \medskip
 \testfont \setbaselineskip
 \ifdim\fontdimen6\testfont<10pt \rightskip=0pt plus 20pt
 \else\rightskip=0pt plus 2em \fi
 \spaceskip=\fontdimen2\testfont % space between words (\raggedright)
 \xspaceskip=\fontdimen2\testfont \advance\xspaceskip
 by\fontdimen7\testfont}

\def\oct#1{\hbox{\rm\'{}\kern-.2em\it#1\/\kern.05em}} % octal constant
\def\hex#1{\hbox{\rm\H{}\tt#1}} % hexadecimal constant
\def\setdigs#1"#2{\gdef\h{#2}% \h=hex prefix; \0\1=corresponding octal
\mcnt=\ncnt \divide\mcnt by 64 \xdef\0{\the\mcnt}%
\multiply\mcnt by-64 \advance\mcnt by\ncnt \divide\mcnt by 8
\xdef\1{\the\mcnt}}
\def\testrow{\setbox0=\hbox{\penalty 1\def\\{\char"\h}%
\\0\\1\\2\\3\\4\\5\\6\\7\\8\\9\\A\\B\\C\\D\\E\\F%
\global\pcnt=\lastpenalty}} % \pcnt=1 if none of the characters exist
\def\oddline{\cr
 \noalign{\nointerlineskip}
 \multispan{19}\hrulefill&
 \setbox0=\hbox{\lower 2.3pt\hbox{\hex{\h x}}}\smash{\box0}\cr
 \noalign{\nointerlineskip}}
\def\evenline{\loop\skippingfalse
\ifnum\ncnt<\maxz \mcnt=\ncnt \divide\mcnt 16 \chardef\next=\mcnt
\expandafter\setdigs\meaning\next \testrow
\ifnum\pcnt=1 \skippingtrue \fi\fi
\ifskipping \global\advance\ncnt 16 \repeat
\ifnum\ncnt=\maxz \let\next=\endchart\else\let\next=\morechart\fi
\next}
\def\morechart{\cr\noalign{\hrule\penalty5000}
\chartline \oddline \mcnt=\1 \advance\mcnt 1 \xdef\1{\the\mcnt}
\chartline \evenline}
\def\chartline{&\oct{\0\1x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&}
\def\chartstrut{\lower4.5pt\vbox to14pt{}}
\def\dofonttable#1{\startfont{#1}$$\global\ncnt=0
 \halign to\hsize\bgroup
   \chartstrut##\tabskip0pt plus10pt&
   &\hfil##\hfil&\vrule##\cr
   \lower6.5pt\null
   &&&\oct0&&\oct1&&\oct2&&\oct3&&\oct4&&\oct5&&\oct6&&\oct7&\evenline}
\def\endchart{\cr\noalign{\hrule}
 \raise11.5pt\null&&&\hex 8&&\hex 9&&\hex A&&\hex B&
 &\hex C&&\hex D&&\hex E&&\hex F&\cr\egroup$$\par}
\def\:{\setbox0=\hbox{\char\ncnt}%
 \ifdim\ht0>7.5pt\reposition
 \else\ifdim\dp0>2.5pt\reposition\fi\fi
 \box0\global\advance\ncnt 1 }
\def\reposition{\setbox0=\vbox{\kern2pt\box0}\dim=\dp0
 \advance\dim 2pt \dp0=\dim}
\def\centerlargechars{
 \def\reposition{\setbox0=\hbox{$\vcenter{\kern2pt\box0\kern2pt}$}}}

%%% use of code example

%\begin{figure}
%\dofonttable{msbm7}
%\caption{Output of the \texprog{nfssfont.tex} program for the font
%         \texttt{msbm7}}
%\label{fig:testfontmsbm}
%\end{figure}


\newcommand\textem[1]{\emph{#1}}

\newcommand{\note}[1]{%
 \par \vskip .25cm \textbf{Note:\ }#1 \par \vskip .25cm}

\chardef\bslash=`\\
%    Command name
\newcommand{\cn}[1]{$\mathchar"026E${\tt#1}}
%    File name
\newcommand{\fn}[1]{{\tt#1}}

\def\jzitem#1{\item{\bf #1\\}}

\def\thisisnew{\marginpar {\textbf
                {\textsl {\begin{raggedright}This part is
                                new.\end{raggedright}}}}}

\def\changedabit{\marginpar {\textbf
       {\textsl {\begin{raggedright}Changed this a little
               bit.
                       \end {raggedright}}}}}

\def\seeappendix{\marginpar {\textbf
        {\textsl {\begin{raggedright}Here is an
                appendix reference.
                \end {raggedright}}}}}

% don't warn about bold typewriter
\DeclareFontShape{OT1}{cmtt}{bx}{n}{ <-> ssub * cmtt/m/n }{}
% \DeclareFontShape {T1}{cmtt}{bx}{n}{ <-> ssub * cmtt/m/n }{}
\end{filecontents}



\begin{filecontents}{l3d007a.tex}
\section{Introduction}
 This document aims to put on paper what could be the backbone or the
 skeleton of a new math encoding for \TeX. This is not the complete
 description of an encoding, but a sort of grid, or global picture of
 what things could look like. This document refers to many glyph groups
 defined in another document called ``Towards a list of math glyphs''.
 Same author.





\section{A few definitions}

\begin{description}

\item[An ``encoding table''.] This conveys the
traditional meaning of an encoding. That is to say a set of 256
glyphs in a given order. The expression ``encoding table'' is usually
abbreviated to ``encoding''.

\item[A ``slot''.] It is the usual word used for referring
to a position in an encoding, that can contain a glyph. It is usually
an integer between 0 and 255. A slot is
certainly not a family, nor anything to do with it.

\item[A ``math kernel''.] This terminology is used to
specify the fonts that are necessary for the math facility to work
as it is described in most \TeX\ documentation\footnote{documentation
on LA\TeX, AMS\TeX, etc, also fits in here.}.
In DEK's implementation the math kernel consists of the
families from 0 to 3. On top of the kernel, many other fonts, with
whatever encoding is available, could be optionally loaded and used.

\item[A ``math encoding''.] It is considered here as a whole; not
just one 256-glyph encoding table, but a set of encoding tables.
This concept will be referd to as ``M-encoding''.

\item[The ``core symbols''.] They are made of two
groups. The group of symbols that must live with the default alphabet
for kerning reasons, and the group of symbols that must live with the
default alphabet for design reasons.

\item[The ``default alphabet''.] It is the alphabet that is
used  when a user types \texttt{\$abc\$}. In the present encoding that
produces $abc$.

\item[``Glyph compatibility''.] Two encodings are glyph compatible
when they contain the same glyphs but not in the same positions. As well
as containing the same glyphs, it must also be possible for the
corresponding fonts to contain the same sidebearings and the same
kerning and ligaturing information.

\end{description}





\section{Global policy}
 \subsection{Text in math mode}
  It is generally agreed that for best quality documents, if one wants
  to put text in a math formulae, one should switch back into text
  mode, using something like the \cn{text} macro in the AMS
  package. None of the encodings can be expected to support bad
  usage. In a math encoded font, the letters are not kerned in the same
  way as in a text encoded font, and there are no letter ligatures,
  because they are not needed.

 \subsection{A global rule for boldface}
  It has been decided not to mix ligth face and bold face symbols in
  the same encoding, but to generate a separate boldface version of all
  lightface math encoded fonts when necessary.  This does not make it
  impossible to mix the two: either one can use the
  \texttt{\string\boldsymbol} approach, or one can load an extra bold
  face font in a given family, and have it directly and permanently
  accessible.

 \subsection{Sans serif and typewriter fonts}
 Extra fonts could be designed in sans serif, or in typewriter using
 some of the proposed new encodings. Another solution is to load the
 Cork encoded sans serif fonts (or typewriter fonts) in free
 families. In either case, the new math encoding will not have any
 slots containing specifically sans serif or typewriter glyphs.

 \subsection{Concerning the Euler shapes}
 Euler shapes could be a good example implementation of the new math
 encoding. Thus no Euler glyphs will be included in the new
 encoding. However, it may be useful to use the Euler Fraktur for a
 first implementation example, if the new encoding includes a Fraktur
 or old  German alphabet.


\section{Concerning Cyrillic letters}
 These would be available, but not as part of the math encoding. They
 would be loaded as an extra family, with whatever encoding exists,
 together with suitable \cn{mathchardef}s.

 \subsection{Compatibility with other typesetting systems}
 \subsubsection{Grouping all \TeX\ specific glyphs in one font
 encoding}
 The present \texttt{cmex} font contains glyphs that cannot be used
 by other typesetting systems, because they are set in a strange way.

 The present \texttt{cmsy} font contains one glyph that is set in a
 strange way --- the radical sign, and thus makes that whole font
 unusable for the outer world. It would be a good idea to make sure
 that this does not happen again.

 The \LaTeX 3 project is hoping to set a new standard, that will not
 only be used by \TeX, but by all systems that typeset mathematical
 formulae. If every thing goes according to plan, in the next few years
 many math fonts will exist, for many different systems, and they will
 all use the same encoding.
 The fact that they all use the same encoding means that it will be
 very easy to exchange fonts from one system to another. So one day a
 \TeX\ user will be able to take a math font used by Microsoft Word,
 and convert it easily in order to use it with \TeX.

 If \TeX\ specific glyphs are grouped in one font, there will only be
 one problematic font. As it happens, all \TeX\ specific glyphs are
 more or less geometric, so they could be used with more than one
 math font.

 On the other hand, if \TeX\ specific glyphs are spread around in
 many fonts, then many ``imported'' fonts will not be usable by \TeX\
 without major messing about.

 Concerning this problem, the real question is: is the \LaTeX 3 project
 setting a real standard for the next few years, or just
 making another \TeX\ math font encoding?
 If the answer is: ``the \LaTeX 3 project is setting a real standard
 for the next few years'', then \TeX\ specific glyphs must be grouped
 in one font. If that is really not possible,
 then one can maybe consider putting them in two fonts.

 If a font designer designs a math font for adobe, the work necessary
 for adapting his font to the \TeX\ world should be reduced as much as
 possible.
 Otherwise nobody will provide any fonts for \TeX.

 The \TeX\ specific glyphs that are concerned here could be visually
 compatible with many math fonts.

 Which are the \TeX\ specific glyphs? So far:
 \begin{itemize}
 \item The delimiters.
 \item The large and small `bigops'.
 \item The radicals.
 \end{itemize}


\subsubsection{The space issue}
 To enable easier font exchange between the \TeX\ world and the rest of
 the world, the new math encoding will have a space in position 32
 (decimal) of every encoding table, if it is possible.


 \subsection{General document compatibility}
 It is not worth beeing totaly compatible. A lot of glyph positions
 will change, thus direct \cn{mathchardef}s will not always work.
       Documented names from AMSLA\TeX\ (this includes names from
       LA\TeX, \TeX, and AMS\TeX, and LAMS\TeX) ought to be kept.


 \subsection{Grouping all Plain and \LaTeX\ glyphs in 4 fonts}
 The main reason for this is compatibility. Let us consider a user
 that has typed a document with the present math encoding and has
 saturated the families for this document. If the new math encoding
 does not guarantee Plain and \LaTeX\ glyph compatibility with a
 minimum of 4 fonts, then that document cannot run with the new math
 encoding: not enough families. Thus one should make the first four
 font encodings of the new math encoding glyph compatible with
 the group made by: cmr, cmmi, cmex, cmsy.

 \subsection{Grouping all AMS\TeX\ and AMS\LaTeX\ glyphs in less than
   6 fonts} The main reason for this is compatibility. Let us consider
 a user that has typed a document with the existant AMS\TeX\ or
 \LaTeX\ package, and has saturated the families for this document.
 If the new math encoding does not guarantee AMS\LaTeX\ and AMS\TeX\
 glyph compatibility with less than 6 fonts, then that document cannot
 run with the new math encoding: not enough families. The first 6
 font encodings must be one way glyph compatible with the fonts
 provided in the AMS package.

 \subsection{Replacing \texttt {cmex}}
 The math font group has decided that the encoding due to replace the
 \texttt {cmex} encoding will be designed in such a way that the
 corresponding font can be loaded in three sizes or in one size.

 Loading such a font in three sizes produces better typesetting. But
 the resulting page and line-breaks will not be the same as
 when the font was loaded in one size only. Some people will not like
 that change, in their old documents. But for new documents loading
 the extensibles font in three sizes will be better.

 \subsection{Accents in maths}
 There seems to be an agreement that math accents should not change
 with the font or style of the letter. But it is not a problem to keep
 the accents that are already in plain \TeX\ (and the \fn{cm} fonts) in
 the same position as in the T1 encoding. That will allow them to be
 variable family; thus any T1 encoding could be loaded, and its
 accents used. Since there will be millions of documents using
 bold hats this possibility must be preserved even if by default
 all math accents are non-variable family.


\section{The base: a Cork encoded text font}

Main use: things like \texttt{\string\log}. This would generally be a
Latin font.

If it is a Latin upright font, it would probably also be used by
physicists (and chemists) for operators, and more generally whenever
upright letters are needed.

\textbf{Separating this set from the rest} enables the user to decide
how `log' and `sin', etc. should be typeset. Thus the multiletter
operators can be compatible with the text font, or with the rest of
the math glyphs, or even set in yet another font.

Math mode should not be used for setting text phrases in mathematical
material.  For example:
\begin{verbatim}
$$x=y \quad {\rm is\ a\ direct\ consequence\ of} \quad y=x$$
\end{verbatim}
would be better input as:
\begin{verbatim}
$$x=y \quad\hbox{\rm is a direct consequence of}\quad y=x$$
\end{verbatim}
or, better still:
\begin{verbatim}
$$x=y \quad\text{is a direct consequence of}\quad y=x$$
\end{verbatim}
where \verb|\text| is a macro which sets its argument in horizontal
mode.  This avoids unpleasant surprises such as:
\begin{verbatim}
$$X \quad{\rm is\ a\ sub-object\ of}\quad Y$$
\end{verbatim}
However, for upward compatibility with existing plain \TeX\
documents, it will still be possible to set text phrases in math mode,
as long as they only contain $\langle\mbox{character}\rangle$s of type
Variable.








\section{The ``text symbol'' encoding: the TS encoding}
Here one would put the old style numerals, and most of what
is comming out of the present math encoding. Other symbols could
be added. The Text Symbol encoding is definetly \textem{not part of
 the math kernel.} But since it will contain symbols that
previously were accessed via the math fonts, its
encoding must be supplied. This font will not be loaded in a family. It
will just be loaded as a normal text font\footnote{If some users really
feel the need to load it in a math family, they can.}.


\section{The core: the MC encoding (263)}
Counting: 1,10,1 , 54,5 , 124,14 , 12,24,9 ,9= 263 glyphs

The accents are no longer here. They had no real reason to be here.
Most of them are geometrics anyway. But they do have reasons to be
elsewhere. One of the main consequences of taking these accents out
is that the core can be made more coherent, and more complete.  The MC
encoding would contain:
 \begin{itemize}
       \itemsep -0.1cm

 \item The skewchar in position 0: 1
 \item The core digits: 10
 \item The space character in position 32: 1

 \item The core Latin alphabet, which is the default alphabet, in
   uppercase, and lowercase, together with the dotless i and j: 54

 \item The Latin friends: 5

 \item All the Greek material: 124

 \item The Greek friends, next to the Greek: 14

 \item The core symbols for kerning reasons (punctuation and
   delimiters): 12

 \item The core symbols for design reasons: 24

   More for kerning reasons:
 \item The basic geometric delimiters: 9

   Some new glyphs:
 \item New basic delimiters: 9

 \end{itemize}

Sacrifices can be made in the greek material, and in the core symbols
for design reasons.


\section{The MX encoding: 243}

Count up: 1 1   78 8   10   24 6  16   7 7   26   47 = 230

The usual extensible characters, together with some new ones, would live
here. Here any characters that have strange \TeX\ features, like big
descenders are included, thus grouping glyphs that are not compatible
with the outer world.

For various reasons discussed in ``Replacing \texttt {cmex}?'' (Same
author), the math font group has decided that the MX encoding will be
designed in a way such that when it is loaded in one size (like in the
present \TeX), every thing works OK, and the user can still have
access to the new symbols. However, the MX encoding will produce better
quality typesetting when loaded in three sizes.

Detailed contents of MX:
 \begin{itemize}
       \itemsep -0.1cm

 \item Maybe a skewchar: 1

   The space is questionable here, because MX will not be usable by
   other typesetting systems, see comment:
 \item Maybe a space: 1

 \item Big and extensible \TeX\ delimiters from cmex: 78

   Any characters that have strange \TeX\ features like big descenders:
 \item The radicals: 8

   Technically the following can come out, but then they must go in
   MSP.
   To make this possible one could take the bar accent out of MSP.
 \item Horizontal curly braces: 10

 \item All existant big and small ``bigops'' except the integrals: 24

 \item The existant Plain vertical extensible arrows: 6


   One has to limit the number of wide accents, otherwise there is not
   enough place.
 \item The wide tildes, and the wide hats: 16

 New glyphs:

   It is a little bothering that the following will be separated from
   their small versions, but there is nothing much that can be done
               about it:
 \item The big ``big integral'' family: 7
 \item The small ``big integral'' family: 7

 \item The new big and small `bigops': 26
 \item New multisized and extensible delimiters: 47


 \end{itemize}

Note: all integral glyphs must be kerned with themselves, so that two
integrals following each other can be kerned easily with a little
care. Same for various other glyphs.


\section{The math symbol `privilege' font ``MSP'': 250}
Count up: 1,1, 54 , 18,7,3 , 23 , 8,4,2,4,4 , 20,7, 10 , 8,8,12 ,  14 ,
6,4,5,4,5 , 8,2  , 16 = 250
 \begin{itemize}
       \itemsep -0.1cm

 \item A skewchar in position 0: 1
 \item A spacechar in position 32: 1

 \item The script/calligraphic Latin letter set:54

   According to AMS statistics, the script/cal are
   used more often than the Blackboard bold.

   Having the script/calligraphic here also achieves backward
   compatibility with the existing \TeX, without loading MS1 and MS2.

 \item The basic accents: 18   %  all basic accents should stay here
 \item The double accents: 7   %  could go elsewhere
 \item The underaccents: 3     %  could go elsewhere

   The following must stay here:
 \item The ``Basic symbols'' group: 23

   The next 5 are needed for compatibility with plain:
 \item The ``Greater than plain'' group: 8
 \item The ``Subset plain'' group: 4
 \item The ``In / ni plain'' group: 2
 \item The ``Sqsubset plain \& ams'' group: 4
 \item The ``Succ without sim plain'' group: 4

 \item The ``Small binops plain'' group: 20
 \item The small ints: 7
   These should probably live with the other `succ' members for design
   reasons:
 \item The ``Succ without sim ams'' group: 10

   The next three make a homogenous group, and must live with sim.
   Sim itself must live here because of compatibility with Plain:
 \item The ``Greater than with sim'' group: 8
 \item The ``Succ with sim'' group: 8
 \item The ``Sim'' group: 12

   The arrows, for compatibility, (improved a little though):
 \item ``Plain horizontal arrows'': 14 or 10
 \item ``Plain vertical arrows'' : 6 \\
   Does not include the extensible arrows.
   The latter are in MX as before.
 \item ``Plain oblique arrows'': 4\\
   Also called ``Plain other arrows''.
 \item ``Latex arrows'': 5
 \item Plain miscellaneous geometric symbols: 4

   Extras ---~these are new glyphs~--- to improve a little what \TeX\
   can already do:
 \item Extra arrows for use with plain: 5

   The ``lasy'' triangles are included in the AMS fonts and thus are
               included in the following group:
 \item AMS left-right open triangles: 8

   Should live with the ``Plain oblique arrows'':
 \item ``AMS obliques'':2

   Some new glyphs: Some of this could come out.
 \item Wide accents bar: 8

\end{itemize}







\section{The MS1, MS2, Math-Symbol encodings}
Each of these encodings will contain a set of Latin letters, like for
instance fraktur or blackboard bold, in uppercase or lowercase or
both. In some cases a place should be reserved for a set of matching
numbers too (i.e. Open). The rest would be filled up with symbols.
An MS$_i$ encoding is needed for:

\begin{itemize}
\item An extra script/calligraphic, (see below comment on script and
 calligraphic) the default caligraphic is in the MSP encoding.
\item Open + (Arrows or relations) + other geometrics.
\item Old german,
\end{itemize}


\textbf{Note:} Barbara Beeton writes ``Regarding script vs.
calligraphic, I do understand the difference; however, at AMS I believe
we only very rarely get a request to use both styles in the same paper.

For that there are two possibilities:
\begin{enumerate}
       \item designing one encoding table where the positions A-Z (and
       probably  a-z and 0-9 even if they are not all filled) are
       supposed to contain a  ``calligraphy/script'' set of characters.
       Then there would be  instances of that encoding that would
       contain script chars and others  that would contain calligraphic
       chars.  Suppose our standard would say that this encoding is to
       be used as family 4.  A designer would then choose  one such
       font with this encoding for family 4 (thereby effectively
       deciding  what \texttt{\string\cal} and a lot of other symbols
       look like (the ones whose \cn {mathchardef} points into family
       4)). For those who in addition would like to use another
       script/call math alphabet: they can then just allocate one of
       the free families. Access to this would then be trivial.

       \item Having two different encodings; one for cal, and one for
       script. The remaining symbols in both encodings would be
       different too. Thus both encodings would need to be part of the
       standard suite of math encoding tables.

       Which solution is preferable depends a bit on the number of
       symbols that ought to go in the standard.''
\end{enumerate}

Also J\"org Knappen writes: ``I strongly support having two different
encodings, one for cal and one for script. If users have the choice
between cal and script, they prefer script (at least in
Mainz\footnote{Maybe Americans prefer it the other way
 round.}). However, the old calligraphic alphabet still
needs to be supported for compatibility reasons.''



\section{The MS1 encoding: 232}
Count up: 1 1 54 10  32  36 30 12 10 21 10 15= 232
 \begin{enumerate}
       \itemsep -0.1cm

   \item A skewchar in position 0: 1
   \item A spacechar in position 32: 1
   \item The BBB alphabet uppercase and lowercase: 54
   \item The BBB digits: 10
   \item The last WIDE ACCENTS: arc, back-to-front vector, and
   double-sided  vector, normal vector: 32

     For AMS inclusion:
   \item The ``AMS horizontal arrows'' group: 22
   \item The AMS other arrows group: 12
   \item The ``Greater than AMS'' group: 30
   \item The ``Subset AMS'' group: 12
   \item ``AMS Equals friends'': 10
   \item ``AMS Miscellaneous geometric symbols'': 21
   \item ``AMS Vdash group'': 10
   \item ``AMS boxes and friends: 15

     For fun if there is place to spare:
   \item Alan's arrow construction set: ?

 \end{enumerate}








\section{Other requested typefaces}
 \begin{itemize}

 \item In general, users may want MC fonts in arbitrary styles (bold
   sans serif MC for instance) in order to get the Greek letters in
   their favourite styles.

 \item A ``text-like'' italic or slanted for computer science
   identifier names and the like.  This would be Cork encoded and
   optionally loaded.

 \item A ``bold upright'' for use as variables -- e.g. vectors in
   physics notation rather than the arrow over an italic letter. This
   would be Cork encoded, and optionally loaded or accessed via the
   \texttt{\string\boldsymbol} concept in which case no family would be
   required.

 \item Bold italic for use as variables: either optionally loaded as
   a second font with MC or cork encoding (using only variable family
   symbols) or accessed via something like
   \texttt{\string\boldsymbol}.

 \item Bold Old german (occasional) suggested
   \texttt{\string\boldsymbol} approach.

 \item Bold script (occasional) suggested \texttt{\string\boldsymbol}
   approach.

 \item Sans serif lightface (occasional): optionally loaded cork
   encoded font.

 \item Sans serif boldface (occasional): optionally loaded cork
   encoded font.

 \item Bold symbols: either \texttt{\string\boldsymbol} or optionally
   loaded in remaining slots.

 \item Ultra bold symbols: either \texttt{\string\boldsymbol} or
   optionally loaded in remaining slots.

 \item An MC-encoded bold font containing upright bold Latin glyphs,
   plus bold upright and bold slanted Greek.  This would contain all
   of the most commonly requested bold glyphs in one font (rather
   than many more).


 \item A cyrillic alphabet. Loaded as an extra family, or in text.

 \end{itemize}





\section{Summarising the families used by the proposed YAASP M-encoding}
\begin{enumerate}
\item Family $0$: A Cork encoded upright text font.

\item Family $2$: An MC encoded font containing the default Latin and
 Greek italic+upright, and core symbols...

\item Family $1$: An MSP encoded font containing cal/script and
 accents...

\item Family $3$: An MX encoded font including all extensible glyphs,
 and double sized operators...

\item Family $y$: An MS$1$ encoded symbol font for Open, and arrows
 or relations.

\item Family $z$: An MS$2$ encoded symbol font for Old German.
\end{enumerate}

\textbf{Comments:}
\begin{itemize}
       \item This leaves 10 families free for anything else, (like bold
       or sans...) and makes many symbols available.

       \item The first four encoding tables put together give total
       \TeX,  La\TeX\ compatibility.

       \item The first six give total \TeX, \LaTeX, AMS\TeX,
       AMS\LaTeX, LAMS\TeX\ compatibility.

       \item The six put together: do wonders, using no more font
       families than  the present AMS\TeX.

\end{itemize}

\section{Discussion}

\subsection{Advantages}

For MC: A big advantage here, is kerning. In this encoding kerning
is possible between the Latin default alphabet, and both italic and
upright Greek alphabets. This is necessary for compatibility, and for
tidyness. On top of this both letter sets (in actual fact there are
three) can be kerned with the core symbols that are in the MC
encoding. This last point is the most important, and gives new and
better automatic math spacing. (For compatibility reasons,
the Greek italic must be kerned with the period, the comma, and the
slash.)

The bold version of the MC encoding gives the user access to a lot of
bold letter sets in one go. The global family consumption is therefore
largely reduced: 1 bold font instead of 2 or 3.

Taking the accents away from the letters, means that the accents do
not change when the text face changes, i.e. bold letters and medium
letters get the same accents.

One can get more than compatibility with plain \TeX\ only using 4
families (the same number as standard \TeX\ currently uses).

One can get more than compatibility with AMS\TeX\ using 6 families.
This is less or equal than the number of families used by AMS\TeX.

The calligraphic alphabet is more used than the open, so putting it
with the accents is a step towards grouping most used glyphs together.

This proposal gives a little room in the MC for free spaces, and good
core material.

With the MSP encoding concept, the MSi encodings can really be
considered as (optional) extensions. Thus somebody who knows he
does not need the arrow kit and the Blackboard bold letter set does not
have to load them. Same for Fraktur.

All the \TeX\ specific glyphs are grouped in MX. Thus all the other
fonts could be used by other typesetting systems.

Using the Cork encoded font in family 0 for things like \cn{log} and
\cn{sin}, means that the Greek users can replace it by a Greek font.
(Apparently Greek mathematicians set these function names
using a Greek alphabet).



\subsection{Disadvantages}

If there is not enough space for all the required symbols,
one can make an MS3 encoding that would contain the other
version of script/cal, together with extra symbols.


\subsection{Comments}

In this proposal the core is really made of two fonts, and the kernel is
made of four.

Comments from Alan about the space slot in MX:
\begin{quote}
 ``MX will be used by \TeX, and the dvi drivers may be outputting to a
 device that does not accept anything but a space in position 32.  So
 if you don't include a space here, then the MX-encoded fonts have to
 be split into two device fonts by the drivers.''
\end{quote}

Comment from Alan about the Cork encoded font:
\begin{quote}
 ``I think it would be good to specify that this is family 0, for
 compatiblity with current \TeX\ documents containing explicit
\cn{fam 0}
 (naughty them!) and in order to have filled up slots 0 to 3 rather
 than leaving a gap in family 0.''
\end{quote}
\end{filecontents}

\begin{filecontents}{l3d007b.tex}
\section{Introduction}
 The goal of this document is to try and list the future contents of
 the new math encoding. The glyphs have been divided into groups and
 subgroups.

 \textem {This is a draft and a workbench document.  Some of it is out
 of date, and usually the comments are not for the public.  There are
 lots of spelling mistakes, I know! The reader should be indulgent.}



%something added by bb for euler:
%begin{enumerate}
%\item Cursive (normal) capital letters. not often used.
%\item Cursive (normal) lower-case letters.
%\item Script upper-case.
%\item (Some) script lower-case.
%\item Fraktur (old german) upper-case.
%\item Fraktur (old german) lower-case.
%\item Digits.
%\end{enumerate}
%% bb --
%[bb: I will be happy to make a proposal for this.]


\section{Extra font dimensions}
\begin{itemize}
\itemsep -0.15cm
\item The design size,
\item The default script size,
\item The default scriptscript size,
\item Suggested value for mathsurround (in MC)
\item Math\_axis (in every font)
\item Thin mu skip,
\item Med mu skip,
\item Thick mu skip,
\item Recommended rule weight
\item Baselineskip: leadingheight,
\item Baselineskip: leadingdepth
\item Suggested by JMR: the big and bigg params.
\end{itemize}






\section{Kerning}
Better kerning should be made possible in the Latin math
italic, if it is possible. Normal kerning information is put in the
\fn{.tfm}
file. But in math mode, for things to get kerned as specified in the
\texttt{.tfm} file the left atom must be of ordinary type. If the user
interface redefines everything that must be kerned as being ordinary,
old documents will start looking different, and this is not wanted. To
avoid this, the user interface could define a macro
\texttt{\string\mathkerning\{...\}} that would use the kerning specified
in the \texttt{.tfm} file, without globally making all characters
ordinary.

The following glyphs should be kerned:
\begin{itemize}
\itemsep -0.15cm
\item The spacing of [ and ( and ) and ] followed by letters should
 be adjusted.

\item  The spacing of letters followed by [ ] ( ) should be
 adjusted. This is an  important one.

\item Keep the kerning with\quad . \quad , \quad / \quad for most
 letters !
%\item Keep kerning between = and upright $\Gamma$ ! (I must have
 %dreamt this one) Maybe add kerning for other punctuation characters.
\item At least keep the kerning between d and Y,Z,j,f. Maybe add some
 others: dx, dy, d$\alpha$, d$\theta$, d$\phi$ ...

\item New kerning of the integral with itself. This would only
 be available via a \texttt{\string\mathkerning} macro (see
 previous comment).
\item Kerning of the period with itself,
\item Kerning of the centered period with itself.
\item Jan M.R. is sure that kerning is needed between
 Latin and Greek. More precise information is needed.
\end{itemize}













\section{The following should be taken out of the present math encoding}
\begin{itemize}
\itemsep -0.15cm
\item The old digits: 10.
\item The 2 paragraph signs: \P, \S.
\item The Yen sign: Y.
\item The double dagger sign \ddag.
\item The four card families: $\clubsuit, \heartsuit, \diamondsuit,
 \spadesuit$.
\item The musical signs: $\flat, \natural, \sharp$ ?????????
\item The maltese cross. (AMS)
\item The $\mathchar"017F$ seems not to be needed in maths.
\item The circled R must come out of the math symbols. (AMS)
\item The raised asterisk,
\item The two triangles in cmmi: cmmi'56 '57  ???????
\end{itemize}

All these will be put in to the ``Text symbols'' encoding, that would
come in many faces, and be text dependent.
Other glyphs could be put in the ``Text symbols'' font:
\begin{itemize}
\itemsep -0.15cm
\item More numerals,
\item The perthousand sign.
\item Maybe this is a good place for the `fraction' characters from
 adobe.
\item $<$florin$>$, $<$ellipsis$>$ etc.
\item The superior and inferior digits, and put in kerning so that
 $<$onesuperior$>$ $<$fraction$>$ $<$twoinferior$>$ produces a 1/2.
\item The single dagger finds a place here although it is in
 maths as well. This makes them two different symbols, and enables both
 to  have more specific shapes.
\item A real copyright symbol, TM (trademark) and SM (service mark).
\item An interrabang (a combination of ? and !) new. (bb)
\end{itemize}


Alan Jeffrey has worked on the `text symbol font'. Actually it is now
called the companion text font. He has written more on this topic.
``[email protected]''






\section{The Greek glyphs: 124}
The following shapes must be included:
\begin{itemize}
\itemsep -0.15cm
\item All the Upper-case in upright. 24
\item All the Upper-case in italic. 24
\item All the Lower-case in upright. 24
\item All the Lower-case in italic face. 24. So far: $24\times 4=96$
\item All the variable shapes in upright. 10
\item All the variable shapes in italic. 10
\item All the special numeric letters in upright. 3
\item All the special numeric letters in italic. If lack of space
 prefer the italic shapes to the upright ones. 3.
\item Some control glyphs: 2 (probably useless)
\end{itemize}


\subsection{Variable shapes: 10}
This list comes basically from: J\"org Knappen.  They are all listed
here including the ones that are already in the \fn{cm} fonts:
\begin{enumerate}
\itemsep -0.15cm
\item Lower-case Phi,
\item Lower-case Pi,
\item Lower-case Kappa, (AMS)
\item Lower-case beta (new),
\item Lower-case Rho,
\item Lower-case Epsilon,
\item Lower-case Sigma,
\item Lower-case Theta.
\item Upper case chi (new),
\item Upper case for upsilon.
\end{enumerate}

\subsection{Extra letters for numerals: 3}
Source: J\"org Knappen.  They are all listed here including the ones
that are already in the \fn{cm} and \fn{ams} fonts. Must be given in
lower-case.
Upper-case Greek numerals exist, although extremely rare. For the
 sake of completeness one could fill them in. But they are surely not
 the hottest characters needed. (J\"org)
\begin{enumerate}
\itemsep -0.15cm
\item Qoppa (new),
\item Sampi (Sanpi?) (new) (J\"org Knappen),
\item Digamma (AMS).
\end{enumerate}


\subsection{Control glyphs: 2}
\begin{enumerate}
\itemsep -0.15cm
\item An italic control glyph, i.e. the following Greek letter is not
taken from the upright, but from the italic Greek,
\item A variable shape control glyph, i.e. the following Greek letter
is not taken from the normal set of letters, but form the variant shape
set. This will not work for all letters. Thus may not be a good idea.
\end{enumerate}

\note {From Alan about the control slots for Greek, ``Er, I'm not very
 sure about those, since they'll affect kerning.  I'd prefer to have
 the choice between italic / upright made by the document designer.
 And I'm not sure why anyone would want to get at an upper case $\xi$
 by a macro \texttt{\string\uppercasegreek}\{\texttt{\string\xi}\}!''
 --- ``True they will affect the kerning. But one could use them
 differently from what you suggested.  Although I'm not sure it is
 interesting, the ligature mechanism does not have to be visible for
 the user, i.e. he can still type \texttt{\string\Gamma}, which is
 expanded to \texttt{\string\up \string\gamma}.''}







\section{Extra Greek-like material: 14}
This group of glyphs should not really be separated from the rest of
the Greek material.
\begin{enumerate}
\itemsep -0.15cm
 \item An upright partial sign,
 \item An italic partial sign,
 \item An upright partial sign with a slanted bar, AMS \cn{eth}
 \item A \cn{thorn} WASY'151 but this one is not very good. There is a
         better one in dcmr'136. There is one is wslipa10'102 (J\"org)
 \item A barred upright lambda, ?????? (probably not J\"org)
 \item A barred italic lambda, this is preferred. (J\"org)
 \item An upright mho sign (upside down Omega),
 \item The back to front epsilon: AMS "7F \cn{backepsilon},
 \item Arabic letter dal: looks something like a back to front $c$.
 \item Hebrew letter msbm'151,
 \item Hebrew letter msbm'152,
 \item Hebrew letter msbm'153,
 \item The $\aleph$ or \cn{aleph} in position CMSY'100,
 \item The Nabla, $\nabla$ in CMSY'162
\end{enumerate}

The barred signs may be obtained by ligatures, or could be
constructed with kerning. In any case some slots for ligatures must be
left free if possible.








\section{The Latin letters: One set$=54$ glyphs}

We shall assume here that all lower case alphabets contain a dotless `i'
and a dotless `j', so that they can take accents other than a dot.
\begin{itemize}
\itemsep -0.15cm
\item The usual \fn{cmmi} italic shapes. Upper-case and lower-case.
\item The calligraphic shapes. Upper-case and lower-case.
 The lower-case shapes are presently maybe not available.
\item The script shapes. Upper-case and lower-case.
 The lower-case shapes are presently maybe not available.
\item The black board bold shapes. Upper-case and lower-case.
\item The Fraktur style. Upper-case, and lower-case.
\end{itemize}

 \subsection{The calligraphic and/or script styles}
 BB: ``How are ``calligraphic'' and ``script'' different here?  I've
 never seen what Knuth calls calligraphic and what most
 mathematicians call script (the ``curly'' style) used in the same
 context, so they are presumably not distinct from one another in
 actual usage.''

 The two should be included if there is enough space.
 Otherwise one is enought.

 \subsection{A hyphen char ?} These Latin letters are not meant for
 typesetting words. It is assumed that all multi-letter words should be
 typeset using the text fonts, not the math fonts. Thus
 the hyphen character is not needed in the math encoding.


 \subsection{Computer science and identifiers}
 It looks as though the new math encoding will not contain anything
 specially designed for computer science. Computer scientists will have
 to use \texttt {cmti*} in an extra family for long identifiers.

 \subsection{Chemists and chemical formulae} Considering the fact that
 chemists do use a lot of mathematical expressions, they need the total
 math mode as it is. On top of that they need a special mode for
 writing chemical equations. One of the particularities of this
 chemical mode would be the different placing of sub- and
 superscript. A possible implementation is something like
 \cn{EnterChemicalMode} and \cn{ExitChemicalMode}, which would
 in actual fact load a new set of fonts (or only the font in family 2),
 in order to have a different font dimensions in family 2.


\section{Latin-like material: 5}
This group should live next to the Latin letter set.
 \begin{enumerate}
       \itemsep -0.15cm
 \item An upright d. This is needed for standard mathematical
 typesetting.

 \item A horizontally barred italic h, for physicists.

 \item A slanted barred italic h, for physicists.

 \item An italic upper-case Vee with a bar, the bar is meant to be
   horizontal. \texttt{[email protected]}

 \item An upright upper-case Vee with a bar, the bar is meant to be
   horizontal, and extends through both sides of the Vee almost like
   a strikeout. \texttt{[email protected]}

 \end{enumerate}



\section{The different ways needed to write numbers}
 \begin{itemize}
       \itemsep -0.15cm
 \item The normal set of numbers in \fn{cmmi}: upright lining.

 \item The blackboard bold numbers. (Used in physics and a field of
 maths. See  Alan J. for more details.) [Note:  presently no
 satisfactory bbb numbers  seem to exist.]

 \end{itemize}



\section{Empty slots?}
Some free slots could be included, so that people can put their
ligatures in when they are trying to convert fonts coming from other
worlds.

Alan J. can give good explanations for this.


\section{Arrows}
Arrow construction should be possible. But to make sure it does not fail
when used in different sizes, every single glyph used for this purpose,
will be \textem{specifically} designed for this use.  \textem{All of
them will be in the same font table}. This does not mean that a given
construction block can't be used for different types of arrows.  This
sort of thing has to be thought of, and forecasted. These construction
blocks must not be used for any other purpose --- like for instance the
equal or minus sign.

All arrows from \fn{cm}, and from \fn{msam/msbm}, should be taken if
necessary. Maybe some others too.

 \subsection{The ``Plain horizontal arrows'' group: 14 (Alan:10)}
 The first 6 are in cmmi'050 to '055:
 \begin{enumerate}
               \itemsep -0.15cm
   \item leftharpoonup
   \item leftharpoondown
   \item rightharpoondown
   \item rightharpoonup
   \item lhook
   \item rhook

     From \fn{cmsy}:
   \item leftarrow '40
   \item Leftarrow '50
   \item leftrightarrow '44
   \item Leftrightarrow '54
   \item rightarrow '41
   \item Rightarrow '51
   \item CMSY'67 this is the \cn{mapstochar}
   \item CMSY'66 the negation sign/slash: 1
  \end{enumerate}

 \subsection{Extra arrows for use with plain arrows: 5 (Alan 5)}
   \begin{enumerate}
               \itemsep -0.15cm

   \item It would be reasonable to add a \cn{mapsfromchar} in order to
   build things like: $<\!\!-\!|$: 1

   \item It would be reasonable to add a \cn{Mapstochar} that could go
   with the double arrows to build things like $|\!=\!>$ : 1

   \item It would be reasonable to add a \cn{Mapsfromchar}
     that could go with the double arrows to build things like
     $<\!=\!\!|$ : 1

   \item A - for extending arrows: 1
   \item A = for extending arrows: 1

 \end{enumerate}


 \subsection{The ``Plain vertical arrows'' group: 6 (Alan 6)}
 \begin{itemize}
       \itemsep -0.15cm
 \item updownarrow cmsy'154
 \item Updownarrow cmsy'155
 \item uparrow cmsy'042
 \item downarrow cmsy'043
 \item Uparrow cmsy'052
 \item Downarrow cmsy'053
 \end{itemize}

 \subsection{Plain vertical extensible arrows: 6 (Alan 6)}
 \begin{enumerate}
       \itemsep -0.15cm
 \item Top sing arrow: cmex'170
 \item Bottom single arrow cmex'171
 \item Top double arrow cmex'176
 \item Bottom double arrow cmex'177
 \item Middle double arrow cmex '167
 \item Middle single arrow cmex '077
 \end{enumerate}

 \subsection{Plain extra vertical arrows: 0}
 Nothing added here.


 \subsection{The plain other arrows: 8 (Alan)}
 First the oblique arrows:
 \begin{enumerate}
       \itemsep -0.15cm
 \item CMSY'45
 \item CMSY'46
 \item CMSY'55
 \item CMSY'56

   What else: ?
 \end{enumerate}

 \subsection{The ``Ams obliques'' group: 2}
 \begin{enumerate}
       \itemsep -0.15cm
 \item msbm'36
 \item msbm'37
 \end{enumerate}



 \subsection{The ``Latex arrows'' group: 5}
 The four characters in position LASY'50 to '53 from the lasy font
 (These appear in the wasy font as well) must be put with the arrows.
 They are arrow heads. The squig \cn{arrow} in position '73
 of lasy should also be included: 5


 \subsection{The ``Ams other arrows'' group: 4}
 \begin{itemize}
               \itemsep -0.15cm
   \item Circle arrows MSAM: '10 to '11 :2
   \item Horizontal arrows MSAM:'113 '114 :2
 \end{itemize}

 \subsection{AMS horizontal arrows: 22 (Alan 23)}
 This includes all the horizontal arrows and the negated ones,
 that are listed page 280 of ``The joy of tex''.
 \begin{enumerate}
       \itemsep -0.15cm
 \item leftarrowtail
 \item leftleftarrows
 \item leftrightarrows
 \item leftrightsquigarrow
 \item lefttrightharpoons
 \item Lleftarrow
 \item looparrowleft
 \item looparrowright
 \item nleftarrow
 \item nLeftarrow
 \item nLeftrightarrow
 \item nleftrightarrow
 \item nrightarrow
 \item nRightarrow
 \item rightarrowtail
 \item rightleftarrows
 \item rightleftharpoons
 \item rightrightarrows
 \item rightsquigarrow
 \item Rrightarrow
 \item twoheadleftarrow
 \item twoheadrightarrow
 \end{enumerate}

 \subsection{Ams vertical arrows (Alan: 6) (here: 8)}
 \begin{enumerate}
       \itemsep -0.15cm
 \item MSAM:'24 upuparrows
 \item MSAM:'25 downdownarrows
 \item MSAM:'26 upharpoonright
 \item MSAM:'27 downharpoonright
 \item MSAM:'30 upharpoonleft
 \item MSAM:'31 downharpoonleft
 \item MSAM:'36 Lsh
 \item MSAM:'37 Rsh
 \end{enumerate}


 \subsection{Some control glyphs for access to arrows }
 These do not appear in the \fn{.dvi} file, they simply enable the
 construction of some arrows and slahsed arrows using the ligature
 mechanism.

% end of arrows






\section{All sorts of accents}

 \subsection{Basic size accents: 18}
 \begin{itemize}
       \itemsep -0.15cm
 \item All those that are created by macros in the Ams package: the 3
   dotted accent, and the 4 dotted accent.\quad 2

 \item The ones in \TeX: e\char'22\ e\char'23\  e\char'24\ e\char'25
   e\char'26\ e\char'27\ e\char'136\ e\char'137\ e\char'177\ e\char'176
   e$\mathchar"017E$. They all come from \fn{cmr} except for
   the last two from \fn{cmmi}. \quad 11

 \item Extra: a back-to-front vector arrow, \quad 1
 \item Extra: a double sided type vector arrow, \quad 1
 \item Extra: a square bracket used as an accent, \quad 1
 \item Extra: The previous one turned upside down, \quad 1
 \item Extra: an arc is requested by AMS, \quad 1
 \end{itemize}

 \note {The e\char'175\ in \fn{cmr} is not needed in maths, it is just
 a Hungarian accent.}

 \note {The $\mathchar"017F$ seems not to be needed in maths. It
   could be put in the text companion font.}



 \subsection{Double accents: 7}
 \begin{enumerate}
               \itemsep -0.15cm
   \item A bar and a dot on top,
   \item A dot and a bar on top,
   \item 2 dots with a bar on top,
   \item A bar with 2 dots on top,
   \item A hat and a tilde on top,
   \item A hat and bar on top,
   \item A double bar,
 \end{enumerate}

 \note {For the double accents, Spivak and Ralf Rey could do some
   archive research at the AMS. Similar research could be done at the
   APS, and the CUP.}


 \subsection{Variable size accents: $7*8=56$}
 Variable size has meant 5 different sizes until today.
 That number could be raised to 8.
 \note {If the accents are in a font
   loaded in three different sizes, the choice mechanism of
   \cn{mathaccent} will only look in the current style size (unlike
   the delimiter choice mechanism). Thus although one could hope to
   multiply the number of available sizes by three, in actual fact in
   a given style the number of automatically available sizes would not
   be multiplied.  All the same this \textem{would} give better
   results in each style,
   but it would also create \textem{compatibility problems}
   i.e.~formulae heights and widths may change.  Even if not done in an
   automatic way, the user would still have a larger range of accents
   to choose from.
   Compatibility problems could be avoided by redefining
   \cn{mathaccent} to a \cn{mathchoice}. Thus the accents could always
   come from text style, and the accented material could come from the
   current style. But this does not work either. In doing so one would
   no longer be able to take the base accents from the current style.
   Although one could make two macros. See paper ``Repacing
   \texttt{cmex}?'', same author.}

 \begin{enumerate}
               \itemsep -0.15cm
   \item e$\mathchar"017E$ the vector. \quad 8
   \item e\char'176\ the tilde. \quad 8
   \item e\char'136\ the hat. \quad 8
   \item e\char'26\ the bar. \quad 8
   \item Some people request a variable size arc. \quad 8
   \item The back to front vector arrow, \quad 8
   \item The double sided vector arrow, \quad 8
 \end{enumerate}


 \subsection{Under accents: 3 so far}
 Requests exist for the following:
 \begin{enumerate}
               \itemsep -0.15cm
   \item A tilde,
   \item A breve (\char'25)
   \item A bar
 \end{enumerate}

 Like for the double accents, research could be done at the AMS...
 ???????


\section{Core symbols}

The symbols that have some reason to live with the default math
material. There are mainly two reasons for them to be there: one is
kerning, and the other is design similarity.

 \subsection{For kerning reasons: 12}
 \begin{enumerate}
       \itemsep -0.15cm
 \item The period . CMMI
 \item The coma , CMMI
 \item The semi colon ; CMR
 \item The colon : CMR
 \item The exclamation mark ! CMR
 \item The (
 \item and the ) respectively opening-class and closing-class, CMR
 \item The [
 \item and the ] respectively opening-class and closing-class, CMR
 \item The $\{$,
 \item And the $\}$ (design similarity reasons also) in positions '146
 and  '147 of CMSY,
 \item The `/' as a delimiter\footnote {This is not accessible via a
     single key. The key `/' produces the sign $/$ taken from
     \fn{cmmi}.}, and  as a fraction sign, CMR

 \end{enumerate}

 \subsection{Basic geometric delimiters: 9}
 Should go in the core, for kerning reasons, like the other ( ) and [
 ].  If they don't fit in the core, they must go with the basics.  The
 ones listed here are all in CMSY, around '142, and '150.
   \begin{enumerate}
               \itemsep -0.15cm
   \item $\rangle$
   \item $\langle$
   \item $|$
   \item $\|$
   \item $\rceil$
   \item $\lceil$
   \item $\rfloor$
   \item $\lfloor$
   \item The $\mathchar"026E$ in position '156
   \end{enumerate}

    Test: $|f|,\|f\|,\lfloor f\rfloor,$$\lceil f\rceil,\langle
    f\rangle$ $f\lceil,f\lfloor,$$f\langle,f\backslash$ strange that
    no kerning seems to be needed here, where as it is necessary for
    the bracket.

 \subsection{New basic size delimiters: 9}
 Basic size means the same size as the parentheses and brackets in
 \fn{cmr}. The following is a preference order list of desired new
       delimiters:
 \begin{enumerate}
               \itemsep -0.15cm
   \item A $|||$ for use as $|||f|||$ a norme,

     Semantic brackets: $[\![$ and $]\!]$
   \item The opening semantic bracket,
   \item The closing semantic bracket,

   \item Opening multi set brackets $\{|$
   \item Closing multi set bracket $|\}$

     Unicode contains another style of brackets, they call them
     tortoise shell brackets. They look like:
     \begin{verbatim}
        /
       /
      /
      |
      |
      |
      \
       \
        \
     \end{verbatim}
     These are like parentheses, but with straight lines. No curves.
   \item The opening tortoise shell bracket,
   \item The closing turtoise shell bracket,

     Triangle brackets, something like:
     \begin{verbatim}
         /|
        / |
       /  |
       \  |
        \ |
         \|
     \end{verbatim}
   \item The opening triangle bracket,
   \item The closing triangle bracket.

%    \item The opening turtoise shell in Bbb.
%    \item The closing turtoise shell in Bbb.

 \end{enumerate}



 \subsection{For design similarity reasons: 24}
       All this group must live with the default alphabet for design
       similarity reasons.
 \begin{enumerate}
               \itemsep -0.15cm
   \item The question mark ? must live with the ! CMR
   \item The percent sign \% must live with the ! and ? CMR
   \item The at sign @ must live with the \%  CMR
   \item The \$ sign must live with the @ \% ? ! CMR
   \item The \& must live with \$, \% .. CMR
   \item The \# in CMR

   \item The inverted \&: must be found.

   \item The $\ell$ as a rounded `l'. CMMI
   \item The centered dot $\cdot$ for use as a multiplication sign,
   must live with the period. CMSY
   \item The asterisk $\mathchar"0203$ for use as a multiplication
     sign, in position '003 in CMSY.
   \item The $\propto$ sign must live with @, \%, $\ell$. In position
     '057 of CMSY.
   \item The $'$ or prime in position '060 of CMSY, one cannot
     separate the prime from the the prime ligature slots. (2
                       ligatures) Kerning of letters with the prime is
                       not possible, because
     the latter is set in superscript. ????????

   \item The backprime from MSAM'070 should live with the prime. ??????

   \item The $\infty$ sign in position '061,

   \item The $\emptyset$ in position '073 of CMSY,

   \item The \cn{check} mark in MSAM'130, ?????
   \item The \cn{between} double parentheses in MSAM'107
     should go with the normal parentheses.
   \item The $\Re$, could come out if necessary,
   \item The $\Im$ in position '074, and '075 of CMSY, could also
     come out if necessary.
   \item The $\dagger$ or dagger in CMSY'171, ?????
   \item The smile, ???????
   \item The frown form CMMI ????????
   \item Could maybe include the circled S from MSAM'163. ?????
   \item The Weierstrass symbol: $\wp$ only in one style, (could come
   out)
   \item The powerset symbol from
   \texttt{[email protected]}. (could  come out if necessary)
     It looks something like this:
   \begin{verbatim}
         XXXXXXXXXXX
     XXXXXXXX    XXXX
   XXXXX           XXXX
   XXXXX           XXXX
 XXXXXXXX          XXXX
 XXX  XXX          XX
 XX   XXXXXXX  XXXXXX
 XX    XXXXXXXXXXXX
 XX    XXXX
 XX    XXXX
 XX      XXXX
 XX      XXXX
 XXXX    XXXX
 XXXX    XXXX
 XXXX  XXXX
   XXXXXX
   \end{verbatim}
 \end{enumerate}












\section{Symbols from \fn{lasy} that must be kept: }
The first four \fn{lasy} symbols are in \fn{msam}.

The ones in positions '50 through '53 are arrow heads, and are counted
as such in the corresponding group.

Character '60 is in the \fn{msam}.

'61 is not in the \fn{msam}, and should be kept.

Char'62: same as \fn{msam}'03 ?

Char'63: same as \fn{msam}'06?

Lasy'72: same as \fn{msbm}'163 or \fn{msam}'166 ?

Lasy'73: same as \fn{msam}'40 ?

A list of what should be kept from \fn{lasy}:
\begin{itemize}
       \itemsep -0.15cm
       \item Character '61: \quad 1
       \item ???
\end{itemize} \textbf{This makes a total of 1.}



\section{The ``Subset'' groups}
\note {None of these have anything to do with the \cn{sim} glyph.}

 \subsection{The ``subset plain'' group: 4}
 \begin{itemize}
               \itemsep -0.15cm
   \item The $\subseteq$ in position CMSY'022
   \item The $\supseteq$ in position CMSY'023
   \item The $\subset$ in position CMSY'032,
   \item The $\supset$ in position CMSY'033,
 \end{itemize}

 \subsection{The ``subset Ams'' group: 12}
 \begin{itemize}
               \itemsep -0.15cm
   \item From MSBM'040 to MSBM'43 : 12
 \end{itemize}

 \subsection{The ``In/ni plain'' group: 2}
 \begin{enumerate}
               \itemsep -0.15cm
   \item The $\in$ sign in position CMSY'062,
   \item The $\ni$ sign in position CMSY'063,
 \end{enumerate}

 \subsection{The ``sqsubset plain \& Ams'' group: 4}
 These do not have a place any where else:
 \begin{enumerate}
               \itemsep -0.15cm
   \item The $\sqsubseteq$, cmsy'166,
   \item The $\sqsupseteq$, cmsy'167,
   \item The sqsubset from MSAM'100,
   \item The sqsupset from MSAM'101,
 \end{enumerate}




\section{The ``Greater than'' group}

 \subsection{The ``Greater than Plain'' group: 8}
 \begin{itemize}
               \itemsep -0.15cm
   \item The $\leq$ in position CMSY'024,
   \item The $\geq$ in position CMSY'025,
   \item The $\ll$ in position CMSY'34,
   \item The $\gg$ in position CMSY'35
   \item $<$ less than CMMI'074,
   \item $>$ Greater than: CMMI'076,
   \item The alternative leq: msam'66
   \item The alternative geq: msam'76
 \end{itemize}

 \subsection{The ``Greater than AMS'' group: 30}
 \begin{itemize}
               \itemsep -0.15cm
   \item From MSBM'000 to '005: 6
   \item From MSBM'010 to '015: 6
   \item From MSBM'024 to '025: 4
   \item From MSBM'154 to '155: 2
   \item From MSAM'060 to '061: 2
   \item From MSAM'065 and '067: 2
   \item From MSAM'075 and '077: 2
   \item From MSAM'121 to '124: 4
   \item From MSAM'156 to '157: 2
 \end{itemize}

 \subsection{The ``greater than with sim'' group: 8}
 \begin{enumerate}
               \itemsep -0.15cm
   \item MSBM'022,
   \item MSBM'023,
   \item MSBM'032,
   \item MSBM'033.
   \item MSAM'046,
   \item MSAM'047,
   \item MSAM'056,
   \item MSAM'057
 \end{enumerate}

 The `shapee' \cn{sim}, and the geometric \cn{sim} are considered to be
 the same glyph, i.e. the difference that is sometimes visible is
 considered  to be a bug.


\section{The ``Succ'' groups}

 \subsection{The ``Succ without sim plain'' group: 4}
   \begin{enumerate}
                       \itemsep -0.15cm
     \item CMSY'026,
     \item CMSY'027,
     \item CMSY'036,
     \item CMSY'037
   \end{enumerate}

 \subsection{The ``Succ without sim AMS'' group: 10}
   \begin{itemize}
                       \itemsep -0.15cm
     \item MSBM'006, '007: 2
     \item MSBM'016, '017: 2
     \item MSBM'026, '027: 2
     \item MSAM'062 - '064: 3
     \item MSAM'074: 1
   \end{itemize}

 \subsection{The ``Succ with sim Ams'' group: 8}
 \begin{itemize}
               \itemsep -0.15cm
   \item MSBM'020, '021: 2
   \item MSBM'030, '031: 2
   \item MSBM'166, '167: 2
   \item MSAM'45,
   \item MSAM'55,
 \end{itemize}

 The `shapee' \cn{sim}, and the geometric \cn{sim} are considered to be
 the same glyph, i.e. the difference that is sometimes visible is
 considered to be a bug.





\section{The ``Sim'' group: 12}
 \begin{enumerate}
\itemsep -0.15cm
   \item sim CMSY'030
   \item approx CMSY '31
   \item simeq CMSY'047
   \item wr CMSY'157
   \item The bold MSBM'034
   \item MSBM'035
   \item MSBM'150
   \item The bold MSBM'163
   \item The bold MSBM'164
   \item MSBM'165
   \item MSAM'166, backsim
   \item MSAM'167, backsimeq
 \end{enumerate}


\section{Binops}
 \subsection{The ``Small binops plain'' group: 20}
 \begin{enumerate}
               \itemsep -0.15cm
   \item cap  CMSY
   \item cup CMSY
   \item uplus CMSY
   \item sqcap CMSY
   \item sqcup CMSY
   \item big circle CMSY
   \item big triangle up CMSY
   \item big triangle down CMSY
   \item vee CMSY
   \item wedge CMSY
   \item oplus CMSY
   \item ominus CMSY
   \item otimes CMSY
   \item oslash CMSY
   \item odot CMSY
   \item amalg CMSY
   \item bullet CMSY
   \item circ CMSY
   \item diamond CMSY
   \item star (5 branches): CMMI'77
 \end{enumerate}

 \subsection{Small plain left right triangles: 2}
 These should be replaced by the ones in the AMS fonts for math usage.

 They are also used as bullets, and should go in the text symbol
 encoding for such a usage.
 \begin{enumerate}
               \itemsep -0.15cm
   \item triangle left: CMMI'56
   \item triangle right CMMI'57
 \end{enumerate}

 \subsection{AMS left right open triangles: 8}
 These are also in LASY.
 \begin{itemize}
       \itemsep -0.15cm
 \item vartriangle left
 \item vartriangle right
 \item triangle left eq
 \item triangle right eq

   The previous four are in \fn{msam}
 \item Same 4 negated in \fn{msbm}: 4
 \end{itemize}

\section{Basic Symbols: 24}
A group of symbols used for typesetting basic mathematics.
These are mainly geometrics. Some have been added for similarity
reasons:
 \begin{enumerate}
       \itemsep -0.15cm
 \item = The equals sign, CMR'075
 \item - The minus sign, CMSY'00
 \item + The plus sign, CMR'053

 \item The $\times$ multiplication sign CMSY'002,
 \item The \cn{divide} sign $\div$ CMSY'004
 \item The \cn{divideontimes} from msbm'076 should live with divide
   and times.
 \item The rtimes from msbm'157 should live with the times.
 \item The ltimes from msbm'156 should live with the times.

 \item The $\pm$ sign in position CMSY'006,
 \item The $\mp$ sign in position CMSY'007,

 \item The $\equiv$ in position CMSY'021, Difficult to separate from
   other similar relations.

 \item The $\forall$ sign in position '070,
 \item The $\exists$ sign in position '071,
 \item The \cn{nexists} sign from msbm'100
 \item The $\neg$ sign CMSY'072,
 \item The \cn{varpropto} from AMS "5F. ????? or should this be left
   as a geometric?????
 \item The varemptyset from MSBM'77, ????

 \item Could go here: the upside down F: Finv from msbm'140 ???????
 \item And the back to front G: Game from Msbm'141 ???????

 \item Unary minus like en dash, could be CMR'173 but I personally
   think it should be shorter.

 \item The \cn{varnothing} from MSBM'77,
 \item smallsetminus from msbm'162

 \item The $\perp$ perp or bot sign in position '077,
 \item top sign CMSY'076
\end{enumerate}




\section{Radical}
 \subsection{The \TeX\ radicals: 10}
 Currently available in \fn{cmex} are:
 \begin{itemize}
               \itemsep -0.15cm
               \item Five radical signs: 5
               \item The vertical bit needed to construct the big
               radical '165 : 1
               \item The top bit of the constructed radical. '166 : 1
 \end{itemize}

 \fn{cmsy} includes the basic size of the radical. It
 has always been loaded in three sizes, and must remain so.
       If it is taken out of \fn{cmsy}, and put in a \fn{cmex}
       replacement,
       then this point must be taken into consideration.

       If the new \fn{cmex} is loaded in one size, it must contain
       three different sizes of the radical in order to stay compatible
       with plain: 3


 \subsection{New radicals: 2}
 Request made by: \texttt { HITT\% [email protected] }.

 One can overload the little vertical extensible module of the
 radical, in order to produce a left quantum operator. For the right
 quantum operator, the glyphs could be available, but the radical
 macro can't be used.
       A specific macro could be designed and it would need two glyphs:
 the top right hand corner, and the repeatable
 vertical bit: 2






\section{The integrals family: 18}

 \subsection{Big `bigops' size: 7}
 \begin{enumerate}
               \itemsep -0.15cm
   \item The single integral.
   \item The double integral. Could be done with kerning if there is
   not enough space.
   \item The triple integral. Could be done with kerning if there is
   not enough space.
   \item The single O integral.
   \item The double O integral.
%    \item The triple O integral. ??????? J\"org says no.
   \item The sigma integral. For physics: J\"org.
   \item The slash integral. For physics: J\"org.
 \end{enumerate}

 \subsection{Small `bigops' size: 7}
 The same as in big `bigops' size.

 \subsection{Small size: 7 }
 This refers to the size of the \cn{smallint} in CMSY.
 \begin{enumerate}
       \itemsep -0.15cm
 \item The single normal integral.
 \item The single O integral.
 \item Double O integral. ???? (J\"org thinks yes)
 \item Double normal integral ???? (J\"org thinks yes) Could be done
   with kerning if there is not enough space.
 \item Triple normal integral ???? (J\"org thinks yes) Could be done
   with kerning if there is not enough space.
 \item The sigma integral.
 \item The slash integral.
 \end{enumerate}

 Mail from HSS:
 \begin{quote} %\fontsize{7}{7}\selectfont
   \cn{doubleoint} is used by Becker in "electromagnetic fields and
   interactions" (Dover). I also saw \cn{tripleoint} used in
   electromagnetic theory books although both are somewhat archaic.

   Concerning the small version (in \fn{cmsy}), I suggested this solely
   for reasons of completeness. The need for it is less now that
   \fn{cmex} will be loaded in three sizes. But the small version of
   \cn{int}
   \& \cn{oint} look a bit large when used in inline formulas. The
   \cn{smallint} \& \cn{smalloint} etc. may be a choice for some
   authors in the latter case.

   YH also pointed out, that the upright versions of integrals are very
   common in textbooks. since the integral sign is one of the most
   common symbols used in maths, it may not be a bad idea to include
   upright versions of *all* integral signs in \fn{cmex} (with
   corresponding small versions in \fn{cmsy}) again for reasons of
   completeness.

 \end{quote}

\section{AMS Vdash group: 10}
\begin{enumerate}
\itemsep -0.15cm
\item MSBM'054 nparallel
\item MSBM'055 nmid
\item MSBM'056 nshortmid
\item MSBM'057 nshortparallel
\item MSBM'061 nVdash
\item MSBM'062 nvDash
\item MSBM'063 nVDash
\item MSBM'160 shortmid
\item MSBM'161 shortparallel
\item MSAM'015 Vdash
\item MSAM'016 Vvdash
\item MSAM'017 vDash
\end{enumerate}

\section{Plain and \fn{lasy} miscellaneous symbols: 6}
\begin{enumerate}
\itemsep -0.15cm
\item CMSY'20
\item CMSY'140
\item CMSY'141

 Should live with the two previous:
\item MSBM'060 What about the back to front version of this ?????
\item LASY'061 the bow tie,
\item LASY'62 the small box. It is smaller than the one in the AMS
 fonts. ????
\end{enumerate}


\section{AMS equals friends: 10}
 \begin{enumerate}
\itemsep -0.15cm
 \item msam'155
 \item msam'154
 \item msam'120
 \item msam'73
 \item msam'72
 \item msam'54
 \item msam'53
 \item msam'52
 \item msam'51
 \item msam'44
 \end{enumerate}


\section{AMS miscellaneous geometric symbols: 21}
\begin{enumerate}
\itemsep -0.15cm
\item msam'174
\item msam'173
\item msam'171
\item msam'170
\item msam'165
\item msam'164
\item msam'161
\item msam'160
\item msam'151
\item msam'150
\item msam'147
\item msam'146
\item msam'141
\item msam'140
\item msam'136
\item msam'135
\item msam'134
\item msam'133
\item msam'132
\item msam'131
\item msam'050
\item msam'005

\end{enumerate}



\section{AMS boxes and friends: 15}
\begin{enumerate}
\itemsep -0.15cm
\item MSAM'000
\item MSAM'001
\item MSAM'002
\item MSAM'003
\item MSAM'004
\item MSAM'006
\item MSAM'007
\item MSAM'014
\item MSAM'106
\item MSAM'110
\item MSAM'111
\item MSAM'112
\item MSAM'115
\item MSAM'116
\item MSAM'117
\end{enumerate}



\section{The horizontal curly braces: 10}
Their design should be the same as the vertical braces. Add two
horizontal extension modules for them, since if they are drawn with
rules, digitization errors may cause them not to line up with the
horizontal brace glyphs. What's more, this would enable the designer
to choose there boldness.

Plus two extra middle bits. So that the designer is not restricted by
the number of slots. Knuths design could use that little amount of
glyphs, but maybe other will need more.

Count: for the downwards brace: 2 end bits, 1 middle bit, 2 extensible
modules. That makes a total of 5 per curly brace. One up, and one
down: makes 10.



\section{Big and extensible \TeX\ delimiters from \fn{cmex}: 78}
This group includes delimiters that are in \fn{cmex}. And an extra
little extensible module for the \{ and \}.
 \begin{itemize}
       \itemsep -0.15cm

 \item Four different sizes of ( and ): 8

 \item Extensible versions of ( and ): 6 \\
   Left and right extensible modules are '102 and '103. Top and bottom
   are '060, '061, '100, '101.

 \item Four different sizes of [ ]: 8

 \item Extensible version of [ and ]: 6 \\
   The extensible modules, one for the right bracket, and one for the
   left bracket are: '066, '067. The top and bottom pieces are: '062
   to '065.

 \item Four different sizes of \{ and \}\,: 8

 \item Extensible module for \{ and \}\,: 7 \\
   The extensible module ('76) used for the curly braces is very small,
     because it is added twice: once above the middle piece, and once
     below the middle piece. Its height is half that of the parentheses
     extensible  module. Other pieces are: '070 - '075.


 \item An extra extensible module for the \{ and \}\,: 1\\
   There is only one extensible module for both the left and the
   right curly brace in \fn{cm}. This is because the left-right spread
   of a curly brace is symmetrical in \fn{cm}, unlike the parentheses
   for example.
   This may not be the case for other designs.

 \item Four different sizes of $\langle$ and $rangle$: 8

 \item Four different sizes of $\backslash$ and $/$: 8

 \item Four different sizes of $\lfloor$ and $\rfloor$: 8 \\
   The extensible version is build with the same pieces as the
   extensible brackets.

 \item Four different sizes of $\lceil$ and $\rceil$: 8 \\
   The extensible version is build with the same pieces as the
   extensible brackets.

 \item Extensible vert and parallel: 2 \\
   Extensible versions of the vertical bar and the double vertical
   bar. They are their own extensible modules: '014 and '015.

 \end{itemize}




\section{Bigops}
 \subsection{Old bigops from \TeX: 28}
 There are two glyphs for each bigop.
 \begin{enumerate}
               \itemsep -0.15cm
   \item The sqcup
   \item The circled integral
   \item The circled dot
   \item The circled plus
   \item The circled times
   \item The sums
   \item The prods
   \item The normal integrals
   \item The bigcups
   \item The bigcaps
   \item The U plus
   \item The wedges
   \item The vees
   \item The coprods
 \end{enumerate}

 \subsection{New double sized `bigops': 26 }
 All these should come in two sizes, in the same font, like the present
 \cn{bigcup}. One for display style, and one for text style. That
 makes two glyphs for each one.

 \begin{enumerate}
               \itemsep -0.15cm

   \item A double sized sqcap $\sqcap$ \cn{bigsqcap} (can be found in
               \fn{cspex})

   \item Two sized $\bigcirc$ with $\vee$ inside.  $\bigcirc
     \!\!\!\!\!\vee$ proposed name: \cn{ovee}, and \cn{bigovee}.
     Can be found in \fn{cspex} and \fn{stmary}.

   \item Two sized $\bigcirc$ with $\wedge$ inside.  $\bigcirc
     \!\!\!\!\!\wedge$ proposed name \cn{owedge}, and
     \cn{bigowedge}. Comment from Alan:
     \begin{quote}
       ``As far as I'm aware nobody has *ever* used these glyphs in a
       paper.  I put them in St Mary's Road because I needed them at
       the time, but I shortly abandoned writing the paper they were
       going to be used in.  Please don't include them!  (If we are
       going to, we need to include <ovee> and <owedge> as well as
       <bigovee> and <bigowedge> which are the ones you described.)''
     \end{quote}

   \item Dijkstra choice: $[\!]$ CSPEX

   \item A wide Dijkstra choice. CSPEX . Comment from Alan:
     \begin{quote}
       If this is the glyph I think it is, it's not quite a wide
       Dijkstra choice in shape (although mathematically it's the
       same thing as Dijkstra choice).  The two glyphs are:

       $<$dijkstrachoice$>$ looks remarkably like [ and ] glued
       together.

       $<$oblong$>$ looks like $<$sqcap$>$ but with the square
       completed.

       $<$oblong$>$ is used in CSP in conjunction with $<$sqcap$>$,
       so it's quite important that they look the same.  In
       particular, they need to be of the same width because if they're
       not, formulae sometimes don't line up properly...
     \end{quote}

   \item Parallel \cn{bigparallel} just a double sized version of
     parallel.

   \item Interleaving $|\!|\!|$ : \cn{biginterleaving}
     \begin{quote}
       `Interleaving' and `parallel' are used in (at least) three
       different ways:
       \begin{itemize}
                               %\itemsep -0.15cm
       \item As delimiters $||$foo$||$ and $|||$foo$|||$.  These
         should come in basic-sized and extensible versions.
       \item As binary operators p $||$ q and p $|||$ q.  These can
         be the same glyphs as for the basic-sized delimiters.
       \item As `big' operators $||_i \;p_i$ and $|||_i \;p_i$
         similar to \cn{bigcup}.  These should come in text style and
         display style versions.
     \end{itemize}
     The big operators are not the same glyphs as the extensible
     delimiters.
   \end{quote}

 \item \cn{bigcupdot}: A `U' with a dot in it. Something like:
   $\bigcup\!\!\!\!\cdot$

 \item \cn{bigcapdot}: an upside down `U' with a dot in it. Something
   like: $\bigcap\!\!\!\!\cdot$

 \item An inverted \& called \cn{dnasrepma}

 \item Large operator symbol based on, an asterisk sign.

 \item Large operator symbol based on a times sign.

 \item Large operator symbol based on, a hash sign.

 \item Large operator symbol based on, an ampersand sign.

 \end{enumerate}



\section{Non classified existing symbols}
Here is a list of some symbols that do not have a place elsewhere:
\begin{itemize}
       \itemsep -0.15cm
 \item The different shapes of \# should find a place, although one is
   already in the core group.
\end{itemize}








\section{A list of new glyphs}

The following symbols should be added in the math fonts. Some have
already been designed by various people, so it should be possible to
find them...


 \subsection{Basic size operators: 2}
 Basic size means the same size as the operators in \fn{cmsy}.
 \begin{enumerate}
               \itemsep -0.15cm
   \item Something like \cn{cupdot} and
   \item Something like \cn{capdot} Frank M. can justify these.
 \end{enumerate}


 \subsection{New multi-sized, and extensible delimiters: 47}
 Count: 8, 6,  1,  8, 8,   8,   8, makes 47 .

 A multi-sized delimiter means: 4 sizes for each side : 8 glyphs.
 Plus and extensible version: top, bottom, extension module for both
 sides: 6 glyphs. Sometimes also a middle: 8 glyphs. Total: 16 or 14.
 \begin{enumerate}
               \itemsep -0.15cm

   \item Four sizes of the semantic brackets $[\![$ and $]\!]$: 8

   \item An extensible version of the semantic brackets: 6 \\
     (Top - Middle - Bottom) * 2 makes 6.

   \item An extensible version of $|||$ for use as $|||f|||$ (a
     norme). Just the extension module: 1

   \item 4 sizes of multiset brackets $\{|$ and $|\}$: 8

   \item An extensible version of multi-set brackets: 8 \\
     (Top - Bottom - Middle - extensible module) *2 makes 8.

   Unicode contains another style of brackets, they are called
     tortoise shell brackets. They look like
       \begin{verbatim}
         /
        /
       /
       |
       |
       |
       |
       |
       \
        \
         \
       \end{verbatim}
   \item Four sizes of tortoise shell brackets: 8 \\
     No extensible version. could add them in.


   \item Four sizes of triangle brackets: 8 \\
     They look like:
     \begin{verbatim}
         /|
        / |
       /  |
       \  |
        \ |
         \|
     \end{verbatim}

 \end{enumerate}



\subsection{Geometrics: 21}
\begin{enumerate}
       \itemsep -0.15cm

 \item The \fn{ams} smaller or equal and greater or equal must not be
       forgotten.

 \item Linear `is implied by' if o-- and o--o [bb: There are also
 versions of these with filled-in circles.]

 \item From JMR: something like: $\raise .1cm \hbox{$|$}\!\_$
   maybe the same upside down.

 \item From JMR: Something like: $\_\!\raise .1cm \hbox{$|$}$
   maybe the same upside down.

 \item \texttt{Here are some other symbol I once needed: \cn{opm},
     \cn{omp}.  Why?  There is \cn{oplus} and \cn{ominus} and there
     is +, -, \cn{pm} and \cn{mp}, but there's no \cn{opm} and
     \cn{omp}.}

   \texttt {I once needed them in a context, where \cn{oplus} and
     \cn{ominus} were used as subscripts to indicate symmetric and
     antisymmetric wave functions that were normalized. It is easy to
     specify formulae that include both cases using \cn{pm} and
     \cn{mp}, but suddenly there was no \cn{opm} and \cn{omp}. I
     constructed symbols using the circle from the copyright sign,
     but that was not actually the perfect size.}

\end{enumerate}

\subsection{New arrows}
\begin{itemize}
\itemsep -0.15cm
 \item Alan J. wrote: \cn{arrownot} and \cn{Arrownot}, so that for
      example \cn{arrownot}\cn{mapsto} is visually compatible with
     \cn{nrightarrow}.  Describing the same thing he also wrote: Add
     the `building blocks' for the AMS negated relations, for example a
      \cn{arrownot} to build \cn{nlongrightarrow} and
        \cn{nrightarrowfill}.

 \item The building blocks to make \cn{mapsfrom} $<\!\!-\!|$
   \cn{Mapsto} $|\!=\!>$ and \cn{Mapsfrom} $<\!=\!\!|$

 \item Lfloor, Rfloor, Lceil, Rceil like $\lfloor\!\lfloor$

 \item Arrows with triangles on the end.
$$<\!\!\!|\!\!-\!\!-,\ -\!\!-\!\!|\!\!\!>,\ <\!\!\!|\!\!-\!\!-|\!\!\!>$$

 \item Equals like symbol: $<\!--\!>$ with $==$ underneath.
\end{itemize}


\subsection{Non geometrics: 19}

\begin{enumerate}
\itemsep -0.15cm
%  \item The double circled integral, or the surface integral for
%    physicists.
%  \item A single integral with on top a $\Sigma$:
%    $\displaystyle\int\!\!\!\!\!\!\textstyle\Sigma$ (J\"org)
%  \item A single integral with a slanted dash:
%    $\displaystyle\int\!\!\!\!\!\textstyle-$
%  \item A triple, circled integral.

 \item Possibly something like $\bar{}\!\!($ and $\bar{}\!\!)$ if the
 bar was touching the parentheses.

 \item Banana brackets: look (sort of) like $(\!|$ and $|\!)$.  Or
   they look like bananas if you believe Jeremy... Alan: The St. Mary
   Road font includes samples of them, in a line-drawing style. Since
   I have not seen them in real use, I cannot comment, if this style
   or rather the look of \& in \fn{cmr} is appropriate.

 \item lightning (\fn{wasy}) --- probably does not belongs in maths.
   Alan: It is actually used though!  It means `interrupt' in process
   theory, c.f.  Communicating Sequential Processes, Hoare, Prentice
   Hall 1985.  I don't know how widely used outside process theory it
   is though...



%  \item Upright partial derivation sign. In the Greek-like stuff.
%  \item Arabic letter dal: looks something like a back to front $c$.
%    This should live with the Hebrew letters.
%  \item Remember that message forwarded by J\"org. The V-bar, and the
%    parenthesis-bar. [email protected]. The V-bar is in
%    with the Latin like stuff.

 \item Katakana character that looks like a spiral. (bb)

%  \item Here is a proposal from J\"org: Mylona's and Whitney's
%    ligatures: two extra symbs. These would come in italic, and
%    upright like the rest of the Greek stuff. J\"org himself does not
%    like this.  [bb: But we're not talking about text! Why put these
%    in a math font?]  It now looks as though this is not going to be
%    included in the new math encoding.

 \item A lower-case sigma with a long tail that goes a little bit below
 the baseline.

 \item The two versions of the \# hash sign must not be forgotten. It
               seems that one is geometric, and one is not: the slanted
               hash sign and the upright hash sign.


 \item An \cn{inviota} is sometimes requested on the net. I'll send
   you a reference file for it. (J\"org)

 \item More ?

\end{enumerate}
\end{filecontents}



\begin{filecontents}{l3d007c.tex}
%a macro to put a frame round a box
\newcommand{\frameit}[1]{\vbox{\hrule\hbox{\vrule
     \hskip .1cm\vbox{\vskip .1cm\hbox{#1}\vskip .1cm}\hskip .1cm
     \vrule}\hrule}}


\begin{quote} \small
 The placing of mathematical accents in \TeX{} is done with
 the following control sequence: {\tt\string
   \mathaccent"xyzz\{box\}}. {\tt"xyzz} is the code that
 identifies the accent, and `{\tt box}' is the entity to come
 under the accent. The box can be any type of box known by \TeX{}: a
 single letter, a vbox, an hbox, etc... The accent code used is a
 usual math code (refer to any book on \TeX\ for more details).  The
 accent is basicaly centered over the given box, but there are ways
 of influencing the way the centering is done. The vertical placement
 is as we shall see a little more tricky.
\end{quote}






\section{The accent choosing}

Let us call $x$ the box that is due to be accentuated, and $u$ its
width.
We shall first suppose that $x$ is a single character.\\
\vskip .05cm
\centerline{$x:\qquad \rightarrow$\lower .2cm%
\hbox{\frameit{\huge x}}$\leftarrow$u}
\vskip .2cm

If the accent is part of a list of successors\footnote{See document on
 charlist for more details about successors.}, \TeX{} chooses the one
whose total width is {\em as close as possible} to $u$, but still
{\em smaller or equal} to $u$.

\vskip .25cm \textbf{Note.} A list of successors can only take place
in one and one font only, and Knuth reminds us of this fact in rule 12:
``[...] has a successor in its font [...]'' This makes me wonder...
The hat and tilde come in their smallest size in cmr position '136 and
'176. The other sizes are in cmex position '142 - '147. The
following:\\
\verb|   $\mathaccent"005E{e}$, ...  ,$\mathaccent"005E{eeee}$|\\
produces: $\mathaccent"005E{e}$ and
$\mathaccent "005E{ee}$ and $\mathaccent "005E{eee}$ and
$\mathaccent "005E{eeee}$. So as expected there is no automatic sizing,
seeing as the hat in cmr is not part of a charlist. Same test with
tilde: \\
\verb|   $\mathaccent "007E{e}$, ...  ,$\mathaccent"007E{eeee}$|\\
produces: $\mathaccent"007E{e}$ and $\mathaccent
"007E{ee}$ and $\mathaccent "007E{eee}$ and $\mathaccent "007E{eeee}$.
In order to get a hat that changes size automatically one must
call the smallest hat in \texttt{cmex} directly , like \cn{widehat}
does. \cn{widehat} is defined as: \cn{mathaccent} \texttt{"0362}.
Family three indicates cmex, and 62 is the hex position of the
smallest hat in \texttt{cmex}. Thus \cn{widehat}\verb|{abda}|
produces: $\widehat{abda}$.





\section{The horizontal placing}

The accent followed by its italic correction is put into a box $y$
whose width we shall call $w$.\\
\vskip .05cm
\centerline{$y:\qquad \rightarrow$\lower .2cm\hbox{\frameit{ acc
{\tt \string\/}}}$\leftarrow\ w$}
\vskip .2cm

Let us call $s$ the kern between the character in box $x$ and the {\em
skewchar\/}\footnote{See below for more details about the skewchar.}
---~taken in that order. The box $y$ is first centered on top of the
box $x$ and then shifted rightwards by the amount $s$.

\textsl {If the box $x$ is {\em not a single character}, the amount
 $s$ is taken equal to zero, therefore the skewchar is ignored and
 the box $y$ is normally centered\footnote{But the box $y$
 contains the italic correction, which influences the centering.}
 over the box $x$.
}
%
%
%
\subsection{The `skewchar'}
The \textem {skewchar} is a specific character that D.E.~Knuth decided
to use ---~in the way described above~--- for placing accents. Each font
should have its own skewchar.

For most characters, the {\tt .tfm} file specifies a particular
kerning of each letter with the skewchar. This is true for the
computer modern fonts, but other font designers may not have used this
feature.

Why choose one skewchar rather than another? This is because the
character $\mathchar"017F$ chosen by Knuth does not have any other
kerning that could have been disturbed by the skewchar kerning. This
choice may not always be good for all fonts, because it depends on
what the character in position '127 is. Thus a font designer might
choose another skewchar and put the necessary kernings in the {\tt
 .tfm} file.  Accent glyphs can be used as skewchars, because they
are usually not subject to kerns from other glyphs.

As long as one uses the {\tt cm} fonts, it does not make much sense to
change the skewchar, unless one wants to get strange effects, or
unless one intends to change the {\tt .tfm file}. One should remember
that an assignment to skewchar is not undone at the end of the group,
but is a part of the global font information. A local change
therefore involves saving the original value, and then restoring it.

\subsection{The italic correction}
The presence of the italic correction here is a little mysterious.
Its effect is to shift the accent to the left compared to the position
it would have without it. One should note that it is still added when
the $x$ box is more than one character.
%My personal theory is that
%whereas the skewchar-kerning enables a general positioning of all
%accents on one character, the italic correction enables an adjustment
%of a particular accent over all characters.


\subsection{Conclusions}
It is useless changing the skewchar unless one also modifies the
{\tt .tfm} file(s), which contain(s) the info for the skewchar kerning,
and for the italic correction.

Accents from any font can be positioned on characters from any other
font if there is a skewchar in the character font.

%
%
%
%
%
%
\section{The vertical placing}
This is a little more tricky.  Here as well one can start
by supposing that the character to be put under the accent is single.
All previous notations are kept. $\chi$ is the x-height
of the accent's font i.e. {\tt\string \fontdimen5}. Let us call $h_x$
the height of the $x$ box:\\
%
\vskip .1cm

\centerline{\vbox{\hbox{$\ \downarrow$} \frameit{\huge x} \kern
   -.2cm\hbox{$\ \uparrow$}
\hbox{$h_x$}}}
\noindent
and $h'_x$ the height of a box $x'$ containing the character to be put
under the accent, together with an empty superscript, and an empty
subscript.\\
\centerline{%
$x':\qquad $ \lower 1.5cm \vbox{\hbox{$\ \downarrow$}%
\frameit{$\mathop{\frameit{\huge x}}\limits^{\frameit{\scriptsize%
super}}_{\frameit{\scriptsize sub}}$}%
\kern-.2cm
\hbox{$\ \uparrow$}%
\hbox{$h'_x$}}%
}%end of centerline
%

\noindent
Now set $\delta$ with the minimum of $\chi$ and $h_x$, and increase it
by $h'_x-h_x$.

The end result is a \cn{vbox} $z$ containing box $y$ (the accent
correctly positioned lateraly), followed by a (vertical) kern of
$-\delta$, and then box $x'$ .  A normal accent char has the folowing
aspect:
\frameit{\char127}. This explains why the kern of $-\delta$ is needed.
Without it the gap under the accent would be to big. One can now
understand why an `O' used as an accent over an `e' produces the
following: $\mathaccent"714F{e}$.

If the height $h_z$ of $z$ is smaller than $h_x$ then a kern
is added on top of $z$ in order to make the end height that of $x$.
Finally, the width of $z$ is set to the width of $x$.
%

\textsl{If the character to be accentuated is not single, $\delta$ is
 not increased by $h'_x-h_x$. In other words, one can forget about
 the subscript and superscript.
}%end of textsl

\subsection{Conclusions}
The x-height is used for the vertical placing of accents. This means
that one cannot mix in the same font table glyphs designed with
different x-heights.
\end{filecontents}



\begin{filecontents}{l3d007d.tex}
\begin{quote}\small
The re-encoding of the math fonts cannot be thought of without a
closer look at \TeX's mechanisms for dealing with extensible
characters. This includes all characters that come in different sizes,
all characters that are constructed, and the operators that usually
come in two sizes. The ultimate questions are: how
to implement these characters? What sort of kerning can be
done with them?
\end{quote}












\section{Let us start with the easiest: The operators}

A large operator like $\sum$ will be vertically centered with respect
to the axis when it is typeset. Thus, large operators can be used
with different sizes of text. This vertical adjustment is not made for
symbols of other classes. [The \TeX book p.155]

This is a sum in tex: $\sum_{i=0}^{i=n} i$ and this is a prod in text:
$\prod_{j=0}^{i=m} j$.
Same test in display maths:$$ \sum_{i=0}^{i=n}
vi\quad \prod_{j=0}^{j=m} j$$

The sum is defined as follows:
\begin{verbatim}\mathchardef\sum = "1350 \end{verbatim}
If \TeX\ is in display style, it looks to see if the character in
position "50 of family 3 (The extensible \fn{cmex} family) has a
successor\footnote{See below for explanations on successors.}.  If it
does then the successor is taken. When not in display style, \TeX\
just takes character "50 from family 3.

\subsection{Conclusions for operators}
Both occurrencies of a double sized operator must be in the same font
table, because they are linked by the successor mechanism.

%JWZ commented this out: don't understand.
%It would be a good idea to try and make them all similar sizes so that
%they don't require many different sizes. This is due to font
%restrictions.
%\subsection{Going further}
%One wonders if the choosing is recursive, but presumably it isn't,
%because of the ``if we are in display mode'', which does not do any
%size testing.




\section{How characters can be linked}

\subsection{\texttt{charlists}}
They enable several characters of the same font to be linked together.
By simply typing \texttt{charlist oct"000": oct"020": oct"022":
 oct"040": oct"060"} in the metafont source, one links all the
occurrences (in the \fn{cmex} font) of the left parentheses in
increasing size-order.

They are used for:
\begin{itemize}
\item Linking variable-size delimiters,
\item Linking variable-width accents,
\item Making pairs of operators that come in textsize and
 display size.
\end{itemize}

\subsection{The \texttt{extensible} lists p318 metafont book.}

An extensible glyph is identified with one of its pieces. One has
to decide which piece is going to be used for this identification.
For instance, in \texttt{extensible oct"060": oct"060", 0, oct"100",
 oct"102";} the first \texttt{oct"060"} is the identifier of the whole
 glyph.
The next three characters are the top, middle, and bottom pieces. The
last character position is that of the piece to be repeated as many
times as necessary between the top
and middle, and between the bottom and middle pieces. All three pieces
are optional. When they are not needed, they are replaced with `0'.
But if a zero is put in the repeater position, then character `0' will
effectively be used as a repeater.

\subsection{Restrictions compiled from p318 of the Metafont book}
\begin{itemize}
\item An \texttt{extensible} identifier can only appear at the end of a
 \texttt{charlist}.
\item A kerning/ligtable label can only appear at the end of a charlist.
\item One cannot use an extensible identifier as a kerning label.
\item One cannot use an extensible identifier as a ligature label.
%JWZ commented out the next two lines.
%\item this restriction explains why one specifies the kerning and the
%  ligatures in the same table.
\end{itemize}

\subsection{Conclusions}
Kerning with parentheses is going to be very tricky...

A delimiter is made of two sets of characters that can be in two
separate font tables. The glyphs in these two sets (the delimiters),
can be kerned with the characters that accompany them in their font. So
one must place them correctly.

\note {There are no parentheses in the \fn{cmmi} fonts, so this
 possibility has not been used.}

There is a list of all \fn{cm} extensibles, and charlists in ``Computer
modern typefaces'' p66.


\section{The vertical constructables, or ``those that come in pieces''
 --- \textit{Delimiters}}

\subsection{A few notes}
First information about delimiters p.171 of the \TeX book. They are not
all of the same type. Some are (bigl,bigr) Open/close atoms, and
others are (bigm) rel and (big) ord atoms. On the other hand, a
\texttt{\string\left} \texttt{\string\right} grouping is inner.

When a delimiter gets larger, its height and depth both grow by the
same amount.

In the \fn{cmex} font, most of the vertical constructable glyphs are
below the baseline, in the \texttt{.tfm} file.

\subsection{A first description of the choice mechanism}
A delimiter is defined by a small ``character'' and a ``large
character''. \textbf{These characters can be in different families,
and therefore in different font tables.} We shall call the small
character $a$ in family $f_a$, and the large character $b$ in family
$f_b$.

The search first starts by considering the three different sizes of
char a in its family\footnote{i.e. the scriptscriptsize then the
 scriptsize and finaly the textsize. This is done in an optimized
 way: if the current size is bigger than scripscriptsize, no point
 looking in the scriptscriptsize font, same goes for the other two
 sizes.}. When testing a character in a given font table, its
successors are tested before going to the next bigger font. In other
words, for each member of a family, starting with the smallest, and
going to the biggest, \TeX{} first looks at char $a$ and then at its
successors\footnote{See later explanations.} in the same
font table. If nothing suitable is actually found within the family
$f_a$ a similar search is done in the family $f_b$ based on character
$b$.

The search stops when the character being tested has a sufficient
height plus depth, or when it is extensible\footnote{Obviously in this
 case the appropriate delim has been found, seeing as it can be made
 to any given size bigger than the non-extensible characters.}.

If either of the couples $(a,f_a)$, and $(b,f_b)$ are set to
$(0,0)$ then the corresponding part of the search is bypassed.

If none of the characters actually found are suitable, the biggest is
taken, i.e. the one with the greatest height plus depth.


\subsection{A second description of the choice mechanism by Victor E.}
\TeX\ first tries the small variant, and if that is not satisfactory
(or if the left part of the delimiter code is 000) it tries the large
variant. If trying the large variant does not meet with succes \TeX
takes the largest delimiter encountered in this search. If no
delimiter at all is found, (which can happen if the right hand part is
also 000), an empty box of width \texttt{\string\nulldelimiterspace}
is taken.

Investigating a variant means in sequence:
\begin{itemize}
\item If the current style is scriptscript style, the scriptscript font
 of the family is tried.
\item If the current style is script style or smaller the script font of
 the family is tried.
\item Otherwise the text font of the family is tried.
\end{itemize}
Note: The plain format puts the \fn{cmex10} font in all three styles of
family three.

Looking for a delimiter at a certain position in a certain font means:
\begin{itemize}
\item If the character is large enough, accept it.
\item If the character is extensible, accept it.
\item Otherwise, if the character has a successor, (the same but
 bigger), try the successor.
\end{itemize}


\subsection{The final width of the delimiter ?}
If the character chosen ends up to be an \textbf{extensible} one,
\textem{the resulting width is that of the repeatable piece.}
Otherwise (in the case of a normal character) the width is that of the
chosen character \textem{plus its italic correction.}


\subsection{Conclusions for delims}
\begin{itemize}
\item The different sizes of a delimiter can be spread in two font
 tables if it is necessary.
\item A given delimiter does not absolutly have to have two sizes.
\item One can adjust the width of the repeatable piece for
 extensibles.
\item One can adjust the italic correction of the normal ``single
 glyph'' delimiters, in order to get it further away from things like
 $f$, $j$, $g$ and $p$. In the present case of `(' (in text
 size) it comes from the text font \fn{cmr*} so obviously has not got
 any italic correction. The vertical bar has not got any either
 (checked in the \fn{.pl} files). This is quite a global solution and
 the italic correction will be added in all cases: if it is small it
 may not bother anybody and should have the right effect
 in most cases.
\end{itemize}





\section{References}
The \TeX book ``Construction of math symbols'':
151 mathchoice: no good;
152: about delimiters and size choosing;
178: using phantom and vphantom - no good;
358: how large operators are assigned in plain, and
  some horizontal constructables;
359: all the 24 delims that can change size and the big and bigg macros;
360-361: nothing.

The \TeX book ``math symbols'': 127-128: nothing, 289: nothing, 290:
interesting things about delims.

The \TeX book ``Mathcode'':
134: tiny little bit at the bottom about mathcodes,
154: a list of the 8 classes and (3) about variable family and (bottom)
  about mathcode,
155: the mathcode "8000 + mathchardef + mathinner,
156: delcodes and delimiter,
157: radical,
214: nothing,
271: nothing, 289: nothing,
319,326: answer to exercises,
344: where all the mathcodes are set so that `1' comes from fam 0 and
`a' from fam 1,
345: where the delcodes for plain tex are set.

The \TeX book ``mathop'': 155 cf mathcode, 178 cf construction of math
symbs, 291 a bit about mathaccent, radical 324-325  361

The \TeX book ``dilimiters'':
156: delcode;
157: radical;
214: nothing; 289: nothing; 271: nothing;
290: in the middle ``A delim...''; 345: see at the top;
359: Plain tex definitions of some delimiters;
146: A list of plain tex delims;
147: the bigs;
148-149: details for use of left right;
150: still more extensible chars (bottom not on growing delims);
171: info on the type of atoms made by big bigr bigl bigm;
437: openings and closings;
442: The search of the appropriate delimiter: good.

About radicals rule 11 page 443 appendix G. \TeX book

\TeX book: About operators rule 13 page 443, successors. Interesting
about italic corrections. The way limits are typeset rule 13a.

About parameter usage p447 \TeX book.
About math spacing p170 \TeX book.

\TeX\ for the impatient: p.126 nothing, p.194-196 a list of operators
and a few explanations.

\TeX\ by topic p.194: Large operators and their limits, the choosing
method: good.
\end{filecontents}



\begin{filecontents}{l3d007e.tex}
\begin{quote}\small
% jz -- a little change in the first sentence
 While working on the new math encoding, the writer realised that
 the fact the \texttt{cmex} font is only loaded in one size, and not in
 three like the other math fonts, was going to create a few problems.
 This paper deals with the following topics:
 \begin{itemize}
   \item What is in \texttt{cmex}?
   \item Which special mechanisms does \TeX\ use to access glyphs from
     \texttt{cmex}?
   \item What could be added to \texttt{cmex}?
   \item What could be taken out of \texttt{cmex}?
 \end{itemize}
 The aim of this paper is to help the MFG\footnote{Math Font Group.}
 design the MX encoding as a replacement and improvement of the
 \texttt{cmex} encoding.  \note{Most of what is written in this paper
   is pure theory, and has not been applied or tested.}

 Acknowledgements: thanks to Alan Jeffrey and Barbara Beeton for
 their constructive comments, and help.
\end{quote}

% bb -- suggest adding an introductory section 0 giving a bit of
%       background on knuth and early history; i would make suggestions
%       and vouch for the accuracy of the statements, or offer to be
%       quoted as the source if something is conjecture.

\section{What is in \texttt{cmex}?}
 \subsection{Delimiters}
 \begin{itemize}
 \item Four different sizes of ( ) and extensible versions. Left and
 right extensible modules are '102 and '103.
 \item Four different sizes of [ ] and extensible versions. The
 extensible  modules, one for the right bracket, and one for the left
   bracket are: '066, '067.
 \item Same for \{ and \}\,; the extensible module is: '076.
   \note{The extensible module here is very small, because it is
     added twice: once above the middle piece, and once below the
     middle piece. Its height is half that of the parentheses'
     extensible module. Interesting to see that there is only one
     extensible module for both the left and the right curly brace.
     This is because the left-right extension of a curly
     brace is symmetrical, unlike the parentheses for example.}
 \item Four different sizes of $\langle$ and $rangle$. No extensible
 version.
 \item Same for $\backslash$ and $/$. No extensible version.
 \item Four different sizes of $\lfloor$ and $\rfloor$ and an
   extensible version. Extensible modules: '066, and '067.
 \item Same for $\lceil$ and $\rceil$. Same extensible modules as the
   previous one.
 \item Glyphs in positions '014 and '015 are the extensible versions
   of the vertical bar and the double vertical bar. They are their
   own extensible modules.
 \end{itemize}

 \subsection{Large operators}
 Large operators come in pairs:
 \begin{itemize}
   \item The sqcup
% bb -- although the sqcap isn't here, might want to note that it
%       isn't but could be added
   \item The circled integral
   \item The circled dot
   \item The circled plus
   \item The circled times
   \item The sums
   \item The prods
   \item The normal integrals
   \item The bigcups
   \item The bigcaps
   \item The U plus
   \item The wedges
   \item The vees
   \item The coprods
 \end{itemize}

 \subsection{Wide accents}
 \begin{itemize}
   \item 3 sizes for the hat
   \item 3 sizes for the tilde
 \end{itemize}

 \subsection{Radicals}
 \begin{itemize}
   \item Five radical signs
   \item The vertical bit needed to construct the big radical: '165
   \item The top bit of the constructed radical: '166
 \end{itemize}

 \subsection{Arrows}
 \begin{itemize}
 \item The three pieces for the construction of the vertical double
   arrow: '167 '176 '177
 \item The three pieces of the vertical single arrow: '077, '170,
   '171
 \end{itemize}

 \subsection{Horizontal curly braces}
 \begin{itemize}
 \item The four pieces for the construction of horizontal curly
   braces: '172 -- '175
 \end{itemize}







\section{\TeX's behavior with \texttt{cmex} glyphs}
 \subsection{Large operators}
 \begin{itemize}
 \item A large operator is vertically centered with respect to the
   math axis. This means that, whatever the surrounding glyph size,
   things will not look too bad.
 \item With the following definition of a large operator:
   \cn{mathchardef} \cn{sum} \texttt{= "1xyy}, if \TeX\ is in \textem
   {display style}, it looks to see if the character in position
   \texttt{"yy} of family \texttt{x} has a successor. If it does then
   the successor is taken. When not in \textem {display style}, \TeX\
   simply takes character \texttt{"yy} from family \texttt{x}. Thus in
   text style, in script style and in scriptscript style the same
   glyph is used.
 \end{itemize}

 \subsection{Vertical delimiters, and friends}
 Radicals are delimiters, and vertical arrows also, so let us only
 speak about delimiters. Here is a quote from Victor Eijkhout's book:
 \begin{quote}
   A delimiter has two codes: a small variant, and a large variant.
   \TeX\ first tries the small variant, and if that is not
   satisfactory (or if the left part of the delimiter code is 000) it
   tries the large variant. If trying the large variant does not meet
   with success \TeX\ takes the largest delimiter encountered in this
   search. If no delimiter at all is found, (which can happen if the
   right hand part is also 000), an empty box of width
   \texttt{\string\nulldelimiterspace} is taken.

   Investigating a variant means in sequence:
   \begin{itemize}
   \item If the current style is scriptscript style, the scriptscript
     font of the family is tried.
   \item If the current style is script style or smaller the script
     font of the family is tried.
   \item Otherwise the text font of the family is tried.
   \end{itemize}

   Looking for a delimiter at a certain position in a certain font
   means:
   \begin{itemize}
   \item If the character is large enough, accept it.
   \item If the character is extensible accept it.
   \item Otherwise, if the character has a successor (the same but
     bigger), try the successor.
   \end{itemize}
 \end{quote}
 Using the three size mechanism probably did not seem necessary to
 Knuth.  Generally large delimiters are used in display style, and not
 in script or scriptscript style. However, they can also be used in
 the small styles.

 \subsection{Wide accents}
 For the choice of accents, \TeX\ only considers one font, but looks
 to see if the current accent has a successor. Unlike the delimiter
 choice mechanism, the accent choice mechanism does not go through all
 three sizes. \TeX\ chooses the accent in such a way that the accent
 width is as close as possible but smaller than the width of the box
 to cover.

 \subsection{First conclusion}
 For operators, a distinction in size is made between text style and
 display style, whereas with the usual automatic size choosing
 mechanism, glyphs in text style and in display style are taken from
 the text size font, and are therefore the same size.



\section{Consequences of loading \texttt{cmex} in 3 different sizes}

 \subsection{Consequences for operators}
 For operators, let us consider two completely separate policies. In
 the one  case \texttt{cmex} is unchanged, and loaded in three
 sizes. In the other case, an imaginary font derived from \fn{cmex}
 called \texttt{cmex}$'$ is loaded in three sizes.  In \fn{cmex}$'$ the
 operators no longer have two sizes, thus glyphs like \cn{bigcup} do
 not have a successor in their font.

       \textem {All the following supposes that no new macros have been
 written. What would \TeX's automatic behavior would be?}

 \begin{description}
 \item[If \texttt{cmex}$'$ is loaded in three sizes.] In
   such a situation \TeX\ has a large version of \cn{bigcup} (and
   other operators) in text size, a small version in script size, and
   yet a smaller version in scriptscript size.

   The operators can be centered: no problem.

% jz -- change:
   In script and scriptscript style the resulting `big operators' would
   be smaller than if they were produced with today's standard \TeX,
   and today's standard \texttt{cmex}.  What is more, one would be
   smaller than the other, which is also not the case with today's
   standard \TeX, and today's standard \texttt{cmex}.
% ----

   In display style one would get big operators from the text size
   font: this is acceptable.  \textem {But one would also get a big
     operator in text style,} and that does not conform with today's
   standard \TeX\ behavior.


% jz -- I added encoding:
 \item[If the existing \texttt{cmex} encoding is loaded in three
 sizes.]
   The operators can be centered: no problem.

   In script style, one would get the smallest version of a large
   operator. But coming from a small size font, that will produce
   something very small. In scriptscript style, same behavior as in
   script style, but the result would be even smaller.
% jz -- Added this:
   Thus in script, and in scriptscript style, the large `big
   operators' would never automatically be used.
   Hence the \texttt{cmex}$'$ encoding.
% -- maybe should have said that earlier :-)

   In text style, \TeX\ would produce the small version taken from
   the text size font. In display style \TeX\ would produce the big
   version of operators taken from the text size font.

   So in text style and in display style, there would be no change
   compared to what today's standard \TeX\ produces.
% jz -- changed the phrasing here:
   But script and scriptscript style would produce different
   results.
% --

 \end{description}

 In both cases, things could be improved if macros were written to
 override the present behavior of \cn{bigsqcup}. One could think of
 things like \cn{mathchoice}, but $\ldots$



 \subsection{For vertical delimiters, radicals, vertical arrows}
 Let us start by supposing \TeX\ is in scriptscript style, and it has
 to typeset a large delimiter. One should consider two cases:
 \begin{description}
 \item[The delimiter has an extensible variant.]  In this case the
   search will start in scriptscript size, and continue until \TeX\
   finds the extensible variant of scriptscript size. Then the search
   will stop, and the extensible will be used.  This extensible will
   come from scriptscript size, and therefore probably not look the
   same as it would in today's setup, where all extensibles come from
   text size.
 \item[The delimiter does not have an extensible variant.]  As
   previously, the search starts in scriptscript size. If nothing
   big enough is found in scriptscript size, the search continues in
   script size. If still nothing is found, the search then continues
   in text size. If necessary the biggest delimiter from text size
   will be used. If the search stops in text size, there is no
   difference with what \TeX\ produces today. But if the search stops
   before reaching text size, the chosen delimiter will be different
   from the one \TeX\ would use in the present configuration. Its
   strokes would be finer, and better adapted for use in script style.
 \end{description}

 If one supposes that \TeX\ is in script style, the previous two
 cases also apply, except that every occurrence of `scriptscript' must
 be replaced with `script'. If one supposes that \TeX\ is in text
 style, the result of loading three different sizes of \texttt{cmex}
 would be the same as it is in \TeX's current configuration.

 \subsection{For horizontal curly braces}
 If they are automatically taken from script size, or from script
 script size when necessary, the spacing changes a little,
 because the dimensions in the \texttt {.tfm} files would be
 different. A consequence of this could be different line and page
 breaks.

 However, it would be nice if curly braces did come out
 of the correctly sized fonts. Then their boldness would match the
 surrounding text. But apparently from a macro programming
 point of view things could be difficult, even if the glyphs are
 available and loaded.

 \subsection{For wide accents}
 See first paragraph of previous section.

% jz --
%  I guess that in actual fact, the wide accents were put in
%  \texttt{cmex} because they could not fit any where else.
% jz -- I don't want to start any discussion on that topic so i'm
% taking it out

 If accents were taken from the current size, things could only look
 better.
 The accent width would be closer to that of the material under the
 accent, and the accent's boldness would be better adjusted.

 \note {Unlike the delimiter choice mechanism, the accent choice
   mechanism is restricted to one font, and one size.  It will thus
   not look in text size when it is in script size for instance. So
   in script style, accents will always come out of the script size
   font, and in scriptscript style, accents will always come out of
   the scriptscript size font, etc.$\ldots$}

 \subsection{Conclusion}
% jz -- Changed the conclusion
 Nearly everything in \texttt{cmex} could have lived in a normal three
 sized math font, and maybe that would have been better. The only
 problems would have come from the specific ``big operator'' behavior
 required by Knuth.

 Also one must not forget that Knuth did not want to leave any empty
 slots.
% jz -- added this:

 The reduced amount of memory that was available on the machines with
 which \TeX\ was first used could have been another reason for loading
 \texttt{cmex} in one size only.






% jz -- This section has been changed a bit, and extended.

\section{What could be added to \texttt{cmex}?}
Let us now consider possible evolutions of \texttt{cmex}.
In spite of the terminology ``adding to \texttt{cmex}'', the font
resulting from these evolutions would have a different name.
 \subsection{If the \texttt{cmex} encoded font is loaded in three
 sizes}
 In this case big operators would not produce the usual results, and
 the rest would be slightly different, as stated above.
   \begin{itemize}

   \item One could add wide accents, but one would get slightly
     different (better) results. Thus wide accents would match the
     script and scriptscript styles. Macros could be made available
     as a style option to keep the old behavior, if necessary.

   \item One could increase the number of different sizes for accents.

   \item One could add big delimiters and their extensible versions,
     without any problem! Things will be slightly
     better adjusted in script and scriptscript style. Macros could
     be made (available as a style option) to keep the old behavior,
     if necessary.

   \item One could increase the number of different sizes for
     delimiters, and one could probably reduce the height of the
     extensible module in order to make the growing of delimiters
     more gradual.

   \item One could add some vertical extensible arrows!  Things will be
     slightly better adjusted in script and scriptscript style.

   \item One can add small and large `big operators' without any
     problem!

   \item Big improvement: one could add loads of other glyphs (symbols,
     etc.$\ldots$) that would come in all three sizes.

   \end{itemize}

 \subsection{If the \texttt{cmex} encoded font is only loaded in one
 size}
   \begin{itemize}
   \item One could add big delimiters and their extensible versions
     without any problem!

   \item One could increase the number of different sizes for
     delimiters, and one could reduce the height of the extensible
     module in order to make the growing of delimiters more gradual.

   \item One could add some vertical extensible arrows!

   \item One could add large operators without any problem!

   \item One could add wide accents without any problem, and the
   present behavior of wide accents would not change. But if wide
   accents are meant to match the script and scriptscript styles, then
   wide accents must go in another font that would be loaded in
   different sizes.

   \item One could increase the number of different sizes for accents.

   \item One could add other stuff, but it would only come in one
     size.

   \end{itemize}

 \subsection {If a \texttt{cmex}$'$ encoded font is loaded in three
     sizes}
       The imaginary \texttt{cmex}$'$ encoded font, previously
       described in this document is considered here.

 One could add the same things as when \texttt{cmex} is loaded in three
 sizes. The only difference is: if no macro programming were done, the
 text style and display style will produce the same `big operators'.
 In script and scriptscript style the `big operators' would be in
 different sizes from one another and smaller than those in text
 style.






\section{Conclusions}
If one loads \texttt{cmex} in three different sizes, many things are
improved, and with a \cn{mathchoice} the initial behaviour of large
operators could be kept, or available as a style option.

If \texttt{cmex} is kept in a single size, one must decide whether to
put wide accents in or not.







\section{The beginning of my \texttt{cmex10.pl} file}

\begin{verbatim}
(FAMILY CMEX)
(FACE O 352)
(CODINGSCHEME TEX MATH EXTENSION)
(DESIGNSIZE R 10.0)
(COMMENT DESIGNSIZE IS IN POINTS)
(COMMENT OTHER SIZES ARE MULTIPLES OF DESIGNSIZE)
(CHECKSUM O 37254272422)
(FONTDIMEN
  (SLANT R 0.0)
  (SPACE R 0.0)
  (STRETCH R 0.0)
  (SHRINK R 0.0)
  (XHEIGHT R 0.430555)
  (QUAD R 1.000003)
  (EXTRASPACE R 0.0)
  (DEFAULTRULETHICKNESS R 0.039999)
  (BIGOPSPACING1 R 0.111112)
  (BIGOPSPACING2 R 0.166667)
  (BIGOPSPACING3 R 0.2)
  (BIGOPSPACING4 R 0.6)
  (BIGOPSPACING5 R 0.1)
  )
(CHARACTER O 0 ...
\end{verbatim}



\subsection{Comments about the \texttt{cmex10.pl} file}
\begin{itemize}
\item The xheight is not equal to zero.
\item The space is equal to zero.

\item With the following:
\begin{verbatim}
(CHARACTER O 100
  (CHARWD R 0.875003)
  (CHARHT R 0.039999)
  (CHARDP R 1.760019)
  (VARCHAR
     (TOP O 70)
     (BOT O 73)
     (REP O 76)
     )
  )
\end{verbatim}
that is in the \texttt{.pl} file, one can produce something that looks
like a growing integral:
$$ \left\lmoustache \frac{3.q}{\frac{3\pi.r^2}{3.q.b.c}} \right.$$

\item The pieces used to construct the horizontal curly braces are not
linked in any way.

\item The bottom pieces of the extensible parentheses are overloaded
 for \cn{rmoustache} and \cn{lmoustache}. One of these could be linked
 (charlisted) with the integrals, so that \cn{left}\cn{bigint} could
 produce a growing integral like the delimiters.

\item The bottom pieces of the curly braces ('072 and '073) are also
 overloaded for \cn{lgroup} and \cn{rgroup}.

\item The middle pieces of the curly braces are overloaded for
 \cn{arrowvert} and \cn{Arrowvert}. Other single and double
 extensible bars with different spacing.

\item The extensible module of the curly braces is overloaded for
 \cn{bracevert}. A fat vertical bar.

\item The extensible modules of the parenthese are overloaded to
 produce more fat vertical bars.

\item More overloading: the construction pieces of the extensible
 brackets are also used for the $\lceil \rceil \lfloor \rfloor$. Thus
 the top left bracket piece ('062) identifies the left bracket; the
 top right bracket piece identifies the left bracket; the
 bottom left bracket piece identifies the $\lfloor$ extensible
 version; the bottom right piece identifies the $\rfloor$; the left
 extensible module identifies the $\lceil$; and the right extensible
 module identifies the $\rceil$. This over-loading may not be desired.

\item For the wide accents and the curly braces the depth is nil.

\item All the delimiter glyphs in \texttt{cmex} are set with a very
 small height and a big depth. This is because the radical
 primitive is also used for delimiters. For radicals, the \texttt
 {.tfm} height of the glyph is used to determine the size of the
 \cn{hrule}.

\item The extension modules do not have any height at all. Same for
 the arrow heads.

\item The four integrals have italic corrections.

\item Small versions of operators have a nil height, whereas
 big versions have a small height and a big depth:
 \begin{verbatim}
 (CHARACTER O 116
  (COMMENT This is the small \bigotimes)
  (CHARWD R 1.1111145)
  (CHARDP R 1.000013)
  (NEXTLARGER O 117)
  )
 (CHARACTER O 117
  (COMMENT This is the big \bigotimes)
  (CHARWD R 1.511116)
  (CHARHT R 0.100001)
  (CHARDP R 1.500012)
  )
 \end{verbatim}
 What is more, in the metafont code,
 both big and small versions of bigops are under the baseline.

\item There are no kerns or ligatures in \texttt{cmex}.

\end{itemize}








\section{Characters under the baseline}

\subsection{Which?}
It is understood and agreed that the radical glyphs need to be
virtually completely under the base line. \TeX\ uses their small
height to measure the thickness of the radical's over line. But
delimiters and `bigops' are also placed under the baseline for no
obvious reasons: both types of glyphs are just centered on the maths
axis.

\subsection{`Bigops' and metafont code}
\begin{description}
\item[The metafont padded operator.]
Although both big and small versions of bigops are placed completely
under the baseline (height=0), the big version ends up ---~in the
\texttt {tfm} file, with a non-nil height. Many people agree that the
reason for this is that the metafont code for the large version of
`bigops' contains the \cn {padded} macro. This last places some extra
space around the glyphs.  This extra space would serve for separating
the `bigops' from the limits they may take.

Large integrals do not have any padded macro, and thus in the \texttt
{tfm} file, they are placed completely under the base line.  The
reason for the difference between integrals, and other `bigops' could
be that the limits of the former are usually placed next to the glyph,
and not on top. Even when limits are placed on top of the integral,
the results do not look too bad, because the integral is very narrow.

The reason for `bigops' being set under the baseline is still unknown.
Whether or not they would be correctly centered on the math axis,
if they were placed over the base line is not sure either.

\item[Changing the metrics in the metafont code.] It is interesting to
 see how \TeX\ would manage if the `bigops' were placed over the base
 line.
 The best way to find out, is to change the metafont code of \texttt
 {cmex}, and see$\ldots$ I first did the test on \cn{bigoplus}
 because it was a good candidate ---~simple metafont code. I have
 never used metafont before. I copied all the necessary files into my
 directory, and changed their names to `\texttt {my*}'. I then did
 the following modifications in \texttt {mybigop.mf}
 (look for \%\%\%\%):
 \begin{verbatim}
   cmchar "\textstyle circle-plus operator";
   beginchar(oct"114",20u#,10/6dh#,0); %%%% was ,0,10/6dh#)
   adjust_fit(0,0); pickup pencircle scaled stem;
   lft x6=hround u; x2=w-x6; top y8=h; bot y4=-d; %%%% was top y8=0
   ...

   cmchar "\displaystyle circle-plus operator";
   beginchar(oct"115",27.2u#,14/6dh#,0); padded 1/6dh#;
        %%%% was ,0,10/6dh#)
   adjust_fit(0,0); pickup pencircle scaled curve;
   lft x6=hround u; x2=w-x6; top y8=h; bot y4=-d; %%%% was top y8=0
   ...

 \end{verbatim}

 When I ran Metafont on it there were no problems. After having put
 all the relevent files where they were meant to go, I gave the
 following to \TeX:

 \begin{verbatim}

    Old cmex in text style: $\bigoplus i \coprod$
    Old cmex in display style: $$-\bigoplus i\coprod \mathchar"034D$$

    % change font:
    \font\myfont=myex10
    \textfont3=\myfont
    \scriptfont3=\myfont
    \scriptscriptfont3=\myfont

    New myex in text style: $\bigoplus i \coprod$
    New myex in display style: $$-\bigoplus i\coprod \mathchar"034D$$
    The minus sign gives the height of the math axis, while the bottom
                of the word `base' gives that of the baseline.

    \bye

 \end{verbatim}

 Here is the output:

    Old cmex in text style: $-\bigoplus i \coprod$ \\
    Old cmex in display style:
        $$-\bigoplus i\coprod \mathchar"034D base$$

    % change font:
    %\font\myfont=myex10
    %\textfont3=\myfont
    %\scriptfont3=\myfont
    %\scriptscriptfont3=\myfont

    New myex in text style: $-\bigoplus i \coprod$ \\
    New myex in display style:
        $$-\bigoplus i\coprod \mathchar"034D base$$
    The minus sign gives the height of the math axis, while the bottom
                of the word `base' gives the baseline.

 The results are quite obvious: in both case the \cn{bigoplus} is
 correctly centered. On the 300 dpi printer I have here there is no
 visible difference. What is more the \cn{mathchar"034D} proves that
 the metrics of the \cn{bigoplus} have changed: in the first case the
 glyph is placed under the baseline, and in the second case it is
 placed over the baseline. The difference is not visible if this
 document is printed with the wrong fonts.
\end{description}
\end{filecontents}



\documentclass{l3ms002}

\usepackage{l3d007}
\usepackage{ifthen}

\setlength{\emergencystretch}{2em}

\hyphenation{pre-sent fa-mi-ly}

\renewcommand{\abstractname}{\Large Acknowledgement}


\typeout{******************************************}
\typeout{* }
\typeout{* This document makes use of three fonts}
\typeout{* which you might not have on your system.}
\typeout{* These are:}
\typeout{* \space\space ecrm1000 \space\space T1 encoded Computer Modern}
\typeout{* \space\space msam10   \space\space AMS symbol font A}
\typeout{* \space\space msbm10   \space\space AMS symbol font B}
\typeout{* }
\typeout{* In that case type <RETURN> to the error message}
\typeout{* The output will not contain the font charts but}
\typeout{* but will otherwise be readable.}
\typeout{* }
\typeout{* The document will also produce a number of underfull}
\typeout
      {* and overfull boxes. Please ignore them (or volunteer to help)}
\typeout{* }
\typeout{******************************************}

\newcounter{sleep}
\whiledo{\value{sleep}<1000}{\stepcounter{sleep}}

\begin{document}





\title{Technical Report on Math Font Encoding}
\author{Justin Ziegler}
\date{Started on June 13, 1993\\
                       Last change: June 1, 1994\\
                       Organisational updates: August 23, 2000\\
     Printed: \today \\
     Filename: \fn{l3d007.tex} }

\maketitle



\newcommand{\NFSS}{\textsf{NFSS}}

\chapter*{Foreword}

I'm pleased to present the final report on ``Math Font Encoding''
produced by Justin Ziegler for the \LaTeX3 project to the public.

Justin has worked for three months at the Johannes Gutenberg
University Mainz. His work was generously sponsored by GUTenberg
(The French \TeX{} Users Group) and by the ZDV of the University
of Mainz (Data Processing Center), the latter providing Justin with
office space and taking care of the administrative details.

In the past years a lot of work went into integrating  new fonts into
the \TeX{} system. Only five years ago, typesetting with \TeX{}
basically meant typesetting in Computer Modern. Nowadays many users can
choose  (at least theoretically) from several thousands of fonts.
Today, \NFSS{} is the standard font selection in \LaTeX{} and due to
this mechanism and the fontinst-package by Alan Jeffrey virtually every
PostScript font, in fact, every font for which a \texttt{tfm}-file can
be obtained, can be used, out of the box, with \LaTeX.

But for these thousand text fonts there are only five font families for
use in math formulas
to go with them. Even worse, every of these math font sets are encoded
in a different way making it nearly impossible even for an expert \TeX{}
user to use different fonts for math in different jobs.

The work undertaken by Justin is the first of several steps to solve
the problems at hand, the final goal being the development of a system
that allows the user
to change math fonts as painlessly as it is now possible with text
fonts.

Based on Justin's analysis and his proposal, the \LaTeX3 Project is now
undertaking to provide a prototype implementation for math fonts,
starting with the Computer Modern fonts as well as the Euler Math fonts.
We expect this implementation to be available for public usage
during 1995.


\begin{flushright}
Mainz, December 6, 1994\\[5pt]
Frank Mittelbach\\
Technical Director \LaTeX3 Project
\end{flushright}




\begin{abstract}

\vskip1cm
 I wish to thank the many people without whom my stay in
 Germany would not have been possible, and the work I did would not
 have been done. This includes:

 \begin{description}
 \item[GUTenberg] who financed my stay;

 \item[Ehoud Ahronovitz] for helping me with the administrative side
   of things, for giving me the opportunity of coming here, and
   spending extra time with me to make sure that everything went
   all right;

 \item[Frank Mittelbach] for his friendly welcome, for the
   organization, time and guidance;

       \item[Bernard Gaulle] the past president of GUTenberg, for the
               organization and logistics;

 \item[Klaus Merle] for lending all the material that I used;

 \item[Chris Rowley] for the organization, and help;

 \item[Stefan Steffens] for answering patiently all my stupid
   questions, and helping me integrate Mainz and the university;

 \item[The \LaTeX3 project] which partially financed my stay in England
   for the Aston conference;

 \begin{sloppypar}
 \item[Barbara Beeton and Alan Jeffrey] who commented my papers, and
   answered more stupid questions;
 \end{sloppypar}

 \item[J\"org Knappen] who gave me advice on the project, and
   with whom I discovered the Mainz night life;

 \item[All the computer center employees] for making my stay more
   pleasant;

 \begin{sloppypar}
 \item[All the people who took the trouble to answer my mail,] for
 their time  and effort;
 \end{sloppypar}

 \item[Donald E. Knuth] who created \TeX.

 \end{description}

\end{abstract}


\setcounter{tocdepth}{1}
\tableofcontents




%                                This is the introduction




\chapter{Introduction / Overview}

\section{The technical environment}
I worked in the ZDV of the university of Mainz in Germany.  In German
ZDV stands for ``Zentrum f\"ur Daten Verarbeitung''. Which means: Data
Processing Center.
This is where the main --- soft and hardware --- maintenance team
works.

I worked on an X-terminal like a lot of other people in the university.
For writing my documents I used GNU Emacs together with the
AUC\TeX\ package.



\section{A few Definitions}%    ***************** first section
       %jwzinria: one could get rid of this subsection ?
 \subsection{\TeX: a page description language}
 The best definition I can find for \TeX\ is: ``one third compiler,
 one third interpreter, and one third word processor''. It was
 written in $\fam 1 1975$ by D.~E.~Knuth and a group of students.
 One of its main features is its portability. A document written
 on one machine can be used on another machine.  Knuth also
 insisted that \TeX\ would not change. So a document written in $\fam
 1 1980$
 is still usable in $\fam 1 1990$.

 The language defined by \TeX\ is very specific, in so far as it is
 designed to describe a page layout. \TeX\ processes the page like a
 rectangle, or more exactly like a box, that can be filled with
 smaller boxes. These smaller boxes can similarly be filled with
 smaller boxes, and etc... The smallest box one can get is a
 vertical~/~horizontal line, or a character (a glyph), or just some
 space. \TeX\ has variables in which one can put boxes, or different
 types of numbers. One can define functions ---~usually called
 macros~--- in a way similar to lisp. The if-then-else statement is
 there, and combined with recursion it can be used to make loops.

 In spite of its limitations due to its specificity, \TeX{} defines
 a Turing machine. The syntax is very disagreeable, but one can get
 used to it: somebody wrote a basic interpreter in \TeX. The only
 difference between \TeX{} and a usual compiler, is that \TeX{} stops
 the compilation when it gets to the pcode, and just puts it into a
 file.  This file, called the device independant file, can then be
 sent to a printer, a screen, or any other printing device.

 Today many people use \TeX. All \TeX\ users have got together and
 created TUG: \TeX\ Users Group.

 \subsection{Plain \TeX} \label{plain-tex}
       %jwzinria: one could get rid of this subsection ?
 Plain \TeX\ is the standard set of macros and definitions that comes
 with \TeX. It is written in \TeX.

 \subsection{\LaTeX{}: a document description language}
       %jwzinria: one could get rid of this subsection ?
 Just as \TeX\ is a language to describe pages, \LaTeX\ is a
 language designed for describing whole documents, and their logical
 structure. The idea is that it lets the user concentrate on the
 contents of the document rather than the formating commands
 necessary for the document to look good. Thus it uses the
 logical mark-up concept. It was written by Leslie Lamport in 1985.
 Technicaly, \LaTeX\ is ``only'' a cluster of macros written in \TeX.
 This means that a \LaTeX\ user has still got access to most of the
 \TeX\ language.  \LaTeX\ includes the following facilities:
 \begin{itemize}
      \item Cross referencing.
      \item Automatic construction of a table of contents.
      \item Automatic construction of an index.
      \item Bibliography referencing.
      \item Basically the same math mode as \TeX.
 \end{itemize}



 \subsection{The \LaTeX3 project}
       %jwzinria: one could get rid of this subsection ?
 During the 1989 TUG conference at Stanford, the decision was taken
 to produce an improved and expanded version of \LaTeX, that was
 going to be called \LaTeX3.

 The major difference in the new version will be the addition of a
 good interface through which designers can specify how classes of
 documents should be formated.

 Frank Mittelbach is the technical director of the project; he and
 Chris Rowley are responsible for the management.


 \subsection{Fonts, glyphs, and slots}
 \TeX\ would not be able to produce any nice documents if it did not
 have any fonts. One cannot get a nice looking `A' or `$\mathcal{A}$',
 or any other letter if nobody has previously designed it.

 All \TeX\ really does, is produce a file that contains a set of
 instructions. Each instruction looks like the following: ``place
 here the picture that is in such and such a file, in position number
 $x$.'' \textem{The files that contain all the pictures (the letters
   and other symbols), are called ``fonts''. All the pictures that are
   in a font are called ``glyphs''.} Every glyph in a given font has
 a specific and known position. \textem{I shall use the word ``slot''
   to refer to a given position in a font.} Some slots can be empty,
 but most of them contain a glyph.




 \subsection{Font encodings} \label {about-encoding}
 When \TeX\ refers to the glyph number $x$, it must know which glyph
 is in position number $x$. This knowledge is contained in the
 encoding.  In some cases one could say that the letters are in the
 ASCII order. But this is not sufficient, because the
 ASCII code does not include all the glyphs that people wish
 to put in their documents. Therefore, one must link every single
 font with a given encoding, and make the encoding known by \TeX.
 Many different encodings exist, sometimes even for the same group
 of glyphs. But there are also many fonts that use the same encoding.

 A mathematical definition of an encoding could be the following:
 \textem{An encoding is a set of glyph names in a given order.}

 \subsection{The ``Computer Modern Fonts''}
 When D.~E.~Knuth created \TeX, he also created a set of fonts called
 the
 \textem{Computer Modern Fonts}. Most of them were based on an encoding
 that is called the Computer Modern Encoding throughout this document.

       All file names of Computer Modern Fonts start with the two
       letters `\texttt{cm}'.


 \subsection{Metafont: a font description language}
 Metafont is a language / program especially designed to describe glyph
 shapes, and more generally whole fonts.  It was used to generate all
 the  ``Computer modern'' fonts. The Metafont user must describe or
 ``program'' the curves for each glyph. Then Metafont produces an array
 of black and white dots for each glyph. The dots can be made as small
 as necessary to fit the precision of the printing device.


 \subsection{\TeX\ version 3}
 In the beginning of $\fam 1 1990$, under a lot of pressure (from the
 \TeX\ User Community), D.~E.~Knuth produced a new version of \TeX.
 \TeX\ version 3 was born. The main improvements were the following:

 \begin{itemize}
 \item Up to 256 glyphs per font. The previous versions of \TeX\ could
   only use the first 128 glyphs of a font.


 \item Virtual fonts. A normal font has all its
   glyphs in a file, and this file is in actual fact the font.
   Virtual fonts enable people to group 256 glyphs taken from many
   different fonts, and make \TeX\ think it is using one normal font.
   For instance, one could make a virtual font with lowercase letters
   in bold, and uppercase letters in italic. The user would work as
   if he was using one font, but the results would in actual fact be
   a combination of two fonts. A very good example implementation of
   virtual fonts is the creation of ``Small Caps'' fonts: the
   uppercase letters could come from a roman upright font at 12
   points, whereas the lowercase ones could come from a roman
   upright at 10 points.

   Virtual fonts enable still more ingenious things, like replacing
   glyphs with a set of \TeX\ macros.  One can then
   consider, for example, automatic raising or lowering of some
   letters.

 \item Better hyphenation. \TeX\ version 3 can have up to 256
   different hyphenation tables, and can produce good automatic
   hyphenation even when a word contains accents. The latter was not
   possible in previous versions. More generally the hyphenation
   mecanisms have been improved.

 \item The new ligature mecanism is more powerful. The result of a
   ligature is no longer only one glyph, but can be a set of
   glyphs...

 \item Special ligatures can be done at the beginning and at the end
   of words. Thus when a given letter is at the end of a word, its
   shape can be different from the shape it would have in the middle
   of a word.

 \item Better automatic adjusting of interword space.

 \item More little details that make everybody happy...
 \end{itemize}


 \subsection{The ``DC Fonts''}
 Although D.~E.~Knuth included a lot of ``European glyphs'' in his
 Computer
 Modern fonts, more were needed. In $\fam 1 1989$ \TeX\ users got
 together in Cork, and designed some new fonts called the \textem{DC
 Fonts}.
 Thanks to the new features  of \TeX\ version 3 (256 glyphs per font
       encoding),
 DC fonts included for example more special letters for Catalan and
 Scandinavian languages.

       The DC Fonts used what is now called the \textem{Cork encoding.}
       All DC  fonts file names start with `\texttt{dc}'.


\section{My work}
%\section{My work}
One of \TeX's nicest features is its ability to typeset mathematical
formulae. There has now been over ten years of experience typesetting
mathematical material with \TeX.  During this time, \TeX's math mode
has been used to set a wide variety of material, including traditional
mathematics, categorical diagrams, chemical reactions, computer
programs and textual material such as `$5\frac12\%$ or $\fam0M^{lle}$.

In recent years, with the arrival of the Cork standard for typesetting
European text, and the Virtual Font standard, the fonts available for
use in \TeX\ have radically changed. The current situation is that
there are over 14,000 text fonts available for use in \TeX, but only
five math fonts:
\begin{itemize}
\item Computer Modern
\item Computer Concrete with Euler
\item Lucida Math
\item Lucida New Math
\item Math Time
\end{itemize}
Each of these fonts use different encodings, and each comes with its own
selection of \TeX\ macros.

Although the Cork encoding is rapidly being established as the
standard encoding for European Latin text, there is no similar
encoding for mathematics.  The result is:
\begin{itemize}
\item complex macro packages for using each math font.
\item it is difficult to set mathematics with Cork text, since the Cork
  encoding does not include the uppercase Greek.
\item installing PostScript math fonts such as Mathematical Pi is very
  difficult.
\end{itemize}

Furthermore, the present math encoding includes glyphs like old-style
digits, and game card suits $(\spadesuit)$ that just do not belong in
a math encoding. On the other hand, many new glyphs have been
designed and should be included in the math encoding.

To solve these problems, a new math encoding, using all the power of
\TeX\ version 3, is needed. For this reason I have been trying to
re-organize all the glyphs that are needed to typeset mathematical
formulae with \TeX, according to various technical constraints.

The new math encoding that I am helping to produce is hopefully going
to be part of the \LaTeX3 package, and comes as one of the general
improvements of \LaTeX.

First I learnt to use \TeX. In a second stage, I had to study and
understand the technical constraints that apply on the grouping of
mathematical glyphs in a font. Only then could I actually start
thinking about which glyphs should go where. I intensively used \LaTeX\
---~so that I permanently had an up to date record of what had been
done ---~and email, to communicate with the people I
was working with.


















\chapter{The \TeX nicalities of math typesetting}



\section{A brief description of \TeX's math facility}
 \begin{description}
 \item[Logical markup like \LaTeX.] For the design of \TeX's user
   interface, one of Knuth's concerns was that in the source code of
   a mathematical document the formulae should be readable in a
   linear manner. Thus when a mathematician thinks, he says to
   himself: ``$n$ over $n-1$'', and when a \TeX nician works, in order
   to produce the result $\frac{n}{n-1}$ he just has to type:
   \verb|n \over {n-1}|.

   The user is no longer bothered by trying to get this bit of text
   higher than this other bit of text.  He just gives \TeX\ the
   logical meaning of what should be typeset, and it is correctly
   placed.

 \item[The two math modes.] There are two ways to enter \TeX's math
   mode, which produce slightly different results with the same
   input. One mode is called the \textem{display mode,} and produces
   \textem{display style,} while the other is called \textem{text
     mode,} and produces \textem{text style.} The following input:\\
   \verb|    $ \int_0^1 \frac{1}{x}\;dx $| \\ produces
   \textem{text style:} $\int_0^1 \frac{1}{x}\;dx$, which can be
   mixed with text, whereas \\
   \verb|    $$ \int_0^1 \frac {1}{x} \;dx $$| \\ produces \textem
   {display style:}
   $$ \int_0^1 \frac {1}{x} \;dx $$ which is automatically
   centered and surrounded by space.

 \item[Automatic size change according to meaning.] When the user
   says to \TeX: ``this letter is a superscript'', or ``this number is
   a subscript'', \TeX\ automatically typesets the letter (or the
   number) in a smaller font size. \TeX\ does that same size adjustment
   for setting limits on glyphs like $\sum$, or $\smallint$.

 \item[Automatic placing for sub/superscript and for limits.] At the
   same time as \TeX\ changes size automatically when the user
   specifies a sub- or superscript, \TeX\ also raises and lowers the
   resulting text. When placing limits over a $\sum$, for example,
   \TeX\ automatically centers them over the sum:
   $$\sum_{i=0}^{i=n} i = \frac {n(n+1)} {2}$$

 \item[Size change for big operators.] One can see in the previous
   example that the two $\sum$ signs (one in the text and one in the
   example), are not set in the same size. \TeX\ changes the size of
   some big operators when they are set in a centered environment
   like that example is. The integral also changes size.

 \item[Automatic spacing and math classes.] As one can see in the
   previous example, \TeX\ also spaces various glyphs in a special
   way. For instance the space around the $+$ sign is quite large,
   whereas the space between the $n$ and the open parentheses is
   comparatively reduced. Turning off the automatic mathematical
   spacing for the $+$ sign would produce the following: $(n \mathord
   + 1)$ versus $(n+1)$.

   On a \TeX nical point of view, the math spacing is done by
   dividing all mathematical glyphs into classes. For each class
   \TeX\ has different spacing rules. Thus a class 1 glyph followed
   by a class two glyph would not induce the same spacing as a class
   1 followed by a class 3. There is no point in giving all the spacing
   rules here. The different classes are listed below\footnote{Thanks
     to Victor Eijkout for the comments.}:
   \begin{enumerate}
   \item \textem{Ordinary:} lowercase Greek characters, and those
     symbols that are just `symbols';
   \item \textem{Large operators:} integral and sum signs, and `big'
     objects such as \cn{bigcap}, or \cn{bigotimes}. Large operators
     are centered vertically, and they may behave differently in text
     style, and in display style\footnote{See below for
       explanations.}.
   \item \textem{Binary operators:} plus, minus, and look
     alikes;
   \item \textem{Binary relations:} equal, less than, subset, and
     friends;
   \item \textem{Opening symbol:} opening brace, bracket,
     parentheses, etc$\ldots$
   \item \textem{Closing symbol:} closing brace, etc$\ldots$
   \item \textem{Punctuation:} most punctuation marks, with an
     exception or two;
   \item \textem{Variable family:} described further on in section
     \ref {var-fam}.
   \end{enumerate}

 \item[More symbols/glyphs.] Last but not least, \TeX's math facility
   gives the user easy access to special symbols: Greek letters,
   $\aleph$\footnote{$\aleph$ is a Hebrew letter, not a Greek one.}
   $\cap$, $\subset$, and many others that are often used in
   mathematical formulae.

 \end{description}


\section{Math styles}
 When Knuth wrote ``The \TeX book'', he extended the `display', and
 `text style' terminology. If \TeX\ is typesetting sub- or superscript
 material, one says that it is in \textem{script style.} Furthermore,
 if \TeX\ is typesetting sub- or superscript when it is already in
 script style, one says that it is in \textem {scriptscript style.} The
 style terminology must not be confused with the size terminology that
 is described further on: text size, script size, and scriptscript
 size.


\section{Font families}
 \subsection{What are font families? / a definition}
 In math mode, \TeX\ does not load fonts in the same way as it does
 in text mode. For maths, Knuth thought best to organize the fonts in
 families, and give each family a number. One font family can contain
 three fonts.

 The normal use is to load in a single family the same font in
 three different sizes. One size for the main text, one size for
 superscript and subscript, and one size for the exceptional
 super-superscript, or super-subscript. A good example ought to make
 things clear: $$\int_0^\infty\;e^{\alpha.x^\alpha}\;dx = \;\;?$$ It is
 clear that the $x$ is smaller than the $e$, and that the second
 $\alpha$ is smaller than the first, which is the same size as the $x$.

 \subsection{The organization of mathematical glyphs}
 In the present version of \TeX\ the mathematically used glyphs are
 organized in 4 families:
 \begin{description}
 \item[Family 0: Computer Modern Roman (\texttt {cmr})] This is a
   normal upright roman text font. It is loaded in a math family
   in order to typeset things like $\log$ or $\sin$. The other reason
   for which it is loaded into a math font family is that it
   contains the uppercase Greek alphabet, so that the user can
   typeset $\Psi$ and $\Gamma$, or even $\Upsilon$. A few other
   symbols are also taken from \texttt{cmr}: `;' `=' `( )' `[ ]' `:'
   `+' $\ldots$ See figure in appendix \ref
   {app-fonts}.

 \item[Family 1: Computer Modern Math Italic (\texttt {cmmi})] The
   \texttt {cmmi} font is one of the special math fonts. For a
   non-expert user, its letters look just like normal \textit {italic}
   letters. But in actual fact they are slightly different in their
   shapes, especially the lowercase. The reason for the letters being
   different is so that the variable $a$ can be easily differenciated
   from the article `a' used in ``a horse'' for example.

   Whereas \texttt {cmit}\footnote {The normal italic Computer
     Modern font.} contains ligatures, \texttt{cmmi} does not, and
   includes instead the Greek lowercase and uppercase alphabets in
   italic.

   A strange feature of \fn{cmmi} is that it contains some
   old style digits. Thus one can write $\fam1 1789$ or $\fam1 1942$
   which are quite different from 1789 and 1942. But these digits are
   never used in maths, so they do not belong in a font that is
   designed for use in maths.

   The \fn{cmmi} font also includes some other useful\footnote
   {Only for scientists though.} symbols / glyphs that one can see on
   the corresponding figure in appendix \ref {app-fonts}.

 \item[Family 2: Computer Modern Symbols (\texttt {cmsy})] One can
   find in this font the calligraphic alphabet that some scientists
   use: $\cal{A B C D E F G H}$ $\cal{ I J K L M N O}$ $\cal{ P Q R S
     T U V X Y Z}$; plus lots of other symbols that only
   mathematicians could want to use: $\cap \cup \ominus \otimes
   \bigtriangleup \exists \; \forall \subset \le \succ \leftarrow
   \ldots$ See figure in appendix \ref
   {app-fonts}.

 \item[Family 3: Computer Modern Extensibles (\texttt {cmex})] All
   three sizes in this family are the same.  \texttt {cmex} mainly
   contains symbols that change size, automatically.
   One can produce:
   \[ \left\{
   \begin{array}{ll}
   u(x,y,z,t) & = u_{0}(x,y,t) + U(x,y,z,t) \\
   v(x,y,z,t) & = v_{0}(x,y,t) + V(x,y,z,t) \\
   w(x,y,z,t) & = w_{0}(x,y,t) + W(x,y,z,t) \\
   w'(x,y,z,t) & = w'_{0}(x,y,t) + W'(x,y,z,t)
   \end{array}
   \right. \]\label {extens-example}%
   with four or ten lines, and the `\{' will get bigger and bigger of
   its own accord, without the user specifying anything more. \texttt
   {cmex} also contains wide accents, so one can produce: $\widehat{a}$
   $\widehat{ar}$ $\widehat{arg}$. I have previously spoken about the
   automatic size change of some operators, whether in text, or in
   display, style. These double sized `big operators' are in \texttt
   {cmex}:
   $\bigcap\ and\ \coprod$ in text style, and in display style:
   $$\bigcup \  and\ \coprod\ and \ldots$$
   The total contents of \fn {cmex} is
   shown in a  figure appendix \ref {app-fonts}.

   Most of the glyhs in \texttt{cmex} have a stange metric
   particularity, that makes them \TeX\ specific. Thus no other
   typesetting system can use those glyphs. Vice versa \TeX\ could not
   use those glyphs if they were made for another typesetting system.
   I spent a certain amount of time trying to understand all the
   triks hidden in \texttt {cmex}, and wrote a document on the topic
   (see appendix \ref {app-rep-cmex}). The math font group was then
   able to take decisions concerning the replacement of \fn
   {cmex}.

 \item[The AMS symbol fonts: \texttt {msam} and \texttt {msbm}.] Many
   more mathematical glyphs, and an extra blackboard bold alphabet.
   They are not part of the standard \TeX, and are not loaded
   automatically in a family, but they are used on many sites. They
   were designed for the AMS: American Math Society, for use with
   \TeX, and are now very widely spread. Their contents is shown in
   figures, appendix \ref {app-fonts}.

 \end{description}


 \subsection {How does \TeX\ identify glyphs? }
   \begin{description}

   \item[Glyph names.] In Plain (see section \ref {plain-tex}) many
     glyph names are defined. They refer to some of the numerous
     glyphs \TeX\ can typeset.

     The user can also define his own names for glyphs. To a
     glyph name must be associated a family number, and a position
     in the given family. On top of that \TeX\ likes to know which
     class the glyph belongs to. As well as the classes that have
     already been defined, there is an extra one:

   \item[The \textem {`variable family'} class and the \cn{fam}
     variable.] \label{var-fam} This class has nothing to do with
     spacing, and, to my mind, treating it as a class is one of
     Knuth's mistakes. It is used in particular for letters, but it
     could have other uses. If the calligraphic, upright, and italic
     letters all have the same position in their respective fonts,
     one does not want to define a different name for each letter in
     each shape. Instead, \TeX\ has a \cn{fam} variable, that
     contains the number of the current family where glyphs should be
     taken from. So when a glyph is of class \textem {`variable
       family'}, it is taken from the family number \cn{fam}. But
     that is not enough. Some times the \cn{fam} variable can be
     equal to $-1$, and there is no family number $-1$.  In such a
     case a default family number is used. So together with the class
     and the position, one can assign the default family number for
     each glyph name. When a glyph is not defined as being variable
     family, it always comes from the same family, and its family
     number is linked to its name in the same way as the class
     number.

     Example: when the user enters math mode, \cn{fam} is equal to
     $-1$, the letters come from the default family. By typing:
     \verb|$abda$| which produces `$abda$', one can see that the
     default family for letters is family number 1 (See family
     descriptions). If the user assigns the family variable to $0$
     then the letters will come from family $0$. Thus
     \verb|$\fam0 abda$| produces `$\fam0 abda$'. (See family
     descriptions).
   \end{description}







\section{Font metric files: The ``\texttt{.tfm}'' files}
 \subsection{A theoretical overview}
 When \TeX\ is typesetting a page, and making all the calculations
 that are necessary for this, it does not need the actual picture of
 the glyphs. All \TeX\ needs at this stage is the dimensions of the
 glyphs, and other numerical data. That information is in the
 ``\texttt{.tfm}''\footnote{\texttt{tfm} stands for ``\TeX\ font metric
   file.''} files, and every font has one.  Without it, the font is
 unusable as far as \TeX\footnote{There are some slight exceptions to
   this rule: in some cases a given font can use another font's
   ``\texttt{.tfm}'' file. But the visual results are not very good.}
   is  concerned.

 For mathematical typesetting \TeX\ uses all the information that a
 ``\texttt {.tfm}'' file can give. One of the first things I had to do
 was to study and understand the machinery hidden in the math fonts
 ``\texttt {.tfm}'' files. From a general point of view a font metrics
 file can contain the following data: \footnote {This is not
   restricted to \TeX.  Although the file formats maybe slightly
   different, Postscript type fonts and others use similar
   metric files.  One can find programs to convert the files
   from one format to another.}

 \begin{description}
 \item [Font dimensions.] These are global parameters
   for the whole font. In a normal text font one would find the
   slant (positive on an italic or slanted font), the size of the
   interword space, other interword spacing parameters, more general
   spacing parameters, and the x-height. The latter is the height of
   the `x' glyph, and is used for correct accent positioning.

   The fonts in family 2 and 3 are a little special as far as font
   dimensions are concerned. \TeX\ looks in family 2 and 3 for more
   font dimensions than usual. This extra information is used for
   special math spacing.

 \item [Glyph dimensions.] Each glyph has a height, a width and a
   depth specified in the ``\texttt{.tfm}'' file. The height of the
   box that surrounds a glyph is equal to the height of the glyph
   plus the depth of the glyph, whereas the width is that of the
   glyph. I think it is important to say that \textem{these values
     are theoretical, and can be quite different from the real size
     of the glyph.} Thus some glyphs are bigger than their box. A
   good example of this is the italic `f': \textit{f}. The top right
   end, and the bottom left end stick out of the box. The right hand
   side of a given glyph box is also the left hand side of the
   next\footnote{The box on the right of the first one.} glyph box.

 \item [Kerns.] \label {about-kerning} They are necessary for the
   \textem{automatic adjustment } of the spacing between two glyphs.
   Many non professional electronic typesetting systems have for a
   long time ignored this refinement of traditional typesetting. The
   problem is the following: for visual comfort all the letters of
   the alphabet cannot be spaced in the same manner. For instance
   when an `A' is followed by a `V', the two letters must be brought
   closer together to produce `AV' versus `A{V}'. In other cases
   letters must be separated a little to produce `aj' versus `a{j}',
   or `f!' versus `f{!}'. Otherwise the spacing does not look correct
   compared to the spacing of surrounding letters.  In the
   ``\texttt{.tfm}'' file, for each glyph one can specify kerns with
   every other glyph \textem{of the font}. \textem{When two glyphs
     that are kerned in the ``\texttt{.tfm}'' file are found side by
     side in the right order, \TeX\ automatically brings them closer
     together, or farther away.}

 \item [Ligatures.] \label {about-ligs} Here again, the idea is to
   improve visual comfort, and reading. Some letters when followed by
   other particular letters do not look right. In this case the two
   glyphs side by side must be replaced by another glyph that will
   look much better. This is called a ligature. The best and very
   well known example occurs when an `f' glyph is followed by an `i'
   glyph.  The non-ligatured glyphs look like `f{i}', and
   \textem{\TeX\ automatically replaces} them with the ligature that
   looks like `fi'. In the ``Collection La Pleiade'', one can see many
   other ligatures if one looks hard enough.

   In \TeX\ version 3 the concept of ligatures is more general. It
   can use more than two letters, and has other interesting new
   features.

 \item [Italic corrections.] For this I can only quote Frank
   Mittelbach:
   \begin{quote}
     ``At the points where one switches from slanted or italic to
     upright, the glyphs usually come too close together, especially
     if the last slanted/italic glyph has an ascender\footnote{Here
       is something that has not been defined. The following letters
       have ascenders: l,k,h,f,t,b,d, in lowercase. One can guess
       what descenders are.}. The proper amount of extra
     white space that should be added at this boundary is called the
     `italic correction'. Its value depends on individual glyph
     shape, and is therefore stored in the ``\texttt{.tfm}'' file for
     each glyph. [...]  For an upright font the italic corrections
     are usually null. [...]  In slanted and italic fonts, the italic
     corrections are usually positif...''
   \end{quote}
   Example: in the word {\it dif}ferent, the first f runs into the
   second one. Whereas in the word \textem{dif}ferent, a little space
   is left between the two f's. That space is the f's italic
   correction.

 \item [`Skewchar' kerning.]  The skewchar is a specific character
   that is used for placing mathematical accents. In math mode, when
   an accent is placed on a glyph, the accent is first centered on
   top of the glyph's box, and then shifted rightwards by the amount
   of the kern between the glyph and the skewchar.

   Each font should have its own skewchar. For most characters, the
   ``{\tt .tfm}'' file specifies the kerning of each letter with its
   skewchar. This is true for the computer modern fonts, but other
   font designers may have chosen not to use this feature.

   Why choose one skewchar rather than another? This is because the
   character $\mathchar"017F$ chosen by Knuth does not have any other
   kerning that could have been disturbed by the skewchar kerning.
   This choice may not always be good for all fonts, because it
   depends on what the character in position '127 is.  Thus a font
   designer might choose another skewchar and put the necessary
   kernings in the ``{\tt .tfm}'' file. Accent glyphs can be used as
   skewchars, because they are not usually subject to kerning from
   other glyphs.''


 \begin{sloppypar}
 \item[Charlists.]  \label {about-charlists} Charlists enable several
   characters in a font to be linked together.  The \texttt {cmex}
   font uses charlists a lot: by just typing \texttt{charlist oct
     "000": oct "020": oct "022": oct "040": oct "060"} in the
   metafont source code, one links in order of increasing size all
   the left parenthesis that are in the font. Thus with this
   information contained in the ``\texttt {.tfm}'' file, \TeX\ can
   find the parentheses that has the correct size for what is
   currently being typeset.

   Charlists are used for:
   \begin{itemize}
     \item Linking variable-size delimiters,
     \item Linking variable-width accents,
     \item Pairing the ``big operators'' that are typeset in
       different sizes in display style, and text style.
   \end{itemize}
 \end{sloppypar}

 \item[Extensibles.] \label {about-extens} Extensible glyphs can
   change size vertically (not horizontally), according to the
   context. A good example is given in section \ref {extens-example}
   where the \{ grows automatically.

   \begin{sloppypar}
   An extensible glyph is identified with one of its pieces. One simply
   has to decide which piece is going to be used for this
   identification.  In the following example: \texttt {extensible
     oct"060": oct"060", 0, oct"100", oct"102";} --- which appears in
   the metafont code of \texttt {cmex}, the first oct"060" is the
   identifier of the whole extensible glyph.  The next three
   characters are the top, middle, and bottom pieces of the glyph
   whose identifier is oct"060". The last character code is that
   of the piece to be repeated as many times as necessary between the
   top and middle, and between the bottom and middle pieces. All
   pieces are optional except the repeatable piece.
   \end{sloppypar}

   This mecanism is also used for the construction of the radical
   sign. But it only works for glyphs that grow vertically.
   Therefore the horizontal braces and the horizontal extendable
   arrows cannot use this facility.

\end{description}




 \subsection{Example: analysis of `\texttt{cmmi}' metrics}
 %\footnotetext{\texttt{cmmi} stands for Computer Modern Math italic,
 %  and is the default font for typesetting mathematics.}
 I shall use here the usual \TeX\ notation for writing octal numbers.
 Thus all numbers preceeded by a little quote sign like '77 are in
 octal.
 \begin{itemize}
 \item Most characters in `\texttt{cmmi}' are kerned with the
   skewchar.

 \item Many Greek uppercase and lowercase letters are kerned to: `.'
   `,' and `/' respectivly '72, '73, and '75. This takes us right up
   to position '50.

 \item Characters from '50 to '73 are not kerned at all. This
   includes: funny horizontal half arrows, two hooks for the arrow
   construction set, two triangles, the old style digits, the `.', the
   `,' and the `$<$'.

 \item The `$/$' sign is kerned with 1\footnote {The digit.}, A, M,
   N, Y, Z. Nothing to say about `$<$' and $*$ and $\partial$.

 \item Then come the uppercase Latin letters. They are not kerned
   among each other.  They are not kerned either with the lowercase
   letters. Just like the Greek letters, some of them are kerned with
   `.' `,' `$/$'.

 \item In my \fn{.pl} file, it looks as though N and X have got two
   different kerns with '75.  (Not yet any explanation for this.) The
   3 musical signs are not kerned with anything. The horizontal
   parentheses are not either.

 \item The lowercase Latin letters are not kerned with each other,
   except `$d$' that is kerned with $Y, Z, j, f$. Some of them are
   kerned with `,' `.'  `$/$' in a way similar to that of uppercase
   letters.

 \item The last characters are not kerned at all.
 \end{itemize}

 For compatibility reasons, all these kerns will have to be in the
 new encoding.

 See appendix \ref {app-rep-cmex} for a complete description and
 analyses of \fn{cmex10.tfm}.


%  \subsection{Large operators}
%  \subsection{Delimiters}


















\chapter{Dividing all the glyphs into groups}

\section{More vocabulary}\label{MoreVocab}
 \begin{description}
 \item[An ``encoding table''.] This conveys the traditional meaning
   of an encoding (see section \ref {about-encoding}). That is to say
   a set of 256 glyphs in a given order.  The expression ``encoding
   table'' is usually abbreviated: ``encoding''.

 \item[A ``slot'':] the usual word used for referring to a
   position in an encoding. A slot can contain a glyph, or be empty.
   It is represented by an integer between 0 and 255. A slot is
   \textem{not} a family in spite of the usage some people make of this
   word.

 \item[The ``math kernel''.] This terminology is used to specify the
   minimal group of fonts that is necessary for the math facility to
   work, as described in the \TeX\ documentation\footnote{And
     LA\TeX, AMS\TeX, etc, documentation.}.  In D.~E.~K.'s
   package (Plain) the math kernel consists of the families
   numbered from 0 to 3. Together with the kernel, many other fonts can
   optionally be loaded and used.

 \item[A ``math encoding'':] considered here as a
   whole. Not just one 256-glyph encoding table, but a set of $x$
   encoding tables, where $x$ is greater or equal to the number of
   fonts in the math kernel.  I will sometimes refer to this concept
   with the abbreviation ``M-encoding''.

 \item[The ``default alphabet'':] the alphabet that is used
   when a user types \texttt{\$abc\$}. With Plain \TeX's math
   encoding that produces $abc$.

 \item[``Glyph compatibility'':] two encodings (or M-encodings) are
   glyph compatible, if they contain the same glyphs. The latter do
   not systematically have to be in the same positions. However
   identical glyphs must have the same
   metrics. The kerning and ligaturing information must
   also be identical in both M-encodings.


\end{description}

\section{General approach}
Taking all the glyphs one by one, and putting them in a font encoding
would have been to easy, and above all not satisfactory.  Instead one
must divide all the necessary glyphs into groups and subgroups, and then
tried to match groups in individual encoding tables according to all
the constraints.

For instance, a typical group is the Latin alphabet: it includes the
uppercase letters A-Z, and the lowercase letters a-z.
Mathematicians often use accents on letters. For this they
need a dotless `i' (looks like: `$\i$') and a dotless `j'
(looks like: `$\j$') with every Latin alphabet. Thus the Latin
alphabet group contains the uppercase and lowercase letters, the
dotless `i' and the dotless `j'.

The grouping is based on different types of constraints: some
technical, and some based on glyph usage. These are detailed in the next
section.

\section{Grouping constraints}
Before being able to group the glyphs, grouping rules had to be
established. \footnote {I've put in appendix
\ref {app-accents}, \ref {app-rep-cmex}, and \ref {app-delims}, three
of the documents that I wrote for this purpose.}

At first I did not realise the importance of the design similarity
constraint for the person designing the font. Thus the first two
proposals did not really take it into account at all.

 \begin{description}
   \item [Kerning.] See section \ref {about-kerning} for a definition
   of  kerning. The kerning information for a given font can be found
       in it's \texttt{tfm} file.  Thus two glyphs from the same
       encoding table can be kerned together, but the letter `f'
       belonging in a given encoding table, cannot be kerned with a
       glyph (the open parenthesis for instance) belonging in another
       encoding table. A group resulting from this constraint is: the
       group of glyphs that must be kerned with the default alphabet.
       This group and the alphabet will have to live in the same
       encoding table. This constraint is considered to be one of the
       most important.

       In fact this type of grouping is not so much grouping together
       all the glyphs that must be kerned, but putting together in one
       group the glyphs that need to be kerned with another group. In
       order to facilitate the counting.

 \item[Ligaturing.] See section \ref {about-ligs} for a definition of
       ligaturing.  In a similar manner to kerning, ligatures request
        that various glyphs live in the same encoding. If the letter
       `f' is to be ligatured with the letter `i' and produce the `fi'
       ligature, then those three glyphs `f',`i', and `fi' must live
       in the same encoding. In actual fact ligatures are not really
       used in math fonts. But they may be necessary one day. So
       empty slots should be left for ligatures where possible.

 \item[Design similarity:] another reason for which the letter A must
 live in the same font encoding as the letter B, and all the other
 letters. All the glyphs in a normal text encoding are designed to be
 visually compatible with each other. This should also be the case in a
 math encoding.  But all the compatible glyphs cannot live in the same
 font. There are simply too many of them. So one has to make a
 choice. Which glyphs must be alike? A lot of groups result from this
 constraint, which even comes into play when putting the groups
 together into encodings. A good example is the sim group.  `Sim' is
 the name given to the glyph: $\sim$. Many mathematical symbols contain
 such a sim. $\approx$ cannot be separated from $\sim$ because they
 must look alike, and for that they must be designed by the same
 person.  Even more, the $\sim$ and the $\approx$ should be produced in
 metafont using the same sub-routine, with the same parameters. This
 also explains why it is important that the letters of a given style
 all live together.


 \item[Charlists.] The reader is advised to re-read section \ref
   {about-charlists} if he no longer remembers what charlists are. The
   information that such and such a glyph is part of a charlist is in
   the \texttt{tfm} file.  Therefore charlists are also restricted to
   one font. Because of this all glyphs that are intended to be
   linked in a charlist must be put in the same font. Concerned
   by this restriction are:
   \begin{itemize}
   \item Wide accents, which are linked with a charlist in order of
     increasing size,
   \item Big delimiters: same as accents,
   \item The two sizes of big operators which are linked,
   \item All the different sized radicals.
   \end{itemize}

 \item[Extensibles.] In case of memory deficiency the reader is
   advised to take another look at the relevant passage in section
   \ref {about-extens} again. As for charlists, the extensible
   information is part of the \texttt {tfm} file. The different
   pieces of an extensible glyph must therefore live in the same
   font. Concerned by this restriction are:

   \begin{itemize}
     \item Extensible delimiters (not all delimiters are extensible).
       This constraint is doubled by the fact that an extensible
       delimiter is often the last element of a charlist. Thus many
       glyphs must live together.

     \item Radicals: the last element of the radical charlist is an
                  extensible: it grows as high as necessary. In
                  the same way as delimiters, the glyphs used to
                  build the extensible radical are a subgroup of
                  the radical charlist group, and therefore must
                  live with the other members of the charlist.

     \item Vertical arrows or bars\footnote {But not horizontal
         arrows.}.
   \end{itemize}

 \item[Constructed symbols.] Some glyphs in a font are especially
   designed to be put next to each other. Good examples are the
   horizontal arrows, and the horizontal curly braces. Because of
   their horizontal characteristic, the extensible mechanism cannot
   be used. So the $$\underbrace {\mathrm {horizontal\ curly\ brace}}$$
   is built up with abbuting glyphs. These glyphs must be of the same
   weight, and very well adjusted in order to fit together properly.
   They must therefore live in the same font.

\end{description}



\section {Constraint importance}
The design constraint is less important than the kerning constraint.
Whereas keeping empty slots for ligaturing has very little
importance compared to the two former constraints.

Charlists' and extensible lists' members \textbf {must} stay together,
without exception. One could establish the following order of
importance:

 \begin{enumerate}
 \item Extensibles,
 \item Charlists,
 \item Constructed symbols,
 \item Kerning,
 \item Ligatures,
 \item Design similarities,
 \item Empty slots for ligatures.
 \end{enumerate}



\section{A few groups}

\begin{itemize}
\item The Greek letter sets,
\item The Greek-like glyphs,
\item The Latin letter set,
\item The Latin-like material
\item The digits,
\item The vertical arrows,
\item The horizontal arrows,
\item The accents, wide, double, underaccents,
\item The core symbols: must live with the default alphabet,
\item The subset group,
\item The greater than group,
\item etc ...
\end{itemize}

A lot of the above groups were still divided into smaller groups in
order to make things fit in the encoding tables. Compromises had to be
made, in order to respect the constraints set by compatibility.



\chapter{Making encoding tables}
Similarly to the constraints governing the grouping, the constraints
governing the division into encoding tables listed below were not at
all obvious, and had to be thought of, and fully understood.

The construction of encoding tables largely depends on the main goals of
a new math encoding.

\section{The constraints of group grouping}
\begin{description}

 \item[Glyphs access.] (alphabets, variable family) This is
   another technical constraint due to the way \TeX\ accesses glyphs.
   It is also a user interface constraint, because the idea is to
   make alphabets easily accessible to the user.

 Due to the variable family mechanism (explained in section \ref
 {var-fam}), It is very practical for the user that font
 encodings contain only one alphabet. Thus when the fonts are
 loaded into the families, different letters can be accessed by
 changing the \cn{fam} variable, and typing the usual letters on
 the keyboard. For instance, when \cn{fam} is equal to $-1$, the
 default family is used. When \cn{fam} equals 2 the user can
 get the script alphabet. For this the user need only type
 \verb|$\fam=2 A,B,C$| and the letters $\cal{A,B,C}$ are
 produced.

 The alternative would be to have many alphabets in one encoding.
 In that case, to access script letters A,B,C for example, the user
 would have to type \verb|$\scriptA,\scriptB$|. That would be much more
 difficult to read, and less practical.

 This constraint --~due to glyph access~-- sets the shape of the whole
 M-encoding and has a very high priority.

 \item[Font access.] This only concerns the font that will replace
   \texttt {cmex}. For compatibility reasons, the math font group
   decided that it would be reasonable to try and replace \texttt
   {cmex} by a font that can be loaded in one size, \textem {and} in
   three sizes. Therefore the \texttt{cmex} replacement can only
   take:
   \begin{itemize}
   \item Wide accents,
   \item Big delimiters,
   \item Big Operators,
   \item Radicals (with a small change),
   \item Vertical extensible arrows.
   \end{itemize}
   It would be too long to justify the decision here, but the relevant
   document is in appendix \ref {app-rep-cmex}. One of the consequences
   of this is that one cannot put an alphabet in \texttt {cmex}'s
   replacement encoding. An aphabet must be available in all three
   sizes.
   Other glyphs are also victims of this limitation.

 \item[Kerning.] Obviously, if glyphs in two separate groups must be
               kerned, then those two groups must live together.

 \item[Design similarity.] This is a designor's constraint and
 therefore has low priority. Because of this low priority, it often
 happens that big design similarity groups are subdivided into smaller
 ones. In such case one must try in sofar as is possible to put the
 smaller groups back together.

 \item[Bold face.] Mathematicians and physicists often use boldface
   glyphs. These can either be directly available in some of the
   encodings, whereby the encoding will specify: here should go a
   bold uppercase `A' ---~and that could be next to a non-bold glyph;
   or none of the encoding tables specify whether or not the glyphs
   are bold, and a bold version of the whole M-encoding or of each
   encoding table can be made ---~as with text fonts.

          To reduce the total number of glyphs in the M-encoding, the
          second possibility has been chosen. But this induces another
          constraint on the global M-encoding: the individual encoding
          tables must be designed in such a way that the most commonly
          used bold glyphs are put together.

 \begin{sloppypar}
       \item[Compatibility with other font-using programs.] Since the
          invention of ASCII code, the first 32 slots of fonts were
          often not used for glyphs, but reserved for control
          codes. Today many programs are still not designed to use the
          first 32 slots of a font. Thus fonts should not contain any
          glyphs in those slots.
          But this would be a big waste for \TeX, because it can use
          glyphs in slots below 32.

          However, if the glyphs in the critical slots do not have any
          kerning relation-ship with other glyphs in the font, then the
          former can be put in another font, and be used with little
          difficulty even in problematic software. This seemed a fairly
          good compromise, so it was decided to fill slots below 32
          with glyphs that do not have any kerning with the others, and
          could thus be separated from them.

   On the same lines: some programs are unable to use fonts that do
   not have a space in position 32. To solve this problem, only one
   slot in concerned, so it was decided to include a space in every
   font. This should not be a problem.

 \item[Grouping \TeX\ specific glyphs: another compatibility
   issue.] \hfil
   The present \texttt{cmex} font/encoding contains glyphs
   that cannot be used by other typesetting systems, because they
   are set in a strange way. Similarly \texttt{cmsy} contains one
   glyph that is set in a strange way: the radical sign.  Therefore
   the whole of \texttt{cmsy} is unusable for other programs. Such a
   mistake must not be reproduced.

       It is hoped that the new \TeX\ math encoding will set a
       standard, that will not only be used by \TeX, but by all
       systems that typeset mathematical formulae. If everything goes
       according to plan, in the next few years many math fonts will
       exist, for many different systems, and they will all use the
       same M-encoding. Thus it will be very easy to use the same
       fonts on different systems. One day a \TeX\ user will be able
       to take a mathematical font from Microsoft Word, and convert it
       easily in order to use it with \TeX.

   If \TeX\ specific glyphs are grouped in one font, there will only
   be one problematic font. As it happens, all \TeX\ specific glyphs
   are more or less geometric, so they could be used next to
   different math fonts. On the other hand, if \TeX\ specific glyphs
   are spread around in many fonts, then many ``imported'' fonts will
   not be usable by \TeX\ without major changes.

   From a commercial point of view, if a font designer creates a math
   font for Adobe, the work necessary for adapting it to \TeX\ must
   be reduced to the minimum.  Otherwise nobody will provide any new
   math fonts for \TeX.

   \TeX\ specific glyphs are the following:
   \begin{itemize}
   \item The delimiters,
   \item The large and small `bigops',
   \item The radicals.
   \end{itemize}


 \item[Compatibility with Plain and \LaTeX.] Let us consider a user
   that has typed a document with the present math encoding, and in
   so doing has saturated the available families. If the new math
   encoding does not garantee Plain and \LaTeX\ glyph compatibility
   with a \textem {maximum of 4 fonts,} then the document will
   not be able to run with the new math encoding: not enough
   families. Thus one should make the first four encoding tables of
   the global M-encoding glyphs compatible with the Computer
   Modern cluster: cmr, cmmi, cmex, and cmsy.


 \item[Compatibility with AMS\TeX, AMS\LaTeX, and LAMS\TeX.]  Let us
   consider this time a user that has typed a document with the
   existant AMS\TeX\ or AMS\LaTeX\ package, and in so doing has
   saturated the available families.  If the new math encoding does
   not give AMS\LaTeX\ and AMS\TeX\ glyph compatibility with less
   than 6 encoding tables, then that document will not run with the
   new math encoding, for lack of family reasons.

   The first 6 encoding tables must be one way glyph compatible with
   the fonts provided in the AMS packages.
 \end{sloppypar}

 \item[Trying to give the Plain \TeX\ user a logical cluster of new
   glyphs.] No comment.

\end{description}

\section{The Aston-LC math encoding}
This is one of the proposals first thought of, but it is
not the one finally chosen, because it had many problems.

LC stands for latin core. The main characteristic is the separation of
the Greek letter sets from the Latin ones.  In keeping these two sets
separate, we give the greek letters an identity of their own, thus
making them quite independent of the rest. The idea goes in the
direction of orthogonal grouping. All the encodings that contain
letters would have them in the Cork encoding positions, thus making
access very simple. In fact this positioning concept will be taken
farther: Cork encoded glyphs that are in the new encoding, will keep
their Cork position.

\subsection{The encoding tables}
 \begin{description}
 \item[The text symbols: the TS encoding.] Here would be included the
 old  style numerals, and most of what is to be taken out of the
 present math encoding, because it does not belong with the rest of the
   math glyphs. Other symbols could be added in this encoding.

   This encoding is not part of the M-encoding, but it will contain
   symbols that previously were accessed via the math fonts. In
   normal usage, this font will not be loaded in a family. It will
   simply be loaded as a normal text font.

 \item[The base: a Cork encoded latin text font.] The main use of
   this font would be to typeset function names like \texttt
   {\string\log}. The idea being that the user can actually choose
   this font among the existing Cork encoded fonts. Thus `sin' can
   actually be typeset in the same style as the text, or in another
   special style to match the rest of the math glyphs.

 \item[The core: the MC encoding.] It would not contain any Greek
   glyphs (unlike \texttt {cmmi}). The basic accents (only one size)
   would be here, next to the default numerals. It would also include
   all the upper and lowercase default latin alphabet, all
   of the symbols that are most commonly used, and glyphs that must
   be kerned with the default alphabet.

 \item[The Greek alphabets: the MG encoding.] This encoding table
   would contain all the upper and lowercase Greek letters in
   upright and italic, plus some variable shape Greek letters, also
   in upright and italic, and some numeric Greek letters. Any other
   Greek related glyphs would also live in MG. If place is still
   available, one could include some symbols. An advantage of
   putting the italic Greek and upright Greek together, is that both
   are often requested in medium and in bold weight.

 \item[The extensibles: the MX encoding.] This encoding would look
   very much like the present \texttt {cmex} encoding: the usual
   extensible characters, together with some new ones. It could
   include any characters that have strange \TeX\ features like big
   descenders. Thus glyphs that are not compatible with the
   outer world would be kept together.

 \begin{sloppypar}
 \item[The math symbols: the MS1, MS2, MS3... encodings.] Each of
   these encodings would contain a set of Latin letters, like for
   instance script or blackboard bold, in upper or lowercase, or
   both, together with a set of matching accents if needed. In some
   cases a place should also be reserved for a set of matching
   numbers.  The rest would be filled up with symbols. There could be
   an  MS$_i$ encoding for:
   \begin{itemize}
   \item Calligraphic,
   \item Script,
   \item Open,
   \item Old german, (Fraktur)
   \end{itemize}
 \end{sloppypar}

 \end{description}



\subsection{Other requested typefaces}
 \begin{itemize}
 \item A ``text-like'' italic or slanted font for computer science
   identifier-names and the like.  This would be Cork encoded.
 \item A ``bold upright'' for use as variables -- e.g. vectors in
   physics notation rather than the arrow over an italic letter. This
   would be Cork encoded.
 \item Bold italic for use as variables: an MC or Cork encoding.
 \item Bold Old german (occasional).
 \item Bold script (occasional).
 \item Sans serif lightface (occasional): Cork encoded font.
 \item Sans serif boldface (occasional): Cork encoded font.
 \item Bold symbols: the same encodings loaded in bold.
 \item Ultra bold symbols: the same encodings loaded in bold.
\end{itemize}



\subsection{Summarizing the family occupation}
The following encodings are needed in the kernel:

\begin{enumerate}
\item A Cork encoded upright text font.
\item An MC encoded font containing the default alphabet, digits,
 accents, and symbols.
\item An MS$_1$ encoded symbol font for calligraphic/script.
\item An MX encoded extensible font.
\item An MG encoded font for Greek italic and upright.
\item An MS$_2$ encoded symbol font for Open and symbols.
\item An MS$_3$ encoded symbol font for Old german and symbols.
\end{enumerate}

This occupies 7 families, and leaves 9 free for anything else, (like
bold or sans...) and makes many symbols available.




\subsection{Pros and cons}
This proposal did not respect the limit of 4 and 6 families
(compatibility with Plain\TeX\ and AMS\TeX), nor did it enable the Latin
and Greek to be kerned together, nor could the Greek be kerned with the
same symbols as the Latin alphabet, unless these were
repeated. Generally, to get the equivalent of Plain \TeX, one would have
had to load 5 families, and to get the functionalities of AMS\TeX, one
would have needed to load 7 families.

One of the advantages was the orthogonality of the individual encoding
tables, i.e. there were no strange mixes like Latin and Greek, or
anything of the sort.

The main reasons for rejecting this proposal are:
\begin{itemize}
\item it is a big family consumer. In particular bold Latin and Greek
 would occupy two extra families, and they are frequently requested.
\item it does not enable kerning between the Greek and punctuation
 which is needed for compatibility ---~the punctuation is in
 a separate encoding table from the Greek.
\end{itemize}

The next proposal is more attractive...



\section{The Aston LGC math encoding}

LGC stands for Latin Greek core.  One of the main features of this
proposal is that the Greek and Latin alphabets have been put
together. In one font they could be upright, and in the other they could
be italic. A good reason for doing things this way is that the font
dimension called slant may give a few unexpected problems if italic and
non italic glyphs are mixed in the way that they would have been in the
Aston LC math encoding.


\subsection{The encoding tables}
\begin{description}

\begin{sloppypar}
\item[The text symbols: the TS encoding.] This would be the same as in
 the previous proposal, and would be used in a similar manner.

\item[The base: a Cork encoded latin text font.] This would be the same
 as in the previous proposal, and they would be used in a similar
 manner.
\end{sloppypar}

\item[The core: the LG encoding.] Instead of the MC encoding (in the
 previous proposal), the core could be duplicated. Once in upright,
 and once in uppercase.  The LG encoding would contain one instance
 of both Latin and Greek letter sets. So two LG encoded fonts would
 be used (upright, and italic).

 As far as the other slots are concerned, they could be filled in
 with the most used math symbols (similarly to the MC encoding),
 these would then appear once in upright, and once in bold. An
 alternative to such a duplication would be to make an LG1 encoding
 that would contain different symbols from an LG2 encoding, and these
 would always be in upright, whereas the letters would be specified
 as italic in LG1, and upright in LG2.

 \textbf{Note.} The user could choose whether he wants to load both LG1
 and LG2, or only one of the two.


\item[The extensibles: the MX encoding.] It would be the same as in
 the previous proposal, and it would be used in a similar manner.

\item[The math symbols: the MS1, MS2, MS3... encodings.] {\sloppy These
   would be the same as in the previous proposal, and they would be
   used in a similar manner.}

\end{description}

\subsection{And the rest?}
Similarly to the previous proposal, many other fonts could be loaded
in all the free families.


\subsection{Summarising the Family occupation}
\begin{enumerate}
 \item An LG encoded font containing Latin and Greek italic. (This
   could be LG1 if necessary. See explanations above.)
 \item An MS$_1$ encoded symbol font for calligraphic/script.
 \item An MX encoded extensibles font.
 \item An LG encoded font containing latin and Greek upright. (This
   could be LG2 if necessary. See explanations above.)
 \item An MS$_2$ encoded font for Open and symbols.
 \item An MS$_3$ encoded font for Old German and symbols.
\end{enumerate}

Only 6 families are occupied. This leaves 10 families free for
anything else, (like bold or sans...)  and makes many symbols
available.



\subsection{Pros and cons}

One of the advantages of this proposal is that kerning can be done
between Latin and Greek (as long as they are in the same shape), and
between Greek and other symbols present in the encoding such as
punctuation. Also when bold is requested, one gets the bold Latin and
the bold Greek in the same font table, which again consumes less
families than having the two separate.

This proposal occupies less families than the previous one.

Reasons for abandoning the Aston LGC math encoding:
\begin{itemize}
 \item The user must be able to choose the look of his log, sin, and
   friends.  He may want them to be either text compatible, or
   compatible with the other math alphabets and the rest of the math
   glyphs in general. The choice must be left open, and the math font
   designer must not impose his decision on the user.
 \item A solution to the previous problem is to include another
   font for this purpose, as in the previous proposal. But then the
   family occupation rises up to 7, and three Latin alphabets are
   loaded, of which one (the LG upright) is probably not going to be
   used much. Thus a lot of precious space is wasted.
 \item The `Yaasp' proposal is much more attractive.
\end{itemize}




\chapter{The proposed YAASP encoding}
\label{app-yaasp}
\begin{quote}
  This chapter is the final proposal that was made. It is also the body
  of the official document that was produced. The reader may find some
  similarities with the previous sections, for instance some of the
  definitions can be found in section \ref {MoreVocab}. Also a lot of
  the points discussed in the global policy section have already been
  discussed.

\end{quote}

\input{l3d007a.tex}



\chapter{The glyph groups}

\input{l3d007b.tex}




\chapter{Final conclusions}

The `Yaasp' proposal, which is the final proposal made, is given in
chapter \ref{app-yaasp}.

Working on the \LaTeX3 project in Mainz was very interesting for many
reasons:
\begin{itemize}
\item People next to me were working on net management and system
 maintenance. I used email intensively for communicating
 with other people working on the project. I used tar files and
 other programs to send large amounts of information to other people.

 It was a very good introduction to the network oriented studies I
 will be doing in my last year. I don't think that I really knew before
 what the network was.  Now I have a better idea.

\item I installed a test version of NFSS2. It was a good
 introduction to software installing, and enabled me to discover a
 few more UNIX tools. I hope I will be able to use this knowledge for
 installing various packages for \TeX\ and especially emacs in the
 Ecole des Mines de Saint Etienne.


\item The work I did was to a large extent research work and thus
 involved many topics for which no previous experience was
 available. For this reason the work seemed to go slowly, and we often
 had to go back to the drawing board and re-think points that we
 thought were already finished and done with. All of this, of course,
 was made worse by the fact that a lot of the communication was done
 via email.

  However, the final result was very positive. By the end of the three
  months, a complete proposal for a new math font set-up was produced.
  For Frank Mittelbach technical director of the \LaTeX 3 project,
  this is a good achievement, and a big step forward. The next stage is
  to try and implement the proposal, and start testing it.


\item Last but not least, I greatly improved my English and my German.
 I learnt about another country, about its educational system, and
 about its habits, which one can only grasp by working in the country.
 I learnt how to integrate in a foreign environment, and how to deal
 with a few distressing problems: it was not easy to keep calm when
 my car packed up the week-end before I had planned to go back to
 France.  In one's own country garage mechanics aren't easy people to
 deal with; matters get even worse when it is in a foreign country.

\end{itemize}

The whole experience was very enriching in many domains. The difficulty
I had in speaking German proved to me that the teaching of languages in
the \textem {Ecole des Mines de Saint Etienne} is not good enough, and
must be improved and given more importance.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% appendix starts here.
\appendix


\chapter{Analysing \TeX's positioning of \cn{mathaccent}s}
\label {app-accents}
\input{l3d007c.tex}


\chapter{A close look at extensible characters}
\label {app-delims}
\input{l3d007d.tex}


\chapter{Replacing \texttt{cmex} ?}
\label{app-rep-cmex}
\input{l3d007e.tex}






\chapter{Fonts and font encodings}
\label{app-fonts}

The first 4 figures given on the next few pages are the standard fonts
used in plain \TeX\ for maths.
\begin{itemize}
\item \textbf{Computer Modern Roman:} loaded in family 0 shown on figure
 \ref {fnt-tbl-cmr}.
\item \textbf{Computer Modern Math Italic:} loaded in family 1 shown on
 figure  \ref {fnt-tbl-cmmi}.
\item \textbf{Computer Modern SYmbols:} loaded in family 2 shown on
 figure \ref {fnt-tbl-cmsy}.
\item \textbf{Computer Modern EXtensibles:} loaded in family 3 shown on
  figure \ref {fnt-tbl-cmex}.
\end{itemize}
%
Figure \ref {fnt-tbl-dcr} shows the DC-encoding with which the new
math encoding is designed to live. Unlike the \fn{cmr} encoding, the
Cork encoding does not include any Greek glyphs, this prevents its use
in family 0 for maths. But an upright text font is needed in family 0,
for mixing sub- and super-script in text. This problem has until now
prevented the wide spreading of the DC-fonts.

The next two fonts shown in figures \ref {fnt-tbl-msam} and \ref
{fnt-tbl-msbm} are the AMS fonts, designed especially for use in
maths.
%
\begin{figure}[b]
 \dofonttable{cmr10}
 \caption{The \fn{cmr} encoding: 128 glyphs.}
 \label{fnt-tbl-cmr}
\end{figure}
%
\begin{figure}[b]
 \dofonttable{cmmi10}
 \caption{The \fn{cmmi} encoding: 128 glyphs.}
 \label{fnt-tbl-cmmi}
\end{figure}
%
\begin{figure}[b]
 \dofonttable{cmsy10}
 \caption{The \fn{cmsy} encoding: 128 glyphs.}
 \label{fnt-tbl-cmsy}
\end{figure}
%
\begin{figure}[b]
 \dofonttable{cmex10}
 \caption{The \fn{cmex} encoding: 128 glyphs.}
 \label{fnt-tbl-cmex}
\end{figure}
%
\begin{figure}[b]
 \dofonttable{msam10}
 \caption{The \fn{msam} encoding: 128 glyphs.}
 \label{fnt-tbl-msam}
\end{figure}
%
\begin{figure}[b]
 \dofonttable{msbm10}
 \caption{The \fn{msbm} encoding: 128 glyphs.}
 \label{fnt-tbl-msbm}
\end{figure}
%
\maxz=256 \maxiz=255
\begin{figure}[b]
 \dofonttable{ecrm1000}
 %\dofonttable{cmssdc10}
 \caption{The \fn{dcr} encoding: 256 glyphs.}
 \label{fnt-tbl-dcr}
\end{figure}
%

%\begin{figure}[b]
%  \dofonttable{cspex10}
%  \caption{The \fn{cspex} and Saint Mary Road encodings.}
%  \label{fnt-tbl-mary}
%\end{figure}



%\chapter{Requirements analysis}
%\label{app-require}











%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% end
\end{document}
\chapter{Daily detail of what I did (for personal record)}

\begin{itemize}

\jzitem{First week}
A lot of reading: Tex for the impatient...
Getting accustamed to the local environment: Emacs Bash Mwm...
Getting Telnet and mail connection with France.
Started my social report.
Looked a little at the code of latex.
Played around with latex. (wrote two letters)
Played around with the Tex boxes.
Read the chapter 7 from Frank's coming book.
Started to read a little more about the math fonts in \TeX.

\jzitem{Sunday 13th June}
Started this report. Got familiar with Frank's integrated documenting
system. Got to letter f in the tex index of commands. (evening)

\jzitem{Monday 14th June}
Trying to install NFSS2 in my environment.
Read a lot of the installation documentation.
Found the punk font, it was here.
Read to letter k of the tex index.(evening)

\jzitem{Tuesday 15th June}
Last details of installing nfss2.
Testing various features of NFSS2.
Playing with nfss2: first page of my report.
Read to letter p of the tex index.

\jzitem{Wednesday 16th June}
Still correcting a few problems with the installation of nfss2.
Installing the punk font in NfSS2:
made file nfpunk.dst based on nfpandor.dst.

\jzitem{Thursday 17th June}
Still installing the punk font in NfSS2.
Read mail about maths.
Went to get my car.
End of afternoon with Frank.
Read to letter t of the tex index.

\jzitem{Friday 18th June : time flies}
Ended the installation of the punk fonts.
Read the article from tugboat about the punk fonts.
Did the documentation for the pk fonts.
Played around with NFSS2 and texlatex: getting my presentation page ok.

\jzitem{Saturday 19th June}
Ended the tex index.
Read more about char in the Tex by topic.

\jzitem{Sunday 20th June}
Read still more about the Tex fonts and math fonts.
Tested char and loops =$>$ loops make problems in latex.
Tried postcript fonts.

\jzitem{Monday 21st June} Tried to install the dunhill font with
nfss2: OK.  Maybe make a ``\texttt{.sty}'' file like punk. I find it
is a nice font.  Used raise and negative kerns for a joke.  Reading
more on math fonts.

\jzitem{Tuesday 22nd June}
Still more reading on mathfonts.-Families-mathcode-mathchar-greek
and testing. Finaly solved all problems due to the loop.

\jzitem{Wednesday 23rd June}
Testing on skewchar, and more reading about maths.

\jzitem{Thursday 24th June}
More testing with skewchar.
Meeting with FMI

\jzitem{Friday 25th June}
All day : writing the paper about accents.

\jzitem{Week end}
nothing about tex or latex
oh yes: reading about fonts. (cahier gutenberg)

\jzitem{Monday 28th June}
Morning more testing with accents.
Afternoon: Meeting with FMI and Jorg K.

\jzitem{Tuesday 29th June}
Writing a summary of yesterday, and trying to figure out how to do the
work.

\jzitem{Wednesday 30th June}
Reinstalling NFSS2. Sending NFSS2 to Estonia. Reading all the mail
printed by Joerg.

\jzitem{Thursday 1st July}
More thinking about maths. Sending mail to people. Reading about the
Tex files.

\jzitem{Friday 2nd July}
More mailing.

\jzitem{monday 5th July}
More mailing, meeting with Frank.

\jzitem{Tuesday 6th July}
More mailing, reading about the extensible chars.

\jzitem{Wednesday 7th July}
More mailing. Sending stuff to joerg. Dealing with Aston.
Continuing The paper on extensible chars.

\end{itemize}
\end{document}


% Local Variables:
% mode: latex
% TeX-master: t
% TeX-command-default: "LaTeX2+"
% End: