%% This OTP, part of the Makor2 package for typesetting Hebrew with
%% Omega, is responsible for the minimal contextual analysis that
%% Hebrew demands.  That is, where appropriate, consonants at the ends
%% of words are converted to final forms.

%% This OTP handles the special case of lamed or lameddagesh followed
%% by the holam dot.

%% Also, this OTP converts the pattern `_a<gutteral>' at a word end to
%% `<gutteral>a'.

input:
       1;
output:
       1;
aliases:
       %% Here is a subset of the gutterals...
       GUTTERAL        = (103 | 114) % het and ayin
                       ;
       USCORE          = 95 % underscore character
                       ;
       CIRCUM          = 94 % circumflex char
                       ;
       FINAL           = (107|110|112|116|118|%khaf,mem,nun,phe,tsadi
                          155|158|160|164|166) % dagesh forms
                       ;
       PATAH           = 71
                       ;
       VOWEL           = (65-73|75-77|182-187) % vwls EXCEPT sheva
                       ;
       SHEVA           = 74
                       ;
       HOLAM           = 85
                       ;
       ALLVWL          = ({SHEVA}|{VOWEL})
                       ;
       TROPE   = (209-225|227-239) % cantorial trope
               ;
       LAMED   = 108
               ;
       LAMEDDOT        = 156
               ;
       BLAMED  = 140 % bent lamed
               ;
       BLAMEDDOT       = 172
               ;
       HOLAMDOT        = 73
               ;
       TSADI   = 118
               ;
       RQUOTE  = 39
               ;
%% Here we identify the characters which are valid parts of words.
       VALID   =  (85|96-122|128-135|140|144-173|176-181|189|192-199)
               ;
       INVALID = ^(85|96-122|128-135|140|144-173|176-181|189|192-199)
               ;
       SPACE   = 32
               ;
       DOTORCOLON      = (46|58) % period or colon
                       ;
expressions:
%% Hebrew has no word initial forms, so we can get rid of the start-of-word
%% markers right away...
         254 `('         =>
                         ;
%% Let's get the LAMED stuff out of the way...
       {LAMED} {HOLAMDOT}      => 180
               ;
       {LAMEDDOT} {HOLAMDOT}   => 196
               ;
       {BLAMED} {HOLAMDOT}     => 173
               ;
       {BLAMEDDOT} {HOLAMDOT}  => 189
               ;

%% Here is the convention for gutteral+patah at the word end...
       {USCORE}{PATAH}{GUTTERAL}
                       =>\3 \2
                       ;
%% There are two `kinds' of vowels---regular vowels, and trope.  The
%% may precede or follow the vowels.  Usually, there's one trope (at most)
%% per syllable, but it's possible for there to be two.

%% First, we handle the final-suppressing machinery...
       {FINAL}{USCORE} => \1
                       ;
%% Now for the enforced finals...
       {FINAL}{CIRCUM} => #(\1 - 1)
                       ;
%% Now, the finals.  But first, another special case:
%% tsadi at the end of a word followed by a
%% single right quote.
       {TSADI}{SHEVA} `)' 254   => #(\1 - 1) 39
                       ;
%% Because of some bug or other in Omega1.15, it's not possible for
%% m2contest.otp to be as general as it should be.  Consequently,
%% we need the following rule, which can hopefully be removed when
%% Omega is fixed...
       {FINAL}{TROPE}<0,1>{ALLVWL}<0,1>{TROPE}<0,1>{DOTORCOLON}
                               => #(\1 - 1) \(*  + 1 )
                               ;
%% Words can be ended by invalids, spaces, or the end
%% of input..
       {FINAL}{TROPE}<0,1>{ALLVWL}<0,1>{TROPE}<0,1>`)' 254
                       => #(\1 - 1 ) \(*  + 1 - 2)
                       ;
%% Finally, let's strip off any remaining end-of-word markers...
       `)' 254         =>
                       ;