%% This OTP, part of the Makor2 package for typesetting Hebrew with
%% Omega, is responsible for the minimal contextual analysis that
%% Hebrew demands. That is, where appropriate, consonants at the ends
%% of words are converted to final forms.
%% This OTP handles the special case of lamed or lameddagesh followed
%% by the holam dot.
%% Also, this OTP converts the pattern `_a<gutteral>' at a word end to
%% `<gutteral>a'.
input:
1;
output:
1;
aliases:
%% Here is a subset of the gutterals...
GUTTERAL = (103 | 114) % het and ayin
;
USCORE = 95 % underscore character
;
CIRCUM = 94 % circumflex char
;
FINAL = (107|110|112|116|118|%khaf,mem,nun,phe,tsadi
155|158|160|164|166) % dagesh forms
;
PATAH = 71
;
VOWEL = (65-73|75-77|182-187) % vwls EXCEPT sheva
;
SHEVA = 74
;
HOLAM = 85
;
ALLVWL = ({SHEVA}|{VOWEL})
;
TROPE = (209-225|227-239) % cantorial trope
;
LAMED = 108
;
LAMEDDOT = 156
;
BLAMED = 140 % bent lamed
;
BLAMEDDOT = 172
;
HOLAMDOT = 73
;
TSADI = 118
;
RQUOTE = 39
;
%% Here we identify the characters which are valid parts of words.
VALID = (85|96-122|128-135|140|144-173|176-181|189|192-199)
;
INVALID = ^(85|96-122|128-135|140|144-173|176-181|189|192-199)
;
SPACE = 32
;
DOTORCOLON = (46|58) % period or colon
;
expressions:
%% Hebrew has no word initial forms, so we can get rid of the start-of-word
%% markers right away...
254 `(' =>
;
%% Let's get the LAMED stuff out of the way...
{LAMED} {HOLAMDOT} => 180
;
{LAMEDDOT} {HOLAMDOT} => 196
;
{BLAMED} {HOLAMDOT} => 173
;
{BLAMEDDOT} {HOLAMDOT} => 189
;
%% Here is the convention for gutteral+patah at the word end...
{USCORE}{PATAH}{GUTTERAL}
=>\3 \2
;
%% There are two `kinds' of vowels---regular vowels, and trope. The
%% may precede or follow the vowels. Usually, there's one trope (at most)
%% per syllable, but it's possible for there to be two.
%% First, we handle the final-suppressing machinery...
{FINAL}{USCORE} => \1
;
%% Now for the enforced finals...
{FINAL}{CIRCUM} => #(\1 - 1)
;
%% Now, the finals. But first, another special case:
%% tsadi at the end of a word followed by a
%% single right quote.
{TSADI}{SHEVA} `)' 254 => #(\1 - 1) 39
;
%% Because of some bug or other in Omega1.15, it's not possible for
%% m2contest.otp to be as general as it should be. Consequently,
%% we need the following rule, which can hopefully be removed when
%% Omega is fixed...
{FINAL}{TROPE}<0,1>{ALLVWL}<0,1>{TROPE}<0,1>{DOTORCOLON}
=> #(\1 - 1) \(* + 1 )
;
%% Words can be ended by invalids, spaces, or the end
%% of input..
{FINAL}{TROPE}<0,1>{ALLVWL}<0,1>{TROPE}<0,1>`)' 254
=> #(\1 - 1 ) \(* + 1 - 2)
;
%% Finally, let's strip off any remaining end-of-word markers...
`)' 254 =>
;