% This change extends patgens pattern generation algorithm to deal with up to
% 10 different hyphen classes. The new algorithm has one new integer
% parameter, the number of hyphen classes (between 2 and 10). This parameter
% can be specified in columns 8 and 9 of the first line of the translate
% file. patgen will now produce values up to 63.

@x l.54
@d banner=='This is PATGEN, Version 2.3' {printed when the program starts}
@y
@d banner=='This is PATGEN, Version 2.3 (with multiple hyphen classes)'
                                   {printed when the program starts}
@z

@x l.588
@!max_val=10; {maximum number of levels$+1$, also used to denote bad patterns}
@y
@!max_val=64; {maximum number of levels$+1$, also used to denote bad patterns}
@z

@x l.603
if max_val>10 then bad:=5;
@y
if max_val>100 then bad:=5;
@z

@x l.1107
print_ln('left_hyphen_min = ',left_hyphen_min:1,
        ', right_hyphen_min = ',right_hyphen_min:1,
        ', ',imax-edge_of_word:1,' letters');
@y
print_ln('left_hyphen_min = ',left_hyphen_min:1,
        ', right_hyphen_min = ',right_hyphen_min:1,
        ', hyphen_class_num = ',hyphen_class_num:1,
        ', ',imax-edge_of_word:1,' letters');
@z

@x l.1113
@ @<Globals...@>=
@!imax: internal_code; {largest |internal_code| assigned so far}
@!left_hyphen_min, @!right_hyphen_min: dot_type;

@ @<Set up default...@>=
begin left_hyphen_min:=2; right_hyphen_min:=3;
@y

@ @d hyphen_class(#)==(# mod hyphen_class_num)

@<Globals...@>=
@!imax: internal_code; {largest |internal_code| assigned so far}
@!left_hyphen_min, @!right_hyphen_min: dot_type;
@!hyphen_class_num: 2..10; { allow up to 10 hyphen classes, default is 2 }

@ @<Set up default...@>=
begin left_hyphen_min:=2; right_hyphen_min:=3;
hyphen_class_num:=2;
@z

@x
If the values specified for \.{\\lefthyphenmin} and \.{\\righthyphenmin}
are invalid (e.g., blank) new values are read from the terminal.
@y
Another addition is, that columns 8 and~9 may optionally contain a value for
\.{\\hyphenclassnum}.

If the values specified for \.{\\lefthyphenmin}, \.{\\righthyphenmin} and
\.{\\hyphenclassnum} are invalid (e.g., blank) new values are read from the
terminal.
@z
@x l.1148
if (n>=1)and(n<max_dot) then right_hyphen_min:=n@+
else bad:=true;
@y
if (n>=1)and(n<max_dot) then right_hyphen_min:=n@+
else bad:=true;
if buf[8]=' ' then n:=0
else if xclass[buf[8]]=digit_class then n:=xint[buf[8]]@+
else bad:=true;
if xclass[buf[9]]=digit_class then n:=10*n+xint[buf[9]]@+
else bad:=true;
if (n>=2)and(n<=10) then hyphen_class_num:=n@+
else bad:=true;
@z

@x l.1358
@!more_to_come: boolean;
@y
@!more_to_come: boolean;
@!off_count: array [1..9] of integer;
                      { off by $<-3$, $-3$, $-2$, $-1$, $0$, $1$, $2$, $3$, $>3$ }
@z

@x l.1480
@ @<Output this pattern@>=
begin  for d:=0 to pat_len do hval[d]:=0;
 repeat  d:=hyf_dot(h);
   if hval[d]<hyf_val(h) then hval[d]:=hyf_val(h);
   h:=hyf_nxt(h);
 until h=0;
 if hval[0]>0 then write(patout,xdig[hval[0]]);
 for d:=1 to pat_len do
 begin  write_letter(pat[d])(patout); write(patout,xext[pat[d]]);
   if hval[d]>0 then write(patout,xdig[hval[d]]);
 end;
 write_ln(patout);
end
@y
@ Since we have increased |max_val|, we must allow for two-digit values
in patterns.

@<Output this pattern@>=
begin  for d:=0 to pat_len do hval[d]:=0;
 repeat  d:=hyf_dot(h);
   if hval[d]<hyf_val(h) then hval[d]:=hyf_val(h);
   h:=hyf_nxt(h);
 until h=0;
 if hval[0]>9 then write(patout,xdig[hval[0] div 10]);
 if hval[0]>0 then write(patout,xdig[hval[0] mod 10]);
 for d:=1 to pat_len do
 begin  write_letter(pat[d])(patout); write(patout,xext[pat[d]]);
   if hval[d]>9 then write(patout,xdig[hval[d] div 10]);
   if hval[d]>0 then write(patout,xdig[hval[d] mod 10]);
 end;
 write_ln(patout);
end
@z

@x l.1502
@!dots: array[word_index] of hyf_type; {current hyphens}
@y
@!hclass: array[word_index] of digit; {current wanted hyphen classes}
@z

@x
applied to all following words (until the next global word weight).  A
digit at some intercharacter position indicates a weight for that position
only.

The |read_word| procedure scans a line of input representing a word,
and places the letters into the array |word|, with |word[1]=word[wlen]=
edge_of_word|.  The dot appearing between |word[dpos]| and |word[dpos+1]|
is placed in |dots[dpos]|, and the corresponding dot weight in
|dotw[dpos]|.
@y
applied to all following words (until the next global word weight).  A
digit at the beginning of some intercharacter position indicates a weight for
that position only. A digit at the end of some intercharacter position
indicates the hyphen class for the hyphen wanted at this position.

The |read_word| procedure scans a line of input representing a word,
and places the letters into the array |word|, with |word[1]=word[wlen]=
edge_of_word|.  The class of the hyphen appearing between |word[dpos]| and
|word[dpos+1]| is placed in |hclass[dpos]|, and the corresponding dot weight in
|dotw[dpos]|.
@z

@x l.1537
@p procedure read_word;
label done, found;
var c: text_char;
@!t: trie_pointer;
begin read_buf(dictionary);
word[1]:=edge_of_word; wlen:=1; buf_ptr:=0;
repeat incr(buf_ptr); c:=buf[buf_ptr];
 case xclass[c] of
 space_class: goto found;
 digit_class:
   if wlen=1 then {global word weight}
     begin if xint[c]<>word_wt then wt_chg:=true;
     word_wt:=xint[c];
     end
   else dotw[wlen]:=xint[c]; {dot weight}
 hyf_class: dots[wlen]:=xint[c]; {record the dot |c|}
 letter_class: {record the letter |c|}
   begin incr(wlen);
   if wlen=max_len then
     begin print_buf; overflow('word length=',max_len:1);
     end;
   word[wlen]:=xint[c]; dots[wlen]:=no_hyf; dotw[wlen]:=word_wt;
   end;
 escape_class: {record a multi-character sequence starting with |c|}
   begin incr(wlen);
   if wlen=max_len then
     begin print_buf; overflow('word length=',max_len:1);
     end;
   get_letter(word[wlen]); dots[wlen]:=no_hyf; dotw[wlen]:=word_wt;
   end;
 invalid_class: bad_input('Bad character');
@.Bad character@>
 end;
until buf_ptr=max_buf_len;
found: incr(wlen); word[wlen]:=edge_of_word;
end;
@y
@p procedure read_word;
label done, found;
var c: text_char;
i: word_index;
@!t: trie_pointer;
begin read_buf(dictionary);
word[1]:=edge_of_word; wlen:=1; buf_ptr:=0;
for i:=0 to max_len do hclass[i]:=0;
repeat incr(buf_ptr); c:=buf[buf_ptr];
 case xclass[c] of
 space_class: goto found;
 digit_class:
   if wlen=1 then {global word weight}
     begin if xint[c]<>word_wt then wt_chg:=true;
     word_wt:=xint[c];
     end
   else if hclass[wlen]>0 then begin
        if hyphen_class_num>xint[c] then hclass[wlen]:=xint[c]
        else error('unexpected hyphen class!');
   end else dotw[wlen]:=xint[c]; {dot weight}
 hyf_class:
   begin
       hclass[wlen]:=1;
   end;
 letter_class: {record the letter |c|}
   begin
   incr(wlen);
   if wlen=max_len then
     begin print_buf; overflow('word length=',max_len:1);
     end;
   word[wlen]:=xint[c]; hclass[wlen]:=0; dotw[wlen]:=word_wt;
   end;
 escape_class: {record a multi-character sequence starting with |c|}
   begin incr(wlen);
   if wlen=max_len then
     begin print_buf; overflow('word length=',max_len:1);
     end;
   get_letter(word[wlen]); dotw[wlen]:=word_wt;
   end;
 invalid_class: bad_input('Bad character');
@.Bad character@>
 end;
until buf_ptr=max_buf_len;
found: incr(wlen); word[wlen]:=edge_of_word;
end;
@z

@x l.1628
@ The |change_dots| procedure updates the |dots| array representing the
printing values of the hyphens.  Initially, hyphens (and correctly
found hyphens) in the word list are represented by |is_hyf| whereas
non-hyphen positions (and erroneous hyphens) are represented by |no_hyf|. A
Here these values are increased by one for each hyphen found by the
current patterns, thus changing |no_hyf| into |err_hyf| and |is_hyf|
into |found_hyf|. The routine also collects statistics about the number
of good, bad, and missed hyphens.

@d incr_wt(#)==Incr(#)(dotw[dpos])

@p procedure change_dots;
var dpos: word_index;
begin  for dpos:=wlen-hyf_max downto hyf_min do
 begin if odd(hval[dpos]) then incr(dots[dpos]);
 if dots[dpos]=found_hyf then incr_wt(good_count)
 else if dots[dpos]=err_hyf then incr_wt(bad_count)
 else if dots[dpos]=is_hyf then incr_wt(miss_count);
 end;
end;
@y
@ The |change_dots| procedure owes its name to the fact that its job was
once to update an array called |dots| representing the printing values of the
hyphens. This is no longer the case, as |dots| is gone.

The routine collects statistics about the number of good, bad, and missed hyphens.

@d incr_wt(#)==Incr(#)(dotw[dpos])

@p procedure change_dots;
var dpos: word_index;
have: integer;
begin  for dpos:=wlen-hyf_max downto hyf_min do
 begin
   have:=hyphen_class(hval[dpos]);
   {good/bad/miss statistics}
   if have>0 then
     if have=hclass[dpos] then incr_wt(good_count)
     else incr_wt(bad_count)
   else if hclass[dpos]>0 then incr_wt(miss_count);
   {off statistics}
   if have+hclass[dpos]>0 then
    if abs(have-hclass[dpos])<=3 then
     incr_wt(off_count[have-hclass[dpos]+5])
    else if have<hclass[dpos] then
     incr_wt(off_count[1])
    else incr_wt(off_count[9])
 end;
end;
@z

@x l.1653
@ The following procedure outputs the word as hyphenated by the current
patterns, including any word weights. Hyphens inhibited by the values of
\.{\\lefthyphenmin} and \.{\\righthyphenmin} are output as well.

@p procedure output_hyphenated_word;
var dpos: word_index;@/
@!l: triec_pointer; {for |write_letter|}
begin if wt_chg then {output global word weight}
 begin write(pattmp,xdig[word_wt]); wt_chg:=false
 end;
for dpos:=2 to wlen-2 do
 begin write_letter(word[dpos])(pattmp); write(pattmp,xext[word[dpos]]);
 if dots[dpos]<>no_hyf then write(pattmp,xhyf[dots[dpos]]);
 if dotw[dpos]<>word_wt then write(pattmp,xdig[dotw[dpos]]);
 end;
write_letter(word[wlen-1])(pattmp); write_ln(pattmp,xext[word[wlen-1]]);
end;
@y
@ The following procedure outputs the word as hyphenated by the current
patterns, including the found hyphen classes. A correct hyphen is shown with
|found_hyf|, an incorrect one with |err_hyf|. Hyphens inhibited by the values of
\.{\\lefthyphenmin} and \.{\\righthyphenmin} are {\it not} shown.

@p procedure output_hyphenated_word;
var dpos: word_index;@/
@!l: triec_pointer; {for |write_letter|}
begin
 for dpos:=2 to hyf_min-1 do begin
   write_letter(word[dpos])(pattmp);
   write(pattmp,xext[word[dpos]]);
 end;
 for dpos:=hyf_min to wlen-hyf_max do begin
   write_letter(word[dpos])(pattmp);
   write(pattmp,xext[word[dpos]]);
   if hyphen_class(hval[dpos])>0 then begin
     if hyphen_class(hval[dpos])=hclass[dpos] then write(pattmp,xhyf[found_hyf])
     else write(pattmp,xhyf[err_hyf]);
     if hyphen_class(hval[dpos])>1 then
       write(pattmp,xdig[hyphen_class(hval[dpos])]);
   end;
 end;
 for dpos:=wlen-hyf_max+1 to wlen-1 do begin
   write_letter(word[dpos])(pattmp);
   write(pattmp,xext[word[dpos]]);
 end;
 write_ln(pattmp,'');
end;
@z

@x l.1702
@ The globals |good_dot| and |bad_dot| will be set to |is_hyf| and
|no_hyf|, or |err_hyf| and |found_hyf|, depending on whether the current
level is odd or even, respectively. The globals |dot_min|, |dot_max|,
and |dot_len| are analogous to |hyf_min|, |hyf_max|, and |hyf_len|
defined earlier.

@<Globals...@>=
@!good_dot, @!bad_dot: hyf_type; {good and bad hyphens at current level}
@!dot_min, @!dot_max, @!dot_len: word_index; {limits for legal dots}

@ @<Prepare to read dictionary@>=
if procesp then
 begin dot_min:=pat_dot; dot_max:=pat_len-pat_dot;
 if dot_min<hyf_min then dot_min:=hyf_min;
 if dot_max<hyf_max then dot_max:=hyf_max;
 dot_len:=dot_min+dot_max;
 if odd(hyph_level) then
   begin good_dot:=is_hyf; bad_dot:=no_hyf;
   end
 else begin good_dot:=err_hyf; bad_dot:=found_hyf;
   end;
 end;
@y
@ The globals |dot_min|, |dot_max|, and |dot_len| are analogous to |hyf_min|,
|hyf_max|, and |hyf_len| defined earlier.

@<Globals...@>=
@!dot_min, @!dot_max, @!dot_len: word_index; {limits for legal dots}

@ @<Prepare to read dictionary@>=
if procesp then
 begin dot_min:=pat_dot; dot_max:=pat_len-pat_dot;
 if dot_min<hyf_min then dot_min:=hyf_min;
 if dot_max<hyf_max then dot_max:=hyf_max;
 dot_len:=dot_min+dot_max;
 end;
@z

@x
   @!goodp: boolean;
@y
   @!goodp: boolean;
   have,get: integer;
@z

@x l.1729
@<Check this dot position...@>=
if no_more[dpos] then goto continue;
if dots[dpos]=good_dot then goodp:=true else
if dots[dpos]=bad_dot then goodp:=false else goto continue;
@y
@<Check this dot position...@>=
if no_more[dpos] then goto continue;
have:=hyphen_class(hval[dpos]);
get:=hyphen_class(hyph_level);
if abs(get-hclass[dpos])<abs(have-hclass[dpos]) then goodp:=true
else if abs(get-hclass[dpos])>abs(have-hclass[dpos]) then goodp:=false
else goto continue;
@z

@x l.1750
@p procedure do_dictionary;
begin  good_count:=0; bad_count:=0; miss_count:=0;
@y
@p procedure do_dictionary;
var i: integer;
begin
 for i:=1 to 9 do begin
   off_count[i]:=0;
 end;
 good_count:=0; bad_count:=0; miss_count:=0;
@z

@x l.1771
 if (good_count+miss_count)>0 then
   print_ln((100*good_count/(good_count+miss_count)):1:2,' %, ',
     (100*bad_count/(good_count+miss_count)):1:2,' %, ',
     (100*miss_count/(good_count+miss_count)):1:2,' %');
@y
 if (good_count+miss_count)>0 then
   print_ln((100*good_count/(good_count+miss_count)):1:2,' %, ',
     (100*bad_count/(good_count+miss_count)):1:2,' %, ',
     (100*miss_count/(good_count+miss_count)):1:2,' %');
 print_ln('off by <-3, -3,...: ',
          off_count[1]:1, ', ',
          off_count[2]:1, ', ',
          off_count[3]:1, ', ',
          off_count[4]:1, ', ',
          off_count[5]:1, ', ',
          off_count[6]:1, ', ',
          off_count[7]:1, ', ',
          off_count[8]:1, ', ',
          off_count[9]:1);
@z

@x l.1835
 digit_class:
   begin d:=xint[c];
   if d>=max_val then bad_input('Bad hyphenation value');
@.Bad hyphenation value@>
   if d>max_pat then max_pat:=d;
   hval[pat_len]:=d;
   end;
@y
 digit_class:
   begin d:=xint[c];
   if xclass[buf[buf_ptr+1]]=digit_class then begin
       incr(buf_ptr);
       c:=buf[buf_ptr];
       d:=10*d+xint[c];
   end;
   if d>=max_val then bad_input('Bad hyphenation value');
@.Bad hyphenation value@>
   if d>max_pat then max_pat:=d;
   hval[pat_len]:=d;
   end;
@z