% This change extends patgens pattern generation algorithm to deal with up to
% 10 different hyphen classes. The new algorithm has one new integer
% parameter, the number of hyphen classes (between 2 and 10). This parameter
% can be specified in columns 8 and 9 of the first line of the translate
% file. patgen will now produce values up to 63.
@x l.54
@d banner=='This is PATGEN, Version 2.3' {printed when the program starts}
@y
@d banner=='This is PATGEN, Version 2.3 (with multiple hyphen classes)'
{printed when the program starts}
@z
@x l.588
@!max_val=10; {maximum number of levels$+1$, also used to denote bad patterns}
@y
@!max_val=64; {maximum number of levels$+1$, also used to denote bad patterns}
@z
@x l.603
if max_val>10 then bad:=5;
@y
if max_val>100 then bad:=5;
@z
@ @<Set up default...@>=
begin left_hyphen_min:=2; right_hyphen_min:=3;
@y
@ @d hyphen_class(#)==(# mod hyphen_class_num)
@<Globals...@>=
@!imax: internal_code; {largest |internal_code| assigned so far}
@!left_hyphen_min, @!right_hyphen_min: dot_type;
@!hyphen_class_num: 2..10; { allow up to 10 hyphen classes, default is 2 }
@ @<Set up default...@>=
begin left_hyphen_min:=2; right_hyphen_min:=3;
hyphen_class_num:=2;
@z
@x
If the values specified for \.{\\lefthyphenmin} and \.{\\righthyphenmin}
are invalid (e.g., blank) new values are read from the terminal.
@y
Another addition is, that columns 8 and~9 may optionally contain a value for
\.{\\hyphenclassnum}.
If the values specified for \.{\\lefthyphenmin}, \.{\\righthyphenmin} and
\.{\\hyphenclassnum} are invalid (e.g., blank) new values are read from the
terminal.
@z
@x l.1148
if (n>=1)and(n<max_dot) then right_hyphen_min:=n@+
else bad:=true;
@y
if (n>=1)and(n<max_dot) then right_hyphen_min:=n@+
else bad:=true;
if buf[8]=' ' then n:=0
else if xclass[buf[8]]=digit_class then n:=xint[buf[8]]@+
else bad:=true;
if xclass[buf[9]]=digit_class then n:=10*n+xint[buf[9]]@+
else bad:=true;
if (n>=2)and(n<=10) then hyphen_class_num:=n@+
else bad:=true;
@z
@x l.1358
@!more_to_come: boolean;
@y
@!more_to_come: boolean;
@!off_count: array [1..9] of integer;
{ off by $<-3$, $-3$, $-2$, $-1$, $0$, $1$, $2$, $3$, $>3$ }
@z
@x l.1480
@ @<Output this pattern@>=
begin for d:=0 to pat_len do hval[d]:=0;
repeat d:=hyf_dot(h);
if hval[d]<hyf_val(h) then hval[d]:=hyf_val(h);
h:=hyf_nxt(h);
until h=0;
if hval[0]>0 then write(patout,xdig[hval[0]]);
for d:=1 to pat_len do
begin write_letter(pat[d])(patout); write(patout,xext[pat[d]]);
if hval[d]>0 then write(patout,xdig[hval[d]]);
end;
write_ln(patout);
end
@y
@ Since we have increased |max_val|, we must allow for two-digit values
in patterns.
@<Output this pattern@>=
begin for d:=0 to pat_len do hval[d]:=0;
repeat d:=hyf_dot(h);
if hval[d]<hyf_val(h) then hval[d]:=hyf_val(h);
h:=hyf_nxt(h);
until h=0;
if hval[0]>9 then write(patout,xdig[hval[0] div 10]);
if hval[0]>0 then write(patout,xdig[hval[0] mod 10]);
for d:=1 to pat_len do
begin write_letter(pat[d])(patout); write(patout,xext[pat[d]]);
if hval[d]>9 then write(patout,xdig[hval[d] div 10]);
if hval[d]>0 then write(patout,xdig[hval[d] mod 10]);
end;
write_ln(patout);
end
@z
@x l.1502
@!dots: array[word_index] of hyf_type; {current hyphens}
@y
@!hclass: array[word_index] of digit; {current wanted hyphen classes}
@z
@x
applied to all following words (until the next global word weight). A
digit at some intercharacter position indicates a weight for that position
only.
The |read_word| procedure scans a line of input representing a word,
and places the letters into the array |word|, with |word[1]=word[wlen]=
edge_of_word|. The dot appearing between |word[dpos]| and |word[dpos+1]|
is placed in |dots[dpos]|, and the corresponding dot weight in
|dotw[dpos]|.
@y
applied to all following words (until the next global word weight). A
digit at the beginning of some intercharacter position indicates a weight for
that position only. A digit at the end of some intercharacter position
indicates the hyphen class for the hyphen wanted at this position.
The |read_word| procedure scans a line of input representing a word,
and places the letters into the array |word|, with |word[1]=word[wlen]=
edge_of_word|. The class of the hyphen appearing between |word[dpos]| and
|word[dpos+1]| is placed in |hclass[dpos]|, and the corresponding dot weight in
|dotw[dpos]|.
@z
@x l.1537
@p procedure read_word;
label done, found;
var c: text_char;
@!t: trie_pointer;
begin read_buf(dictionary);
word[1]:=edge_of_word; wlen:=1; buf_ptr:=0;
repeat incr(buf_ptr); c:=buf[buf_ptr];
case xclass[c] of
space_class: goto found;
digit_class:
if wlen=1 then {global word weight}
begin if xint[c]<>word_wt then wt_chg:=true;
word_wt:=xint[c];
end
else dotw[wlen]:=xint[c]; {dot weight}
hyf_class: dots[wlen]:=xint[c]; {record the dot |c|}
letter_class: {record the letter |c|}
begin incr(wlen);
if wlen=max_len then
begin print_buf; overflow('word length=',max_len:1);
end;
word[wlen]:=xint[c]; dots[wlen]:=no_hyf; dotw[wlen]:=word_wt;
end;
escape_class: {record a multi-character sequence starting with |c|}
begin incr(wlen);
if wlen=max_len then
begin print_buf; overflow('word length=',max_len:1);
end;
get_letter(word[wlen]); dots[wlen]:=no_hyf; dotw[wlen]:=word_wt;
end;
invalid_class: bad_input('Bad character');
@.Bad character@>
end;
until buf_ptr=max_buf_len;
found: incr(wlen); word[wlen]:=edge_of_word;
end;
@y
@p procedure read_word;
label done, found;
var c: text_char;
i: word_index;
@!t: trie_pointer;
begin read_buf(dictionary);
word[1]:=edge_of_word; wlen:=1; buf_ptr:=0;
for i:=0 to max_len do hclass[i]:=0;
repeat incr(buf_ptr); c:=buf[buf_ptr];
case xclass[c] of
space_class: goto found;
digit_class:
if wlen=1 then {global word weight}
begin if xint[c]<>word_wt then wt_chg:=true;
word_wt:=xint[c];
end
else if hclass[wlen]>0 then begin
if hyphen_class_num>xint[c] then hclass[wlen]:=xint[c]
else error('unexpected hyphen class!');
end else dotw[wlen]:=xint[c]; {dot weight}
hyf_class:
begin
hclass[wlen]:=1;
end;
letter_class: {record the letter |c|}
begin
incr(wlen);
if wlen=max_len then
begin print_buf; overflow('word length=',max_len:1);
end;
word[wlen]:=xint[c]; hclass[wlen]:=0; dotw[wlen]:=word_wt;
end;
escape_class: {record a multi-character sequence starting with |c|}
begin incr(wlen);
if wlen=max_len then
begin print_buf; overflow('word length=',max_len:1);
end;
get_letter(word[wlen]); dotw[wlen]:=word_wt;
end;
invalid_class: bad_input('Bad character');
@.Bad character@>
end;
until buf_ptr=max_buf_len;
found: incr(wlen); word[wlen]:=edge_of_word;
end;
@z
@x l.1628
@ The |change_dots| procedure updates the |dots| array representing the
printing values of the hyphens. Initially, hyphens (and correctly
found hyphens) in the word list are represented by |is_hyf| whereas
non-hyphen positions (and erroneous hyphens) are represented by |no_hyf|. A
Here these values are increased by one for each hyphen found by the
current patterns, thus changing |no_hyf| into |err_hyf| and |is_hyf|
into |found_hyf|. The routine also collects statistics about the number
of good, bad, and missed hyphens.
@d incr_wt(#)==Incr(#)(dotw[dpos])
@p procedure change_dots;
var dpos: word_index;
begin for dpos:=wlen-hyf_max downto hyf_min do
begin if odd(hval[dpos]) then incr(dots[dpos]);
if dots[dpos]=found_hyf then incr_wt(good_count)
else if dots[dpos]=err_hyf then incr_wt(bad_count)
else if dots[dpos]=is_hyf then incr_wt(miss_count);
end;
end;
@y
@ The |change_dots| procedure owes its name to the fact that its job was
once to update an array called |dots| representing the printing values of the
hyphens. This is no longer the case, as |dots| is gone.
The routine collects statistics about the number of good, bad, and missed hyphens.
@d incr_wt(#)==Incr(#)(dotw[dpos])
@p procedure change_dots;
var dpos: word_index;
have: integer;
begin for dpos:=wlen-hyf_max downto hyf_min do
begin
have:=hyphen_class(hval[dpos]);
{good/bad/miss statistics}
if have>0 then
if have=hclass[dpos] then incr_wt(good_count)
else incr_wt(bad_count)
else if hclass[dpos]>0 then incr_wt(miss_count);
{off statistics}
if have+hclass[dpos]>0 then
if abs(have-hclass[dpos])<=3 then
incr_wt(off_count[have-hclass[dpos]+5])
else if have<hclass[dpos] then
incr_wt(off_count[1])
else incr_wt(off_count[9])
end;
end;
@z
@x l.1653
@ The following procedure outputs the word as hyphenated by the current
patterns, including any word weights. Hyphens inhibited by the values of
\.{\\lefthyphenmin} and \.{\\righthyphenmin} are output as well.
@p procedure output_hyphenated_word;
var dpos: word_index;@/
@!l: triec_pointer; {for |write_letter|}
begin if wt_chg then {output global word weight}
begin write(pattmp,xdig[word_wt]); wt_chg:=false
end;
for dpos:=2 to wlen-2 do
begin write_letter(word[dpos])(pattmp); write(pattmp,xext[word[dpos]]);
if dots[dpos]<>no_hyf then write(pattmp,xhyf[dots[dpos]]);
if dotw[dpos]<>word_wt then write(pattmp,xdig[dotw[dpos]]);
end;
write_letter(word[wlen-1])(pattmp); write_ln(pattmp,xext[word[wlen-1]]);
end;
@y
@ The following procedure outputs the word as hyphenated by the current
patterns, including the found hyphen classes. A correct hyphen is shown with
|found_hyf|, an incorrect one with |err_hyf|. Hyphens inhibited by the values of
\.{\\lefthyphenmin} and \.{\\righthyphenmin} are {\it not} shown.
@p procedure output_hyphenated_word;
var dpos: word_index;@/
@!l: triec_pointer; {for |write_letter|}
begin
for dpos:=2 to hyf_min-1 do begin
write_letter(word[dpos])(pattmp);
write(pattmp,xext[word[dpos]]);
end;
for dpos:=hyf_min to wlen-hyf_max do begin
write_letter(word[dpos])(pattmp);
write(pattmp,xext[word[dpos]]);
if hyphen_class(hval[dpos])>0 then begin
if hyphen_class(hval[dpos])=hclass[dpos] then write(pattmp,xhyf[found_hyf])
else write(pattmp,xhyf[err_hyf]);
if hyphen_class(hval[dpos])>1 then
write(pattmp,xdig[hyphen_class(hval[dpos])]);
end;
end;
for dpos:=wlen-hyf_max+1 to wlen-1 do begin
write_letter(word[dpos])(pattmp);
write(pattmp,xext[word[dpos]]);
end;
write_ln(pattmp,'');
end;
@z
@x l.1702
@ The globals |good_dot| and |bad_dot| will be set to |is_hyf| and
|no_hyf|, or |err_hyf| and |found_hyf|, depending on whether the current
level is odd or even, respectively. The globals |dot_min|, |dot_max|,
and |dot_len| are analogous to |hyf_min|, |hyf_max|, and |hyf_len|
defined earlier.
@<Globals...@>=
@!good_dot, @!bad_dot: hyf_type; {good and bad hyphens at current level}
@!dot_min, @!dot_max, @!dot_len: word_index; {limits for legal dots}
@ @<Prepare to read dictionary@>=
if procesp then
begin dot_min:=pat_dot; dot_max:=pat_len-pat_dot;
if dot_min<hyf_min then dot_min:=hyf_min;
if dot_max<hyf_max then dot_max:=hyf_max;
dot_len:=dot_min+dot_max;
if odd(hyph_level) then
begin good_dot:=is_hyf; bad_dot:=no_hyf;
end
else begin good_dot:=err_hyf; bad_dot:=found_hyf;
end;
end;
@y
@ The globals |dot_min|, |dot_max|, and |dot_len| are analogous to |hyf_min|,
|hyf_max|, and |hyf_len| defined earlier.
@<Globals...@>=
@!dot_min, @!dot_max, @!dot_len: word_index; {limits for legal dots}
@ @<Prepare to read dictionary@>=
if procesp then
begin dot_min:=pat_dot; dot_max:=pat_len-pat_dot;
if dot_min<hyf_min then dot_min:=hyf_min;
if dot_max<hyf_max then dot_max:=hyf_max;
dot_len:=dot_min+dot_max;
end;
@z
@x l.1729
@<Check this dot position...@>=
if no_more[dpos] then goto continue;
if dots[dpos]=good_dot then goodp:=true else
if dots[dpos]=bad_dot then goodp:=false else goto continue;
@y
@<Check this dot position...@>=
if no_more[dpos] then goto continue;
have:=hyphen_class(hval[dpos]);
get:=hyphen_class(hyph_level);
if abs(get-hclass[dpos])<abs(have-hclass[dpos]) then goodp:=true
else if abs(get-hclass[dpos])>abs(have-hclass[dpos]) then goodp:=false
else goto continue;
@z
@x l.1750
@p procedure do_dictionary;
begin good_count:=0; bad_count:=0; miss_count:=0;
@y
@p procedure do_dictionary;
var i: integer;
begin
for i:=1 to 9 do begin
off_count[i]:=0;
end;
good_count:=0; bad_count:=0; miss_count:=0;
@z
@x l.1835
digit_class:
begin d:=xint[c];
if d>=max_val then bad_input('Bad hyphenation value');
@.Bad hyphenation value@>
if d>max_pat then max_pat:=d;
hval[pat_len]:=d;
end;
@y
digit_class:
begin d:=xint[c];
if xclass[buf[buf_ptr+1]]=digit_class then begin
incr(buf_ptr);
c:=buf[buf_ptr];
d:=10*d+xint[c];
end;
if d>=max_val then bad_input('Bad hyphenation value');
@.Bad hyphenation value@>
if d>max_pat then max_pat:=d;
hval[pat_len]:=d;
end;
@z