Introduction
Introduction Statistics Contact Development Disclaimer Help
n8.c - 9base - revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log
Files
Refs
README
LICENSE
---
n8.c (10320B)
---
1 #include <u.h>
2 #include "tdef.h"
3 #include "fns.h"
4 #include "ext.h"
5
6 #define HY_BIT 0200 /* stuff in here only works for…
7 /* this value is used (as a literal) in suftab.c…
8 /* to encode possible hyphenation points in suff…
9 /* it could be changed, by widening the tables */
10 /* to be shorts instead of chars. */
11
12 /*
13 * troff8.c
14 *
15 * hyphenation
16 */
17
18 int hexsize = 0; /* hyphenation exception list siz…
19 char *hbufp = NULL; /* base of list */
20 char *nexth = NULL; /* first free slot in list */
21 Tchar *hyend;
22
23 #define THRESH 160 /* digram goodness threshold */
24 int thresh = THRESH;
25
26 int texhyphen(void);
27 static int alpha(Tchar);
28
29 void hyphen(Tchar *wp)
30 {
31 int j;
32 Tchar *i;
33
34 i = wp;
35 while (punct((*i++)))
36 ;
37 if (!alpha(*--i))
38 return;
39 wdstart = i++;
40 while (alpha(*i++))
41 ;
42 hyend = wdend = --i - 1;
43 while (punct((*i++)))
44 ;
45 if (*--i)
46 return;
47 if (wdend - wdstart < 4) /* 4 chars is too short to hyphe…
48 return;
49 hyp = hyptr;
50 *hyp = 0;
51 hyoff = 2;
52
53 /* for now, try exceptions first, then tex (if hyphalg is non-ze…
54 then suffix and digram if tex didn't hyphenate it at all.
55 */
56
57 if (!exword() && !texhyphen() && !suffix())
58 digram();
59
60 /* this appears to sort hyphenation points into increasing order…
61 *hyp++ = 0;
62 if (*hyptr)
63 for (j = 1; j; ) {
64 j = 0;
65 for (hyp = hyptr + 1; *hyp != 0; hyp++) {
66 if (*(hyp - 1) > *hyp) {
67 j++;
68 i = *hyp;
69 *hyp = *(hyp - 1);
70 *(hyp - 1) = i;
71 }
72 }
73 }
74 }
75
76 static int alpha(Tchar i) /* non-zero if really alphabetic */
77 {
78 if (ismot(i))
79 return 0;
80 else if (cbits(i) >= ALPHABET) /* this isn't very elegant…
81 return 0; /* no good way to make sure i i…
82 else /* the call of isalpha */
83 return isalpha(cbits(i));
84 }
85
86 int
87 punct(Tchar i)
88 {
89 if (!i || alpha(i))
90 return(0);
91 else
92 return(1);
93 }
94
95
96 void caseha(void) /* set hyphenation algorithm */
97 {
98 hyphalg = HYPHALG;
99 if (skip())
100 return;
101 noscale++;
102 hyphalg = atoi0();
103 noscale = 0;
104 }
105
106
107 void caseht(void) /* set hyphenation threshold; not in manual! */
108 {
109 thresh = THRESH;
110 if (skip())
111 return;
112 noscale++;
113 thresh = atoi0();
114 noscale = 0;
115 }
116
117
118 char *growh(char *where)
119 {
120 char *new;
121
122 hexsize += NHEX;
123 if ((new = grow(hbufp, hexsize, sizeof(char))) == NULL)
124 return NULL;
125 if (new == hbufp) {
126 return where;
127 } else {
128 int diff;
129 diff = where - hbufp;
130 hbufp = new;
131 return new + diff;
132 }
133 }
134
135
136 void casehw(void)
137 {
138 int i, k;
139 char *j;
140 Tchar t;
141
142 if (nexth == NULL) {
143 if ((nexth = hbufp = grow(hbufp, NHEX, sizeof(char))) ==…
144 ERROR "No space for exception word list." WARN;
145 return;
146 }
147 hexsize = NHEX;
148 }
149 k = 0;
150 while (!skip()) {
151 if ((j = nexth) >= hbufp + hexsize - 2)
152 if ((j = nexth = growh(j)) == NULL)
153 goto full;
154 for (;;) {
155 if (ismot(t = getch()))
156 continue;
157 i = cbits(t);
158 if (i == ' ' || i == '\n') {
159 *j++ = 0;
160 nexth = j;
161 *j = 0;
162 if (i == ' ')
163 break;
164 else
165 return;
166 }
167 if (i == '-') {
168 k = HY_BIT;
169 continue;
170 }
171 *j++ = maplow(i) | k;
172 k = 0;
173 if (j >= hbufp + hexsize - 2)
174 if ((j = growh(j)) == NULL)
175 goto full;
176 }
177 }
178 return;
179 full:
180 ERROR "Cannot grow exception word list." WARN;
181 *nexth = 0;
182 }
183
184
185 int exword(void)
186 {
187 Tchar *w;
188 char *e, *save;
189
190 e = hbufp;
191 while (1) {
192 save = e;
193 if (e == NULL || *e == 0)
194 return(0);
195 w = wdstart;
196 while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*…
197 e++;
198 w++;
199 }
200 if (!*e) {
201 if (w-1 == hyend || (w == wdend && maplow(cbits(…
202 w = wdstart;
203 for (e = save; *e; e++) {
204 if (*e & HY_BIT)
205 *hyp++ = w;
206 if (hyp > hyptr + NHYP - 1)
207 hyp = hyptr + NHYP - 1;
208 w++;
209 }
210 return(1);
211 } else {
212 e++;
213 continue;
214 }
215 } else
216 while (*e++)
217 ;
218 }
219 }
220
221 int
222 suffix(void)
223 {
224 Tchar *w;
225 char *s, *s0;
226 Tchar i;
227 extern char *suftab[];
228
229 again:
230 i = cbits(*hyend);
231 if (!alpha(i))
232 return(0);
233 if (i < 'a')
234 i -= 'A' - 'a';
235 if ((s0 = suftab[i-'a']) == 0)
236 return(0);
237 for (;;) {
238 if ((i = *s0 & 017) == 0)
239 return(0);
240 s = s0 + i - 1;
241 w = hyend - 1;
242 while (s > s0 && w >= wdstart && (*s & 0177) == maplow(c…
243 s--;
244 w--;
245 }
246 if (s == s0)
247 break;
248 s0 += i;
249 }
250 s = s0 + i - 1;
251 w = hyend;
252 if (*s0 & HY_BIT)
253 goto mark;
254 while (s > s0) {
255 w--;
256 if (*s-- & HY_BIT) {
257 mark:
258 hyend = w - 1;
259 if (*s0 & 0100) /* 0100 used in suftab to…
260 continue;
261 if (!chkvow(w))
262 return(0);
263 *hyp++ = w;
264 }
265 }
266 if (*s0 & 040)
267 return(0);
268 if (exword())
269 return(1);
270 goto again;
271 }
272
273 int
274 maplow(int i)
275 {
276 if (isupper(i))
277 i = tolower(i);
278 return(i);
279 }
280
281 int
282 vowel(int i)
283 {
284 switch (i) {
285 case 'a': case 'A':
286 case 'e': case 'E':
287 case 'i': case 'I':
288 case 'o': case 'O':
289 case 'u': case 'U':
290 case 'y': case 'Y':
291 return(1);
292 default:
293 return(0);
294 }
295 }
296
297
298 Tchar *chkvow(Tchar *w)
299 {
300 while (--w >= wdstart)
301 if (vowel(cbits(*w)))
302 return(w);
303 return(0);
304 }
305
306
307 void digram(void)
308 {
309 Tchar *w;
310 int val;
311 Tchar *nhyend, *maxw;
312 int maxval;
313 extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13],…
314 maxw = 0;
315 again:
316 if (!(w = chkvow(hyend + 1)))
317 return;
318 hyend = w;
319 if (!(w = chkvow(hyend)))
320 return;
321 nhyend = w;
322 maxval = 0;
323 w--;
324 while (++w < hyend && w < wdend - 1) {
325 val = 1;
326 if (w == wdstart)
327 val *= dilook('a', cbits(*w), bxh);
328 else if (w == wdstart + 1)
329 val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
330 else
331 val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
332 val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
333 val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
334 if (val > maxval) {
335 maxval = val;
336 maxw = w + 1;
337 }
338 }
339 hyend = nhyend;
340 if (maxval > thresh)
341 *hyp++ = maxw;
342 goto again;
343 }
344
345 int
346 dilook(int a, int b, char t[26][13])
347 {
348 int i, j;
349
350 i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
351 if (!(j & 01))
352 i >>= 4;
353 return(i & 017);
354 }
355
356
357 /* here beginneth the tex hyphenation code, as interpreted freely */
358 /* the main difference is that there is no attempt to squeeze space */
359 /* as tightly at tex does. */
360
361 static int texit(Tchar *, Tchar *);
362 static int readpats(void);
363 static void install(char *);
364 static void fixup(void);
365 static int trieindex(int, int);
366
367 static char pats[50000]; /* size ought to be computed dyna…
368 static char *nextpat = pats;
369 static char *trie[27*27]; /* english-specific sizes */
370
371 int texhyphen(void)
372 {
373 static int loaded = 0; /* -1: couldn't find tex f…
374
375 if (hyphalg == 0 || loaded == -1) /* non-zero => tex for …
376 return 0;
377 if (loaded == 0) {
378 if (readpats())
379 loaded = 1;
380 else
381 loaded = -1;
382 }
383 return texit(wdstart, wdend);
384 }
385
386 static int texit(Tchar *start, Tchar *end) /* hyphenate as in tex…
387 {
388 int nw, i, k, equal, cnt[500];
389 char w[500+1], *np, *pp, *wp, *xpp, *xwp;
390
391 w[0] = '.';
392 for (nw = 1; start <= end && nw < 500-1; nw++, start++)
393 w[nw] = maplow(tolower(cbits(*start)));
394 start -= (nw - 1);
395 w[nw++] = '.';
396 w[nw] = 0;
397 /*
398 * printf("try %s\n", w);
399 */
400 for (i = 0; i <= nw; i++)
401 cnt[i] = '0';
402
403 for (wp = w; wp+1 < w+nw; wp++) {
404 for (pp = trie[trieindex(*wp, *(wp+1))]; pp < nextpat; )…
405 if (pp == 0 /* no trie entry */
406 || *pp != *wp /* no match on 1st…
407 || *(pp+1) != *(wp+1)) /* no match on 2n…
408 break; /* so move to ne…
409 equal = 1;
410 for (xpp = pp+2, xwp = wp+2; *xpp; )
411 if (*xpp++ != *xwp++) {
412 equal = 0;
413 break;
414 }
415 if (equal) {
416 np = xpp+1; /* numpat */
417 for (k = wp-w; *np; k++, np++)
418 if (*np > cnt[k])
419 cnt[k] = *np;
420 /*
421 * printf("match: %s %s\n", pp, xpp+1);
422 */
423 }
424 pp += *(pp-1); /* skip over pattern and n…
425 }
426 }
427 /*
428 * for (i = 0; i < nw; i++) printf("%c", w[i]);
429 * printf(" ");
430 * for (i = 0; i <= nw; i++) printf("%c", cnt[i]);
431 * printf("\n");
432 */
433 /*
434 * for (i = 1; i < nw - 1; i++) {
435 * if (i > 2 && i < nw - 3 && cnt[i] % 2)
436 * printf("-");
437 * if (cbits(start[i-1]) != '.')
438 * printf("%c", cbits(start[i-1]));
439 * }
440 * printf("\n");
441 */
442 for (i = 1; i < nw -1; i++)
443 if (i > 2 && i < nw - 3 && cnt[i] % 2)
444 *hyp++ = start + i - 1;
445 return hyp - hyptr; /* non-zero if a hyphen was found */
446 }
447
448 /*
449 This code assumes that hyphen.tex looks like
450 % some comments
451 \patterns{ % more comments
452 pat5ter4ns, 1 per line, SORTED, nothing else
453 }
454 more goo
455 \hyphenation{ % more comments
456 ex-cep-tions, one per line; i ignore this part for now
457 }
458
459 this code is NOT robust against variations. unfortunately,
460 it looks like every local language version of this file has
461 a different format. i have also made no provision for weird
462 characters. sigh.
463 */
464
465 static int readpats(void)
466 {
467 FILE *fp;
468 char buf[200], buf1[200];
469
470 if ((fp = fopen(unsharp(TEXHYPHENS), "r")) == NULL
471 && (fp = fopen(unsharp(DWBalthyphens), "r")) == NULL) {
472 ERROR "warning: can't find hyphen.tex" WARN;
473 return 0;
474 }
475
476 while (fgets(buf, sizeof buf, fp) != NULL) {
477 sscanf(buf, "%s", buf1);
478 if (strcmp(buf1, "\\patterns{") == 0)
479 break;
480 }
481 while (fgets(buf, sizeof buf, fp) != NULL) {
482 if (buf[0] == '}')
483 break;
484 install(buf);
485 }
486 fclose(fp);
487 fixup();
488 return 1;
489 }
490
491 static void install(char *s) /* map ab4c5de to: 12 abcde \0 00405…
492 {
493 int npat, lastpat;
494 char num[500], *onextpat = nextpat;
495
496 num[0] = '0';
497 *nextpat++ = ' '; /* fill in with count later */
498 for (npat = lastpat = 0; *s != '\n' && *s != '\0'; s++) {
499 if (isdigit((uchar)*s)) {
500 num[npat] = *s;
501 lastpat = npat;
502 } else {
503 *nextpat++ = *s;
504 npat++;
505 num[npat] = '0';
506 }
507 }
508 *nextpat++ = 0;
509 if (nextpat > pats + sizeof(pats)-20) {
510 ERROR "tex hyphenation table overflow, tail end ignored"…
511 nextpat = onextpat;
512 }
513 num[lastpat+1] = 0;
514 strcat(nextpat, num);
515 nextpat += strlen(nextpat) + 1;
516 }
517
518 static void fixup(void) /* build indexes of where . a b c ... sta…
519 {
520 char *p, *lastc;
521 int n;
522
523 for (lastc = pats, p = pats+1; p < nextpat; p++)
524 if (*p == ' ') {
525 *lastc = p - lastc;
526 lastc = p;
527 }
528 *lastc = p - lastc;
529 for (p = pats+1; p < nextpat; ) {
530 n = trieindex(p[0], p[1]);
531 if (trie[n] == 0)
532 trie[n] = p;
533 p += p[-1];
534 }
535 /* printf("pats = %d\n", nextpat - pats); */
536 }
537
538 static int trieindex(int d1, int d2)
539 {
540 int z;
541
542 z = 27 * (d1 == '.' ? 0 : d1 - 'a' + 1) + (d2 == '.' ? 0 : d2 - …
543 assert(z >= 0 && z < 27*27);
544 return z;
545 }
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.