tuc.c - neatvi - [fork] simple vi-type editor with UTF-8 support | |
git clone git://src.adamsgaard.dk/neatvi | |
Log | |
Files | |
Refs | |
README | |
--- | |
tuc.c (21648B) | |
--- | |
1 #include <ctype.h> | |
2 #include <stdio.h> | |
3 #include <stdlib.h> | |
4 #include <string.h> | |
5 #include "vi.h" | |
6 | |
7 #define LEN(a) (sizeof(a) / sizeof((a)[0])) | |
8 | |
9 /* return the length of a utf-8 character */ | |
10 int uc_len(char *s) | |
11 { | |
12 int c = (unsigned char) s[0]; | |
13 if (~c & 0xc0) /* ASCII or invalid */ | |
14 return c > 0; | |
15 if (~c & 0x20) | |
16 return 2; | |
17 if (~c & 0x10) | |
18 return 3; | |
19 if (~c & 0x08) | |
20 return 4; | |
21 return 1; | |
22 } | |
23 | |
24 /* the number of utf-8 characters in s */ | |
25 int uc_slen(char *s) | |
26 { | |
27 int n; | |
28 for (n = 0; *s; n++) | |
29 s = uc_end(s) + 1; | |
30 return n; | |
31 } | |
32 | |
33 /* the unicode codepoint of the given utf-8 character */ | |
34 int uc_code(char *s) | |
35 { | |
36 int c = (unsigned char) s[0]; | |
37 if (~c & 0xc0) /* ASCII or invalid */ | |
38 return c; | |
39 if (~c & 0x20) | |
40 return ((c & 0x1f) << 6) | (s[1] & 0x3f); | |
41 if (~c & 0x10) | |
42 return ((c & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2]… | |
43 if (~c & 0x08) | |
44 return ((c & 0x07) << 18) | ((s[1] & 0x3f) << 12) | ((s[… | |
45 return c; | |
46 } | |
47 | |
48 /* find the beginning of the character at s[i] */ | |
49 char *uc_beg(char *beg, char *s) | |
50 { | |
51 while (s > beg && (((unsigned char) *s) & 0xc0) == 0x80) | |
52 s--; | |
53 return s; | |
54 } | |
55 | |
56 /* find the end of the character at s[i] */ | |
57 char *uc_end(char *s) | |
58 { | |
59 if (!*s || !((unsigned char) *s & 0x80)) | |
60 return s; | |
61 if (((unsigned char) *s & 0xc0) == 0xc0) | |
62 s++; | |
63 while (((unsigned char) *s & 0xc0) == 0x80) | |
64 s++; | |
65 return s - 1; | |
66 } | |
67 | |
68 /* return a pointer to the character following s */ | |
69 char *uc_next(char *s) | |
70 { | |
71 s = uc_end(s); | |
72 return *s ? s + 1 : s; | |
73 } | |
74 | |
75 /* return a pointer to the character preceding s */ | |
76 char *uc_prev(char *beg, char *s) | |
77 { | |
78 return s == beg ? beg : uc_beg(beg, s - 1); | |
79 } | |
80 | |
81 char *uc_lastline(char *s) | |
82 { | |
83 char *r = strrchr(s, '\n'); | |
84 return r ? r + 1 : s; | |
85 } | |
86 | |
87 /* allocate and return an array for the characters in s */ | |
88 char **uc_chop(char *s, int *n) | |
89 { | |
90 char **chrs; | |
91 int i; | |
92 *n = uc_slen(s); | |
93 chrs = malloc((*n + 1) * sizeof(chrs[0])); | |
94 for (i = 0; i < *n + 1; i++) { | |
95 chrs[i] = s; | |
96 s = uc_next(s); | |
97 } | |
98 return chrs; | |
99 } | |
100 | |
101 char *uc_chr(char *s, int off) | |
102 { | |
103 int i = 0; | |
104 while (s && *s) { | |
105 if (i++ == off) | |
106 return s; | |
107 s = uc_next(s); | |
108 } | |
109 return s && (off < 0 || i == off) ? s : ""; | |
110 } | |
111 | |
112 /* the number of characters between s and s + off */ | |
113 int uc_off(char *s, int off) | |
114 { | |
115 char *e = s + off; | |
116 int i; | |
117 for (i = 0; s < e && *s; i++) | |
118 s = uc_next(s); | |
119 return i; | |
120 } | |
121 | |
122 char *uc_sub(char *s, int beg, int end) | |
123 { | |
124 char *sbeg = uc_chr(s, beg); | |
125 char *send = uc_chr(s, end); | |
126 int len = sbeg && send && sbeg <= send ? send - sbeg : 0; | |
127 char *r = malloc(len + 1); | |
128 memcpy(r, sbeg, len); | |
129 r[len] = '\0'; | |
130 return r; | |
131 } | |
132 | |
133 char *uc_dup(char *s) | |
134 { | |
135 char *r = malloc(strlen(s) + 1); | |
136 return r ? strcpy(r, s) : NULL; | |
137 } | |
138 | |
139 int uc_isspace(char *s) | |
140 { | |
141 int c = s ? (unsigned char) *s : 0; | |
142 return c <= 0x7f && isspace(c); | |
143 } | |
144 | |
145 int uc_isprint(char *s) | |
146 { | |
147 int c = s ? (unsigned char) *s : 0; | |
148 return c > 0x7f || isprint(c); | |
149 } | |
150 | |
151 int uc_isalpha(char *s) | |
152 { | |
153 int c = s ? (unsigned char) *s : 0; | |
154 return c > 0x7f || isalpha(c); | |
155 } | |
156 | |
157 int uc_isdigit(char *s) | |
158 { | |
159 int c = s ? (unsigned char) *s : 0; | |
160 return c <= 0x7f && isdigit(c); | |
161 } | |
162 | |
163 int uc_kind(char *c) | |
164 { | |
165 if (uc_isspace(c)) | |
166 return 0; | |
167 if (uc_isalpha(c) || uc_isdigit(c) || c[0] == '_') | |
168 return 1; | |
169 return 2; | |
170 } | |
171 | |
172 #define UC_R2L(ch) (((ch) & 0xff00) == 0x0600 || \ | |
173 ((ch) & 0xfffc) == 0x200c || \ | |
174 ((ch) & 0xff00) == 0xfb00 || \ | |
175 ((ch) & 0xff00) == 0xfc00 || \ | |
176 ((ch) & 0xff00) == 0xfe00) | |
177 | |
178 /* sorted list of characters that can be shaped */ | |
179 static struct achar { | |
180 unsigned c; /* utf-8 code */ | |
181 unsigned s; /* single form */ | |
182 unsigned i; /* initial form */ | |
183 unsigned m; /* medial form */ | |
184 unsigned f; /* final form */ | |
185 } achars[] = { | |
186 {0x0621, 0xfe80}, /* hamza */ | |
187 {0x0622, 0xfe81, 0, 0, 0xfe82}, /* alef m… | |
188 {0x0623, 0xfe83, 0, 0, 0xfe84}, /* alef h… | |
189 {0x0624, 0xfe85, 0, 0, 0xfe86}, /* waw ha… | |
190 {0x0625, 0xfe87, 0, 0, 0xfe88}, /* alef h… | |
191 {0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a}, /* yeh hamza */ | |
192 {0x0627, 0xfe8d, 0, 0, 0xfe8e}, /* alef */ | |
193 {0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90}, /* beh */ | |
194 {0x0629, 0xfe93, 0, 0, 0xfe94}, /* teh ma… | |
195 {0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96}, /* teh */ | |
196 {0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a}, /* theh */ | |
197 {0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e}, /* jeem */ | |
198 {0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2}, /* hah */ | |
199 {0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6}, /* khah */ | |
200 {0x062f, 0xfea9, 0, 0, 0xfeaa}, /* dal */ | |
201 {0x0630, 0xfeab, 0, 0, 0xfeac}, /* thal */ | |
202 {0x0631, 0xfead, 0, 0, 0xfeae}, /* reh */ | |
203 {0x0632, 0xfeaf, 0, 0, 0xfeb0}, /* zain */ | |
204 {0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2}, /* seen */ | |
205 {0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6}, /* sheen */ | |
206 {0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba}, /* sad */ | |
207 {0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe}, /* dad */ | |
208 {0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2}, /* tah */ | |
209 {0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6}, /* zah */ | |
210 {0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca}, /* ain */ | |
211 {0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece}, /* ghain */ | |
212 {0x0640, 0x640, 0x640, 0x640}, /* tatweel… | |
213 {0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2}, /* feh */ | |
214 {0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6}, /* qaf */ | |
215 {0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda}, /* kaf */ | |
216 {0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede}, /* lam */ | |
217 {0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2}, /* meem */ | |
218 {0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6}, /* noon */ | |
219 {0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea}, /* heh */ | |
220 {0x0648, 0xfeed, 0, 0, 0xfeee}, /* waw */ | |
221 {0x0649, 0xfeef, 0, 0, 0xfef0}, /* alef m… | |
222 {0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2}, /* yeh */ | |
223 {0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57}, /* peh */ | |
224 {0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b}, /* tcheh */ | |
225 {0x0698, 0xfb8a, 0, 0, 0xfb8b}, /* jeh */ | |
226 {0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f}, /* fkaf */ | |
227 {0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93}, /* gaf */ | |
228 {0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd}, /* fyeh */ | |
229 {0x200c}, /* ZWNJ */ | |
230 {0x200d, 0, 0x200d, 0x200d}, /* ZWJ */ | |
231 }; | |
232 | |
233 static struct achar *find_achar(int c) | |
234 { | |
235 int h, m, l; | |
236 h = LEN(achars); | |
237 l = 0; | |
238 /* using binary search to find c */ | |
239 while (l < h) { | |
240 m = (h + l) >> 1; | |
241 if (achars[m].c == c) | |
242 return &achars[m]; | |
243 if (c < achars[m].c) | |
244 h = m; | |
245 else | |
246 l = m + 1; | |
247 } | |
248 return NULL; | |
249 } | |
250 | |
251 static int can_join(int c1, int c2) | |
252 { | |
253 struct achar *a1 = find_achar(c1); | |
254 struct achar *a2 = find_achar(c2); | |
255 return a1 && a2 && (a1->i || a1->m) && (a2->f || a2->m); | |
256 } | |
257 | |
258 static int uc_cshape(int cur, int prev, int next) | |
259 { | |
260 int c = cur; | |
261 int join_prev, join_next; | |
262 struct achar *ac = find_achar(c); | |
263 if (!ac) /* ignore non-Arabic characters */ | |
264 return c; | |
265 join_prev = can_join(prev, c); | |
266 join_next = can_join(c, next); | |
267 if (join_prev && join_next) | |
268 c = ac->m; | |
269 if (join_prev && !join_next) | |
270 c = ac->f; | |
271 if (!join_prev && join_next) | |
272 c = ac->i; | |
273 if (!join_prev && !join_next) | |
274 c = ac->c; /* some fonts do not have a glyph for … | |
275 return c ? c : cur; | |
276 } | |
277 | |
278 /* | |
279 * return nonzero for Arabic combining characters | |
280 * | |
281 * The standard Arabic diacritics: | |
282 * + 0x064b: fathatan | |
283 * + 0x064c: dammatan | |
284 * + 0x064d: kasratan | |
285 * + 0x064e: fatha | |
286 * + 0x064f: damma | |
287 * + 0x0650: kasra | |
288 * + 0x0651: shadda | |
289 * + 0x0652: sukun | |
290 * + 0x0653: madda above | |
291 * + 0x0654: hamza above | |
292 * + 0x0655: hamza below | |
293 * + 0x0670: superscript alef | |
294 */ | |
295 static int uc_acomb(int c) | |
296 { | |
297 return (c >= 0x064b && c <= 0x0655) || /* the sta… | |
298 (c >= 0xfc5e && c <= 0xfc63) || /* shadda… | |
299 c == 0x0670; /* superscri… | |
300 } | |
301 | |
302 static void uc_cput(char *d, int c) | |
303 { | |
304 int l = 0; | |
305 if (c > 0xffff) { | |
306 *d++ = 0xf0 | (c >> 18); | |
307 l = 3; | |
308 } else if (c > 0x7ff) { | |
309 *d++ = 0xe0 | (c >> 12); | |
310 l = 2; | |
311 } else if (c > 0x7f) { | |
312 *d++ = 0xc0 | (c >> 6); | |
313 l = 1; | |
314 } else { | |
315 *d++ = c; | |
316 } | |
317 while (l--) | |
318 *d++ = 0x80 | ((c >> (l * 6)) & 0x3f); | |
319 *d = '\0'; | |
320 } | |
321 | |
322 /* shape the given arabic character; returns a static buffer */ | |
323 char *uc_shape(char *beg, char *s) | |
324 { | |
325 static char out[16]; | |
326 char *r; | |
327 int prev = 0; | |
328 int next = 0; | |
329 int curr = uc_code(s); | |
330 if (!curr || !UC_R2L(curr)) | |
331 return NULL; | |
332 r = s; | |
333 while (r > beg) { | |
334 r = uc_beg(beg, r - 1); | |
335 if (!uc_acomb(uc_code(r))) { | |
336 prev = uc_code(r); | |
337 break; | |
338 } | |
339 } | |
340 r = s; | |
341 while (*r) { | |
342 r = uc_next(r); | |
343 if (!uc_acomb(uc_code(r))) { | |
344 next = uc_code(r); | |
345 break; | |
346 } | |
347 } | |
348 uc_cput(out, uc_cshape(curr, prev, next)); | |
349 return out; | |
350 } | |
351 | |
352 static int dwchars[][2] = { | |
353 {0x1100, 0x115f}, {0x11a3, 0x11a7}, {0x11fa, 0x11ff}, {0x2329, 0… | |
354 {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0… | |
355 {0x3000, 0x3029}, {0x3030, 0x303e}, {0x3041, 0x3096}, {0x309b, 0… | |
356 {0x3105, 0x312d}, {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x31c0, 0… | |
357 {0x31f0, 0x321e}, {0x3220, 0x3247}, {0x3250, 0x32fe}, {0x3300, 0… | |
358 {0x4e00, 0xa48c}, {0xa490, 0xa4c6}, {0xa960, 0xa97c}, {0xac00, 0… | |
359 {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xf900, 0xfaff}, {0xfe10, 0… | |
360 {0xfe30, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xff01, 0… | |
361 {0xffe0, 0xffe6}, {0x1f200, 0x1f200}, {0x1f210, 0x1f231}, {0x1f2… | |
362 {0x20000,0x2ffff}, | |
363 }; | |
364 | |
365 static int zwchars[][2] = { | |
366 {0x0300, 0x036f}, {0x0483, 0x0489}, {0x0591, 0x05bd}, {0x05bf, 0… | |
367 {0x05c1, 0x05c2}, {0x05c4, 0x05c5}, {0x05c7, 0x05c7}, {0x0610, 0… | |
368 {0x064b, 0x065e}, {0x0670, 0x0670}, {0x06d6, 0x06dc}, {0x06de, 0… | |
369 {0x06e7, 0x06e8}, {0x06ea, 0x06ed}, {0x0711, 0x0711}, {0x0730, 0… | |
370 {0x07a6, 0x07b0}, {0x07eb, 0x07f3}, {0x0816, 0x0819}, {0x081b, 0… | |
371 {0x0825, 0x0827}, {0x0829, 0x082d}, {0x0900, 0x0903}, {0x093c, 0… | |
372 {0x093e, 0x094e}, {0x0951, 0x0955}, {0x0962, 0x0963}, {0x0981, 0… | |
373 {0x09bc, 0x09bc}, {0x09be, 0x09c4}, {0x09c7, 0x09c8}, {0x09cb, 0… | |
374 {0x09d7, 0x09d7}, {0x09e2, 0x09e3}, {0x0a01, 0x0a03}, {0x0a3c, 0… | |
375 {0x0a3e, 0x0a42}, {0x0a47, 0x0a48}, {0x0a4b, 0x0a4d}, {0x0a51, 0… | |
376 {0x0a70, 0x0a71}, {0x0a75, 0x0a75}, {0x0a81, 0x0a83}, {0x0abc, 0… | |
377 {0x0abe, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae2, 0… | |
378 {0x0b01, 0x0b03}, {0x0b3c, 0x0b3c}, {0x0b3e, 0x0b44}, {0x0b47, 0… | |
379 {0x0b4b, 0x0b4d}, {0x0b56, 0x0b57}, {0x0b62, 0x0b63}, {0x0b82, 0… | |
380 {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, {0x0bd7, 0… | |
381 {0x0c01, 0x0c03}, {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0… | |
382 {0x0c55, 0x0c56}, {0x0c62, 0x0c63}, {0x0c82, 0x0c83}, {0x0cbc, 0… | |
383 {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, {0x0cd5, 0… | |
384 {0x0ce2, 0x0ce3}, {0x0d02, 0x0d03}, {0x0d3e, 0x0d44}, {0x0d46, 0… | |
385 {0x0d4a, 0x0d4d}, {0x0d57, 0x0d57}, {0x0d62, 0x0d63}, {0x0d82, 0… | |
386 {0x0dca, 0x0dca}, {0x0dcf, 0x0dd4}, {0x0dd6, 0x0dd6}, {0x0dd8, 0… | |
387 {0x0df2, 0x0df3}, {0x0e31, 0x0e31}, {0x0e34, 0x0e3a}, {0x0e47, 0… | |
388 {0x0eb1, 0x0eb1}, {0x0eb4, 0x0eb9}, {0x0ebb, 0x0ebc}, {0x0ec8, 0… | |
389 {0x0f18, 0x0f19}, {0x0f35, 0x0f35}, {0x0f37, 0x0f37}, {0x0f39, 0… | |
390 {0x0f3e, 0x0f3f}, {0x0f71, 0x0f84}, {0x0f86, 0x0f87}, {0x0f90, 0… | |
391 {0x0f99, 0x0fbc}, {0x0fc6, 0x0fc6}, {0x102b, 0x103e}, {0x1056, 0… | |
392 {0x105e, 0x1060}, {0x1062, 0x1064}, {0x1067, 0x106d}, {0x1071, 0… | |
393 {0x1082, 0x108d}, {0x108f, 0x108f}, {0x109a, 0x109d}, {0x135f, 0… | |
394 {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0… | |
395 {0x17b6, 0x17d3}, {0x17dd, 0x17dd}, {0x180b, 0x180d}, {0x18a9, 0… | |
396 {0x1920, 0x192b}, {0x1930, 0x193b}, {0x19b0, 0x19c0}, {0x19c8, 0… | |
397 {0x1a17, 0x1a1b}, {0x1a55, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0… | |
398 {0x1b00, 0x1b04}, {0x1b34, 0x1b44}, {0x1b6b, 0x1b73}, {0x1b80, 0… | |
399 {0x1ba1, 0x1baa}, {0x1c24, 0x1c37}, {0x1cd0, 0x1cd2}, {0x1cd4, 0… | |
400 {0x1ced, 0x1ced}, {0x1cf2, 0x1cf2}, {0x1dc0, 0x1de6}, {0x1dfd, 0… | |
401 {0x200b, 0x200f}, | |
402 {0x20d0, 0x20f0}, {0x2cef, 0x2cf1}, {0x2de0, 0x2dff}, {0x302a, 0… | |
403 {0x3099, 0x309a}, {0xa66f, 0xa672}, {0xa67c, 0xa67d}, {0xa6f0, 0… | |
404 {0xa802, 0xa802}, {0xa806, 0xa806}, {0xa80b, 0xa80b}, {0xa823, 0… | |
405 {0xa880, 0xa881}, {0xa8b4, 0xa8c4}, {0xa8e0, 0xa8f1}, {0xa926, 0… | |
406 {0xa947, 0xa953}, {0xa980, 0xa983}, {0xa9b3, 0xa9c0}, {0xaa29, 0… | |
407 {0xaa43, 0xaa43}, {0xaa4c, 0xaa4d}, {0xaa7b, 0xaa7b}, {0xaab0, 0… | |
408 {0xaab2, 0xaab4}, {0xaab7, 0xaab8}, {0xaabe, 0xaabf}, {0xaac1, 0… | |
409 {0xabe3, 0xabea}, {0xabec, 0xabed}, {0xfb1e, 0xfb1e}, {0xfe00, 0… | |
410 {0xfe20, 0xfe26}, {0x101fd, 0x101fd}, {0x10a01, 0x10a03}, {0x10a… | |
411 {0x10a0c, 0x10a0f}, {0x10a38, 0x10a3a}, {0x10a3f, 0x10a3f}, {0x1… | |
412 {0x110b0, 0x110ba}, {0x1d165, 0x1d169}, {0x1d16d, 0x1d172}, {0x1… | |
413 {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad}, {0x1d242, 0x1d244}, {0xe… | |
414 }; | |
415 | |
416 static int bchars[][2] = { | |
417 {0x00000, 0x0001f}, {0x00080, 0x0009f}, {0x00300, 0x0036f}, | |
418 {0x00379, 0x00379}, {0x00380, 0x00383}, {0x0038d, 0x0038d}, | |
419 {0x00483, 0x00489}, {0x00527, 0x00530}, {0x00558, 0x00558}, | |
420 {0x00588, 0x00588}, {0x0058c, 0x005bd}, {0x005c1, 0x005c2}, | |
421 {0x005c5, 0x005c5}, {0x005c8, 0x005cf}, {0x005ec, 0x005ef}, | |
422 {0x005f6, 0x00605}, {0x00611, 0x0061a}, {0x0061d, 0x0061d}, | |
423 {0x0064b, 0x0065f}, {0x006d6, 0x006e4}, {0x006e8, 0x006e8}, | |
424 {0x006eb, 0x006ed}, {0x0070f, 0x0070f}, {0x00730, 0x0074c}, | |
425 {0x007a7, 0x007b0}, {0x007b3, 0x007bf}, {0x007ec, 0x007f3}, | |
426 {0x007fc, 0x007ff}, {0x00817, 0x00819}, {0x0081c, 0x00823}, | |
427 {0x00826, 0x00827}, {0x0082a, 0x0082f}, {0x00840, 0x00903}, | |
428 {0x0093b, 0x0093c}, {0x0093f, 0x0094f}, {0x00952, 0x00957}, | |
429 {0x00963, 0x00963}, {0x00974, 0x00978}, {0x00981, 0x00984}, | |
430 {0x0098e, 0x0098e}, {0x00992, 0x00992}, {0x009b1, 0x009b1}, | |
431 {0x009b4, 0x009b5}, {0x009bb, 0x009bc}, {0x009bf, 0x009cd}, | |
432 {0x009d0, 0x009db}, {0x009e2, 0x009e5}, {0x009fd, 0x00a04}, | |
433 {0x00a0c, 0x00a0e}, {0x00a12, 0x00a12}, {0x00a31, 0x00a31}, | |
434 {0x00a37, 0x00a37}, {0x00a3b, 0x00a58}, {0x00a5f, 0x00a65}, | |
435 {0x00a71, 0x00a71}, {0x00a76, 0x00a84}, {0x00a92, 0x00a92}, | |
436 {0x00ab1, 0x00ab1}, {0x00aba, 0x00abc}, {0x00abf, 0x00acf}, | |
437 {0x00ad2, 0x00adf}, {0x00ae3, 0x00ae5}, {0x00af2, 0x00b04}, | |
438 {0x00b0e, 0x00b0e}, {0x00b12, 0x00b12}, {0x00b31, 0x00b31}, | |
439 {0x00b3a, 0x00b3c}, {0x00b3f, 0x00b5b}, {0x00b62, 0x00b65}, | |
440 {0x00b73, 0x00b82}, {0x00b8b, 0x00b8d}, {0x00b96, 0x00b98}, | |
441 {0x00b9d, 0x00b9d}, {0x00ba1, 0x00ba2}, {0x00ba6, 0x00ba7}, | |
442 {0x00bac, 0x00bad}, {0x00bbb, 0x00bcf}, {0x00bd2, 0x00be5}, | |
443 {0x00bfc, 0x00c04}, {0x00c11, 0x00c11}, {0x00c34, 0x00c34}, | |
444 {0x00c3b, 0x00c3c}, {0x00c3f, 0x00c57}, {0x00c5b, 0x00c5f}, | |
445 {0x00c63, 0x00c65}, {0x00c71, 0x00c77}, {0x00c81, 0x00c84}, | |
446 {0x00c91, 0x00c91}, {0x00cb4, 0x00cb4}, {0x00cbb, 0x00cbc}, | |
447 {0x00cbf, 0x00cdd}, {0x00ce2, 0x00ce5}, {0x00cf3, 0x00d04}, | |
448 {0x00d11, 0x00d11}, {0x00d3a, 0x00d3c}, {0x00d3f, 0x00d5f}, | |
449 {0x00d63, 0x00d65}, {0x00d77, 0x00d78}, {0x00d81, 0x00d84}, | |
450 {0x00d98, 0x00d99}, {0x00dbc, 0x00dbc}, {0x00dbf, 0x00dbf}, | |
451 {0x00dc8, 0x00df3}, {0x00df6, 0x00e00}, {0x00e34, 0x00e3e}, | |
452 {0x00e48, 0x00e4e}, {0x00e5d, 0x00e80}, {0x00e85, 0x00e86}, | |
453 {0x00e8b, 0x00e8c}, {0x00e8f, 0x00e93}, {0x00ea0, 0x00ea0}, | |
454 {0x00ea6, 0x00ea6}, {0x00ea9, 0x00ea9}, {0x00eb1, 0x00eb1}, | |
455 {0x00eb5, 0x00ebc}, {0x00ebf, 0x00ebf}, {0x00ec7, 0x00ecf}, | |
456 {0x00edb, 0x00edb}, {0x00edf, 0x00eff}, {0x00f19, 0x00f19}, | |
457 {0x00f37, 0x00f37}, {0x00f3e, 0x00f3f}, {0x00f6d, 0x00f84}, | |
458 {0x00f87, 0x00f87}, {0x00f8d, 0x00fbd}, {0x00fcd, 0x00fcd}, | |
459 {0x00fda, 0x00fff}, {0x0102c, 0x0103e}, {0x01057, 0x01059}, | |
460 {0x0105f, 0x01060}, {0x01063, 0x01064}, {0x01068, 0x0106d}, | |
461 {0x01072, 0x01074}, {0x01083, 0x0108d}, {0x0109a, 0x0109d}, | |
462 {0x010c7, 0x010cf}, {0x010fe, 0x010ff}, {0x0124e, 0x0124f}, | |
463 {0x01259, 0x01259}, {0x0125f, 0x0125f}, {0x0128e, 0x0128f}, | |
464 {0x012b6, 0x012b7}, {0x012c1, 0x012c1}, {0x012c7, 0x012c7}, | |
465 {0x01311, 0x01311}, {0x01317, 0x01317}, {0x0135c, 0x0135f}, | |
466 {0x0137e, 0x0137f}, {0x0139b, 0x0139f}, {0x013f6, 0x013ff}, | |
467 {0x0169e, 0x0169f}, {0x016f2, 0x016ff}, {0x01712, 0x0171f}, | |
468 {0x01733, 0x01734}, {0x01738, 0x0173f}, {0x01753, 0x0175f}, | |
469 {0x01771, 0x0177f}, {0x017b5, 0x017d3}, {0x017de, 0x017df}, | |
470 {0x017eb, 0x017ef}, {0x017fb, 0x017ff}, {0x0180c, 0x0180d}, | |
471 {0x0181a, 0x0181f}, {0x01879, 0x0187f}, {0x018ab, 0x018af}, | |
472 {0x018f7, 0x018ff}, {0x0191e, 0x0193f}, {0x01942, 0x01943}, | |
473 {0x0196f, 0x0196f}, {0x01976, 0x0197f}, {0x019ad, 0x019c0}, | |
474 {0x019c9, 0x019cf}, {0x019dc, 0x019dd}, {0x01a18, 0x01a1d}, | |
475 {0x01a56, 0x01a7f}, {0x01a8b, 0x01a8f}, {0x01a9b, 0x01a9f}, | |
476 {0x01aaf, 0x01b04}, {0x01b35, 0x01b44}, {0x01b4d, 0x01b4f}, | |
477 {0x01b6c, 0x01b73}, {0x01b7e, 0x01b82}, {0x01ba2, 0x01bad}, | |
478 {0x01bbb, 0x01bff}, {0x01c25, 0x01c3a}, {0x01c4b, 0x01c4c}, | |
479 {0x01c81, 0x01cd2}, {0x01cd5, 0x01ce8}, {0x01cf2, 0x01cff}, | |
480 {0x01dc1, 0x01dff}, {0x01f17, 0x01f17}, {0x01f1f, 0x01f1f}, | |
481 {0x01f47, 0x01f47}, {0x01f4f, 0x01f4f}, {0x01f5a, 0x01f5a}, | |
482 {0x01f5e, 0x01f5e}, {0x01f7f, 0x01f7f}, {0x01fc5, 0x01fc5}, | |
483 {0x01fd5, 0x01fd5}, {0x01ff0, 0x01ff1}, {0x01fff, 0x01fff}, | |
484 {0x0200c, 0x0200f}, {0x02029, 0x0202e}, {0x02061, 0x0206f}, | |
485 {0x02073, 0x02073}, {0x02095, 0x0209f}, {0x020ba, 0x020ff}, | |
486 {0x0218b, 0x0218f}, {0x023ea, 0x023ff}, {0x02428, 0x0243f}, | |
487 {0x0244c, 0x0245f}, {0x026e2, 0x026e2}, {0x026e5, 0x026e7}, | |
488 {0x02705, 0x02705}, {0x0270b, 0x0270b}, {0x0274c, 0x0274c}, | |
489 {0x02753, 0x02755}, {0x02760, 0x02760}, {0x02796, 0x02797}, | |
490 {0x027bf, 0x027bf}, {0x027cd, 0x027cf}, {0x02b4e, 0x02b4f}, | |
491 {0x02b5b, 0x02bff}, {0x02c5f, 0x02c5f}, {0x02cf0, 0x02cf8}, | |
492 {0x02d27, 0x02d2f}, {0x02d67, 0x02d6e}, {0x02d71, 0x02d7f}, | |
493 {0x02d98, 0x02d9f}, {0x02daf, 0x02daf}, {0x02dbf, 0x02dbf}, | |
494 {0x02dcf, 0x02dcf}, {0x02ddf, 0x02dff}, {0x02e33, 0x02e7f}, | |
495 {0x02ef4, 0x02eff}, {0x02fd7, 0x02fef}, {0x02ffd, 0x02fff}, | |
496 {0x0302b, 0x0302f}, {0x03097, 0x0309a}, {0x03101, 0x03104}, | |
497 {0x0312f, 0x03130}, {0x031b8, 0x031bf}, {0x031e5, 0x031ef}, | |
498 {0x032ff, 0x032ff}, {0x04db7, 0x04dbf}, {0x09fcd, 0x09fff}, | |
499 {0x0a48e, 0x0a48f}, {0x0a4c8, 0x0a4cf}, {0x0a62d, 0x0a63f}, | |
500 {0x0a661, 0x0a661}, {0x0a670, 0x0a672}, {0x0a675, 0x0a67d}, | |
501 {0x0a699, 0x0a69f}, {0x0a6f1, 0x0a6f1}, {0x0a6f9, 0x0a6ff}, | |
502 {0x0a78e, 0x0a7fa}, {0x0a806, 0x0a806}, {0x0a823, 0x0a827}, | |
503 {0x0a82d, 0x0a82f}, {0x0a83b, 0x0a83f}, {0x0a879, 0x0a881}, | |
504 {0x0a8b5, 0x0a8cd}, {0x0a8db, 0x0a8f1}, {0x0a8fd, 0x0a8ff}, | |
505 {0x0a927, 0x0a92d}, {0x0a948, 0x0a95e}, {0x0a97e, 0x0a983}, | |
506 {0x0a9b4, 0x0a9c0}, {0x0a9da, 0x0a9dd}, {0x0a9e1, 0x0a9ff}, | |
507 {0x0aa2a, 0x0aa3f}, {0x0aa4c, 0x0aa4f}, {0x0aa5b, 0x0aa5b}, | |
508 {0x0aa7c, 0x0aa7f}, {0x0aab2, 0x0aab4}, {0x0aab8, 0x0aab8}, | |
509 {0x0aabf, 0x0aabf}, {0x0aac3, 0x0aada}, {0x0aae1, 0x0abbf}, | |
510 {0x0abe4, 0x0abea}, {0x0abed, 0x0abef}, {0x0abfb, 0x0abff}, | |
511 {0x0d7a5, 0x0d7af}, {0x0d7c8, 0x0d7ca}, {0x0d7fd, 0x0f8ff}, | |
512 {0x0fa2f, 0x0fa2f}, {0x0fa6f, 0x0fa6f}, {0x0fadb, 0x0faff}, | |
513 {0x0fb08, 0x0fb12}, {0x0fb19, 0x0fb1c}, {0x0fb37, 0x0fb37}, | |
514 {0x0fb3f, 0x0fb3f}, {0x0fb45, 0x0fb45}, {0x0fbb3, 0x0fbd2}, | |
515 {0x0fd41, 0x0fd4f}, {0x0fd91, 0x0fd91}, {0x0fdc9, 0x0fdef}, | |
516 {0x0fdff, 0x0fe0f}, {0x0fe1b, 0x0fe2f}, {0x0fe67, 0x0fe67}, | |
517 {0x0fe6d, 0x0fe6f}, {0x0fefd, 0x0ff00}, {0x0ffc0, 0x0ffc1}, | |
518 {0x0ffc9, 0x0ffc9}, {0x0ffd1, 0x0ffd1}, {0x0ffd9, 0x0ffd9}, | |
519 {0x0ffde, 0x0ffdf}, {0x0ffef, 0x0fffb}, {0x0ffff, 0x0ffff}, | |
520 {0x10027, 0x10027}, {0x1003e, 0x1003e}, {0x1004f, 0x1004f}, | |
521 {0x1005f, 0x1007f}, {0x100fc, 0x100ff}, {0x10104, 0x10106}, | |
522 {0x10135, 0x10136}, {0x1018c, 0x1018f}, {0x1019d, 0x101cf}, | |
523 {0x101fe, 0x1027f}, {0x1029e, 0x1029f}, {0x102d2, 0x102ff}, | |
524 {0x10324, 0x1032f}, {0x1034c, 0x1037f}, {0x103c4, 0x103c7}, | |
525 {0x103d7, 0x103ff}, {0x1049f, 0x1049f}, {0x104ab, 0x107ff}, | |
526 {0x10807, 0x10807}, {0x10836, 0x10836}, {0x1083a, 0x1083b}, | |
527 {0x1083e, 0x1083e}, {0x10860, 0x108ff}, {0x1091d, 0x1091e}, | |
528 {0x1093b, 0x1093e}, {0x10941, 0x109ff}, {0x10a02, 0x10a0f}, | |
529 {0x10a18, 0x10a18}, {0x10a35, 0x10a3f}, {0x10a49, 0x10a4f}, | |
530 {0x10a5a, 0x10a5f}, {0x10a81, 0x10aff}, {0x10b37, 0x10b38}, | |
531 {0x10b57, 0x10b57}, {0x10b74, 0x10b77}, {0x10b81, 0x10bff}, | |
532 {0x10c4a, 0x10e5f}, {0x10e80, 0x11082}, {0x110b1, 0x110ba}, | |
533 {0x110c2, 0x11fff}, {0x12370, 0x123ff}, {0x12464, 0x1246f}, | |
534 {0x12475, 0x12fff}, {0x13430, 0x1cfff}, {0x1d0f7, 0x1d0ff}, | |
535 {0x1d128, 0x1d128}, {0x1d166, 0x1d169}, {0x1d16e, 0x1d182}, | |
536 {0x1d186, 0x1d18b}, {0x1d1ab, 0x1d1ad}, {0x1d1df, 0x1d1ff}, | |
537 {0x1d243, 0x1d244}, {0x1d247, 0x1d2ff}, {0x1d358, 0x1d35f}, | |
538 {0x1d373, 0x1d3ff}, {0x1d49d, 0x1d49d}, {0x1d4a1, 0x1d4a1}, | |
539 {0x1d4a4, 0x1d4a4}, {0x1d4a8, 0x1d4a8}, {0x1d4ba, 0x1d4ba}, | |
540 {0x1d4c4, 0x1d4c4}, {0x1d50b, 0x1d50c}, {0x1d51d, 0x1d51d}, | |
541 {0x1d53f, 0x1d53f}, {0x1d547, 0x1d549}, {0x1d6a6, 0x1d6a7}, | |
542 {0x1d7cd, 0x1d7cd}, {0x1d801, 0x1efff}, {0x1f02d, 0x1f02f}, | |
543 {0x1f095, 0x1f0ff}, {0x1f10c, 0x1f10f}, {0x1f130, 0x1f130}, | |
544 {0x1f133, 0x1f13c}, {0x1f140, 0x1f141}, {0x1f144, 0x1f145}, | |
545 {0x1f148, 0x1f149}, {0x1f150, 0x1f156}, {0x1f159, 0x1f15e}, | |
546 {0x1f161, 0x1f178}, {0x1f17d, 0x1f17e}, {0x1f181, 0x1f189}, | |
547 {0x1f18f, 0x1f18f}, {0x1f192, 0x1f1ff}, {0x1f202, 0x1f20f}, | |
548 {0x1f233, 0x1f23f}, {0x1f24a, 0x1ffff}, {0x2a6d8, 0x2a6ff}, | |
549 {0x2b736, 0x2f7ff}, {0x2fa1f, 0x10ffff}, | |
550 }; | |
551 | |
552 static int find(int c, int tab[][2], int n) | |
553 { | |
554 int l = 0; | |
555 int h = n - 1; | |
556 int m; | |
557 if (c < tab[0][0]) | |
558 return 0; | |
559 while (l <= h) { | |
560 m = (h + l) / 2; | |
561 if (c >= tab[m][0] && c <= tab[m][1]) | |
562 return 1; | |
563 if (c < tab[m][0]) | |
564 h = m - 1; | |
565 else | |
566 l = m + 1; | |
567 } | |
568 return 0; | |
569 } | |
570 | |
571 /* double-width characters */ | |
572 static int uc_isdw(int c) | |
573 { | |
574 return c >= 0x1100 && find(c, dwchars, LEN(dwchars)); | |
575 } | |
576 | |
577 /* zero-width and combining characters */ | |
578 static int uc_iszw(int c) | |
579 { | |
580 return c >= 0x0300 && find(c, zwchars, LEN(zwchars)); | |
581 } | |
582 | |
583 int uc_wid(char *s) | |
584 { | |
585 int c = uc_code(s); | |
586 if (uc_iszw(c)) | |
587 return 0; | |
588 return uc_isdw(c) ? 2 : 1; | |
589 } | |
590 | |
591 /* nonprintable characters */ | |
592 int uc_isbell(char *s) | |
593 { | |
594 int c = (unsigned char) *s; | |
595 if (c == ' ' || c == '\t' || c == '\n' || (c <= 0x7f && isprint(… | |
596 return 0; | |
597 c = uc_code(s); | |
598 return uc_iszw(c) || find(c, bchars, LEN(bchars)); | |
599 } | |
600 | |
601 /* combining characters */ | |
602 int uc_iscomb(char *s) | |
603 { | |
604 int c = (unsigned char) *s; | |
605 if (c == ' ' || c == '\t' || c == '\n' || (c <= 0x7f && isprint(… | |
606 return 0; | |
607 return uc_acomb(uc_code(s)); | |
608 } |