Introduction
Introduction Statistics Contact Development Disclaimer Help
tuc.c - neatvi - [fork] simple vi-type editor with UTF-8 support
git clone git://src.adamsgaard.dk/neatvi
Log
Files
Refs
README
---
tuc.c (21648B)
---
1 #include <ctype.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include "vi.h"
6
7 #define LEN(a) (sizeof(a) / sizeof((a)[0]))
8
9 /* return the length of a utf-8 character */
10 int uc_len(char *s)
11 {
12 int c = (unsigned char) s[0];
13 if (~c & 0xc0) /* ASCII or invalid */
14 return c > 0;
15 if (~c & 0x20)
16 return 2;
17 if (~c & 0x10)
18 return 3;
19 if (~c & 0x08)
20 return 4;
21 return 1;
22 }
23
24 /* the number of utf-8 characters in s */
25 int uc_slen(char *s)
26 {
27 int n;
28 for (n = 0; *s; n++)
29 s = uc_end(s) + 1;
30 return n;
31 }
32
33 /* the unicode codepoint of the given utf-8 character */
34 int uc_code(char *s)
35 {
36 int c = (unsigned char) s[0];
37 if (~c & 0xc0) /* ASCII or invalid */
38 return c;
39 if (~c & 0x20)
40 return ((c & 0x1f) << 6) | (s[1] & 0x3f);
41 if (~c & 0x10)
42 return ((c & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2]…
43 if (~c & 0x08)
44 return ((c & 0x07) << 18) | ((s[1] & 0x3f) << 12) | ((s[…
45 return c;
46 }
47
48 /* find the beginning of the character at s[i] */
49 char *uc_beg(char *beg, char *s)
50 {
51 while (s > beg && (((unsigned char) *s) & 0xc0) == 0x80)
52 s--;
53 return s;
54 }
55
56 /* find the end of the character at s[i] */
57 char *uc_end(char *s)
58 {
59 if (!*s || !((unsigned char) *s & 0x80))
60 return s;
61 if (((unsigned char) *s & 0xc0) == 0xc0)
62 s++;
63 while (((unsigned char) *s & 0xc0) == 0x80)
64 s++;
65 return s - 1;
66 }
67
68 /* return a pointer to the character following s */
69 char *uc_next(char *s)
70 {
71 s = uc_end(s);
72 return *s ? s + 1 : s;
73 }
74
75 /* return a pointer to the character preceding s */
76 char *uc_prev(char *beg, char *s)
77 {
78 return s == beg ? beg : uc_beg(beg, s - 1);
79 }
80
81 char *uc_lastline(char *s)
82 {
83 char *r = strrchr(s, '\n');
84 return r ? r + 1 : s;
85 }
86
87 /* allocate and return an array for the characters in s */
88 char **uc_chop(char *s, int *n)
89 {
90 char **chrs;
91 int i;
92 *n = uc_slen(s);
93 chrs = malloc((*n + 1) * sizeof(chrs[0]));
94 for (i = 0; i < *n + 1; i++) {
95 chrs[i] = s;
96 s = uc_next(s);
97 }
98 return chrs;
99 }
100
101 char *uc_chr(char *s, int off)
102 {
103 int i = 0;
104 while (s && *s) {
105 if (i++ == off)
106 return s;
107 s = uc_next(s);
108 }
109 return s && (off < 0 || i == off) ? s : "";
110 }
111
112 /* the number of characters between s and s + off */
113 int uc_off(char *s, int off)
114 {
115 char *e = s + off;
116 int i;
117 for (i = 0; s < e && *s; i++)
118 s = uc_next(s);
119 return i;
120 }
121
122 char *uc_sub(char *s, int beg, int end)
123 {
124 char *sbeg = uc_chr(s, beg);
125 char *send = uc_chr(s, end);
126 int len = sbeg && send && sbeg <= send ? send - sbeg : 0;
127 char *r = malloc(len + 1);
128 memcpy(r, sbeg, len);
129 r[len] = '\0';
130 return r;
131 }
132
133 char *uc_dup(char *s)
134 {
135 char *r = malloc(strlen(s) + 1);
136 return r ? strcpy(r, s) : NULL;
137 }
138
139 int uc_isspace(char *s)
140 {
141 int c = s ? (unsigned char) *s : 0;
142 return c <= 0x7f && isspace(c);
143 }
144
145 int uc_isprint(char *s)
146 {
147 int c = s ? (unsigned char) *s : 0;
148 return c > 0x7f || isprint(c);
149 }
150
151 int uc_isalpha(char *s)
152 {
153 int c = s ? (unsigned char) *s : 0;
154 return c > 0x7f || isalpha(c);
155 }
156
157 int uc_isdigit(char *s)
158 {
159 int c = s ? (unsigned char) *s : 0;
160 return c <= 0x7f && isdigit(c);
161 }
162
163 int uc_kind(char *c)
164 {
165 if (uc_isspace(c))
166 return 0;
167 if (uc_isalpha(c) || uc_isdigit(c) || c[0] == '_')
168 return 1;
169 return 2;
170 }
171
172 #define UC_R2L(ch) (((ch) & 0xff00) == 0x0600 || \
173 ((ch) & 0xfffc) == 0x200c || \
174 ((ch) & 0xff00) == 0xfb00 || \
175 ((ch) & 0xff00) == 0xfc00 || \
176 ((ch) & 0xff00) == 0xfe00)
177
178 /* sorted list of characters that can be shaped */
179 static struct achar {
180 unsigned c; /* utf-8 code */
181 unsigned s; /* single form */
182 unsigned i; /* initial form */
183 unsigned m; /* medial form */
184 unsigned f; /* final form */
185 } achars[] = {
186 {0x0621, 0xfe80}, /* hamza */
187 {0x0622, 0xfe81, 0, 0, 0xfe82}, /* alef m…
188 {0x0623, 0xfe83, 0, 0, 0xfe84}, /* alef h…
189 {0x0624, 0xfe85, 0, 0, 0xfe86}, /* waw ha…
190 {0x0625, 0xfe87, 0, 0, 0xfe88}, /* alef h…
191 {0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a}, /* yeh hamza */
192 {0x0627, 0xfe8d, 0, 0, 0xfe8e}, /* alef */
193 {0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90}, /* beh */
194 {0x0629, 0xfe93, 0, 0, 0xfe94}, /* teh ma…
195 {0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96}, /* teh */
196 {0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a}, /* theh */
197 {0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e}, /* jeem */
198 {0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2}, /* hah */
199 {0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6}, /* khah */
200 {0x062f, 0xfea9, 0, 0, 0xfeaa}, /* dal */
201 {0x0630, 0xfeab, 0, 0, 0xfeac}, /* thal */
202 {0x0631, 0xfead, 0, 0, 0xfeae}, /* reh */
203 {0x0632, 0xfeaf, 0, 0, 0xfeb0}, /* zain */
204 {0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2}, /* seen */
205 {0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6}, /* sheen */
206 {0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba}, /* sad */
207 {0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe}, /* dad */
208 {0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2}, /* tah */
209 {0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6}, /* zah */
210 {0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca}, /* ain */
211 {0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece}, /* ghain */
212 {0x0640, 0x640, 0x640, 0x640}, /* tatweel…
213 {0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2}, /* feh */
214 {0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6}, /* qaf */
215 {0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda}, /* kaf */
216 {0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede}, /* lam */
217 {0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2}, /* meem */
218 {0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6}, /* noon */
219 {0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea}, /* heh */
220 {0x0648, 0xfeed, 0, 0, 0xfeee}, /* waw */
221 {0x0649, 0xfeef, 0, 0, 0xfef0}, /* alef m…
222 {0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2}, /* yeh */
223 {0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57}, /* peh */
224 {0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b}, /* tcheh */
225 {0x0698, 0xfb8a, 0, 0, 0xfb8b}, /* jeh */
226 {0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f}, /* fkaf */
227 {0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93}, /* gaf */
228 {0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd}, /* fyeh */
229 {0x200c}, /* ZWNJ */
230 {0x200d, 0, 0x200d, 0x200d}, /* ZWJ */
231 };
232
233 static struct achar *find_achar(int c)
234 {
235 int h, m, l;
236 h = LEN(achars);
237 l = 0;
238 /* using binary search to find c */
239 while (l < h) {
240 m = (h + l) >> 1;
241 if (achars[m].c == c)
242 return &achars[m];
243 if (c < achars[m].c)
244 h = m;
245 else
246 l = m + 1;
247 }
248 return NULL;
249 }
250
251 static int can_join(int c1, int c2)
252 {
253 struct achar *a1 = find_achar(c1);
254 struct achar *a2 = find_achar(c2);
255 return a1 && a2 && (a1->i || a1->m) && (a2->f || a2->m);
256 }
257
258 static int uc_cshape(int cur, int prev, int next)
259 {
260 int c = cur;
261 int join_prev, join_next;
262 struct achar *ac = find_achar(c);
263 if (!ac) /* ignore non-Arabic characters */
264 return c;
265 join_prev = can_join(prev, c);
266 join_next = can_join(c, next);
267 if (join_prev && join_next)
268 c = ac->m;
269 if (join_prev && !join_next)
270 c = ac->f;
271 if (!join_prev && join_next)
272 c = ac->i;
273 if (!join_prev && !join_next)
274 c = ac->c; /* some fonts do not have a glyph for …
275 return c ? c : cur;
276 }
277
278 /*
279 * return nonzero for Arabic combining characters
280 *
281 * The standard Arabic diacritics:
282 * + 0x064b: fathatan
283 * + 0x064c: dammatan
284 * + 0x064d: kasratan
285 * + 0x064e: fatha
286 * + 0x064f: damma
287 * + 0x0650: kasra
288 * + 0x0651: shadda
289 * + 0x0652: sukun
290 * + 0x0653: madda above
291 * + 0x0654: hamza above
292 * + 0x0655: hamza below
293 * + 0x0670: superscript alef
294 */
295 static int uc_acomb(int c)
296 {
297 return (c >= 0x064b && c <= 0x0655) || /* the sta…
298 (c >= 0xfc5e && c <= 0xfc63) || /* shadda…
299 c == 0x0670; /* superscri…
300 }
301
302 static void uc_cput(char *d, int c)
303 {
304 int l = 0;
305 if (c > 0xffff) {
306 *d++ = 0xf0 | (c >> 18);
307 l = 3;
308 } else if (c > 0x7ff) {
309 *d++ = 0xe0 | (c >> 12);
310 l = 2;
311 } else if (c > 0x7f) {
312 *d++ = 0xc0 | (c >> 6);
313 l = 1;
314 } else {
315 *d++ = c;
316 }
317 while (l--)
318 *d++ = 0x80 | ((c >> (l * 6)) & 0x3f);
319 *d = '\0';
320 }
321
322 /* shape the given arabic character; returns a static buffer */
323 char *uc_shape(char *beg, char *s)
324 {
325 static char out[16];
326 char *r;
327 int prev = 0;
328 int next = 0;
329 int curr = uc_code(s);
330 if (!curr || !UC_R2L(curr))
331 return NULL;
332 r = s;
333 while (r > beg) {
334 r = uc_beg(beg, r - 1);
335 if (!uc_acomb(uc_code(r))) {
336 prev = uc_code(r);
337 break;
338 }
339 }
340 r = s;
341 while (*r) {
342 r = uc_next(r);
343 if (!uc_acomb(uc_code(r))) {
344 next = uc_code(r);
345 break;
346 }
347 }
348 uc_cput(out, uc_cshape(curr, prev, next));
349 return out;
350 }
351
352 static int dwchars[][2] = {
353 {0x1100, 0x115f}, {0x11a3, 0x11a7}, {0x11fa, 0x11ff}, {0x2329, 0…
354 {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0…
355 {0x3000, 0x3029}, {0x3030, 0x303e}, {0x3041, 0x3096}, {0x309b, 0…
356 {0x3105, 0x312d}, {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x31c0, 0…
357 {0x31f0, 0x321e}, {0x3220, 0x3247}, {0x3250, 0x32fe}, {0x3300, 0…
358 {0x4e00, 0xa48c}, {0xa490, 0xa4c6}, {0xa960, 0xa97c}, {0xac00, 0…
359 {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xf900, 0xfaff}, {0xfe10, 0…
360 {0xfe30, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xff01, 0…
361 {0xffe0, 0xffe6}, {0x1f200, 0x1f200}, {0x1f210, 0x1f231}, {0x1f2…
362 {0x20000,0x2ffff},
363 };
364
365 static int zwchars[][2] = {
366 {0x0300, 0x036f}, {0x0483, 0x0489}, {0x0591, 0x05bd}, {0x05bf, 0…
367 {0x05c1, 0x05c2}, {0x05c4, 0x05c5}, {0x05c7, 0x05c7}, {0x0610, 0…
368 {0x064b, 0x065e}, {0x0670, 0x0670}, {0x06d6, 0x06dc}, {0x06de, 0…
369 {0x06e7, 0x06e8}, {0x06ea, 0x06ed}, {0x0711, 0x0711}, {0x0730, 0…
370 {0x07a6, 0x07b0}, {0x07eb, 0x07f3}, {0x0816, 0x0819}, {0x081b, 0…
371 {0x0825, 0x0827}, {0x0829, 0x082d}, {0x0900, 0x0903}, {0x093c, 0…
372 {0x093e, 0x094e}, {0x0951, 0x0955}, {0x0962, 0x0963}, {0x0981, 0…
373 {0x09bc, 0x09bc}, {0x09be, 0x09c4}, {0x09c7, 0x09c8}, {0x09cb, 0…
374 {0x09d7, 0x09d7}, {0x09e2, 0x09e3}, {0x0a01, 0x0a03}, {0x0a3c, 0…
375 {0x0a3e, 0x0a42}, {0x0a47, 0x0a48}, {0x0a4b, 0x0a4d}, {0x0a51, 0…
376 {0x0a70, 0x0a71}, {0x0a75, 0x0a75}, {0x0a81, 0x0a83}, {0x0abc, 0…
377 {0x0abe, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae2, 0…
378 {0x0b01, 0x0b03}, {0x0b3c, 0x0b3c}, {0x0b3e, 0x0b44}, {0x0b47, 0…
379 {0x0b4b, 0x0b4d}, {0x0b56, 0x0b57}, {0x0b62, 0x0b63}, {0x0b82, 0…
380 {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, {0x0bd7, 0…
381 {0x0c01, 0x0c03}, {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0…
382 {0x0c55, 0x0c56}, {0x0c62, 0x0c63}, {0x0c82, 0x0c83}, {0x0cbc, 0…
383 {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, {0x0cd5, 0…
384 {0x0ce2, 0x0ce3}, {0x0d02, 0x0d03}, {0x0d3e, 0x0d44}, {0x0d46, 0…
385 {0x0d4a, 0x0d4d}, {0x0d57, 0x0d57}, {0x0d62, 0x0d63}, {0x0d82, 0…
386 {0x0dca, 0x0dca}, {0x0dcf, 0x0dd4}, {0x0dd6, 0x0dd6}, {0x0dd8, 0…
387 {0x0df2, 0x0df3}, {0x0e31, 0x0e31}, {0x0e34, 0x0e3a}, {0x0e47, 0…
388 {0x0eb1, 0x0eb1}, {0x0eb4, 0x0eb9}, {0x0ebb, 0x0ebc}, {0x0ec8, 0…
389 {0x0f18, 0x0f19}, {0x0f35, 0x0f35}, {0x0f37, 0x0f37}, {0x0f39, 0…
390 {0x0f3e, 0x0f3f}, {0x0f71, 0x0f84}, {0x0f86, 0x0f87}, {0x0f90, 0…
391 {0x0f99, 0x0fbc}, {0x0fc6, 0x0fc6}, {0x102b, 0x103e}, {0x1056, 0…
392 {0x105e, 0x1060}, {0x1062, 0x1064}, {0x1067, 0x106d}, {0x1071, 0…
393 {0x1082, 0x108d}, {0x108f, 0x108f}, {0x109a, 0x109d}, {0x135f, 0…
394 {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0…
395 {0x17b6, 0x17d3}, {0x17dd, 0x17dd}, {0x180b, 0x180d}, {0x18a9, 0…
396 {0x1920, 0x192b}, {0x1930, 0x193b}, {0x19b0, 0x19c0}, {0x19c8, 0…
397 {0x1a17, 0x1a1b}, {0x1a55, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0…
398 {0x1b00, 0x1b04}, {0x1b34, 0x1b44}, {0x1b6b, 0x1b73}, {0x1b80, 0…
399 {0x1ba1, 0x1baa}, {0x1c24, 0x1c37}, {0x1cd0, 0x1cd2}, {0x1cd4, 0…
400 {0x1ced, 0x1ced}, {0x1cf2, 0x1cf2}, {0x1dc0, 0x1de6}, {0x1dfd, 0…
401 {0x200b, 0x200f},
402 {0x20d0, 0x20f0}, {0x2cef, 0x2cf1}, {0x2de0, 0x2dff}, {0x302a, 0…
403 {0x3099, 0x309a}, {0xa66f, 0xa672}, {0xa67c, 0xa67d}, {0xa6f0, 0…
404 {0xa802, 0xa802}, {0xa806, 0xa806}, {0xa80b, 0xa80b}, {0xa823, 0…
405 {0xa880, 0xa881}, {0xa8b4, 0xa8c4}, {0xa8e0, 0xa8f1}, {0xa926, 0…
406 {0xa947, 0xa953}, {0xa980, 0xa983}, {0xa9b3, 0xa9c0}, {0xaa29, 0…
407 {0xaa43, 0xaa43}, {0xaa4c, 0xaa4d}, {0xaa7b, 0xaa7b}, {0xaab0, 0…
408 {0xaab2, 0xaab4}, {0xaab7, 0xaab8}, {0xaabe, 0xaabf}, {0xaac1, 0…
409 {0xabe3, 0xabea}, {0xabec, 0xabed}, {0xfb1e, 0xfb1e}, {0xfe00, 0…
410 {0xfe20, 0xfe26}, {0x101fd, 0x101fd}, {0x10a01, 0x10a03}, {0x10a…
411 {0x10a0c, 0x10a0f}, {0x10a38, 0x10a3a}, {0x10a3f, 0x10a3f}, {0x1…
412 {0x110b0, 0x110ba}, {0x1d165, 0x1d169}, {0x1d16d, 0x1d172}, {0x1…
413 {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad}, {0x1d242, 0x1d244}, {0xe…
414 };
415
416 static int bchars[][2] = {
417 {0x00000, 0x0001f}, {0x00080, 0x0009f}, {0x00300, 0x0036f},
418 {0x00379, 0x00379}, {0x00380, 0x00383}, {0x0038d, 0x0038d},
419 {0x00483, 0x00489}, {0x00527, 0x00530}, {0x00558, 0x00558},
420 {0x00588, 0x00588}, {0x0058c, 0x005bd}, {0x005c1, 0x005c2},
421 {0x005c5, 0x005c5}, {0x005c8, 0x005cf}, {0x005ec, 0x005ef},
422 {0x005f6, 0x00605}, {0x00611, 0x0061a}, {0x0061d, 0x0061d},
423 {0x0064b, 0x0065f}, {0x006d6, 0x006e4}, {0x006e8, 0x006e8},
424 {0x006eb, 0x006ed}, {0x0070f, 0x0070f}, {0x00730, 0x0074c},
425 {0x007a7, 0x007b0}, {0x007b3, 0x007bf}, {0x007ec, 0x007f3},
426 {0x007fc, 0x007ff}, {0x00817, 0x00819}, {0x0081c, 0x00823},
427 {0x00826, 0x00827}, {0x0082a, 0x0082f}, {0x00840, 0x00903},
428 {0x0093b, 0x0093c}, {0x0093f, 0x0094f}, {0x00952, 0x00957},
429 {0x00963, 0x00963}, {0x00974, 0x00978}, {0x00981, 0x00984},
430 {0x0098e, 0x0098e}, {0x00992, 0x00992}, {0x009b1, 0x009b1},
431 {0x009b4, 0x009b5}, {0x009bb, 0x009bc}, {0x009bf, 0x009cd},
432 {0x009d0, 0x009db}, {0x009e2, 0x009e5}, {0x009fd, 0x00a04},
433 {0x00a0c, 0x00a0e}, {0x00a12, 0x00a12}, {0x00a31, 0x00a31},
434 {0x00a37, 0x00a37}, {0x00a3b, 0x00a58}, {0x00a5f, 0x00a65},
435 {0x00a71, 0x00a71}, {0x00a76, 0x00a84}, {0x00a92, 0x00a92},
436 {0x00ab1, 0x00ab1}, {0x00aba, 0x00abc}, {0x00abf, 0x00acf},
437 {0x00ad2, 0x00adf}, {0x00ae3, 0x00ae5}, {0x00af2, 0x00b04},
438 {0x00b0e, 0x00b0e}, {0x00b12, 0x00b12}, {0x00b31, 0x00b31},
439 {0x00b3a, 0x00b3c}, {0x00b3f, 0x00b5b}, {0x00b62, 0x00b65},
440 {0x00b73, 0x00b82}, {0x00b8b, 0x00b8d}, {0x00b96, 0x00b98},
441 {0x00b9d, 0x00b9d}, {0x00ba1, 0x00ba2}, {0x00ba6, 0x00ba7},
442 {0x00bac, 0x00bad}, {0x00bbb, 0x00bcf}, {0x00bd2, 0x00be5},
443 {0x00bfc, 0x00c04}, {0x00c11, 0x00c11}, {0x00c34, 0x00c34},
444 {0x00c3b, 0x00c3c}, {0x00c3f, 0x00c57}, {0x00c5b, 0x00c5f},
445 {0x00c63, 0x00c65}, {0x00c71, 0x00c77}, {0x00c81, 0x00c84},
446 {0x00c91, 0x00c91}, {0x00cb4, 0x00cb4}, {0x00cbb, 0x00cbc},
447 {0x00cbf, 0x00cdd}, {0x00ce2, 0x00ce5}, {0x00cf3, 0x00d04},
448 {0x00d11, 0x00d11}, {0x00d3a, 0x00d3c}, {0x00d3f, 0x00d5f},
449 {0x00d63, 0x00d65}, {0x00d77, 0x00d78}, {0x00d81, 0x00d84},
450 {0x00d98, 0x00d99}, {0x00dbc, 0x00dbc}, {0x00dbf, 0x00dbf},
451 {0x00dc8, 0x00df3}, {0x00df6, 0x00e00}, {0x00e34, 0x00e3e},
452 {0x00e48, 0x00e4e}, {0x00e5d, 0x00e80}, {0x00e85, 0x00e86},
453 {0x00e8b, 0x00e8c}, {0x00e8f, 0x00e93}, {0x00ea0, 0x00ea0},
454 {0x00ea6, 0x00ea6}, {0x00ea9, 0x00ea9}, {0x00eb1, 0x00eb1},
455 {0x00eb5, 0x00ebc}, {0x00ebf, 0x00ebf}, {0x00ec7, 0x00ecf},
456 {0x00edb, 0x00edb}, {0x00edf, 0x00eff}, {0x00f19, 0x00f19},
457 {0x00f37, 0x00f37}, {0x00f3e, 0x00f3f}, {0x00f6d, 0x00f84},
458 {0x00f87, 0x00f87}, {0x00f8d, 0x00fbd}, {0x00fcd, 0x00fcd},
459 {0x00fda, 0x00fff}, {0x0102c, 0x0103e}, {0x01057, 0x01059},
460 {0x0105f, 0x01060}, {0x01063, 0x01064}, {0x01068, 0x0106d},
461 {0x01072, 0x01074}, {0x01083, 0x0108d}, {0x0109a, 0x0109d},
462 {0x010c7, 0x010cf}, {0x010fe, 0x010ff}, {0x0124e, 0x0124f},
463 {0x01259, 0x01259}, {0x0125f, 0x0125f}, {0x0128e, 0x0128f},
464 {0x012b6, 0x012b7}, {0x012c1, 0x012c1}, {0x012c7, 0x012c7},
465 {0x01311, 0x01311}, {0x01317, 0x01317}, {0x0135c, 0x0135f},
466 {0x0137e, 0x0137f}, {0x0139b, 0x0139f}, {0x013f6, 0x013ff},
467 {0x0169e, 0x0169f}, {0x016f2, 0x016ff}, {0x01712, 0x0171f},
468 {0x01733, 0x01734}, {0x01738, 0x0173f}, {0x01753, 0x0175f},
469 {0x01771, 0x0177f}, {0x017b5, 0x017d3}, {0x017de, 0x017df},
470 {0x017eb, 0x017ef}, {0x017fb, 0x017ff}, {0x0180c, 0x0180d},
471 {0x0181a, 0x0181f}, {0x01879, 0x0187f}, {0x018ab, 0x018af},
472 {0x018f7, 0x018ff}, {0x0191e, 0x0193f}, {0x01942, 0x01943},
473 {0x0196f, 0x0196f}, {0x01976, 0x0197f}, {0x019ad, 0x019c0},
474 {0x019c9, 0x019cf}, {0x019dc, 0x019dd}, {0x01a18, 0x01a1d},
475 {0x01a56, 0x01a7f}, {0x01a8b, 0x01a8f}, {0x01a9b, 0x01a9f},
476 {0x01aaf, 0x01b04}, {0x01b35, 0x01b44}, {0x01b4d, 0x01b4f},
477 {0x01b6c, 0x01b73}, {0x01b7e, 0x01b82}, {0x01ba2, 0x01bad},
478 {0x01bbb, 0x01bff}, {0x01c25, 0x01c3a}, {0x01c4b, 0x01c4c},
479 {0x01c81, 0x01cd2}, {0x01cd5, 0x01ce8}, {0x01cf2, 0x01cff},
480 {0x01dc1, 0x01dff}, {0x01f17, 0x01f17}, {0x01f1f, 0x01f1f},
481 {0x01f47, 0x01f47}, {0x01f4f, 0x01f4f}, {0x01f5a, 0x01f5a},
482 {0x01f5e, 0x01f5e}, {0x01f7f, 0x01f7f}, {0x01fc5, 0x01fc5},
483 {0x01fd5, 0x01fd5}, {0x01ff0, 0x01ff1}, {0x01fff, 0x01fff},
484 {0x0200c, 0x0200f}, {0x02029, 0x0202e}, {0x02061, 0x0206f},
485 {0x02073, 0x02073}, {0x02095, 0x0209f}, {0x020ba, 0x020ff},
486 {0x0218b, 0x0218f}, {0x023ea, 0x023ff}, {0x02428, 0x0243f},
487 {0x0244c, 0x0245f}, {0x026e2, 0x026e2}, {0x026e5, 0x026e7},
488 {0x02705, 0x02705}, {0x0270b, 0x0270b}, {0x0274c, 0x0274c},
489 {0x02753, 0x02755}, {0x02760, 0x02760}, {0x02796, 0x02797},
490 {0x027bf, 0x027bf}, {0x027cd, 0x027cf}, {0x02b4e, 0x02b4f},
491 {0x02b5b, 0x02bff}, {0x02c5f, 0x02c5f}, {0x02cf0, 0x02cf8},
492 {0x02d27, 0x02d2f}, {0x02d67, 0x02d6e}, {0x02d71, 0x02d7f},
493 {0x02d98, 0x02d9f}, {0x02daf, 0x02daf}, {0x02dbf, 0x02dbf},
494 {0x02dcf, 0x02dcf}, {0x02ddf, 0x02dff}, {0x02e33, 0x02e7f},
495 {0x02ef4, 0x02eff}, {0x02fd7, 0x02fef}, {0x02ffd, 0x02fff},
496 {0x0302b, 0x0302f}, {0x03097, 0x0309a}, {0x03101, 0x03104},
497 {0x0312f, 0x03130}, {0x031b8, 0x031bf}, {0x031e5, 0x031ef},
498 {0x032ff, 0x032ff}, {0x04db7, 0x04dbf}, {0x09fcd, 0x09fff},
499 {0x0a48e, 0x0a48f}, {0x0a4c8, 0x0a4cf}, {0x0a62d, 0x0a63f},
500 {0x0a661, 0x0a661}, {0x0a670, 0x0a672}, {0x0a675, 0x0a67d},
501 {0x0a699, 0x0a69f}, {0x0a6f1, 0x0a6f1}, {0x0a6f9, 0x0a6ff},
502 {0x0a78e, 0x0a7fa}, {0x0a806, 0x0a806}, {0x0a823, 0x0a827},
503 {0x0a82d, 0x0a82f}, {0x0a83b, 0x0a83f}, {0x0a879, 0x0a881},
504 {0x0a8b5, 0x0a8cd}, {0x0a8db, 0x0a8f1}, {0x0a8fd, 0x0a8ff},
505 {0x0a927, 0x0a92d}, {0x0a948, 0x0a95e}, {0x0a97e, 0x0a983},
506 {0x0a9b4, 0x0a9c0}, {0x0a9da, 0x0a9dd}, {0x0a9e1, 0x0a9ff},
507 {0x0aa2a, 0x0aa3f}, {0x0aa4c, 0x0aa4f}, {0x0aa5b, 0x0aa5b},
508 {0x0aa7c, 0x0aa7f}, {0x0aab2, 0x0aab4}, {0x0aab8, 0x0aab8},
509 {0x0aabf, 0x0aabf}, {0x0aac3, 0x0aada}, {0x0aae1, 0x0abbf},
510 {0x0abe4, 0x0abea}, {0x0abed, 0x0abef}, {0x0abfb, 0x0abff},
511 {0x0d7a5, 0x0d7af}, {0x0d7c8, 0x0d7ca}, {0x0d7fd, 0x0f8ff},
512 {0x0fa2f, 0x0fa2f}, {0x0fa6f, 0x0fa6f}, {0x0fadb, 0x0faff},
513 {0x0fb08, 0x0fb12}, {0x0fb19, 0x0fb1c}, {0x0fb37, 0x0fb37},
514 {0x0fb3f, 0x0fb3f}, {0x0fb45, 0x0fb45}, {0x0fbb3, 0x0fbd2},
515 {0x0fd41, 0x0fd4f}, {0x0fd91, 0x0fd91}, {0x0fdc9, 0x0fdef},
516 {0x0fdff, 0x0fe0f}, {0x0fe1b, 0x0fe2f}, {0x0fe67, 0x0fe67},
517 {0x0fe6d, 0x0fe6f}, {0x0fefd, 0x0ff00}, {0x0ffc0, 0x0ffc1},
518 {0x0ffc9, 0x0ffc9}, {0x0ffd1, 0x0ffd1}, {0x0ffd9, 0x0ffd9},
519 {0x0ffde, 0x0ffdf}, {0x0ffef, 0x0fffb}, {0x0ffff, 0x0ffff},
520 {0x10027, 0x10027}, {0x1003e, 0x1003e}, {0x1004f, 0x1004f},
521 {0x1005f, 0x1007f}, {0x100fc, 0x100ff}, {0x10104, 0x10106},
522 {0x10135, 0x10136}, {0x1018c, 0x1018f}, {0x1019d, 0x101cf},
523 {0x101fe, 0x1027f}, {0x1029e, 0x1029f}, {0x102d2, 0x102ff},
524 {0x10324, 0x1032f}, {0x1034c, 0x1037f}, {0x103c4, 0x103c7},
525 {0x103d7, 0x103ff}, {0x1049f, 0x1049f}, {0x104ab, 0x107ff},
526 {0x10807, 0x10807}, {0x10836, 0x10836}, {0x1083a, 0x1083b},
527 {0x1083e, 0x1083e}, {0x10860, 0x108ff}, {0x1091d, 0x1091e},
528 {0x1093b, 0x1093e}, {0x10941, 0x109ff}, {0x10a02, 0x10a0f},
529 {0x10a18, 0x10a18}, {0x10a35, 0x10a3f}, {0x10a49, 0x10a4f},
530 {0x10a5a, 0x10a5f}, {0x10a81, 0x10aff}, {0x10b37, 0x10b38},
531 {0x10b57, 0x10b57}, {0x10b74, 0x10b77}, {0x10b81, 0x10bff},
532 {0x10c4a, 0x10e5f}, {0x10e80, 0x11082}, {0x110b1, 0x110ba},
533 {0x110c2, 0x11fff}, {0x12370, 0x123ff}, {0x12464, 0x1246f},
534 {0x12475, 0x12fff}, {0x13430, 0x1cfff}, {0x1d0f7, 0x1d0ff},
535 {0x1d128, 0x1d128}, {0x1d166, 0x1d169}, {0x1d16e, 0x1d182},
536 {0x1d186, 0x1d18b}, {0x1d1ab, 0x1d1ad}, {0x1d1df, 0x1d1ff},
537 {0x1d243, 0x1d244}, {0x1d247, 0x1d2ff}, {0x1d358, 0x1d35f},
538 {0x1d373, 0x1d3ff}, {0x1d49d, 0x1d49d}, {0x1d4a1, 0x1d4a1},
539 {0x1d4a4, 0x1d4a4}, {0x1d4a8, 0x1d4a8}, {0x1d4ba, 0x1d4ba},
540 {0x1d4c4, 0x1d4c4}, {0x1d50b, 0x1d50c}, {0x1d51d, 0x1d51d},
541 {0x1d53f, 0x1d53f}, {0x1d547, 0x1d549}, {0x1d6a6, 0x1d6a7},
542 {0x1d7cd, 0x1d7cd}, {0x1d801, 0x1efff}, {0x1f02d, 0x1f02f},
543 {0x1f095, 0x1f0ff}, {0x1f10c, 0x1f10f}, {0x1f130, 0x1f130},
544 {0x1f133, 0x1f13c}, {0x1f140, 0x1f141}, {0x1f144, 0x1f145},
545 {0x1f148, 0x1f149}, {0x1f150, 0x1f156}, {0x1f159, 0x1f15e},
546 {0x1f161, 0x1f178}, {0x1f17d, 0x1f17e}, {0x1f181, 0x1f189},
547 {0x1f18f, 0x1f18f}, {0x1f192, 0x1f1ff}, {0x1f202, 0x1f20f},
548 {0x1f233, 0x1f23f}, {0x1f24a, 0x1ffff}, {0x2a6d8, 0x2a6ff},
549 {0x2b736, 0x2f7ff}, {0x2fa1f, 0x10ffff},
550 };
551
552 static int find(int c, int tab[][2], int n)
553 {
554 int l = 0;
555 int h = n - 1;
556 int m;
557 if (c < tab[0][0])
558 return 0;
559 while (l <= h) {
560 m = (h + l) / 2;
561 if (c >= tab[m][0] && c <= tab[m][1])
562 return 1;
563 if (c < tab[m][0])
564 h = m - 1;
565 else
566 l = m + 1;
567 }
568 return 0;
569 }
570
571 /* double-width characters */
572 static int uc_isdw(int c)
573 {
574 return c >= 0x1100 && find(c, dwchars, LEN(dwchars));
575 }
576
577 /* zero-width and combining characters */
578 static int uc_iszw(int c)
579 {
580 return c >= 0x0300 && find(c, zwchars, LEN(zwchars));
581 }
582
583 int uc_wid(char *s)
584 {
585 int c = uc_code(s);
586 if (uc_iszw(c))
587 return 0;
588 return uc_isdw(c) ? 2 : 1;
589 }
590
591 /* nonprintable characters */
592 int uc_isbell(char *s)
593 {
594 int c = (unsigned char) *s;
595 if (c == ' ' || c == '\t' || c == '\n' || (c <= 0x7f && isprint(…
596 return 0;
597 c = uc_code(s);
598 return uc_iszw(c) || find(c, bchars, LEN(bchars));
599 }
600
601 /* combining characters */
602 int uc_iscomb(char *s)
603 {
604 int c = (unsigned char) *s;
605 if (c == ' ' || c == '\t' || c == '\n' || (c <= 0x7f && isprint(…
606 return 0;
607 return uc_acomb(uc_code(s));
608 }
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.