rune.c - sbase - suckless unix tools | |
git clone git://git.suckless.org/sbase | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
rune.c (4243B) | |
--- | |
1 /* MIT/X Consortium Copyright (c) 2012 Connor Lane Smith <[email protected]> | |
2 * | |
3 * Permission is hereby granted, free of charge, to any person obtaining… | |
4 * copy of this software and associated documentation files (the "Softwa… | |
5 * to deal in the Software without restriction, including without limita… | |
6 * the rights to use, copy, modify, merge, publish, distribute, sublicen… | |
7 * and/or sell copies of the Software, and to permit persons to whom the | |
8 * Software is furnished to do so, subject to the following conditions: | |
9 * | |
10 * The above copyright notice and this permission notice shall be includ… | |
11 * all copies or substantial portions of the Software. | |
12 * | |
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRE… | |
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILI… | |
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SH… | |
16 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR … | |
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISI… | |
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
19 * DEALINGS IN THE SOFTWARE. | |
20 */ | |
21 #include "../utf.h" | |
22 | |
23 #define MIN(x,y) ((x) < (y) ? (x) : (y)) | |
24 | |
25 #define UTFSEQ(x) ((((x) & 0x80) == 0x00) ? 1 /* 0xxxxxxx */ \ | |
26 : (((x) & 0xC0) == 0x80) ? 0 /* 10xxxxxx */ \ | |
27 : (((x) & 0xE0) == 0xC0) ? 2 /* 110xxxxx */ \ | |
28 : (((x) & 0xF0) == 0xE0) ? 3 /* 1110xxxx */ \ | |
29 : (((x) & 0xF8) == 0xF0) ? 4 /* 11110xxx */ \ | |
30 : (((x) & 0xFC) == 0xF8) ? 5 /* 111110xx */ \ | |
31 : (((x) & 0xFE) == 0xFC) ? 6 /* 1111110x */ \ | |
32 : 0 ) | |
33 | |
34 #define BADRUNE(x) ((x) < 0 || (x) > Runemax \ | |
35 || ((x) & 0xFFFE) == 0xFFFE \ | |
36 || ((x) >= 0xD800 && (x) <= 0xDFFF) \ | |
37 || ((x) >= 0xFDD0 && (x) <= 0xFDEF)) | |
38 | |
39 int | |
40 runetochar(char *s, const Rune *p) | |
41 { | |
42 Rune r = *p; | |
43 | |
44 switch(runelen(r)) { | |
45 case 1: /* 0aaaaaaa */ | |
46 s[0] = r; | |
47 return 1; | |
48 case 2: /* 00000aaa aabbbbbb */ | |
49 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ | |
50 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ | |
51 return 2; | |
52 case 3: /* aaaabbbb bbcccccc */ | |
53 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ | |
54 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ | |
55 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ | |
56 return 3; | |
57 case 4: /* 000aaabb bbbbcccc ccdddddd */ | |
58 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ | |
59 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ | |
60 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ | |
61 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ | |
62 return 4; | |
63 default: | |
64 return 0; /* error */ | |
65 } | |
66 } | |
67 | |
68 int | |
69 chartorune(Rune *p, const char *s) | |
70 { | |
71 return charntorune(p, s, UTFmax); | |
72 } | |
73 | |
74 int | |
75 charntorune(Rune *p, const char *s, size_t len) | |
76 { | |
77 unsigned int i, n; | |
78 Rune r; | |
79 | |
80 if(len == 0) /* can't even look at s[0] */ | |
81 return 0; | |
82 | |
83 switch((n = UTFSEQ(s[0]))) { | |
84 case 1: r = s[0]; break; /* 0xxxxxxx */ | |
85 case 2: r = s[0] & 0x1F; break; /* 110xxxxx */ | |
86 case 3: r = s[0] & 0x0F; break; /* 1110xxxx */ | |
87 case 4: r = s[0] & 0x07; break; /* 11110xxx */ | |
88 case 5: r = s[0] & 0x03; break; /* 111110xx */ | |
89 case 6: r = s[0] & 0x01; break; /* 1111110x */ | |
90 default: /* invalid sequence */ | |
91 *p = Runeerror; | |
92 return 1; | |
93 } | |
94 /* add values from continuation bytes */ | |
95 for(i = 1; i < MIN(n, len); i++) | |
96 if((s[i] & 0xC0) == 0x80) { | |
97 /* add bits from continuation byte to rune value | |
98 * cannot overflow: 6 byte sequences contain 31 … | |
99 r = (r << 6) | (s[i] & 0x3F); /* 10xxxxxx */ | |
100 } | |
101 else { /* expected continuation */ | |
102 *p = Runeerror; | |
103 return i; | |
104 } | |
105 | |
106 if(i < n) /* must have reached len limit */ | |
107 return 0; | |
108 | |
109 /* reject invalid or overlong sequences */ | |
110 if(BADRUNE(r) || runelen(r) < (int)n) | |
111 r = Runeerror; | |
112 | |
113 *p = r; | |
114 return n; | |
115 } | |
116 | |
117 int | |
118 runelen(Rune r) | |
119 { | |
120 if(BADRUNE(r)) | |
121 return 0; /* error */ | |
122 else if(r <= 0x7F) | |
123 return 1; | |
124 else if(r <= 0x07FF) | |
125 return 2; | |
126 else if(r <= 0xFFFF) | |
127 return 3; | |
128 else | |
129 return 4; | |
130 } | |
131 | |
132 size_t | |
133 runenlen(const Rune *p, size_t len) | |
134 { | |
135 size_t i, n = 0; | |
136 | |
137 for(i = 0; i < len; i++) | |
138 n += runelen(p[i]); | |
139 return n; | |
140 } | |
141 | |
142 int | |
143 fullrune(const char *s, size_t len) | |
144 { | |
145 Rune r; | |
146 | |
147 return charntorune(&r, s, len) > 0; | |
148 } |