rune.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
git clone git://git.suckless.org/9base | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
rune.c (3963B) | |
--- | |
1 /* | |
2 * The authors of this software are Rob Pike and Ken Thompson. | |
3 * Copyright (c) 2002 by Lucent Technologies. | |
4 * Permission to use, copy, modify, and distribute this software for any | |
5 * purpose without fee is hereby granted, provided that this entire noti… | |
6 * is included in all copies of any software which is or includes a copy | |
7 * or modification of this software and in all copies of the supporting | |
8 * documentation for such software. | |
9 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLI… | |
10 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES… | |
11 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABI… | |
12 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. | |
13 */ | |
14 #include <stdarg.h> | |
15 #include <string.h> | |
16 #include "plan9.h" | |
17 #include "utf.h" | |
18 | |
19 enum | |
20 { | |
21 Bit1 = 7, | |
22 Bitx = 6, | |
23 Bit2 = 5, | |
24 Bit3 = 4, | |
25 Bit4 = 3, | |
26 Bit5 = 2, | |
27 | |
28 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ | |
29 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ | |
30 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ | |
31 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ | |
32 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ | |
33 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ | |
34 | |
35 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000… | |
36 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000… | |
37 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000… | |
38 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111… | |
39 | |
40 Maskx = (1<<Bitx)-1, /* 0011 1111 … | |
41 Testx = Maskx ^ 0xFF, /* 1100 0000… | |
42 | |
43 Bad = Runeerror | |
44 }; | |
45 | |
46 int | |
47 chartorune(Rune *rune, char *str) | |
48 { | |
49 int c, c1, c2, c3; | |
50 long l; | |
51 | |
52 /* | |
53 * one character sequence | |
54 * 00000-0007F => T1 | |
55 */ | |
56 c = *(uchar*)str; | |
57 if(c < Tx) { | |
58 *rune = c; | |
59 return 1; | |
60 } | |
61 | |
62 /* | |
63 * two character sequence | |
64 * 0080-07FF => T2 Tx | |
65 */ | |
66 c1 = *(uchar*)(str+1) ^ Tx; | |
67 if(c1 & Testx) | |
68 goto bad; | |
69 if(c < T3) { | |
70 if(c < T2) | |
71 goto bad; | |
72 l = ((c << Bitx) | c1) & Rune2; | |
73 if(l <= Rune1) | |
74 goto bad; | |
75 *rune = l; | |
76 return 2; | |
77 } | |
78 | |
79 /* | |
80 * three character sequence | |
81 * 0800-FFFF => T3 Tx Tx | |
82 */ | |
83 c2 = *(uchar*)(str+2) ^ Tx; | |
84 if(c2 & Testx) | |
85 goto bad; | |
86 if(c < T4) { | |
87 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; | |
88 if(l <= Rune2) | |
89 goto bad; | |
90 *rune = l; | |
91 return 3; | |
92 } | |
93 | |
94 /* | |
95 * four character sequence | |
96 * 10000-10FFFF => T4 Tx Tx Tx | |
97 */ | |
98 if(UTFmax >= 4) { | |
99 c3 = *(uchar*)(str+3) ^ Tx; | |
100 if(c3 & Testx) | |
101 goto bad; | |
102 if(c < T5) { | |
103 l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bit… | |
104 if(l <= Rune3) | |
105 goto bad; | |
106 if(l > Runemax) | |
107 goto bad; | |
108 *rune = l; | |
109 return 4; | |
110 } | |
111 } | |
112 | |
113 /* | |
114 * bad decoding | |
115 */ | |
116 bad: | |
117 *rune = Bad; | |
118 return 1; | |
119 } | |
120 | |
121 int | |
122 runetochar(char *str, Rune *rune) | |
123 { | |
124 long c; | |
125 | |
126 /* | |
127 * one character sequence | |
128 * 00000-0007F => 00-7F | |
129 */ | |
130 c = *rune; | |
131 if(c <= Rune1) { | |
132 str[0] = c; | |
133 return 1; | |
134 } | |
135 | |
136 /* | |
137 * two character sequence | |
138 * 00080-007FF => T2 Tx | |
139 */ | |
140 if(c <= Rune2) { | |
141 str[0] = T2 | (c >> 1*Bitx); | |
142 str[1] = Tx | (c & Maskx); | |
143 return 2; | |
144 } | |
145 | |
146 /* | |
147 * three character sequence | |
148 * 00800-0FFFF => T3 Tx Tx | |
149 */ | |
150 if(c > Runemax) | |
151 c = Runeerror; | |
152 if(c <= Rune3) { | |
153 str[0] = T3 | (c >> 2*Bitx); | |
154 str[1] = Tx | ((c >> 1*Bitx) & Maskx); | |
155 str[2] = Tx | (c & Maskx); | |
156 return 3; | |
157 } | |
158 | |
159 /* | |
160 * four character sequence | |
161 * 010000-1FFFFF => T4 Tx Tx Tx | |
162 */ | |
163 str[0] = T4 | (c >> 3*Bitx); | |
164 str[1] = Tx | ((c >> 2*Bitx) & Maskx); | |
165 str[2] = Tx | ((c >> 1*Bitx) & Maskx); | |
166 str[3] = Tx | (c & Maskx); | |
167 return 4; | |
168 } | |
169 | |
170 int | |
171 runelen(long c) | |
172 { | |
173 Rune rune; | |
174 char str[10]; | |
175 | |
176 rune = c; | |
177 return runetochar(str, &rune); | |
178 } | |
179 | |
180 int | |
181 runenlen(Rune *r, int nrune) | |
182 { | |
183 int nb, c; | |
184 | |
185 nb = 0; | |
186 while(nrune--) { | |
187 c = *r++; | |
188 if(c <= Rune1) | |
189 nb++; | |
190 else | |
191 if(c <= Rune2) | |
192 nb += 2; | |
193 else | |
194 if(c <= Rune3 || c > Runemax) | |
195 nb += 3; | |
196 else | |
197 nb += 4; | |
198 } | |
199 return nb; | |
200 } | |
201 | |
202 int | |
203 fullrune(char *str, int n) | |
204 { | |
205 int c; | |
206 | |
207 if(n <= 0) | |
208 return 0; | |
209 c = *(uchar*)str; | |
210 if(c < Tx) | |
211 return 1; | |
212 if(c < T3) | |
213 return n >= 2; | |
214 if(UTFmax == 3 || c < T4) | |
215 return n >= 3; | |
216 return n >= 4; | |
217 } |