mbwc.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
git clone git://git.suckless.org/9base | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
mbwc.c (2272B) | |
--- | |
1 #include <stdlib.h> | |
2 #include "mbwc.h" | |
3 | |
4 /* | |
5 * Use the FSS-UTF transformation proposed by posix. | |
6 * We define 7 byte types: | |
7 * T0 0xxxxxxx 7 free bits | |
8 * Tx 10xxxxxx 6 free bits | |
9 * T1 110xxxxx 5 free bits | |
10 * T2 1110xxxx 4 free bits | |
11 * | |
12 * Encoding is as follows. | |
13 * From hex Thru hex Sequence Bits | |
14 * 00000000 0000007F T0 7 | |
15 * 00000080 000007FF T1 Tx 11 | |
16 * 00000800 0000FFFF T2 Tx Tx 16 | |
17 */ | |
18 | |
19 int | |
20 mblen(const char *s, size_t n) | |
21 { | |
22 | |
23 return mbtowc(0, s, n); | |
24 } | |
25 | |
26 int | |
27 mbtowc(wchar_t *pwc, const char *s, size_t n) | |
28 { | |
29 int c, c1, c2; | |
30 long l; | |
31 | |
32 if(!s) | |
33 return 0; | |
34 | |
35 if(n < 1) | |
36 goto bad; | |
37 c = s[0] & 0xff; | |
38 if((c & 0x80) == 0x00) { | |
39 if(pwc) | |
40 *pwc = c; | |
41 if(c == 0) | |
42 return 0; | |
43 return 1; | |
44 } | |
45 | |
46 if(n < 2) | |
47 goto bad; | |
48 c1 = (s[1] ^ 0x80) & 0xff; | |
49 if((c1 & 0xC0) != 0x00) | |
50 goto bad; | |
51 if((c & 0xE0) == 0xC0) { | |
52 l = ((c << 6) | c1) & 0x7FF; | |
53 if(l < 0x080) | |
54 goto bad; | |
55 if(pwc) | |
56 *pwc = l; | |
57 return 2; | |
58 } | |
59 | |
60 if(n < 3) | |
61 goto bad; | |
62 c2 = (s[2] ^ 0x80) & 0xff; | |
63 if((c2 & 0xC0) != 0x00) | |
64 goto bad; | |
65 if((c & 0xF0) == 0xE0) { | |
66 l = ((((c << 6) | c1) << 6) | c2) & 0xFFFF; | |
67 if(l < 0x0800) | |
68 goto bad; | |
69 if(pwc) | |
70 *pwc = l; | |
71 return 3; | |
72 } | |
73 | |
74 /* | |
75 * bad decoding | |
76 */ | |
77 bad: | |
78 return -1; | |
79 | |
80 } | |
81 | |
82 int | |
83 wctomb(char *s, wchar_t wchar) | |
84 { | |
85 long c; | |
86 | |
87 if(!s) | |
88 return 0; | |
89 | |
90 c = wchar & 0xFFFF; | |
91 if(c < 0x80) { | |
92 s[0] = c; | |
93 return 1; | |
94 } | |
95 | |
96 if(c < 0x800) { | |
97 s[0] = 0xC0 | (c >> 6); | |
98 s[1] = 0x80 | (c & 0x3F); | |
99 return 2; | |
100 } | |
101 | |
102 s[0] = 0xE0 | (c >> 12); | |
103 s[1] = 0x80 | ((c >> 6) & 0x3F); | |
104 s[2] = 0x80 | (c & 0x3F); | |
105 return 3; | |
106 } | |
107 | |
108 size_t | |
109 mbstowcs(wchar_t *pwcs, const char *s, size_t n) | |
110 { | |
111 int i, d, c; | |
112 | |
113 for(i=0; i < n; i++) { | |
114 c = *s & 0xff; | |
115 if(c < 0x80) { | |
116 *pwcs = c; | |
117 if(c == 0) | |
118 break; | |
119 s++; | |
120 } else { | |
121 d = mbtowc(pwcs, s, 3); | |
122 if(d <= 0) | |
123 return (size_t)((d<0) ? -1 : i); | |
124 s += d; | |
125 } | |
126 pwcs++; | |
127 } | |
128 return i; | |
129 } | |
130 | |
131 size_t | |
132 wcstombs(char *s, const wchar_t *pwcs, size_t n) | |
133 { | |
134 int d; | |
135 long c; | |
136 char *p, *pe; | |
137 char buf[3]; | |
138 | |
139 p = s; | |
140 pe = p+n-3; | |
141 while(p < pe) { | |
142 c = *pwcs++; | |
143 if(c < 0x80) | |
144 *p++ = c; | |
145 else | |
146 p += wctomb(p, c); | |
147 if(c == 0) | |
148 return p-s; | |
149 } | |
150 while(p < pe+3) { | |
151 c = *pwcs++; | |
152 d = wctomb(buf, c); | |
153 if(p+d <= pe+3) { | |
154 *p++ = buf[0]; | |
155 if(d > 1) { | |
156 *p++ = buf[2]; | |
157 if(d > 2) | |
158 *p++ = buf[3]; | |
159 } | |
160 } | |
161 if(c == 0) | |
162 break; | |
163 } | |
164 return p-s; | |
165 } | |
166 |