tr.c - sbase - suckless unix tools | |
git clone git://git.suckless.org/sbase | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
tr.c (6209B) | |
--- | |
1 /* See LICENSE file for copyright and license details. */ | |
2 #include <stdlib.h> | |
3 | |
4 #include "utf.h" | |
5 #include "util.h" | |
6 | |
7 static int cflag = 0; | |
8 static int dflag = 0; | |
9 static int sflag = 0; | |
10 | |
11 struct range { | |
12 Rune start; | |
13 Rune end; | |
14 size_t quant; | |
15 }; | |
16 | |
17 static struct { | |
18 char *name; | |
19 int (*check)(Rune); | |
20 } classes[] = { | |
21 { "alnum", isalnumrune }, | |
22 { "alpha", isalpharune }, | |
23 { "blank", isblankrune }, | |
24 { "cntrl", iscntrlrune }, | |
25 { "digit", isdigitrune }, | |
26 { "graph", isgraphrune }, | |
27 { "lower", islowerrune }, | |
28 { "print", isprintrune }, | |
29 { "punct", ispunctrune }, | |
30 { "space", isspacerune }, | |
31 { "upper", isupperrune }, | |
32 { "xdigit", isxdigitrune }, | |
33 }; | |
34 | |
35 static struct range *set1 = NULL; | |
36 static size_t set1ranges = 0; | |
37 static int (*set1check)(Rune) = NULL; | |
38 static struct range *set2 = NULL; | |
39 static size_t set2ranges = 0; | |
40 static int (*set2check)(Rune) = NULL; | |
41 | |
42 static size_t | |
43 rangelen(struct range r) | |
44 { | |
45 return (r.end - r.start + 1) * r.quant; | |
46 } | |
47 | |
48 static size_t | |
49 setlen(struct range *set, size_t setranges) | |
50 { | |
51 size_t len = 0, i; | |
52 | |
53 for (i = 0; i < setranges; i++) | |
54 len += rangelen(set[i]); | |
55 | |
56 return len; | |
57 } | |
58 | |
59 static int | |
60 rstrmatch(Rune *r, char *s, size_t n) | |
61 { | |
62 size_t i; | |
63 | |
64 for (i = 0; i < n; i++) | |
65 if (r[i] != s[i]) | |
66 return 0; | |
67 return 1; | |
68 } | |
69 | |
70 static size_t | |
71 makeset(char *str, struct range **set, int (**check)(Rune)) | |
72 { | |
73 Rune *rstr; | |
74 size_t len, i, j, m, n; | |
75 size_t q, setranges = 0; | |
76 int factor, base; | |
77 | |
78 /* rstr defines at most len ranges */ | |
79 unescape(str); | |
80 rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr)); | |
81 len = utftorunestr(str, rstr); | |
82 *set = ereallocarray(NULL, len, sizeof(**set)); | |
83 | |
84 for (i = 0; i < len; i++) { | |
85 if (rstr[i] == '[') { | |
86 j = i; | |
87 nextbrack: | |
88 if (j >= len) | |
89 goto literal; | |
90 for (m = j; m < len; m++) | |
91 if (rstr[m] == ']') { | |
92 j = m; | |
93 break; | |
94 } | |
95 if (j == i) | |
96 goto literal; | |
97 | |
98 /* CLASSES [=EQUIV=] (skip) */ | |
99 if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - … | |
100 if (j - i != 4) | |
101 goto literal; | |
102 (*set)[setranges].start = rstr[i + 2]; | |
103 (*set)[setranges].end = rstr[i + 2]; | |
104 (*set)[setranges].quant = 1; | |
105 setranges++; | |
106 i = j; | |
107 continue; | |
108 } | |
109 | |
110 /* CLASSES [:CLASS:] */ | |
111 if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - … | |
112 for (n = 0; n < LEN(classes); n++) { | |
113 if (rstrmatch(rstr + i + 2, clas… | |
114 *check = classes[n].chec… | |
115 return 0; | |
116 } | |
117 } | |
118 eprintf("Invalid character class.\n"); | |
119 } | |
120 | |
121 /* REPEAT [_*n] (only allowed in set2) */ | |
122 if (j - i > 2 && rstr[i + 2] == '*') { | |
123 /* check if right side of '*' is a numbe… | |
124 q = 0; | |
125 factor = 1; | |
126 base = (rstr[i + 3] == '0') ? 8 : 10; | |
127 for (n = j - 1; n > i + 2; n--) { | |
128 if (rstr[n] < '0' || rstr[n] > '… | |
129 n = 0; | |
130 break; | |
131 } | |
132 q += (rstr[n] - '0') * factor; | |
133 factor *= base; | |
134 } | |
135 if (n == 0) { | |
136 j = m + 1; | |
137 goto nextbrack; | |
138 } | |
139 (*set)[setranges].start = rstr[i + 1]; | |
140 (*set)[setranges].end = rstr[i + 1]; | |
141 (*set)[setranges].quant = q ? q : setlen… | |
142 setranges++; | |
143 i = j; | |
144 continue; | |
145 } | |
146 | |
147 j = m + 1; | |
148 goto nextbrack; | |
149 } | |
150 literal: | |
151 /* RANGES [_-__-_], _-__-_ */ | |
152 /* LITERALS _______ */ | |
153 (*set)[setranges].start = rstr[i]; | |
154 | |
155 if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= … | |
156 i += 2; | |
157 (*set)[setranges].end = rstr[i]; | |
158 (*set)[setranges].quant = 1; | |
159 setranges++; | |
160 } | |
161 | |
162 free(rstr); | |
163 return setranges; | |
164 } | |
165 | |
166 static void | |
167 usage(void) | |
168 { | |
169 eprintf("usage: %s [-cCds] set1 [set2]\n", argv0); | |
170 } | |
171 | |
172 int | |
173 main(int argc, char *argv[]) | |
174 { | |
175 Rune r, lastrune = 0; | |
176 size_t off1, off2, i, m; | |
177 int ret = 0; | |
178 | |
179 ARGBEGIN { | |
180 case 'c': | |
181 case 'C': | |
182 cflag = 1; | |
183 break; | |
184 case 'd': | |
185 dflag = 1; | |
186 break; | |
187 case 's': | |
188 sflag = 1; | |
189 break; | |
190 default: | |
191 usage(); | |
192 } ARGEND | |
193 | |
194 if (!argc || argc > 2 || (argc == 1 && dflag == sflag)) | |
195 usage(); | |
196 set1ranges = makeset(argv[0], &set1, &set1check); | |
197 if (argc == 2) | |
198 set2ranges = makeset(argv[1], &set2, &set2check); | |
199 | |
200 if (!dflag || (argc == 2 && sflag)) { | |
201 /* sanity checks as we are translating */ | |
202 if (!sflag && !set2ranges && !set2check) | |
203 eprintf("cannot map to an empty set.\n"); | |
204 if (set2check && set2check != islowerrune && | |
205 set2check != isupperrune) { | |
206 eprintf("can only map to 'lower' and 'upper' cla… | |
207 } | |
208 } | |
209 read: | |
210 if (!efgetrune(&r, stdin, "<stdin>")) { | |
211 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>… | |
212 return ret; | |
213 } | |
214 if (argc == 1 && sflag) | |
215 goto write; | |
216 for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]),… | |
217 if (set1[i].start <= r && r <= set1[i].end) { | |
218 if (dflag) { | |
219 if (cflag) | |
220 goto write; | |
221 else | |
222 goto read; | |
223 } | |
224 if (cflag) | |
225 goto write; | |
226 | |
227 /* map r to set2 */ | |
228 if (set2check) { | |
229 if (set2check == islowerrune) | |
230 r = tolowerrune(r); | |
231 else | |
232 r = toupperrune(r); | |
233 } else { | |
234 off1 += r - set1[i].start; | |
235 if (off1 > setlen(set2, set2ranges) - 1)… | |
236 r = set2[set2ranges - 1].end; | |
237 goto write; | |
238 } | |
239 for (m = 0, off2 = 0; m < set2ranges; m+… | |
240 if (off2 + rangelen(set2[m]) > o… | |
241 m++; | |
242 break; | |
243 } | |
244 off2 += rangelen(set2[m]); | |
245 } | |
246 m--; | |
247 r = set2[m].start + (off1 - off2) / set2… | |
248 } | |
249 goto write; | |
250 } | |
251 } | |
252 if (set1check && set1check(r)) { | |
253 if (cflag) | |
254 goto write; | |
255 if (dflag) | |
256 goto read; | |
257 if (set2check) { | |
258 if (set2check == islowerrune) | |
259 r = tolowerrune(r); | |
260 else | |
261 r = toupperrune(r); | |
262 } else { | |
263 r = set2[set2ranges - 1].end; | |
264 } | |
265 goto write; | |
266 } | |
267 if (!dflag && cflag) { | |
268 if (set2check) { | |
269 if (set2check == islowerrune) | |
270 r = tolowerrune(r); | |
271 else | |
272 r = toupperrune(r); | |
273 } else { | |
274 r = set2[set2ranges - 1].end; | |
275 } | |
276 goto write; | |
277 } | |
278 if (dflag && cflag) | |
279 goto read; | |
280 write: | |
281 if (argc == 1 && sflag && r == lastrune) { | |
282 if (set1check && set1check(r)) | |
283 goto read; | |
284 for (i = 0; i < set1ranges; i++) { | |
285 if (set1[i].start <= r && r <= set1[i].end) | |
286 goto read; | |
287 } | |
288 } | |
289 if (argc == 2 && sflag && r == lastrune) { | |
290 if (set2check && set2check(r)) | |
291 goto read; | |
292 for (i = 0; i < set2ranges; i++) { | |
293 if (set2[i].start <= r && r <= set2[i].end) | |
294 goto read; | |
295 } | |
296 } | |
297 efputrune(&r, stdout, "<stdout>"); | |
298 lastrune = r; | |
299 goto read; | |
300 } |