| tr.c - sbase - suckless unix tools | |
| git clone git://git.suckless.org/sbase | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| tr.c (6191B) | |
| --- | |
| 1 /* See LICENSE file for copyright and license details. */ | |
| 2 #include <stdlib.h> | |
| 3 | |
| 4 #include "utf.h" | |
| 5 #include "util.h" | |
| 6 | |
| 7 static int cflag = 0; | |
| 8 static int dflag = 0; | |
| 9 static int sflag = 0; | |
| 10 | |
| 11 struct range { | |
| 12 Rune start; | |
| 13 Rune end; | |
| 14 size_t quant; | |
| 15 }; | |
| 16 | |
| 17 static struct { | |
| 18 char *name; | |
| 19 int (*check)(Rune); | |
| 20 } classes[] = { | |
| 21 { "alnum", isalnumrune }, | |
| 22 { "alpha", isalpharune }, | |
| 23 { "blank", isblankrune }, | |
| 24 { "cntrl", iscntrlrune }, | |
| 25 { "digit", isdigitrune }, | |
| 26 { "graph", isgraphrune }, | |
| 27 { "lower", islowerrune }, | |
| 28 { "print", isprintrune }, | |
| 29 { "punct", ispunctrune }, | |
| 30 { "space", isspacerune }, | |
| 31 { "upper", isupperrune }, | |
| 32 { "xdigit", isxdigitrune }, | |
| 33 }; | |
| 34 | |
| 35 static struct range *set1 = NULL; | |
| 36 static size_t set1ranges = 0; | |
| 37 static int (*set1check)(Rune) = NULL; | |
| 38 static struct range *set2 = NULL; | |
| 39 static size_t set2ranges = 0; | |
| 40 static int (*set2check)(Rune) = NULL; | |
| 41 | |
| 42 static size_t | |
| 43 rangelen(struct range r) | |
| 44 { | |
| 45 return (r.end - r.start + 1) * r.quant; | |
| 46 } | |
| 47 | |
| 48 static size_t | |
| 49 setlen(struct range *set, size_t setranges) | |
| 50 { | |
| 51 size_t len = 0, i; | |
| 52 | |
| 53 for (i = 0; i < setranges; i++) | |
| 54 len += rangelen(set[i]); | |
| 55 | |
| 56 return len; | |
| 57 } | |
| 58 | |
| 59 static int | |
| 60 rstrmatch(Rune *r, char *s, size_t n) | |
| 61 { | |
| 62 size_t i; | |
| 63 | |
| 64 for (i = 0; i < n; i++) | |
| 65 if (r[i] != s[i]) | |
| 66 return 0; | |
| 67 return 1; | |
| 68 } | |
| 69 | |
| 70 static size_t | |
| 71 makeset(char *str, struct range **set, int (**check)(Rune)) | |
| 72 { | |
| 73 Rune *rstr; | |
| 74 size_t len, i, j, m, n; | |
| 75 size_t q, setranges = 0; | |
| 76 int factor, base; | |
| 77 | |
| 78 /* rstr defines at most len ranges */ | |
| 79 unescape(str); | |
| 80 rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr)); | |
| 81 len = utftorunestr(str, rstr); | |
| 82 *set = ereallocarray(NULL, len, sizeof(**set)); | |
| 83 | |
| 84 for (i = 0; i < len; i++) { | |
| 85 if (rstr[i] == '[') { | |
| 86 j = i; | |
| 87 nextbrack: | |
| 88 if (j >= len) | |
| 89 goto literal; | |
| 90 for (m = j; m < len; m++) | |
| 91 if (rstr[m] == ']') { | |
| 92 j = m; | |
| 93 break; | |
| 94 } | |
| 95 if (j == i) | |
| 96 goto literal; | |
| 97 | |
| 98 /* CLASSES [=EQUIV=] (skip) */ | |
| 99 if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - … | |
| 100 if (j - i != 4) | |
| 101 goto literal; | |
| 102 (*set)[setranges].start = rstr[i + 2]; | |
| 103 (*set)[setranges].end = rstr[i + 2]; | |
| 104 (*set)[setranges].quant = 1; | |
| 105 setranges++; | |
| 106 i = j; | |
| 107 continue; | |
| 108 } | |
| 109 | |
| 110 /* CLASSES [:CLASS:] */ | |
| 111 if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - … | |
| 112 for (n = 0; n < LEN(classes); n++) { | |
| 113 if (rstrmatch(rstr + i + 2, clas… | |
| 114 *check = classes[n].chec… | |
| 115 return 0; | |
| 116 } | |
| 117 } | |
| 118 eprintf("Invalid character class.\n"); | |
| 119 } | |
| 120 | |
| 121 /* REPEAT [_*n] (only allowed in set2) */ | |
| 122 if (j - i > 2 && rstr[i + 2] == '*') { | |
| 123 /* check if right side of '*' is a numbe… | |
| 124 q = 0; | |
| 125 factor = 1; | |
| 126 base = (rstr[i + 3] == '0') ? 8 : 10; | |
| 127 for (n = j - 1; n > i + 2; n--) { | |
| 128 if (rstr[n] < '0' || rstr[n] > '… | |
| 129 n = 0; | |
| 130 break; | |
| 131 } | |
| 132 q += (rstr[n] - '0') * factor; | |
| 133 factor *= base; | |
| 134 } | |
| 135 if (n == 0) { | |
| 136 j = m + 1; | |
| 137 goto nextbrack; | |
| 138 } | |
| 139 (*set)[setranges].start = rstr[i + 1]; | |
| 140 (*set)[setranges].end = rstr[i + 1]; | |
| 141 (*set)[setranges].quant = q ? q : setlen… | |
| 142 setranges++; | |
| 143 i = j; | |
| 144 continue; | |
| 145 } | |
| 146 | |
| 147 j = m + 1; | |
| 148 goto nextbrack; | |
| 149 } | |
| 150 literal: | |
| 151 /* RANGES [_-__-_], _-__-_ */ | |
| 152 /* LITERALS _______ */ | |
| 153 (*set)[setranges].start = rstr[i]; | |
| 154 | |
| 155 if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= … | |
| 156 i += 2; | |
| 157 (*set)[setranges].end = rstr[i]; | |
| 158 (*set)[setranges].quant = 1; | |
| 159 setranges++; | |
| 160 } | |
| 161 | |
| 162 free(rstr); | |
| 163 return setranges; | |
| 164 } | |
| 165 | |
| 166 static void | |
| 167 usage(void) | |
| 168 { | |
| 169 eprintf("usage: %s [-cCds] set1 [set2]\n", argv0); | |
| 170 } | |
| 171 | |
| 172 int | |
| 173 main(int argc, char *argv[]) | |
| 174 { | |
| 175 Rune r, lastrune = 0; | |
| 176 size_t off1, off2, i, m; | |
| 177 int ret = 0; | |
| 178 | |
| 179 ARGBEGIN { | |
| 180 case 'c': | |
| 181 case 'C': | |
| 182 cflag = 1; | |
| 183 break; | |
| 184 case 'd': | |
| 185 dflag = 1; | |
| 186 break; | |
| 187 case 's': | |
| 188 sflag = 1; | |
| 189 break; | |
| 190 default: | |
| 191 usage(); | |
| 192 } ARGEND | |
| 193 | |
| 194 if (!argc || argc > 2 || (dflag == sflag && argc != 2) || | |
| 195 (dflag && argc != 1)) | |
| 196 usage(); | |
| 197 | |
| 198 set1ranges = makeset(argv[0], &set1, &set1check); | |
| 199 if (argc == 2) { | |
| 200 set2ranges = makeset(argv[1], &set2, &set2check); | |
| 201 /* sanity checks as we are translating */ | |
| 202 if (!set2ranges && !set2check) | |
| 203 eprintf("cannot map to an empty set.\n"); | |
| 204 if (set2check && set2check != islowerrune && | |
| 205 set2check != isupperrune) { | |
| 206 eprintf("can only map to 'lower' and 'upper' cla… | |
| 207 } | |
| 208 } | |
| 209 read: | |
| 210 if (!efgetrune(&r, stdin, "<stdin>")) { | |
| 211 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>… | |
| 212 return ret; | |
| 213 } | |
| 214 if (argc == 1 && sflag) | |
| 215 goto write; | |
| 216 for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]),… | |
| 217 if (set1[i].start <= r && r <= set1[i].end) { | |
| 218 if (dflag) { | |
| 219 if (cflag) | |
| 220 goto write; | |
| 221 else | |
| 222 goto read; | |
| 223 } | |
| 224 if (cflag) | |
| 225 goto write; | |
| 226 | |
| 227 /* map r to set2 */ | |
| 228 if (set2check) { | |
| 229 if (set2check == islowerrune) | |
| 230 r = tolowerrune(r); | |
| 231 else | |
| 232 r = toupperrune(r); | |
| 233 } else { | |
| 234 off1 += r - set1[i].start; | |
| 235 if (off1 > setlen(set2, set2ranges) - 1)… | |
| 236 r = set2[set2ranges - 1].end; | |
| 237 goto write; | |
| 238 } | |
| 239 for (m = 0, off2 = 0; m < set2ranges; m+… | |
| 240 if (off2 + rangelen(set2[m]) > o… | |
| 241 m++; | |
| 242 break; | |
| 243 } | |
| 244 off2 += rangelen(set2[m]); | |
| 245 } | |
| 246 m--; | |
| 247 r = set2[m].start + (off1 - off2) / set2… | |
| 248 } | |
| 249 goto write; | |
| 250 } | |
| 251 } | |
| 252 if (set1check && set1check(r)) { | |
| 253 if (cflag) | |
| 254 goto write; | |
| 255 if (dflag) | |
| 256 goto read; | |
| 257 if (set2check) { | |
| 258 if (set2check == islowerrune) | |
| 259 r = tolowerrune(r); | |
| 260 else | |
| 261 r = toupperrune(r); | |
| 262 } else { | |
| 263 r = set2[set2ranges - 1].end; | |
| 264 } | |
| 265 goto write; | |
| 266 } | |
| 267 if (!dflag && cflag) { | |
| 268 if (set2check) { | |
| 269 if (set2check == islowerrune) | |
| 270 r = tolowerrune(r); | |
| 271 else | |
| 272 r = toupperrune(r); | |
| 273 } else { | |
| 274 r = set2[set2ranges - 1].end; | |
| 275 } | |
| 276 goto write; | |
| 277 } | |
| 278 if (dflag && cflag) | |
| 279 goto read; | |
| 280 write: | |
| 281 if (argc == 1 && sflag && r == lastrune) { | |
| 282 if (set1check && set1check(r)) | |
| 283 goto read; | |
| 284 for (i = 0; i < set1ranges; i++) { | |
| 285 if (set1[i].start <= r && r <= set1[i].end) | |
| 286 goto read; | |
| 287 } | |
| 288 } | |
| 289 if (argc == 2 && sflag && r == lastrune) { | |
| 290 if (set2check && set2check(r)) | |
| 291 goto read; | |
| 292 for (i = 0; i < set2ranges; i++) { | |
| 293 if (set2[i].start <= r && r <= set2[i].end) | |
| 294 goto read; | |
| 295 } | |
| 296 } | |
| 297 efputrune(&r, stdout, "<stdout>"); | |
| 298 lastrune = r; | |
| 299 goto read; | |
| 300 } |