Introduction
Introduction Statistics Contact Development Disclaimer Help
bidirectional-test.c - libgrapheme - unicode string library
git clone git://git.suckless.org/libgrapheme
Log
Files
Refs
README
LICENSE
---
bidirectional-test.c (15968B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <errno.h>
3 #include <inttypes.h>
4 #include <stddef.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8
9 #include "../grapheme.h"
10 #include "util.h"
11
12 struct bidirectional_test {
13 uint_least32_t *cp;
14 size_t cplen;
15 enum grapheme_bidirectional_direction mode[3];
16 size_t modelen;
17 enum grapheme_bidirectional_direction resolved;
18 int_least8_t *level;
19 int_least16_t *reorder;
20 size_t reorderlen;
21 };
22
23 static const struct {
24 const char *class;
25 const uint_least32_t cp;
26 } classcpmap[] = {
27 { .class = "L", .cp = UINT32_C(0x0041) },
28 { .class = "AL", .cp = UINT32_C(0x0608) },
29 { .class = "AN", .cp = UINT32_C(0x0600) },
30 { .class = "B", .cp = UINT32_C(0x000A) },
31 { .class = "BN", .cp = UINT32_C(0x0000) },
32 { .class = "CS", .cp = UINT32_C(0x002C) },
33 { .class = "EN", .cp = UINT32_C(0x0030) },
34 { .class = "ES", .cp = UINT32_C(0x002B) },
35 { .class = "ET", .cp = UINT32_C(0x0023) },
36 { .class = "FSI", .cp = UINT32_C(0x2068) },
37 { .class = "LRE", .cp = UINT32_C(0x202A) },
38 { .class = "LRI", .cp = UINT32_C(0x2066) },
39 { .class = "LRO", .cp = UINT32_C(0x202D) },
40 { .class = "NSM", .cp = UINT32_C(0x0300) },
41 { .class = "ON", .cp = UINT32_C(0x0021) },
42 { .class = "PDF", .cp = UINT32_C(0x202C) },
43 { .class = "PDI", .cp = UINT32_C(0x2069) },
44 { .class = "R", .cp = UINT32_C(0x05BE) },
45 { .class = "RLE", .cp = UINT32_C(0x202B) },
46 { .class = "RLI", .cp = UINT32_C(0x2067) },
47 { .class = "RLO", .cp = UINT32_C(0x202E) },
48 { .class = "S", .cp = UINT32_C(0x0009) },
49 { .class = "WS", .cp = UINT32_C(0x000C) },
50 };
51
52 static int
53 classtocp(const char *str, size_t len, uint_least32_t *cp)
54 {
55 size_t i;
56
57 for (i = 0; i < LEN(classcpmap); i++) {
58 if (!strncmp(str, classcpmap[i].class, len)) {
59 *cp = classcpmap[i].cp;
60 return 0;
61 }
62 }
63 fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (in…
64 str);
65
66 return 1;
67 }
68
69 static int
70 parse_class_list(const char *str, uint_least32_t **cp, size_t *cplen)
71 {
72 size_t count, i;
73 const char *tmp1 = NULL, *tmp2 = NULL;
74
75 if (strlen(str) == 0) {
76 *cp = NULL;
77 *cplen = 0;
78 return 0;
79 }
80
81 /* count the number of spaces in the string and infer list lengt…
82 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
83 count++, tmp1 = tmp2 + 1) {
84 ;
85 }
86
87 /* allocate resources */
88 if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
89 fprintf(stderr, "calloc: %s\n", strerror(errno));
90 exit(1);
91 }
92
93 /* go through the string again, parsing the classes */
94 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
95 tmp2 = strchr(tmp1, ' ');
96 if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strle…
97 &((*cp)[i]))) {
98 return 1;
99 }
100 if (tmp2 != NULL) {
101 tmp1 = tmp2 + 1;
102 }
103 }
104
105 return 0;
106 }
107
108 static int
109 strtolevel(const char *str, size_t len, int_least8_t *level)
110 {
111 size_t i;
112
113 if (len == 1 && str[0] == 'x') {
114 /*
115 * 'x' indicates those characters that are ignored.
116 * We indicate this with a level of -1
117 */
118 *level = -1;
119 return 0;
120 }
121
122 if (len > 3) {
123 /*
124 * given we can only express (positive) numbers from
125 * 0..127, more than 3 digits means an excess
126 */
127 goto toolarge;
128 }
129
130 /* check if the string is completely numerical */
131 for (i = 0; i < len; i++) {
132 if (str[i] < '0' && str[i] > '9') {
133 fprintf(stderr,
134 "strtolevel: '%.*s' is not an integer.\n…
135 (int)len, str);
136 return 1;
137 }
138 }
139
140 if (len == 3) {
141 if (str[0] != '1' || str[1] > '2' ||
142 (str[1] == '2' && str[2] > '7')) {
143 goto toolarge;
144 }
145 *level = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
146 (str[2] - '0');
147 } else if (len == 2) {
148 *level = (str[0] - '0') * 10 + (str[1] - '0');
149 } else if (len == 1) {
150 *level = (str[0] - '0');
151 } else { /* len == 0 */
152 *level = 0;
153 }
154
155 return 0;
156 toolarge:
157 fprintf(stderr, "strtolevel: '%.*s' is too large.\n", (int)len, …
158 return 1;
159 }
160
161 static int
162 strtoreorder(const char *str, size_t len, int_least16_t *reorder)
163 {
164 size_t i;
165
166 if (len == 1 && str[0] == 'x') {
167 /*
168 * 'x' indicates those characters that are ignored.
169 * We indicate this with a reorder of -1
170 */
171 *reorder = -1;
172 return 0;
173 }
174
175 if (len > 3) {
176 /*
177 * given we want to only express (positive) numbers from
178 * 0..999 (at most!), more than 3 digits means an excess
179 */
180 goto toolarge;
181 }
182
183 /* check if the string is completely numerical */
184 for (i = 0; i < len; i++) {
185 if (str[i] < '0' && str[i] > '9') {
186 fprintf(stderr,
187 "strtoreorder: '%.*s' is not an integer.…
188 (int)len, str);
189 return 1;
190 }
191 }
192
193 if (len == 3) {
194 *reorder = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
195 (str[2] - '0');
196 } else if (len == 2) {
197 *reorder = (str[0] - '0') * 10 + (str[1] - '0');
198 } else if (len == 1) {
199 *reorder = (str[0] - '0');
200 } else { /* len == 0 */
201 *reorder = 0;
202 }
203
204 return 0;
205 toolarge:
206 fprintf(stderr, "strtoreorder: '%.*s' is too large.\n", (int)len…
207 return 1;
208 }
209
210 static int
211 parse_level_list(const char *str, int_least8_t **level, size_t *levellen)
212 {
213 size_t count, i;
214 const char *tmp1 = NULL, *tmp2 = NULL;
215
216 if (strlen(str) == 0) {
217 *level = NULL;
218 *levellen = 0;
219 return 0;
220 }
221
222 /* count the number of spaces in the string and infer list lengt…
223 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
224 count++, tmp1 = tmp2 + 1) {
225 ;
226 }
227
228 /* allocate resources */
229 if (!(*level = calloc((*levellen = count), sizeof(**level)))) {
230 fprintf(stderr, "calloc: %s\n", strerror(errno));
231 exit(1);
232 }
233
234 /* go through the string again, parsing the levels */
235 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
236 tmp2 = strchr(tmp1, ' ');
237 if (strtolevel(tmp1,
238 tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp…
239 &((*level)[i]))) {
240 return 1;
241 }
242 if (tmp2 != NULL) {
243 tmp1 = tmp2 + 1;
244 }
245 }
246
247 return 0;
248 }
249
250 static int
251 parse_reorder_list(const char *str, int_least16_t **reorder, size_t *reo…
252 {
253 size_t count, i;
254 const char *tmp1 = NULL, *tmp2 = NULL;
255
256 if (strlen(str) == 0) {
257 *reorder = NULL;
258 *reorderlen = 0;
259 return 0;
260 }
261
262 /* count the number of spaces in the string and infer list lengt…
263 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
264 count++, tmp1 = tmp2 + 1) {
265 ;
266 }
267
268 /* allocate resources */
269 if (!(*reorder = calloc((*reorderlen = count), sizeof(**reorder)…
270 fprintf(stderr, "calloc: %s\n", strerror(errno));
271 exit(1);
272 }
273
274 /* go through the string again, parsing the reorders */
275 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
276 tmp2 = strchr(tmp1, ' ');
277 if (strtoreorder(tmp1,
278 tmp2 ? (size_t)(tmp2 - tmp1) : strlen(t…
279 &((*reorder)[i]))) {
280 return 1;
281 }
282 if (tmp2 != NULL) {
283 tmp1 = tmp2 + 1;
284 }
285 }
286
287 return 0;
288 }
289
290 static void
291 bidirectional_test_list_print(const struct bidirectional_test *test,
292 size_t testlen, const char *identifier,
293 const char *progname)
294 {
295 size_t i, j;
296
297 printf("/* Automatically generated by %s */\n"
298 "#include <stdint.h>\n#include <stddef.h>\n\n"
299 "#include \"../grapheme.h\"\n\n",
300 progname);
301
302 printf("static const struct {\n"
303 "\tuint_least32_t *cp;\n"
304 "\tsize_t cplen;\n"
305 "\tenum grapheme_bidirectional_direction *mode;\n"
306 "\tsize_t modelen;\n"
307 "\tenum grapheme_bidirectional_direction resolved;\n"
308 "\tint_least8_t *level;\n"
309 "\tint_least16_t *reorder;\n"
310 "\tsize_t reorderlen;\n} %s[] = {\n",
311 identifier);
312 for (i = 0; i < testlen; i++) {
313 printf("\t{\n");
314
315 printf("\t\t.cp = (uint_least32_t[]){");
316 for (j = 0; j < test[i].cplen; j++) {
317 printf(" UINT32_C(0x%06X)", test[i].cp[j]);
318 if (j + 1 < test[i].cplen) {
319 putchar(',');
320 }
321 }
322 printf(" },\n");
323 printf("\t\t.cplen = %zu,\n", test[i].cplen);
324
325 printf("\t\t.mode = (enum "
326 "grapheme_bidirectional_direction[]){");
327 for (j = 0; j < test[i].modelen; j++) {
328 if (test[i].mode[j] ==
329 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) {
330 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTIO…
331 "NEUTRAL");
332 } else if (test[i].mode[j] ==
333 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR)…
334 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTIO…
335 } else if (test[i].mode[j] ==
336 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL)…
337 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTIO…
338 }
339 if (j + 1 < test[i].modelen) {
340 putchar(',');
341 }
342 }
343 printf(" },\n");
344 printf("\t\t.modelen = %zu,\n", test[i].modelen);
345
346 printf("\t\t.resolved = ");
347 if (test[i].resolved ==
348 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) {
349 printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_"
350 "NEUTRAL");
351 } else if (test[i].resolved ==
352 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR) {
353 printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR");
354 } else if (test[i].resolved ==
355 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL) {
356 printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL");
357 }
358 printf(",\n");
359
360 printf("\t\t.level = (int_least8_t[]){");
361 for (j = 0; j < test[i].cplen; j++) {
362 printf(" %" PRIdLEAST8, test[i].level[j]);
363 if (j + 1 < test[i].cplen) {
364 putchar(',');
365 }
366 }
367 printf(" },\n");
368
369 printf("\t\t.reorder = ");
370 if (test[i].reorderlen > 0) {
371 printf("(int_least16_t[]){");
372 for (j = 0; j < test[i].reorderlen; j++) {
373 printf(" %" PRIdLEAST16, test[i].reorder…
374 if (j + 1 < test[i].reorderlen) {
375 putchar(',');
376 }
377 }
378 printf(" },\n");
379 } else {
380 printf("NULL,\n");
381 }
382 printf("\t\t.reorderlen = %zu,\n", test[i].reorderlen);
383
384 printf("\t},\n");
385 }
386 printf("};\n");
387 }
388
389 static struct bidirectional_test *test;
390 static size_t testlen;
391
392 static int_least8_t *current_level;
393 static size_t current_level_len;
394 static int_least16_t *current_reorder;
395 static size_t current_reorder_len;
396
397 static int
398 test_callback(const char *file, char **field, size_t nfields, char *comm…
399 void *payload)
400 {
401 char *tmp;
402
403 (void)file;
404 (void)comment;
405 (void)payload;
406
407 /* we either get a line beginning with an '@', or an input line …
408 if (nfields > 0 && field[0][0] == '@') {
409 if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - …
410 tmp = field[0] + sizeof("@Levels:") - 1;
411 for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '…
412 tmp++) {
413 ;
414 }
415 free(current_level);
416 parse_level_list(tmp, &current_level,
417 &current_level_len);
418 } else if (!strncmp(field[0],
419 "@Reorder:", sizeof("@Reorder:") - 1…
420 tmp = field[0] + sizeof("@Reorder:") - 1;
421 for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '…
422 tmp++) {
423 ;
424 }
425 free(current_reorder);
426 parse_reorder_list(tmp, &current_reorder,
427 &current_reorder_len);
428 } else {
429 fprintf(stderr, "Unknown @-input-line.\n");
430 exit(1);
431 }
432 } else {
433 if (nfields < 2) {
434 /* discard any line that does not have at least …
435 */
436 return 0;
437 }
438
439 /* extend test array */
440 if (!(test = realloc(test, (++testlen) * sizeof(*test)))…
441 fprintf(stderr, "realloc: %s\n", strerror(errno)…
442 exit(1);
443 }
444
445 /* parse field data */
446 parse_class_list(field[0], &(test[testlen - 1].cp),
447 &(test[testlen - 1].cplen));
448
449 /* copy current level- and reorder-arrays */
450 if (!(test[testlen - 1].level =
451 calloc(current_level_len,
452 sizeof(*(test[testlen - 1].level)))…
453 fprintf(stderr, "calloc: %s\n", strerror(errno));
454 exit(1);
455 }
456 memcpy(test[testlen - 1].level, current_level,
457 current_level_len * sizeof(*(test[testlen - 1].le…
458
459 if (!(test[testlen - 1].reorder =
460 calloc(current_reorder_len,
461 sizeof(*(test[testlen - 1].reorder)…
462 fprintf(stderr, "calloc: %s\n", strerror(errno));
463 exit(1);
464 }
465 if (current_reorder != NULL) {
466 memcpy(test[testlen - 1].reorder, current_reorde…
467 current_reorder_len *
468 sizeof(*(test[testlen - 1].reorde…
469 }
470 test[testlen - 1].reorderlen = current_reorder_len;
471
472 if (current_level_len != test[testlen - 1].cplen) {
473 fprintf(stderr,
474 "mismatch between string and level lengt…
475 exit(1);
476 }
477
478 /* parse paragraph-level-bitset */
479 if (strlen(field[1]) != 1) {
480 fprintf(stderr, "malformed paragraph-level-bitse…
481 exit(1);
482 } else if (field[1][0] == '2') {
483 test[testlen - 1].mode[0] =
484 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
485 test[testlen - 1].modelen = 1;
486 } else if (field[1][0] == '3') {
487 /* auto=0 and LTR=1 */
488 test[testlen - 1].mode[0] =
489 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
490 test[testlen - 1].mode[1] =
491 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
492 test[testlen - 1].modelen = 2;
493 } else if (field[1][0] == '4') {
494 test[testlen - 1].mode[0] =
495 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
496 test[testlen - 1].modelen = 1;
497 } else if (field[1][0] == '5') {
498 test[testlen - 1].mode[0] =
499 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
500 test[testlen - 1].mode[1] =
501 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
502 test[testlen - 1].modelen = 2;
503 } else if (field[1][0] == '7') {
504 test[testlen - 1].mode[0] =
505 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
506 test[testlen - 1].mode[1] =
507 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
508 test[testlen - 1].mode[2] =
509 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
510 test[testlen - 1].modelen = 3;
511 } else {
512 fprintf(stderr,
513 "unhandled paragraph-level-bitset %s.\n",
514 field[1]);
515 exit(1);
516 }
517
518 /* the resolved paragraph level is always neutral as the…
519 * file does not specify it */
520 test[testlen - 1].resolved =
521 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
522 }
523
524 return 0;
525 }
526
527 static int
528 character_test_callback(const char *file, char **field, size_t nfields,
529 char *comment, void *payload)
530 {
531 size_t tmp;
532
533 (void)file;
534 (void)comment;
535 (void)payload;
536
537 if (nfields < 5) {
538 /* discard any line that does not have at least 5 fields…
539 return 0;
540 }
541
542 /* extend test array */
543 if (!(test = realloc(test, (++testlen) * sizeof(*test)))) {
544 fprintf(stderr, "realloc: %s\n", strerror(errno));
545 exit(1);
546 }
547
548 /* parse field data */
549 parse_cp_list(field[0], &(test[testlen - 1].cp),
550 &(test[testlen - 1].cplen));
551 parse_level_list(field[3], &(test[testlen - 1].level), &tmp);
552 parse_reorder_list(field[4], &(test[testlen - 1].reorder),
553 &(test[testlen - 1].reorderlen));
554
555 /* parse paragraph-level-mode */
556 if (strlen(field[1]) != 1) {
557 fprintf(stderr, "malformed paragraph-level-setting.\n");
558 exit(1);
559 } else if (field[1][0] == '0') {
560 test[testlen - 1].mode[0] =
561 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
562 } else if (field[1][0] == '1') {
563 test[testlen - 1].mode[0] =
564 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
565 } else if (field[1][0] == '2') {
566 test[testlen - 1].mode[0] =
567 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
568 } else {
569 fprintf(stderr, "unhandled paragraph-level-setting.\n");
570 exit(1);
571 }
572 test[testlen - 1].modelen = 1;
573
574 /* parse resolved paragraph level */
575 if (strlen(field[2]) != 1) {
576 fprintf(stderr, "malformed resolved paragraph level.\n");
577 exit(1);
578 } else if (field[2][0] == '0') {
579 test[testlen - 1].resolved =
580 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
581 } else if (field[2][0] == '1') {
582 test[testlen - 1].resolved =
583 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
584 } else {
585 fprintf(stderr, "unhandled resolved paragraph level.\n");
586 exit(1);
587 }
588
589 if (tmp != test[testlen - 1].cplen) {
590 fprintf(stderr, "mismatch between string and level lengt…
591 exit(1);
592 }
593
594 return 0;
595 }
596
597 int
598 main(int argc, char *argv[])
599 {
600 (void)argc;
601
602 parse_file_with_callback("data/BidiTest.txt", test_callback, NUL…
603 parse_file_with_callback("data/BidiCharacterTest.txt",
604 character_test_callback, NULL);
605 bidirectional_test_list_print(test, testlen, "bidirectional_test…
606 argv[0]);
607
608 return 0;
609 }
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.