Properly parse reorder list - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 4a4919e8764d3e88c4e33da537f42a0557a8bcf5 | |
parent 7ddf17bf2f20b598d204f32d441e8ea30765b577 | |
Author: Laslo Hunhold <[email protected]> | |
Date: Fri, 26 May 2023 10:20:32 +0200 | |
Properly parse reorder list | |
It worked all fine for the almost million conformance tests, except for | |
test number 490894, given its length exceeds 127 and thus the reorder | |
levels don't fit in a signed 8-bit-integer. | |
This is now fixed by making it 16 bits and making the parsing even | |
stricter so we will not miss out on errors of this kind in this part of | |
the code again. | |
We now pass all the tests. | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M gen/bidirectional-test.c | 110 ++++++++++++++++++++++++++++-… | |
1 file changed, 100 insertions(+), 10 deletions(-) | |
--- | |
diff --git a/gen/bidirectional-test.c b/gen/bidirectional-test.c | |
@@ -16,7 +16,7 @@ struct bidirectional_test { | |
size_t modelen; | |
enum grapheme_bidirectional_direction resolved; | |
int_least8_t *level; | |
- int_least8_t *reorder; | |
+ int_least16_t *reorder; | |
size_t reorderlen; | |
}; | |
@@ -130,14 +130,15 @@ strtolevel(const char *str, size_t len, int_least8_t *lev… | |
/* check if the string is completely numerical */ | |
for (i = 0; i < len; i++) { | |
if (str[i] < '0' && str[i] > '9') { | |
- fprintf(stderr, "hextocp: '%.*s' is not an integer.\n", | |
+ fprintf(stderr, "strtolevel: '%.*s' is not an integer.… | |
(int)len, str); | |
return 1; | |
} | |
} | |
if (len == 3) { | |
- if (str[0] != '1') { | |
+ if (str[0] != '1' || str[1] > '2' || | |
+ (str[1] == '2' && str[2] > '7')) { | |
goto toolarge; | |
} | |
*level = (str[0] - '0') * 100 + (str[1] - '0') * 10 + | |
@@ -152,11 +153,60 @@ strtolevel(const char *str, size_t len, int_least8_t *lev… | |
return 0; | |
toolarge: | |
- fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, str); | |
+ fprintf(stderr, "strtolevel: '%.*s' is too large.\n", (int)len, str); | |
return 1; | |
} | |
static int | |
+strtoreorder(const char *str, size_t len, int_least16_t *reorder) | |
+{ | |
+ size_t i; | |
+ | |
+ if (len == 1 && str[0] == 'x') { | |
+ /* | |
+ * 'x' indicates those characters that are ignored. | |
+ * We indicate this with a reorder of -1 | |
+ */ | |
+ *reorder = -1; | |
+ return 0; | |
+ } | |
+ | |
+ if (len > 3) { | |
+ /* | |
+ * given we want to only express (positive) numbers from | |
+ * 0..999 (at most!), more than 3 digits means an excess | |
+ */ | |
+ goto toolarge; | |
+ } | |
+ | |
+ /* check if the string is completely numerical */ | |
+ for (i = 0; i < len; i++) { | |
+ if (str[i] < '0' && str[i] > '9') { | |
+ fprintf(stderr, "strtoreorder: '%.*s' is not an intege… | |
+ (int)len, str); | |
+ return 1; | |
+ } | |
+ } | |
+ | |
+ if (len == 3) { | |
+ *reorder = (str[0] - '0') * 100 + (str[1] - '0') * 10 + | |
+ (str[2] - '0'); | |
+ } else if (len == 2) { | |
+ *reorder = (str[0] - '0') * 10 + (str[1] - '0'); | |
+ } else if (len == 1) { | |
+ *reorder = (str[0] - '0'); | |
+ } else { /* len == 0 */ | |
+ *reorder = 0; | |
+ } | |
+ | |
+ return 0; | |
+toolarge: | |
+ fprintf(stderr, "strtoreorder: '%.*s' is too large.\n", (int)len, str); | |
+ return 1; | |
+} | |
+ | |
+ | |
+static int | |
parse_level_list(const char *str, int_least8_t **level, size_t *levellen) | |
{ | |
size_t count, i; | |
@@ -196,6 +246,46 @@ parse_level_list(const char *str, int_least8_t **level, si… | |
return 0; | |
} | |
+static int | |
+parse_reorder_list(const char *str, int_least16_t **reorder, size_t *reorderle… | |
+{ | |
+ size_t count, i; | |
+ const char *tmp1 = NULL, *tmp2 = NULL; | |
+ | |
+ if (strlen(str) == 0) { | |
+ *reorder = NULL; | |
+ *reorderlen = 0; | |
+ return 0; | |
+ } | |
+ | |
+ /* count the number of spaces in the string and infer list length */ | |
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; | |
+ count++, tmp1 = tmp2 + 1) { | |
+ ; | |
+ } | |
+ | |
+ /* allocate resources */ | |
+ if (!(*reorder = calloc((*reorderlen = count), sizeof(**reorder)))) { | |
+ fprintf(stderr, "calloc: %s\n", strerror(errno)); | |
+ exit(1); | |
+ } | |
+ | |
+ /* go through the string again, parsing the reorders */ | |
+ for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { | |
+ tmp2 = strchr(tmp1, ' '); | |
+ if (strtoreorder(tmp1, | |
+ tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), | |
+ &((*reorder)[i]))) { | |
+ return 1; | |
+ } | |
+ if (tmp2 != NULL) { | |
+ tmp1 = tmp2 + 1; | |
+ } | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
static void | |
bidirectional_test_list_print(const struct bidirectional_test *test, | |
size_t testlen, const char *identifier, | |
@@ -215,7 +305,7 @@ bidirectional_test_list_print(const struct bidirectional_te… | |
"\tsize_t modelen;\n" | |
"\tenum grapheme_bidirectional_direction resolved;\n" | |
"\tint_least8_t *level;\n" | |
- "\tint_least8_t *reorder;\n" | |
+ "\tint_least16_t *reorder;\n" | |
"\tsize_t reorderlen;\n} %s[] = {\n", | |
identifier); | |
for (i = 0; i < testlen; i++) { | |
@@ -277,9 +367,9 @@ bidirectional_test_list_print(const struct bidirectional_te… | |
printf("\t\t.reorder = "); | |
if (test[i].reorderlen > 0) { | |
- printf("(int_least8_t[]){"); | |
+ printf("(int_least16_t[]){"); | |
for (j = 0; j < test[i].reorderlen; j++) { | |
- printf(" %" PRIdLEAST8, test[i].reorder[j]); | |
+ printf(" %" PRIdLEAST16, test[i].reorder[j]); | |
if (j + 1 < test[i].reorderlen) { | |
putchar(','); | |
} | |
@@ -300,7 +390,7 @@ static size_t testlen; | |
static int_least8_t *current_level; | |
static size_t current_level_len; | |
-static int_least8_t *current_reorder; | |
+static int_least16_t *current_reorder; | |
static size_t current_reorder_len; | |
static int | |
@@ -332,7 +422,7 @@ test_callback(const char *file, char **field, size_t nfield… | |
; | |
} | |
free(current_reorder); | |
- parse_level_list(tmp, ¤t_reorder, | |
+ parse_reorder_list(tmp, ¤t_reorder, | |
¤t_reorder_len); | |
} else { | |
fprintf(stderr, "Unknown @-input-line.\n"); | |
@@ -458,7 +548,7 @@ character_test_callback(const char *file, char **field, siz… | |
parse_cp_list(field[0], &(test[testlen - 1].cp), | |
&(test[testlen - 1].cplen)); | |
parse_level_list(field[3], &(test[testlen - 1].level), &tmp); | |
- parse_level_list(field[4], &(test[testlen - 1].reorder), | |
+ parse_reorder_list(field[4], &(test[testlen - 1].reorder), | |
&(test[testlen - 1].reorderlen)); | |
/* parse paragraph-level-mode */ |