Apply clang-format - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit abdc2ba0c764c527aaa2ed9fe42db27d71a10bc2 | |
parent 50efb9a3396588e6e1266f51ec5446a9fa8013ea | |
Author: Laslo Hunhold <[email protected]> | |
Date: Tue, 15 Nov 2022 15:53:56 +0100 | |
Apply clang-format | |
Even though this disrupts the backtrackability of the code a bit, | |
it's better to rip the band aid off now than to push it on into the | |
future. | |
With these changes, formatting is automatically governed and ensured by | |
a simple call to | |
make format | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M benchmark/bidirectional.c | 2 +- | |
M benchmark/case.c | 5 +++-- | |
M benchmark/character.c | 12 ++++++------ | |
M benchmark/line.c | 4 ++-- | |
M benchmark/sentence.c | 7 ++++--- | |
M benchmark/utf8-decode.c | 24 +++++++++++------------- | |
M benchmark/util.c | 25 ++++++++++++------------- | |
M benchmark/util.h | 8 ++++---- | |
M benchmark/word.c | 4 ++-- | |
M gen/bidirectional-test.c | 150 +++++++++++++++++++----------… | |
M gen/bidirectional.c | 144 ++++++++++++++++-------------… | |
M gen/case.c | 79 ++++++++++++++++++-----------… | |
M gen/character.c | 64 ++++++++++++++++-------------… | |
M gen/line.c | 343 +++++++++++++++++------------… | |
M gen/sentence.c | 66 ++++++++++++++++-------------… | |
M gen/util.c | 202 +++++++++++++++++------------… | |
M gen/util.h | 39 ++++++++++++++++-------------… | |
M gen/word.c | 97 ++++++++++++++++-------------… | |
M grapheme.h | 24 ++++++++++++++---------- | |
M src/bidirectional.c | 323 +++++++++++++++++++----------… | |
M src/case.c | 125 ++++++++++++++++++-----------… | |
M src/character.c | 160 ++++++++++++++++-------------… | |
M src/line.c | 108 +++++++++++++++++------------… | |
M src/sentence.c | 44 ++++++++++++++++-------------… | |
M src/utf8.c | 26 +++++++++++++------------- | |
M src/util.c | 51 ++++++++++++++++++-----------… | |
M src/util.h | 26 ++++++++++++++------------ | |
M src/word.c | 95 +++++++++++++++++------------… | |
M test/bidirectional.c | 20 +++++++++++++------- | |
M test/case.c | 331 +++++++++++++++++++----------… | |
M test/character.c | 19 +++++++++---------- | |
M test/line.c | 21 ++++++++------------- | |
M test/sentence.c | 23 ++++++++++------------- | |
M test/utf8-decode.c | 344 +++++++++++++++--------------… | |
M test/utf8-encode.c | 39 ++++++++++++++++-------------… | |
M test/util.c | 44 +++++++++++++++++++----------… | |
M test/util.h | 15 +++++++++++---- | |
M test/word.c | 16 ++++++---------- | |
38 files changed, 1736 insertions(+), 1393 deletions(-) | |
--- | |
diff --git a/benchmark/bidirectional.c b/benchmark/bidirectional.c | |
@@ -5,8 +5,8 @@ | |
#include <stdlib.h> | |
#include <string.h> | |
-#include "../grapheme.h" | |
#include "../gen/bidirectional-test.h" | |
+#include "../grapheme.h" | |
#include "util.h" | |
#define NUM_ITERATIONS 100000 | |
diff --git a/benchmark/case.c b/benchmark/case.c | |
@@ -6,8 +6,8 @@ | |
#include <stdlib.h> | |
#include <string.h> | |
-#include "../grapheme.h" | |
#include "../gen/word-test.h" | |
+#include "../grapheme.h" | |
#include "util.h" | |
#define NUM_ITERATIONS 10000 | |
@@ -40,7 +40,8 @@ main(int argc, char *argv[]) | |
&(p.srclen))) == NULL) { | |
return 1; | |
} | |
- if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) =… | |
+ if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) == | |
+ NULL) { | |
fprintf(stderr, "calloc: Out of memory\n"); | |
} | |
diff --git a/benchmark/character.c b/benchmark/character.c | |
@@ -6,8 +6,8 @@ | |
#include <stdlib.h> | |
#include <string.h> | |
-#include "../grapheme.h" | |
#include "../gen/character-test.h" | |
+#include "../grapheme.h" | |
#include "util.h" | |
#include <utf8proc.h> | |
@@ -28,7 +28,7 @@ libgrapheme(const void *payload) | |
size_t i; | |
for (i = 0; i + 1 < p->buflen; i++) { | |
- (void)grapheme_is_character_break(p->buf[i], p->buf[i+1], | |
+ (void)grapheme_is_character_break(p->buf[i], p->buf[i + 1], | |
&state); | |
} | |
} | |
@@ -41,9 +41,8 @@ libutf8proc(const void *payload) | |
size_t i; | |
for (i = 0; i + 1 < p->buflen; i++) { | |
- (void)utf8proc_grapheme_break_stateful(p->buf_utf8proc[i], | |
- p->buf_utf8proc[i+1], | |
- &state); | |
+ (void)utf8proc_grapheme_break_stateful( | |
+ p->buf_utf8proc[i], p->buf_utf8proc[i + 1], &state); | |
} | |
} | |
@@ -61,7 +60,8 @@ main(int argc, char *argv[]) | |
&(p.buflen))) == NULL) { | |
return 1; | |
} | |
- if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) ==… | |
+ if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) == | |
+ NULL) { | |
fprintf(stderr, "malloc: %s\n", strerror(errno)); | |
exit(1); | |
} | |
diff --git a/benchmark/line.c b/benchmark/line.c | |
@@ -6,8 +6,8 @@ | |
#include <stdlib.h> | |
#include <string.h> | |
-#include "../grapheme.h" | |
#include "../gen/line-test.h" | |
+#include "../grapheme.h" | |
#include "util.h" | |
#define NUM_ITERATIONS 10000 | |
@@ -23,7 +23,7 @@ libgrapheme(const void *payload) | |
const struct break_benchmark_payload *p = payload; | |
size_t off; | |
- for (off = 0; off < p->buflen; ) { | |
+ for (off = 0; off < p->buflen;) { | |
off += grapheme_next_line_break(p->buf + off, p->buflen - off); | |
} | |
} | |
diff --git a/benchmark/sentence.c b/benchmark/sentence.c | |
@@ -6,8 +6,8 @@ | |
#include <stdlib.h> | |
#include <string.h> | |
-#include "../grapheme.h" | |
#include "../gen/sentence-test.h" | |
+#include "../grapheme.h" | |
#include "util.h" | |
#define NUM_ITERATIONS 100000 | |
@@ -23,8 +23,9 @@ libgrapheme(const void *payload) | |
const struct break_benchmark_payload *p = payload; | |
size_t off; | |
- for (off = 0; off < p->buflen; ) { | |
- off += grapheme_next_sentence_break(p->buf + off, p->buflen - … | |
+ for (off = 0; off < p->buflen;) { | |
+ off += grapheme_next_sentence_break(p->buf + off, | |
+ p->buflen - off); | |
} | |
} | |
diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c | |
@@ -6,8 +6,8 @@ | |
#include <stdlib.h> | |
#include <string.h> | |
-#include "../grapheme.h" | |
#include "../gen/character-test.h" | |
+#include "../grapheme.h" | |
#include "util.h" | |
#include <utf8proc.h> | |
@@ -28,9 +28,8 @@ libgrapheme(const void *payload) | |
size_t ret, off; | |
for (off = 0; off < p->buflen; off += ret) { | |
- if ((ret = grapheme_decode_utf8(p->buf + off, | |
- p->buflen - off, &cp)) > | |
- (p->buflen - off)) { | |
+ if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen - off, | |
+ &cp)) > (p->buflen - off)) { | |
break; | |
} | |
(void)cp; | |
@@ -48,7 +47,7 @@ libutf8proc(const void *payload) | |
for (off = 0; off < p->buflen; off += (size_t)ret) { | |
if ((ret = utf8proc_iterate(p->buf_utf8proc + off, | |
(utf8proc_ssize_t)(p->buflen - off… | |
- &cp)) < 0) { | |
+ &cp)) < 0) { | |
break; | |
} | |
(void)cp; | |
@@ -64,9 +63,8 @@ main(int argc, char *argv[]) | |
(void)argc; | |
- p.buf = generate_utf8_test_buffer(character_break_test, | |
- LEN(character_break_test), | |
- &(p.buflen)); | |
+ p.buf = generate_utf8_test_buffer( | |
+ character_break_test, LEN(character_break_test), &(p.buflen)); | |
/* convert cp-buffer to stupid custom libutf8proc-uint8-type */ | |
if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) { | |
@@ -74,7 +72,7 @@ main(int argc, char *argv[]) | |
exit(1); | |
} | |
for (i = 0; i < p.buflen; i++) { | |
- /* | |
+ /* | |
* even if char is larger than 8 bit, it will only have | |
* any of the first 8 bits set (by construction). | |
*/ | |
@@ -82,11 +80,11 @@ main(int argc, char *argv[]) | |
} | |
printf("%s\n", argv[0]); | |
- run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, | |
- "byte", &baseline, NUM_ITERATIONS, p.buflen); | |
+ run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &baseline, | |
+ NUM_ITERATIONS, p.buflen); | |
run_benchmark(libutf8proc, &p, "libutf8proc ", | |
- "but unsafe (does not detect overlong encodings)", | |
- "byte", &baseline, NUM_ITERATIONS, p.buflen); | |
+ "but unsafe (does not detect overlong encodings)", "byte… | |
+ &baseline, NUM_ITERATIONS, p.buflen); | |
free(p.buf); | |
free(p.buf_utf8proc); | |
diff --git a/benchmark/util.c b/benchmark/util.c | |
@@ -1,7 +1,7 @@ | |
/* See LICENSE file for copyright and license details. */ | |
#include <math.h> | |
-#include <stdlib.h> | |
#include <stdio.h> | |
+#include <stdlib.h> | |
#include <time.h> | |
#include "../gen/types.h" | |
@@ -20,7 +20,8 @@ generate_cp_test_buffer(const struct break_test *test, size_t… | |
*buflen += test[i].cplen; | |
} | |
if (!(buf = calloc(*buflen, sizeof(*buf)))) { | |
- fprintf(stderr, "generate_test_buffer: calloc: Out of memory.\… | |
+ fprintf(stderr, | |
+ "generate_test_buffer: calloc: Out of memory.\n"); | |
exit(1); | |
} | |
for (i = 0, off = 0; i < testlen; i++) { | |
@@ -48,18 +49,18 @@ generate_utf8_test_buffer(const struct break_test *test, si… | |
} | |
(*buflen)++; /* terminating NUL-byte */ | |
if (!(buf = malloc(*buflen))) { | |
- fprintf(stderr, "generate_test_buffer: malloc: Out of memory.\… | |
+ fprintf(stderr, | |
+ "generate_test_buffer: malloc: Out of memory.\n"); | |
exit(1); | |
} | |
for (i = 0, off = 0; i < testlen; i++) { | |
for (j = 0; j < test[i].cplen; j++, off += ret) { | |
- if ((ret = grapheme_encode_utf8(test[i].cp[j], | |
- buf + off, | |
- *buflen - off)) > | |
+ if ((ret = grapheme_encode_utf8( | |
+ test[i].cp[j], buf + off, *buflen - off))… | |
(*buflen - off)) { | |
/* shouldn't happen */ | |
fprintf(stderr, "generate_utf8_test_buffer: " | |
- "Buffer too small.\n"); | |
+ "Buffer too small.\n"); | |
exit(1); | |
} | |
} | |
@@ -77,10 +78,9 @@ time_diff(struct timespec *a, struct timespec *b) | |
} | |
void | |
-run_benchmark(void (*func)(const void *), const void *payload, | |
- const char *name, const char *comment, const char *unit, | |
- double *baseline, size_t num_iterations, | |
- size_t units_per_iteration) | |
+run_benchmark(void (*func)(const void *), const void *payload, const char *nam… | |
+ const char *comment, const char *unit, double *baseline, | |
+ size_t num_iterations, size_t units_per_iteration) | |
{ | |
struct timespec start, end; | |
size_t i; | |
@@ -109,7 +109,6 @@ run_benchmark(void (*func)(const void *), const void *paylo… | |
printf(" avg. %.3es/%s (%.2f%% %s%s%s)\n", diff, unit, | |
fabs(1.0 - diff / *baseline) * 100, | |
(diff < *baseline) ? "faster" : "slower", | |
- comment ? ", " : "", | |
- comment ? comment : ""); | |
+ comment ? ", " : "", comment ? comment : ""); | |
} | |
} | |
diff --git a/benchmark/util.h b/benchmark/util.h | |
@@ -7,10 +7,10 @@ | |
#define LEN(x) (sizeof(x) / sizeof(*(x))) | |
#ifdef __has_attribute | |
- #if __has_attribute(optnone) | |
- void libgrapheme(const void *) __attribute__((optnone)); | |
- void libutf8proc(const void *) __attribute__((optnone)); | |
- #endif | |
+#if __has_attribute(optnone) | |
+void libgrapheme(const void *) __attribute__((optnone)); | |
+void libutf8proc(const void *) __attribute__((optnone)); | |
+#endif | |
#endif | |
uint_least32_t *generate_cp_test_buffer(const struct break_test *, size_t, | |
diff --git a/benchmark/word.c b/benchmark/word.c | |
@@ -6,8 +6,8 @@ | |
#include <stdlib.h> | |
#include <string.h> | |
-#include "../grapheme.h" | |
#include "../gen/word-test.h" | |
+#include "../grapheme.h" | |
#include "util.h" | |
#define NUM_ITERATIONS 10000 | |
@@ -23,7 +23,7 @@ libgrapheme(const void *payload) | |
const struct break_benchmark_payload *p = payload; | |
size_t off; | |
- for (off = 0; off < p->buflen; ) { | |
+ for (off = 0; off < p->buflen;) { | |
off += grapheme_next_word_break(p->buf + off, p->buflen - off); | |
} | |
} | |
diff --git a/gen/bidirectional-test.c b/gen/bidirectional-test.c | |
@@ -3,8 +3,8 @@ | |
#include <inttypes.h> | |
#include <stddef.h> | |
#include <stdio.h> | |
-#include <string.h> | |
#include <stdlib.h> | |
+#include <string.h> | |
#include "../grapheme.h" | |
#include "util.h" | |
@@ -23,29 +23,29 @@ static const struct { | |
const char *class; | |
const uint_least32_t cp; | |
} classcpmap[] = { | |
- { .class = "L", .cp = UINT32_C(0x0041) }, | |
- { .class = "AL", .cp = UINT32_C(0x0608) }, | |
- { .class = "AN", .cp = UINT32_C(0x0600) }, | |
- { .class = "B", .cp = UINT32_C(0x000A) }, | |
- { .class = "BN", .cp = UINT32_C(0x0000) }, | |
- { .class = "CS", .cp = UINT32_C(0x002C) }, | |
- { .class = "EN", .cp = UINT32_C(0x0030) }, | |
- { .class = "ES", .cp = UINT32_C(0x002B) }, | |
- { .class = "ET", .cp = UINT32_C(0x0023) }, | |
+ { .class = "L", .cp = UINT32_C(0x0041) }, | |
+ { .class = "AL", .cp = UINT32_C(0x0608) }, | |
+ { .class = "AN", .cp = UINT32_C(0x0600) }, | |
+ { .class = "B", .cp = UINT32_C(0x000A) }, | |
+ { .class = "BN", .cp = UINT32_C(0x0000) }, | |
+ { .class = "CS", .cp = UINT32_C(0x002C) }, | |
+ { .class = "EN", .cp = UINT32_C(0x0030) }, | |
+ { .class = "ES", .cp = UINT32_C(0x002B) }, | |
+ { .class = "ET", .cp = UINT32_C(0x0023) }, | |
{ .class = "FSI", .cp = UINT32_C(0x2068) }, | |
{ .class = "LRE", .cp = UINT32_C(0x202A) }, | |
{ .class = "LRI", .cp = UINT32_C(0x2066) }, | |
{ .class = "LRO", .cp = UINT32_C(0x202D) }, | |
{ .class = "NSM", .cp = UINT32_C(0x0300) }, | |
- { .class = "ON", .cp = UINT32_C(0x0021) }, | |
+ { .class = "ON", .cp = UINT32_C(0x0021) }, | |
{ .class = "PDF", .cp = UINT32_C(0x202C) }, | |
{ .class = "PDI", .cp = UINT32_C(0x2069) }, | |
- { .class = "R", .cp = UINT32_C(0x05BE) }, | |
+ { .class = "R", .cp = UINT32_C(0x05BE) }, | |
{ .class = "RLE", .cp = UINT32_C(0x202B) }, | |
{ .class = "RLI", .cp = UINT32_C(0x2067) }, | |
{ .class = "RLO", .cp = UINT32_C(0x202E) }, | |
- { .class = "S", .cp = UINT32_C(0x0009) }, | |
- { .class = "WS", .cp = UINT32_C(0x000C) }, | |
+ { .class = "S", .cp = UINT32_C(0x0009) }, | |
+ { .class = "WS", .cp = UINT32_C(0x000C) }, | |
}; | |
static int | |
@@ -59,7 +59,8 @@ classtocp(const char *str, size_t len, uint_least32_t *cp) | |
return 0; | |
} | |
} | |
- fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len,… | |
+ fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len, | |
+ str); | |
return 1; | |
} | |
@@ -77,8 +78,10 @@ parse_class_list(const char *str, uint_least32_t **cp, size_… | |
} | |
/* count the number of spaces in the string and infer list length */ | |
- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count+… | |
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; | |
+ count++, tmp1 = tmp2 + 1) { | |
; | |
+ } | |
/* allocate resources */ | |
if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) { | |
@@ -89,7 +92,8 @@ parse_class_list(const char *str, uint_least32_t **cp, size_t… | |
/* go through the string again, parsing the classes */ | |
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { | |
tmp2 = strchr(tmp1, ' '); | |
- if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1… | |
+ if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1… | |
+ &((*cp)[i]))) { | |
return 1; | |
} | |
if (tmp2 != NULL) { | |
@@ -135,12 +139,10 @@ strtolevel(const char *str, size_t len, int_least8_t *lev… | |
if (str[0] != '1') { | |
goto toolarge; | |
} | |
- *level = (str[0] - '0') * 100 + | |
- (str[1] - '0') * 10 + | |
- (str[2] - '0'); | |
+ *level = (str[0] - '0') * 100 + (str[1] - '0') * 10 + | |
+ (str[2] - '0'); | |
} else if (len == 2) { | |
- *level = (str[0] - '0') * 10 + | |
- (str[1] - '0'); | |
+ *level = (str[0] - '0') * 10 + (str[1] - '0'); | |
} else if (len == 1) { | |
*level = (str[0] - '0'); | |
} else { /* len == 0 */ | |
@@ -149,8 +151,7 @@ strtolevel(const char *str, size_t len, int_least8_t *level) | |
return 0; | |
toolarge: | |
- fprintf(stderr, "hextocp: '%.*s' is too large.\n", | |
- (int)len, str); | |
+ fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, str); | |
return 1; | |
} | |
@@ -167,8 +168,10 @@ parse_level_list(const char *str, int_least8_t **level, si… | |
} | |
/* count the number of spaces in the string and infer list length */ | |
- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count+… | |
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; | |
+ count++, tmp1 = tmp2 + 1) { | |
; | |
+ } | |
/* allocate resources */ | |
if (!(*level = calloc((*levellen = count), sizeof(**level)))) { | |
@@ -179,7 +182,9 @@ parse_level_list(const char *str, int_least8_t **level, siz… | |
/* go through the string again, parsing the levels */ | |
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { | |
tmp2 = strchr(tmp1, ' '); | |
- if (strtolevel(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp… | |
+ if (strtolevel(tmp1, | |
+ tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), | |
+ &((*level)[i]))) { | |
return 1; | |
} | |
if (tmp2 != NULL) { | |
@@ -199,7 +204,8 @@ bidirectional_test_list_print(const struct bidirectional_te… | |
printf("/* Automatically generated by %s */\n" | |
"#include <stdint.h>\n#include <stddef.h>\n\n" | |
- "#include \"../grapheme.h\"\n\n", progname); | |
+ "#include \"../grapheme.h\"\n\n", | |
+ progname); | |
printf("static const struct {\n" | |
"\tuint_least32_t *cp;\n" | |
@@ -208,7 +214,8 @@ bidirectional_test_list_print(const struct bidirectional_te… | |
"\tsize_t modelen;\n" | |
"\tint_least8_t *level;\n" | |
"\tint_least8_t *reorder;\n" | |
- "\tsize_t reorderlen;\n} %s[] = {\n", identifier); | |
+ "\tsize_t reorderlen;\n} %s[] = {\n", | |
+ identifier); | |
for (i = 0; i < testlen; i++) { | |
printf("\t{\n"); | |
@@ -222,11 +229,13 @@ bidirectional_test_list_print(const struct bidirectional_… | |
printf(" },\n"); | |
printf("\t\t.cplen = %zu,\n", test[i].cplen); | |
- printf("\t\t.mode = (enum grapheme_bidirectional_overrid… | |
+ printf("\t\t.mode = (enum " | |
+ "grapheme_bidirectional_override[]){"); | |
for (j = 0; j < test[i].modelen; j++) { | |
if (test[i].mode[j] == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL) { | |
- printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTR… | |
+ printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_" | |
+ "NEUTRAL"); | |
} else if (test[i].mode[j] == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR"); | |
@@ -279,8 +288,8 @@ static int_least8_t *current_reorder; | |
static size_t current_reorder_len; | |
static int | |
-test_callback(const char *file, char **field, size_t nfields, | |
- char *comment, void *payload) | |
+test_callback(const char *file, char **field, size_t nfields, char *comment, | |
+ void *payload) | |
{ | |
char *tmp; | |
@@ -292,23 +301,31 @@ test_callback(const char *file, char **field, size_t nfie… | |
if (nfields > 0 && field[0][0] == '@') { | |
if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - 1)) { | |
tmp = field[0] + sizeof("@Levels:") - 1; | |
- for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); … | |
+ for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); | |
+ tmp++) { | |
; | |
+ } | |
free(current_level); | |
- parse_level_list(tmp, ¤t_level, ¤t_level_l… | |
- } else if (!strncmp(field[0], "@Reorder:", sizeof("@Reorder:")… | |
+ parse_level_list(tmp, ¤t_level, | |
+ ¤t_level_len); | |
+ } else if (!strncmp(field[0], | |
+ "@Reorder:", sizeof("@Reorder:") - 1)) { | |
tmp = field[0] + sizeof("@Reorder:") - 1; | |
- for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); … | |
+ for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); | |
+ tmp++) { | |
; | |
+ } | |
free(current_reorder); | |
- parse_level_list(tmp, ¤t_reorder, ¤t_reord… | |
+ parse_level_list(tmp, ¤t_reorder, | |
+ ¤t_reorder_len); | |
} else { | |
fprintf(stderr, "Unknown @-input-line.\n"); | |
exit(1); | |
} | |
} else { | |
if (nfields < 2) { | |
- /* discard any line that does not have at least 2 fiel… | |
+ /* discard any line that does not have at least 2 fiel… | |
+ */ | |
return 0; | |
} | |
@@ -321,26 +338,33 @@ test_callback(const char *file, char **field, size_t nfie… | |
/* parse field data */ | |
parse_class_list(field[0], &(test[testlen - 1].cp), | |
&(test[testlen - 1].cplen)); | |
- | |
+ | |
/* copy current level- and reorder-arrays */ | |
- if (!(test[testlen - 1].level = calloc(current_level_len, size… | |
+ if (!(test[testlen - 1].level = | |
+ calloc(current_level_len, | |
+ sizeof(*(test[testlen - 1].level))))) { | |
fprintf(stderr, "calloc: %s\n", strerror(errno)); | |
exit(1); | |
} | |
- memcpy(test[testlen - 1].level, current_level, current_level_l… | |
+ memcpy(test[testlen - 1].level, current_level, | |
+ current_level_len * sizeof(*(test[testlen - 1].level))); | |
- if (!(test[testlen - 1].reorder = calloc(current_reorder_len, … | |
+ if (!(test[testlen - 1].reorder = | |
+ calloc(current_reorder_len, | |
+ sizeof(*(test[testlen - 1].reorder))))) { | |
fprintf(stderr, "calloc: %s\n", strerror(errno)); | |
exit(1); | |
} | |
if (current_reorder != NULL) { | |
memcpy(test[testlen - 1].reorder, current_reorder, | |
- current_reorder_len * sizeof(*(test[testlen - 1… | |
+ current_reorder_len * | |
+ sizeof(*(test[testlen - 1].reorder))); | |
} | |
test[testlen - 1].reorderlen = current_reorder_len; | |
- | |
+ | |
if (current_level_len != test[testlen - 1].cplen) { | |
- fprintf(stderr, "mismatch between string and level len… | |
+ fprintf(stderr, | |
+ "mismatch between string and level lengths.\n"… | |
exit(1); | |
} | |
@@ -349,27 +373,38 @@ test_callback(const char *file, char **field, size_t nfie… | |
fprintf(stderr, "malformed paragraph-level-bitset.\n"); | |
exit(1); | |
} else if (field[1][0] == '2') { | |
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ test[testlen - 1].mode[0] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; | |
test[testlen - 1].modelen = 1; | |
} else if (field[1][0] == '3') { | |
/* auto=0 and LTR=1 */ | |
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE… | |
- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ test[testlen - 1].mode[0] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; | |
+ test[testlen - 1].mode[1] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; | |
test[testlen - 1].modelen = 2; | |
} else if (field[1][0] == '4') { | |
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ test[testlen - 1].mode[0] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; | |
test[testlen - 1].modelen = 1; | |
- } else if (field[1][0] == '5') { | |
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE… | |
- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ } else if (field[1][0] == '5') { | |
+ test[testlen - 1].mode[0] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; | |
+ test[testlen - 1].mode[1] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; | |
test[testlen - 1].modelen = 2; | |
} else if (field[1][0] == '7') { | |
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE… | |
- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVE… | |
- test[testlen - 1].mode[2] = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ test[testlen - 1].mode[0] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; | |
+ test[testlen - 1].mode[1] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; | |
+ test[testlen - 1].mode[2] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; | |
test[testlen - 1].modelen = 3; | |
} else { | |
- fprintf(stderr, "unhandled paragraph-level-bitset %s.\… | |
+ fprintf(stderr, | |
+ "unhandled paragraph-level-bitset %s.\n", | |
+ field[1]); | |
exit(1); | |
} | |
} | |
@@ -414,7 +449,8 @@ character_test_callback(const char *file, char **field, siz… | |
} else if (field[1][0] == '1') { | |
test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RT… | |
} else if (field[1][0] == '2') { | |
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NE… | |
+ test[testlen - 1].mode[0] = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; | |
} else { | |
fprintf(stderr, "unhandled paragraph-level-setting.\n"); | |
exit(1); | |
diff --git a/gen/bidirectional.c b/gen/bidirectional.c | |
@@ -15,118 +15,118 @@ static const struct property_spec bidi_property[] = { | |
{ | |
/* default */ | |
.enumname = "L", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "L", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "L", | |
}, | |
{ | |
.enumname = "AL", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "AL", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "AL", | |
}, | |
{ | |
.enumname = "AN", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "AN", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "AN", | |
}, | |
{ | |
.enumname = "B", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "B", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "B", | |
}, | |
{ | |
.enumname = "BN", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "BN", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "BN", | |
}, | |
{ | |
.enumname = "CS", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "CS", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "CS", | |
}, | |
{ | |
.enumname = "EN", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "EN", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "EN", | |
}, | |
{ | |
.enumname = "ES", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "ES", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "ES", | |
}, | |
{ | |
.enumname = "ET", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "ET", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "ET", | |
}, | |
{ | |
.enumname = "FSI", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "FSI", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "FSI", | |
}, | |
{ | |
.enumname = "LRE", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "LRE", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "LRE", | |
}, | |
{ | |
.enumname = "LRI", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "LRI", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "LRI", | |
}, | |
{ | |
.enumname = "LRO", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "LRO", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "LRO", | |
}, | |
{ | |
.enumname = "NSM", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "NSM", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "NSM", | |
}, | |
{ | |
.enumname = "ON", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "ON", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "ON", | |
}, | |
{ | |
.enumname = "PDF", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "PDF", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "PDF", | |
}, | |
{ | |
.enumname = "PDI", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "PDI", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "PDI", | |
}, | |
{ | |
.enumname = "R", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "R", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "R", | |
}, | |
{ | |
.enumname = "RLE", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "RLE", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "RLE", | |
}, | |
{ | |
.enumname = "RLI", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "RLI", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "RLI", | |
}, | |
{ | |
.enumname = "RLO", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "RLO", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "RLO", | |
}, | |
{ | |
.enumname = "S", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "S", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "S", | |
}, | |
{ | |
.enumname = "WS", | |
- .file = FILE_BIDI_CLASS, | |
- .ucdname = "WS", | |
+ .file = FILE_BIDI_CLASS, | |
+ .ucdname = "WS", | |
}, | |
}; | |
@@ -135,11 +135,12 @@ static struct { | |
uint_least32_t cp_pair; | |
char type; | |
} *b = NULL; | |
+ | |
static size_t blen; | |
static int | |
-bracket_callback(const char *file, char **field, size_t nfields, | |
- char *comment, void *payload) | |
+bracket_callback(const char *file, char **field, size_t nfields, char *comment, | |
+ void *payload) | |
{ | |
(void)file; | |
(void)comment; | |
@@ -189,11 +190,12 @@ post_process(struct properties *prop) | |
} | |
static uint_least8_t | |
-fill_missing(uint_least32_t cp) { | |
+fill_missing(uint_least32_t cp) | |
+{ | |
/* based on the @missing-properties in data/DerivedBidiClass.txt */ | |
- if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) || | |
- (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) || | |
- (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) || | |
+ if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) || | |
+ (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) || | |
+ (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) || | |
(cp >= UINT32_C(0x10800) && cp <= UINT32_C(0x10CFF)) || | |
(cp >= UINT32_C(0x10D40) && cp <= UINT32_C(0x10EBF)) || | |
(cp >= UINT32_C(0x10F00) && cp <= UINT32_C(0x10F2F)) || | |
@@ -203,22 +205,22 @@ fill_missing(uint_least32_t cp) { | |
(cp >= UINT32_C(0x1ED50) && cp <= UINT32_C(0x1EDFF)) || | |
(cp >= UINT32_C(0x1EF00) && cp <= UINT32_C(0x1EFFF))) { | |
return 17; /* class R */ | |
- } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) || | |
- (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) || | |
- (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) || | |
- (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) || | |
- (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) || | |
+ } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) || | |
+ (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) || | |
+ (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) || | |
+ (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) || | |
+ (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) || | |
(cp >= UINT32_C(0x10D00) && cp <= UINT32_C(0x10D3F)) || | |
(cp >= UINT32_C(0x10EC0) && cp <= UINT32_C(0x10EFF)) || | |
- (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) || | |
+ (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) || | |
(cp >= UINT32_C(0x1EC70) && cp <= UINT32_C(0x1ECBF)) || | |
(cp >= UINT32_C(0x1ED00) && cp <= UINT32_C(0x1ED4F)) || | |
(cp >= UINT32_C(0x1EE00) && cp <= UINT32_C(0x1EEFF))) { | |
- return 1; /* class AL */ | |
+ return 1; /* class AL */ | |
} else if (cp >= UINT32_C(0x20A0) && cp <= UINT32_C(0x20CF)) { | |
- return 8; /* class ET */ | |
+ return 8; /* class ET */ | |
} else { | |
- return 0; /* class L */ | |
+ return 0; /* class L */ | |
} | |
} | |
@@ -238,13 +240,11 @@ main(int argc, char *argv[]) | |
fprintf(stderr, "calloc: %s\n", strerror(errno)); | |
exit(1); | |
} | |
- parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback, | |
- NULL); | |
+ parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback, NULL); | |
- properties_generate_break_property(bidi_property, | |
- LEN(bidi_property), fill_missing, | |
- NULL, post_process, "bidi", | |
- argv[0]); | |
+ properties_generate_break_property(bidi_property, LEN(bidi_property), | |
+ fill_missing, NULL, post_process, | |
+ "bidi", argv[0]); | |
printf("\nenum bracket_type {\n\tBIDI_BRACKET_NONE,\n\t" | |
"BIDI_BRACKET_OPEN,\n\tBIDI_BRACKET_CLOSE,\n};\n\n" | |
@@ -252,10 +252,12 @@ main(int argc, char *argv[]) | |
"\tuint_least32_t pair;\n};\n\n" | |
"static const struct bracket bidi_bracket[] = {\n"); | |
for (i = 0; i < blen; i++) { | |
- printf("\t{\n\t\t.type = %s,\n\t\t.pair = UINT32_C(0x%06X),\n\… | |
- (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" : | |
- (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" : "BIDI_BRACKET_NONE… | |
- b[i].cp_pair); | |
+ printf("\t{\n\t\t.type = %s,\n\t\t.pair = " | |
+ "UINT32_C(0x%06X),\n\t},\n", | |
+ (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" : | |
+ (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" : | |
+ "BIDI_BRACKET_NONE", | |
+ b[i].cp_pair); | |
} | |
printf("};\n"); | |
diff --git a/gen/case.c b/gen/case.c | |
@@ -12,28 +12,28 @@ | |
static const struct property_spec case_property[] = { | |
{ | |
.enumname = "OTHER", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
{ | |
.enumname = "BOTH_CASED_CASE_IGNORABLE", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
- { | |
+ { | |
.enumname = "CASED", | |
- .file = FILE_DCP, | |
- .ucdname = "Cased", | |
+ .file = FILE_DCP, | |
+ .ucdname = "Cased", | |
}, | |
{ | |
.enumname = "CASE_IGNORABLE", | |
- .file = FILE_DCP, | |
- .ucdname = "Case_Ignorable", | |
+ .file = FILE_DCP, | |
+ .ucdname = "Case_Ignorable", | |
}, | |
{ | |
.enumname = "UNCASED", | |
- .file = FILE_DCP, | |
- .ucdname = "Uncased", | |
+ .file = FILE_DCP, | |
+ .ucdname = "Uncased", | |
}, | |
}; | |
@@ -67,12 +67,14 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uin… | |
} | |
static struct properties *prop_upper = NULL, *prop_lower, *prop_title; | |
+ | |
static struct special_case { | |
struct { | |
uint_least32_t *cp; | |
size_t cplen; | |
} upper, lower, title; | |
} *sc = NULL; | |
+ | |
static size_t sclen = 0; | |
static int | |
@@ -89,9 +91,12 @@ unicodedata_callback(const char *file, char **field, size_t … | |
upper = lower = title = cp; | |
- if ((strlen(field[12]) > 0 && hextocp(field[12], strlen(field[12]), &u… | |
- (strlen(field[13]) > 0 && hextocp(field[13], strlen(field[13]), &l… | |
- (nfields >= 15 && strlen(field[14]) > 0 && hextocp(field[14], strl… | |
+ if ((strlen(field[12]) > 0 && | |
+ hextocp(field[12], strlen(field[12]), &upper)) || | |
+ (strlen(field[13]) > 0 && | |
+ hextocp(field[13], strlen(field[13]), &lower)) || | |
+ (nfields >= 15 && strlen(field[14]) > 0 && | |
+ hextocp(field[14], strlen(field[14]), &title))) { | |
return 1; | |
} | |
@@ -126,7 +131,7 @@ specialcasing_callback(const char *file, char **field, size… | |
/* extend special case array */ | |
if (!(sc = realloc(sc, (++sclen) * sizeof(*sc)))) { | |
fprintf(stderr, "realloc: %s\n", strerror(errno)); | |
- exit(1); | |
+ exit(1); | |
} | |
/* parse field data */ | |
@@ -142,9 +147,12 @@ specialcasing_callback(const char *file, char **field, siz… | |
* special value 0x110000 + (offset in special case array), | |
* even if the special case has length 1 | |
*/ | |
- prop_upper[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen … | |
- prop_lower[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen … | |
- prop_title[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen … | |
+ prop_upper[cp].property = | |
+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1)); | |
+ prop_lower[cp].property = | |
+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1)); | |
+ prop_title[cp].property = | |
+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1)); | |
return 0; | |
} | |
@@ -165,9 +173,8 @@ main(int argc, char *argv[]) | |
(void)argc; | |
/* generate case property table from the specification */ | |
- properties_generate_break_property(case_property, | |
- LEN(case_property), NULL, | |
- handle_conflict, NULL, "case", | |
+ properties_generate_break_property(case_property, LEN(case_property), | |
+ NULL, handle_conflict, NULL, "case", | |
argv[0]); | |
/* | |
@@ -186,38 +193,46 @@ main(int argc, char *argv[]) | |
} | |
parse_file_with_callback("data/UnicodeData.txt", unicodedata_callback, | |
NULL); | |
- parse_file_with_callback("data/SpecialCasing.txt", specialcasing_callb… | |
- NULL); | |
+ parse_file_with_callback("data/SpecialCasing.txt", | |
+ specialcasing_callback, NULL); | |
/* compress properties */ | |
properties_compress(prop_upper, &comp_upper); | |
properties_compress(prop_lower, &comp_lower); | |
properties_compress(prop_title, &comp_title); | |
- fprintf(stderr, "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%… | |
+ fprintf(stderr, | |
+ "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%%, " | |
+ "title=%.2f%%\n", | |
argv[0], properties_get_major_minor(&comp_upper, &mm_upper), | |
properties_get_major_minor(&comp_lower, &mm_lower), | |
properties_get_major_minor(&comp_title, &mm_title)); | |
/* print tables */ | |
- printf("/* Automatically generated by %s */\n#include <stdint.h>\n#inc… | |
+ printf("/* Automatically generated by %s */\n#include " | |
+ "<stdint.h>\n#include <stddef.h>\n\n", | |
+ argv[0]); | |
- printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t cplen;\… | |
+ printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t " | |
+ "cplen;\n};\n\n"); | |
properties_print_lookup_table("upper_major", mm_upper.major, 0x1100); | |
printf("\n"); | |
- properties_print_derived_lookup_table("upper_minor", "int_least32_t", … | |
- mm_upper.minorlen, get_value, co… | |
+ properties_print_derived_lookup_table("upper_minor", "int_least32_t", | |
+ mm_upper.minor, mm_upper.minorle… | |
+ get_value, comp_upper.data); | |
printf("\n"); | |
properties_print_lookup_table("lower_major", mm_lower.major, 0x1100); | |
printf("\n"); | |
- properties_print_derived_lookup_table("lower_minor", "int_least32_t", … | |
- mm_lower.minorlen, get_value, co… | |
+ properties_print_derived_lookup_table("lower_minor", "int_least32_t", | |
+ mm_lower.minor, mm_lower.minorle… | |
+ get_value, comp_lower.data); | |
printf("\n"); | |
properties_print_lookup_table("title_major", mm_title.major, 0x1100); | |
printf("\n"); | |
- properties_print_derived_lookup_table("title_minor", "int_least32_t", … | |
- mm_title.minorlen, get_value, co… | |
+ properties_print_derived_lookup_table("title_minor", "int_least32_t", | |
+ mm_title.minor, mm_title.minorle… | |
+ get_value, comp_title.data); | |
printf("\n"); | |
printf("static const struct special_case upper_special[] = {\n"); | |
diff --git a/gen/character.c b/gen/character.c | |
@@ -9,78 +9,78 @@ | |
static const struct property_spec char_break_property[] = { | |
{ | |
.enumname = "OTHER", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
{ | |
.enumname = "CONTROL", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "Control", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "Control", | |
}, | |
{ | |
.enumname = "CR", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "CR", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "CR", | |
}, | |
{ | |
.enumname = "EXTEND", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "Extend", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "Extend", | |
}, | |
{ | |
.enumname = "EXTENDED_PICTOGRAPHIC", | |
- .file = FILE_EMOJI, | |
- .ucdname = "Extended_Pictographic", | |
+ .file = FILE_EMOJI, | |
+ .ucdname = "Extended_Pictographic", | |
}, | |
{ | |
.enumname = "HANGUL_L", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "L", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "L", | |
}, | |
{ | |
.enumname = "HANGUL_V", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "V", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "V", | |
}, | |
{ | |
.enumname = "HANGUL_T", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "T", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "T", | |
}, | |
{ | |
.enumname = "HANGUL_LV", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "LV", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "LV", | |
}, | |
{ | |
.enumname = "HANGUL_LVT", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "LVT", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "LVT", | |
}, | |
{ | |
.enumname = "LF", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "LF", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "LF", | |
}, | |
{ | |
.enumname = "PREPEND", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "Prepend", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "Prepend", | |
}, | |
{ | |
.enumname = "REGIONAL_INDICATOR", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "Regional_Indicator", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "Regional_Indicator", | |
}, | |
{ | |
.enumname = "SPACINGMARK", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "SpacingMark", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "SpacingMark", | |
}, | |
{ | |
.enumname = "ZWJ", | |
- .file = FILE_GRAPHEME, | |
- .ucdname = "ZWJ", | |
+ .file = FILE_GRAPHEME, | |
+ .ucdname = "ZWJ", | |
}, | |
}; | |
@@ -90,8 +90,8 @@ main(int argc, char *argv[]) | |
(void)argc; | |
properties_generate_break_property(char_break_property, | |
- LEN(char_break_property), NULL, | |
- NULL, NULL, "char_break", argv[0]); | |
+ LEN(char_break_property), NULL, NUL… | |
+ NULL, "char_break", argv[0]); | |
return 0; | |
} | |
diff --git a/gen/line.c b/gen/line.c | |
@@ -12,8 +12,8 @@ | |
static const struct property_spec line_break_property[] = { | |
{ | |
.enumname = "AL", | |
- .file = FILE_LINE, | |
- .ucdname = "AL", | |
+ .file = FILE_LINE, | |
+ .ucdname = "AL", | |
}, | |
/* | |
* Both extended pictographic and cn are large classes, | |
@@ -32,269 +32,269 @@ static const struct property_spec line_break_property[] =… | |
*/ | |
{ | |
.enumname = "TMP_CN", | |
- .file = FILE_LINE, | |
- .ucdname = "Cn", | |
+ .file = FILE_LINE, | |
+ .ucdname = "Cn", | |
}, | |
{ | |
.enumname = "TMP_EXTENDED_PICTOGRAPHIC", | |
- .file = FILE_EMOJI, | |
- .ucdname = "Extended_Pictographic", | |
+ .file = FILE_EMOJI, | |
+ .ucdname = "Extended_Pictographic", | |
}, | |
/* end of special block */ | |
{ | |
.enumname = "B2", | |
- .file = FILE_LINE, | |
- .ucdname = "B2", | |
+ .file = FILE_LINE, | |
+ .ucdname = "B2", | |
}, | |
{ | |
.enumname = "BA", | |
- .file = FILE_LINE, | |
- .ucdname = "BA", | |
+ .file = FILE_LINE, | |
+ .ucdname = "BA", | |
}, | |
{ | |
.enumname = "BB", | |
- .file = FILE_LINE, | |
- .ucdname = "BB", | |
+ .file = FILE_LINE, | |
+ .ucdname = "BB", | |
}, | |
{ | |
.enumname = "BK", | |
- .file = FILE_LINE, | |
- .ucdname = "BK", | |
+ .file = FILE_LINE, | |
+ .ucdname = "BK", | |
}, | |
{ | |
.enumname = "BOTH_CN_EXTPICT", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
{ | |
.enumname = "CB", | |
- .file = FILE_LINE, | |
- .ucdname = "CB", | |
+ .file = FILE_LINE, | |
+ .ucdname = "CB", | |
}, | |
{ | |
.enumname = "CL", | |
- .file = FILE_LINE, | |
- .ucdname = "CL", | |
+ .file = FILE_LINE, | |
+ .ucdname = "CL", | |
}, | |
{ | |
.enumname = "CM", | |
- .file = FILE_LINE, | |
- .ucdname = "CM", | |
+ .file = FILE_LINE, | |
+ .ucdname = "CM", | |
}, | |
{ | |
.enumname = "CP_WITHOUT_EAW_HWF", | |
- .file = FILE_LINE, | |
- .ucdname = "CP", | |
+ .file = FILE_LINE, | |
+ .ucdname = "CP", | |
}, | |
{ | |
.enumname = "CP_WITH_EAW_HWF", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
{ | |
.enumname = "CR", | |
- .file = FILE_LINE, | |
- .ucdname = "CR", | |
+ .file = FILE_LINE, | |
+ .ucdname = "CR", | |
}, | |
{ | |
.enumname = "EB", | |
- .file = FILE_LINE, | |
- .ucdname = "EB", | |
+ .file = FILE_LINE, | |
+ .ucdname = "EB", | |
}, | |
{ | |
.enumname = "EM", | |
- .file = FILE_LINE, | |
- .ucdname = "EM", | |
+ .file = FILE_LINE, | |
+ .ucdname = "EM", | |
}, | |
{ | |
.enumname = "EX", | |
- .file = FILE_LINE, | |
- .ucdname = "EX", | |
+ .file = FILE_LINE, | |
+ .ucdname = "EX", | |
}, | |
{ | |
.enumname = "GL", | |
- .file = FILE_LINE, | |
- .ucdname = "GL", | |
+ .file = FILE_LINE, | |
+ .ucdname = "GL", | |
}, | |
{ | |
.enumname = "H2", | |
- .file = FILE_LINE, | |
- .ucdname = "H2", | |
+ .file = FILE_LINE, | |
+ .ucdname = "H2", | |
}, | |
{ | |
.enumname = "H3", | |
- .file = FILE_LINE, | |
- .ucdname = "H3", | |
+ .file = FILE_LINE, | |
+ .ucdname = "H3", | |
}, | |
{ | |
.enumname = "HL", | |
- .file = FILE_LINE, | |
- .ucdname = "HL", | |
+ .file = FILE_LINE, | |
+ .ucdname = "HL", | |
}, | |
{ | |
.enumname = "HY", | |
- .file = FILE_LINE, | |
- .ucdname = "HY", | |
+ .file = FILE_LINE, | |
+ .ucdname = "HY", | |
}, | |
{ | |
.enumname = "ID", | |
- .file = FILE_LINE, | |
- .ucdname = "ID", | |
+ .file = FILE_LINE, | |
+ .ucdname = "ID", | |
}, | |
{ | |
.enumname = "IN", | |
- .file = FILE_LINE, | |
- .ucdname = "IN", | |
+ .file = FILE_LINE, | |
+ .ucdname = "IN", | |
}, | |
{ | |
.enumname = "IS", | |
- .file = FILE_LINE, | |
- .ucdname = "IS", | |
+ .file = FILE_LINE, | |
+ .ucdname = "IS", | |
}, | |
{ | |
.enumname = "JL", | |
- .file = FILE_LINE, | |
- .ucdname = "JL", | |
+ .file = FILE_LINE, | |
+ .ucdname = "JL", | |
}, | |
{ | |
.enumname = "JT", | |
- .file = FILE_LINE, | |
- .ucdname = "JT", | |
+ .file = FILE_LINE, | |
+ .ucdname = "JT", | |
}, | |
{ | |
.enumname = "JV", | |
- .file = FILE_LINE, | |
- .ucdname = "JV", | |
+ .file = FILE_LINE, | |
+ .ucdname = "JV", | |
}, | |
{ | |
.enumname = "LF", | |
- .file = FILE_LINE, | |
- .ucdname = "LF", | |
+ .file = FILE_LINE, | |
+ .ucdname = "LF", | |
}, | |
{ | |
.enumname = "NL", | |
- .file = FILE_LINE, | |
- .ucdname = "NL", | |
+ .file = FILE_LINE, | |
+ .ucdname = "NL", | |
}, | |
{ | |
.enumname = "NS", | |
- .file = FILE_LINE, | |
- .ucdname = "NS", | |
+ .file = FILE_LINE, | |
+ .ucdname = "NS", | |
}, | |
{ | |
.enumname = "NU", | |
- .file = FILE_LINE, | |
- .ucdname = "NU", | |
+ .file = FILE_LINE, | |
+ .ucdname = "NU", | |
}, | |
{ | |
.enumname = "OP_WITHOUT_EAW_HWF", | |
- .file = FILE_LINE, | |
- .ucdname = "OP", | |
+ .file = FILE_LINE, | |
+ .ucdname = "OP", | |
}, | |
{ | |
.enumname = "OP_WITH_EAW_HWF", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
{ | |
.enumname = "PO", | |
- .file = FILE_LINE, | |
- .ucdname = "PO", | |
+ .file = FILE_LINE, | |
+ .ucdname = "PO", | |
}, | |
{ | |
.enumname = "PR", | |
- .file = FILE_LINE, | |
- .ucdname = "PR", | |
+ .file = FILE_LINE, | |
+ .ucdname = "PR", | |
}, | |
{ | |
.enumname = "QU", | |
- .file = FILE_LINE, | |
- .ucdname = "QU", | |
+ .file = FILE_LINE, | |
+ .ucdname = "QU", | |
}, | |
{ | |
.enumname = "RI", | |
- .file = FILE_LINE, | |
- .ucdname = "RI", | |
+ .file = FILE_LINE, | |
+ .ucdname = "RI", | |
}, | |
{ | |
.enumname = "SP", | |
- .file = FILE_LINE, | |
- .ucdname = "SP", | |
+ .file = FILE_LINE, | |
+ .ucdname = "SP", | |
}, | |
{ | |
.enumname = "SY", | |
- .file = FILE_LINE, | |
- .ucdname = "SY", | |
+ .file = FILE_LINE, | |
+ .ucdname = "SY", | |
}, | |
{ | |
.enumname = "WJ", | |
- .file = FILE_LINE, | |
- .ucdname = "WJ", | |
+ .file = FILE_LINE, | |
+ .ucdname = "WJ", | |
}, | |
{ | |
.enumname = "ZW", | |
- .file = FILE_LINE, | |
- .ucdname = "ZW", | |
+ .file = FILE_LINE, | |
+ .ucdname = "ZW", | |
}, | |
{ | |
.enumname = "ZWJ", | |
- .file = FILE_LINE, | |
- .ucdname = "ZWJ", | |
+ .file = FILE_LINE, | |
+ .ucdname = "ZWJ", | |
}, | |
{ | |
.enumname = "TMP_AI", | |
- .file = FILE_LINE, | |
- .ucdname = "AI", | |
+ .file = FILE_LINE, | |
+ .ucdname = "AI", | |
}, | |
{ | |
.enumname = "TMP_CJ", | |
- .file = FILE_LINE, | |
- .ucdname = "CJ", | |
+ .file = FILE_LINE, | |
+ .ucdname = "CJ", | |
}, | |
{ | |
.enumname = "TMP_XX", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
{ | |
.enumname = "TMP_MN", | |
- .file = FILE_LINE, | |
- .ucdname = "Mn", | |
+ .file = FILE_LINE, | |
+ .ucdname = "Mn", | |
}, | |
{ | |
.enumname = "TMP_MC", | |
- .file = FILE_LINE, | |
- .ucdname = "Mc", | |
+ .file = FILE_LINE, | |
+ .ucdname = "Mc", | |
}, | |
{ | |
.enumname = "TMP_SA_WITHOUT_MN_OR_MC", | |
- .file = FILE_LINE, | |
- .ucdname = "SA", | |
+ .file = FILE_LINE, | |
+ .ucdname = "SA", | |
}, | |
{ | |
.enumname = "TMP_SA_WITH_MN_OR_MC", | |
- .file = FILE_LINE, | |
- .ucdname = "SA", | |
+ .file = FILE_LINE, | |
+ .ucdname = "SA", | |
}, | |
{ | |
.enumname = "TMP_SG", | |
- .file = FILE_LINE, | |
- .ucdname = "SG", | |
+ .file = FILE_LINE, | |
+ .ucdname = "SG", | |
}, | |
{ | |
.enumname = "TMP_EAW_H", | |
- .file = FILE_EAW, | |
- .ucdname = "H", | |
+ .file = FILE_EAW, | |
+ .ucdname = "H", | |
}, | |
{ | |
.enumname = "TMP_EAW_W", | |
- .file = FILE_EAW, | |
- .ucdname = "W", | |
+ .file = FILE_EAW, | |
+ .ucdname = "W", | |
}, | |
{ | |
.enumname = "TMP_EAW_F", | |
- .file = FILE_EAW, | |
- .ucdname = "F", | |
+ .file = FILE_EAW, | |
+ .ucdname = "F", | |
}, | |
}; | |
@@ -306,23 +306,30 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, u… | |
(void)cp; | |
- if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") || | |
- !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") || | |
+ if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") || | |
+ !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") || | |
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) || | |
(!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") || | |
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") || | |
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) { | |
- if (!strcmp(line_break_property[prop1].enumname, "CP_WITHOUT_E… | |
- !strcmp(line_break_property[prop2].enumname, "CP_WITHOUT_E… | |
+ if (!strcmp(line_break_property[prop1].enumname, | |
+ "CP_WITHOUT_EAW_HWF") || | |
+ !strcmp(line_break_property[prop2].enumname, | |
+ "CP_WITHOUT_EAW_HWF")) { | |
target = "CP_WITH_EAW_HWF"; | |
- } else if (!strcmp(line_break_property[prop1].enumname, "OP_WI… | |
- !strcmp(line_break_property[prop2].enumname, "OP_WITHOUT_E… | |
+ } else if (!strcmp(line_break_property[prop1].enumname, | |
+ "OP_WITHOUT_EAW_HWF") || | |
+ !strcmp(line_break_property[prop2].enumname, | |
+ "OP_WITHOUT_EAW_HWF")) { | |
target = "OP_WITH_EAW_HWF"; | |
} else { | |
/* ignore EAW for the rest */ | |
- if ((!strcmp(line_break_property[prop1].enumname, "TMP… | |
- !strcmp(line_break_property[prop1].enumname, "TMP… | |
- !strcmp(line_break_property[prop1].enumname, "TMP… | |
+ if ((!strcmp(line_break_property[prop1].enumname, | |
+ "TMP_EAW_H") || | |
+ !strcmp(line_break_property[prop1].enumname, | |
+ "TMP_EAW_W") || | |
+ !strcmp(line_break_property[prop1].enumname, | |
+ "TMP_EAW_F"))) { | |
result = prop2; | |
} else { | |
result = prop1; | |
@@ -330,15 +337,19 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, u… | |
} | |
} else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") || | |
!strcmp(line_break_property[prop1].enumname, "TMP_MC")) || | |
- (!strcmp(line_break_property[prop2].enumname, "TMP_MN") || | |
- !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) { | |
- if (!strcmp(line_break_property[prop1].enumname, "SA_WITHOUT_M… | |
- !strcmp(line_break_property[prop2].enumname, "SA_WITHOUT_M… | |
+ (!strcmp(line_break_property[prop2].enumname, "TMP_MN") || | |
+ !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) { | |
+ if (!strcmp(line_break_property[prop1].enumname, | |
+ "SA_WITHOUT_MN_OR_MC") || | |
+ !strcmp(line_break_property[prop2].enumname, | |
+ "SA_WITHOUT_MN_OR_MC")) { | |
target = "SA_WITH_MN_OR_MC"; | |
} else { | |
/* ignore Mn and Mc for the rest */ | |
- if ((!strcmp(line_break_property[prop1].enumname, "TMP… | |
- !strcmp(line_break_property[prop1].enumname, "TMP… | |
+ if ((!strcmp(line_break_property[prop1].enumname, | |
+ "TMP_MN") || | |
+ !strcmp(line_break_property[prop1].enumname, | |
+ "TMP_MC"))) { | |
result = prop2; | |
} else { | |
result = prop1; | |
@@ -346,33 +357,42 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, u… | |
} | |
} else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") || | |
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) { | |
- if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED… | |
- !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED… | |
+ if (!strcmp(line_break_property[prop1].enumname, | |
+ "TMP_EXTENDED_PICTOGRAPHIC") || | |
+ !strcmp(line_break_property[prop2].enumname, | |
+ "TMP_EXTENDED_PICTOGRAPHIC")) { | |
target = "BOTH_CN_EXTPICT"; | |
} else { | |
/* ignore Cn for all the other properties */ | |
- if (!strcmp(line_break_property[prop1].enumname, "TMP_… | |
+ if (!strcmp(line_break_property[prop1].enumname, | |
+ "TMP_CN")) { | |
result = prop2; | |
} else { | |
result = prop1; | |
} | |
} | |
- } else if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_… | |
- !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_… | |
+ } else if (!strcmp(line_break_property[prop1].enumname, | |
+ "TMP_EXTENDED_PICTOGRAPHIC") || | |
+ !strcmp(line_break_property[prop2].enumname, | |
+ "TMP_EXTENDED_PICTOGRAPHIC")) { | |
if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") || | |
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) { | |
target = "BOTH_CN_EXTPICT"; | |
} else { | |
- /* ignore Extended_Pictographic for all the other prop… | |
- if (!strcmp(line_break_property[prop1].enumname, "TMP_… | |
+ /* ignore Extended_Pictographic for all the other | |
+ * properties */ | |
+ if (!strcmp(line_break_property[prop1].enumname, | |
+ "TMP_EXTENDED_PICTOGRAPHIC")) { | |
result = prop2; | |
} else { | |
result = prop1; | |
} | |
} | |
} else { | |
- fprintf(stderr, "handle_conflict: Cannot handle conflict %s <-… | |
- line_break_property[prop1].enumname, line_break_proper… | |
+ fprintf(stderr, | |
+ "handle_conflict: Cannot handle conflict %s <- %s.\n", | |
+ line_break_property[prop1].enumname, | |
+ line_break_property[prop2].enumname); | |
exit(1); | |
} | |
@@ -402,27 +422,44 @@ post_process(struct properties *prop) | |
/* post-mapping according to the line breaking algorithm */ | |
for (i = 0; i < UINT32_C(0x110000); i++) { | |
/* LB1 */ | |
- if (!strcmp(line_break_property[prop[i].property].enumname, "T… | |
- !strcmp(line_break_property[prop[i].property].enumname, "T… | |
- !strcmp(line_break_property[prop[i].property].enumname, "T… | |
+ if (!strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_AI") || | |
+ !strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_SG") || | |
+ !strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_XX")) { | |
/* map AI, SG and XX to AL */ | |
target = "AL"; | |
- } else if (!strcmp(line_break_property[prop[i].property].enumn… | |
+ } else if (!strcmp(line_break_property[prop[i].property] | |
+ .enumname, | |
+ "TMP_SA_WITH_MN_OR_MC")) { | |
/* map SA (with General_Category Mn or Mc) to CM */ | |
target = "CM"; | |
- } else if (!strcmp(line_break_property[prop[i].property].enumn… | |
+ } else if (!strcmp(line_break_property[prop[i].property] | |
+ .enumname, | |
+ "TMP_SA_WITHOUT_MN_OR_MC")) { | |
/* map SA (without General_Category Mn or Mc) to AL */ | |
target = "AL"; | |
- } else if (!strcmp(line_break_property[prop[i].property].enumn… | |
+ } else if (!strcmp(line_break_property[prop[i].property] | |
+ .enumname, | |
+ "TMP_CJ")) { | |
/* map CJ to NS */ | |
target = "NS"; | |
- } else if (!strcmp(line_break_property[prop[i].property].enumn… | |
- !strcmp(line_break_property[prop[i].property].enumn… | |
- !strcmp(line_break_property[prop[i].property].enumn… | |
- !strcmp(line_break_property[prop[i].property].enumn… | |
- !strcmp(line_break_property[prop[i].property].enumn… | |
- !strcmp(line_break_property[prop[i].property].enumn… | |
- !strcmp(line_break_property[prop[i].property].enumn… | |
+ } else if ( | |
+ !strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_CN") || | |
+ !strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_EXTENDED_PICTOGRAPHIC") || | |
+ !strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_MN") || | |
+ !strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_MC") || | |
+ !strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_EAW_H") || | |
+ !strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_EAW_W") || | |
+ !strcmp(line_break_property[prop[i].property].enumname, | |
+ "TMP_EAW_F")) { | |
/* map all the temporary classes "residue" to AL */ | |
target = "AL"; | |
} else { | |
@@ -430,14 +467,17 @@ post_process(struct properties *prop) | |
} | |
if (target) { | |
- for (result = 0; result < LEN(line_break_property); re… | |
- if (!strcmp(line_break_property[result].enumna… | |
+ for (result = 0; result < LEN(line_break_property); | |
+ result++) { | |
+ if (!strcmp(line_break_property[result] | |
+ .enumname, | |
target)) { | |
break; | |
} | |
} | |
if (result == LEN(line_break_property)) { | |
- fprintf(stderr, "handle_conflict: Internal err… | |
+ fprintf(stderr, | |
+ "handle_conflict: Internal error.\n"); | |
exit(1); | |
} | |
@@ -451,10 +491,9 @@ main(int argc, char *argv[]) | |
{ | |
(void)argc; | |
- properties_generate_break_property(line_break_property, | |
- LEN(line_break_property), NULL, | |
- handle_conflict, post_process, | |
- "line_break", argv[0]); | |
+ properties_generate_break_property( | |
+ line_break_property, LEN(line_break_property), NULL, | |
+ handle_conflict, post_process, "line_break", argv[0]); | |
return 0; | |
} | |
diff --git a/gen/sentence.c b/gen/sentence.c | |
@@ -6,78 +6,78 @@ | |
static const struct property_spec sentence_break_property[] = { | |
{ | |
.enumname = "OTHER", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
{ | |
.enumname = "CR", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "CR", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "CR", | |
}, | |
{ | |
.enumname = "LF", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "LF", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "LF", | |
}, | |
{ | |
.enumname = "EXTEND", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "Extend", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "Extend", | |
}, | |
{ | |
.enumname = "SEP", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "Sep", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "Sep", | |
}, | |
{ | |
.enumname = "FORMAT", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "Format", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "Format", | |
}, | |
{ | |
.enumname = "SP", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "Sp", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "Sp", | |
}, | |
{ | |
.enumname = "LOWER", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "Lower", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "Lower", | |
}, | |
{ | |
.enumname = "UPPER", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "Upper", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "Upper", | |
}, | |
{ | |
.enumname = "OLETTER", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "OLetter", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "OLetter", | |
}, | |
{ | |
.enumname = "NUMERIC", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "Numeric", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "Numeric", | |
}, | |
{ | |
.enumname = "ATERM", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "ATerm", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "ATerm", | |
}, | |
{ | |
.enumname = "SCONTINUE", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "SContinue", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "SContinue", | |
}, | |
{ | |
.enumname = "STERM", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "STerm", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "STerm", | |
}, | |
{ | |
.enumname = "CLOSE", | |
- .file = FILE_SENTENCE, | |
- .ucdname = "Close", | |
+ .file = FILE_SENTENCE, | |
+ .ucdname = "Close", | |
}, | |
}; | |
@@ -86,9 +86,9 @@ main(int argc, char *argv[]) | |
{ | |
(void)argc; | |
- properties_generate_break_property(sentence_break_property, | |
- LEN(sentence_break_property), NULL, | |
- NULL, NULL, "sentence_break", argv[… | |
+ properties_generate_break_property( | |
+ sentence_break_property, LEN(sentence_break_property), NULL, | |
+ NULL, NULL, "sentence_break", argv[0]); | |
return 0; | |
} | |
diff --git a/gen/util.c b/gen/util.c | |
@@ -1,13 +1,12 @@ | |
/* See LICENSE file for copyright and license details. */ | |
-#include <stdbool.h> | |
#include <ctype.h> | |
#include <errno.h> | |
#include <inttypes.h> | |
#include <stdbool.h> | |
#include <stddef.h> | |
#include <stdint.h> | |
-#include <stdlib.h> | |
#include <stdio.h> | |
+#include <stdlib.h> | |
#include <string.h> | |
#include "util.h" | |
@@ -21,12 +20,13 @@ struct properties_payload { | |
struct properties *prop; | |
const struct property_spec *spec; | |
uint_least8_t speclen; | |
- int (*set_value)(struct properties_payload *, uint_least32_t, int_leas… | |
- uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, uint_l… | |
+ int (*set_value)(struct properties_payload *, uint_least32_t, | |
+ int_least64_t); | |
+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, | |
+ uint_least8_t); | |
}; | |
-struct break_test_payload | |
-{ | |
+struct break_test_payload { | |
struct break_test **test; | |
size_t *testlen; | |
}; | |
@@ -51,8 +51,8 @@ hextocp(const char *str, size_t len, uint_least32_t *cp) | |
/* the maximum valid codepoint is 0x10FFFF */ | |
if (len > 6) { | |
- fprintf(stderr, "hextocp: '%.*s' is too long.\n", | |
- (int)len, str); | |
+ fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)len, | |
+ str); | |
return 1; | |
} | |
@@ -77,8 +77,8 @@ hextocp(const char *str, size_t len, uint_least32_t *cp) | |
} | |
if (*cp > UINT32_C(0x10FFFF)) { | |
- fprintf(stderr, "hextocp: '%.*s' is too large.\n", | |
- (int)len, str); | |
+ fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, | |
+ str); | |
return 1; | |
} | |
@@ -98,8 +98,10 @@ parse_cp_list(const char *str, uint_least32_t **cp, size_t *… | |
} | |
/* count the number of spaces in the string and infer list length */ | |
- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count+… | |
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; | |
+ count++, tmp1 = tmp2 + 1) { | |
; | |
+ } | |
/* allocate resources */ | |
if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) { | |
@@ -110,7 +112,8 @@ parse_cp_list(const char *str, uint_least32_t **cp, size_t … | |
/* go through the string again, parsing the numbers */ | |
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { | |
tmp2 = strchr(tmp1, ' '); | |
- if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),… | |
+ if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), | |
+ &((*cp)[i]))) { | |
return 1; | |
} | |
if (tmp2 != NULL) { | |
@@ -144,8 +147,10 @@ range_parse(const char *str, struct range *range) | |
} | |
void | |
-parse_file_with_callback(const char *fname, int (*callback)(const char *, | |
- char **, size_t, char *, void *), void *payload) | |
+parse_file_with_callback(const char *fname, | |
+ int (*callback)(const char *, char **, size_t, char *, | |
+ void *), | |
+ void *payload) | |
{ | |
FILE *fp; | |
char *line = NULL, **field = NULL, *comment; | |
@@ -182,10 +187,15 @@ parse_file_with_callback(const char *fname, int (*callbac… | |
if (line[i] != '#') { | |
/* extend field buffer, if necessary */ | |
if (++nfields > fieldbufsize) { | |
- if ((field = realloc(field, nfields * | |
- sizeof(*field))) == NULL) { | |
- fprintf(stderr, "parse_file_wi… | |
- "callback: realloc: %s… | |
+ if ((field = realloc( | |
+ field, | |
+ nfields * | |
+ sizeof(*field))) … | |
+ NULL) { | |
+ fprintf(stderr, | |
+ "parse_file_with_" | |
+ "callback: realloc: " | |
+ "%s.\n", | |
strerror(errno)); | |
exit(1); | |
} | |
@@ -209,8 +219,9 @@ parse_file_with_callback(const char *fname, int (*callback)… | |
/* go back whitespace and terminate field there */ | |
if (i > 0) { | |
- for (j = i - 1; line[j] == ' '; j--) | |
+ for (j = i - 1; line[j] == ' '; j--) { | |
; | |
+ } | |
line[j + 1] = '\0'; | |
} else { | |
line[i] = '\0'; | |
@@ -230,7 +241,7 @@ parse_file_with_callback(const char *fname, int (*callback)… | |
/* call callback function */ | |
if (callback(fname, field, nfields, comment, payload)) { | |
fprintf(stderr, "parse_file_with_callback: " | |
- "Malformed input.\n"); | |
+ "Malformed input.\n"); | |
exit(1); | |
} | |
} | |
@@ -257,10 +268,11 @@ properties_callback(const char *file, char **field, size_… | |
for (i = 0; i < p->speclen; i++) { | |
/* identify fitting file and identifier */ | |
- if (p->spec[i].file && | |
- !strcmp(p->spec[i].file, file) && | |
+ if (p->spec[i].file && !strcmp(p->spec[i].file, file) && | |
(!strcmp(p->spec[i].ucdname, field[1]) || | |
- (comment != NULL && !strncmp(p->spec[i].ucdname, comment,… | |
+ (comment != NULL && | |
+ !strncmp(p->spec[i].ucdname, comment, | |
+ strlen(p->spec[i].ucdname)) && | |
comment[strlen(p->spec[i].ucdname)] == ' '))) { | |
/* parse range in first field */ | |
if (range_parse(field[0], &r)) { | |
@@ -287,7 +299,8 @@ properties_compress(const struct properties *prop, | |
uint_least32_t cp, i; | |
/* initialization */ | |
- if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * sizeof(*(comp… | |
+ if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * | |
+ sizeof(*(comp->offset))))) { | |
fprintf(stderr, "malloc: %s\n", strerror(errno)); | |
exit(1); | |
} | |
@@ -296,7 +309,8 @@ properties_compress(const struct properties *prop, | |
for (cp = 0; cp < UINT32_C(0x110000); cp++) { | |
for (i = 0; i < comp->datalen; i++) { | |
- if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*pro… | |
+ if (!memcmp(&(prop[cp]), &(comp->data[i]), | |
+ sizeof(*prop))) { | |
/* found a match! */ | |
comp->offset[cp] = i; | |
break; | |
@@ -308,9 +322,9 @@ properties_compress(const struct properties *prop, | |
* add current properties to data and add the | |
* offset in the offset-table | |
*/ | |
- if (!(comp->data = reallocate_array(comp->data, | |
- ++(comp->datalen), | |
- sizeof(*(comp->dat… | |
+ if (!(comp->data = reallocate_array( | |
+ comp->data, ++(comp->datalen), | |
+ sizeof(*(comp->data))))) { | |
fprintf(stderr, "reallocate_array: %s\n", | |
strerror(errno)); | |
exit(1); | |
@@ -357,8 +371,7 @@ properties_get_major_minor(const struct properties_compress… | |
* and need less storage) | |
*/ | |
for (j = 0; j + 0xFF < mm->minorlen; j++) { | |
- if (!memcmp(&(comp->offset[i << 8]), | |
- &(mm->minor[j]), | |
+ if (!memcmp(&(comp->offset[i << 8]), &(mm->minor[j]), | |
sizeof(*(comp->offset)) * 0x100)) { | |
break; | |
} | |
@@ -373,9 +386,9 @@ properties_get_major_minor(const struct properties_compress… | |
* in major | |
*/ | |
mm->minorlen += 0x100; | |
- if (!(mm->minor = reallocate_array(mm->minor, | |
- mm->minorlen, | |
- sizeof(*(mm->minor)… | |
+ if (!(mm->minor = | |
+ reallocate_array(mm->minor, mm->minorlen, | |
+ sizeof(*(mm->minor)))))… | |
fprintf(stderr, "reallocate_array: %s\n", | |
strerror(errno)); | |
exit(1); | |
@@ -403,7 +416,7 @@ properties_print_lookup_table(char *name, size_t *data, siz… | |
} | |
} | |
- type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" : | |
+ type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" : | |
(maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" : | |
(maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" : | |
"uint_least64_t"; | |
@@ -418,21 +431,21 @@ properties_print_lookup_table(char *name, size_t *data, s… | |
} else { | |
printf(",\n\t"); | |
} | |
- | |
} | |
printf("};\n"); | |
} | |
void | |
-properties_print_derived_lookup_table(char *name, char *type, size_t *offset, … | |
- int_least64_t (*get_value)(const struct … | |
- size_t), const void *payload) | |
+properties_print_derived_lookup_table( | |
+ char *name, char *type, size_t *offset, size_t offsetlen, | |
+ int_least64_t (*get_value)(const struct properties *, size_t), | |
+ const void *payload) | |
{ | |
size_t i; | |
printf("static const %s %s[] = {\n\t", type, name); | |
for (i = 0; i < offsetlen; i++) { | |
- printf("%"PRIiLEAST64, get_value(payload, offset[i])); | |
+ printf("%" PRIiLEAST64, get_value(payload, offset[i])); | |
if (i + 1 == offsetlen) { | |
printf("\n"); | |
} else if ((i + 1) % 8 != 0) { | |
@@ -440,7 +453,6 @@ properties_print_derived_lookup_table(char *name, char *typ… | |
} else { | |
printf(",\n\t"); | |
} | |
- | |
} | |
printf("};\n"); | |
} | |
@@ -464,17 +476,19 @@ set_value_bp(struct properties_payload *payload, uint_lea… | |
{ | |
if (payload->prop[cp].property != payload->speclen) { | |
if (payload->handle_conflict == NULL) { | |
- fprintf(stderr, "set_value_bp: " | |
- "Unhandled character break property " | |
+ fprintf(stderr, | |
+ "set_value_bp: " | |
+ "Unhandled character break property " | |
"overwrite for 0x%06X (%s <- %s).\n", | |
- cp, payload->spec[payload->prop[cp]. | |
- property].enumname, | |
+ cp, | |
+ payload->spec[payload->prop[cp].property] | |
+ .enumname, | |
payload->spec[value].enumname); | |
return 1; | |
} else { | |
- value = payload->handle_conflict(cp, | |
- (uint_least8_t)payload->prop[cp].property, | |
- (uint_least8_t)value); | |
+ value = payload->handle_conflict( | |
+ cp, (uint_least8_t)payload->prop[cp].property, | |
+ (uint_least8_t)value); | |
} | |
} | |
payload->prop[cp].property = value; | |
@@ -489,15 +503,13 @@ get_value_bp(const struct properties *prop, size_t offset) | |
} | |
void | |
-properties_generate_break_property(const struct property_spec *spec, | |
- uint_least8_t speclen, | |
- uint_least8_t (*fill_missing)( | |
- uint_least32_t), | |
- uint_least8_t (*handle_conflict)( | |
- uint_least32_t, uint_least8_t, | |
- uint_least8_t), void | |
- (*post_process)(struct properties *), | |
- const char *prefix, const char *argv0) | |
+properties_generate_break_property( | |
+ const struct property_spec *spec, uint_least8_t speclen, | |
+ uint_least8_t (*fill_missing)(uint_least32_t), | |
+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, | |
+ uint_least8_t), | |
+ void (*post_process)(struct properties *), const char *prefix, | |
+ const char *argv0) | |
{ | |
struct properties_compressed comp; | |
struct properties_major_minor mm; | |
@@ -537,8 +549,7 @@ properties_generate_break_property(const struct property_sp… | |
if (i == j && spec[i].file) { | |
/* file has not been processed yet */ | |
parse_file_with_callback(spec[i].file, | |
- properties_callback, | |
- &payload); | |
+ properties_callback, &payload… | |
} | |
} | |
@@ -546,7 +557,8 @@ properties_generate_break_property(const struct property_sp… | |
for (i = 0; i < UINT32_C(0x110000); i++) { | |
if (payload.prop[i].property == speclen) { | |
if (fill_missing != NULL) { | |
- payload.prop[i].property = fill_missing((uint_… | |
+ payload.prop[i].property = | |
+ fill_missing((uint_least32_t)i); | |
} else { | |
payload.prop[i].property = 0; | |
} | |
@@ -559,14 +571,16 @@ properties_generate_break_property(const struct property_… | |
} | |
/* compress data */ | |
- printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n",… | |
+ printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n", | |
+ argv0); | |
properties_compress(prop, &comp); | |
- fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0, | |
- prefix, properties_get_major_minor(&comp, &mm)); | |
+ fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0, prefi… | |
+ properties_get_major_minor(&comp, &mm)); | |
/* prepare names */ | |
- if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= LEN(bu… | |
+ if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= | |
+ LEN(buf1)) { | |
fprintf(stderr, "snprintf: String truncated.\n"); | |
exit(1); | |
} | |
@@ -578,9 +592,12 @@ properties_generate_break_property(const struct property_s… | |
prefix_uc[i] = (char)toupper(prefix[i]); | |
} | |
prefix_uc[prefixlen] = '\0'; | |
- if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= LEN(buf… | |
- (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= LEN(buf3)… | |
- (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= LEN(buf4)… | |
+ if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= | |
+ LEN(buf2) || | |
+ (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= | |
+ LEN(buf3) || | |
+ (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= | |
+ LEN(buf4)) { | |
fprintf(stderr, "snprintf: String truncated.\n"); | |
exit(1); | |
} | |
@@ -589,8 +606,9 @@ properties_generate_break_property(const struct property_sp… | |
properties_print_enum(spec, speclen, buf1, buf2); | |
properties_print_lookup_table(buf3, mm.major, 0x1100); | |
printf("\n"); | |
- properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor,… | |
- get_value_bp, comp.data); | |
+ properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor, | |
+ mm.minorlen, get_value_bp, | |
+ comp.data); | |
/* free data */ | |
free(prop); | |
@@ -625,42 +643,50 @@ break_test_callback(const char *fname, char **field, size… | |
memset(t, 0, sizeof(*t)); | |
/* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */ | |
- for (token = strtok(field[0], " "), i = 0; token != NULL; i++, | |
- token = strtok(NULL, " ")) { | |
+ for (token = strtok(field[0], " "), i = 0; token != NULL; | |
+ i++, token = strtok(NULL, " ")) { | |
if (i % 2 == 0) { | |
/* delimiter or start of sequence */ | |
- if (i == 0 || !strncmp(token, "\xC3\xB7", 2)) { /* UTF… | |
+ if (i == 0 || | |
+ !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */ | |
/* | |
* '÷' indicates a breakpoint, | |
* the current length is done; allocate | |
* a new length field and set it to 0 | |
*/ | |
- if ((t->len = realloc(t->len, | |
- ++t->lenlen * sizeof(*t->len))) == NULL) { | |
- fprintf(stderr, "break_test_" | |
+ if ((t->len = realloc( | |
+ t->len, | |
+ ++t->lenlen * sizeof(*t->len))) == | |
+ NULL) { | |
+ fprintf(stderr, | |
+ "break_test_" | |
"callback: realloc: %s.\n", | |
strerror(errno)); | |
return 1; | |
} | |
t->len[t->lenlen - 1] = 0; | |
} else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 … | |
- /* | |
- * '×' indicates a non-breakpoint, do nothing | |
- */ | |
+ /* '×' indicates a non-breakpoint, do nothing… | |
} else { | |
- fprintf(stderr, "break_test_callback: " | |
- "Malformed delimiter '%s'.\n", token); | |
+ fprintf(stderr, | |
+ "break_test_callback: " | |
+ "Malformed delimiter '%s'.\n", | |
+ token); | |
return 1; | |
} | |
} else { | |
/* add codepoint to cp-array */ | |
- if ((t->cp = realloc(t->cp, ++t->cplen * | |
- sizeof(*t->cp))) == NULL) { | |
- fprintf(stderr, "break_test_callback: " | |
- "realloc: %s.\n", strerror(errno)); | |
+ if ((t->cp = realloc(t->cp, | |
+ ++t->cplen * sizeof(*t->cp))) == | |
+ NULL) { | |
+ fprintf(stderr, | |
+ "break_test_callback: " | |
+ "realloc: %s.\n", | |
+ strerror(errno)); | |
return 1; | |
} | |
- if (hextocp(token, strlen(token), &t->cp[t->cplen - 1]… | |
+ if (hextocp(token, strlen(token), | |
+ &t->cp[t->cplen - 1])) { | |
return 1; | |
} | |
if (t->lenlen > 0) { | |
@@ -688,8 +714,7 @@ break_test_callback(const char *fname, char **field, size_t… | |
} | |
void | |
-break_test_list_parse(char *fname, struct break_test **test, | |
- size_t *testlen) | |
+break_test_list_parse(char *fname, struct break_test **test, size_t *testlen) | |
{ | |
struct break_test_payload pl = { | |
.test = test, | |
@@ -703,13 +728,14 @@ break_test_list_parse(char *fname, struct break_test **te… | |
void | |
break_test_list_print(const struct break_test *test, size_t testlen, | |
- const char *identifier, const char *progname) | |
+ const char *identifier, const char *progname) | |
{ | |
size_t i, j; | |
printf("/* Automatically generated by %s */\n" | |
"#include <stdint.h>\n#include <stddef.h>\n\n" | |
- "#include \"../gen/types.h\"\n\n", progname); | |
+ "#include \"../gen/types.h\"\n\n", | |
+ progname); | |
printf("static const struct break_test %s[] = {\n", identifier); | |
for (i = 0; i < testlen; i++) { | |
diff --git a/gen/util.h b/gen/util.h | |
@@ -7,7 +7,7 @@ | |
#include "types.h" | |
-#define LEN(x) (sizeof (x) / sizeof *(x)) | |
+#define LEN(x) (sizeof(x) / sizeof *(x)) | |
struct property_spec { | |
const char *enumname; | |
@@ -34,30 +34,31 @@ struct properties_major_minor { | |
int hextocp(const char *, size_t, uint_least32_t *cp); | |
int parse_cp_list(const char *, uint_least32_t **, size_t *); | |
-void parse_file_with_callback(const char *, int (*callback)(const char *, | |
- char **, size_t, char *, void *), void *payload); | |
+void parse_file_with_callback(const char *, | |
+ int (*callback)(const char *, char **, size_t, | |
+ char *, void *), | |
+ void *payload); | |
-void properties_compress(const struct properties *, struct properties_compress… | |
+void properties_compress(const struct properties *, | |
+ struct properties_compressed *comp); | |
double properties_get_major_minor(const struct properties_compressed *, | |
struct properties_major_minor *); | |
void properties_print_lookup_table(char *, size_t *, size_t); | |
-void properties_print_derived_lookup_table(char *, char *, size_t *, size_t, | |
- int_least64_t (*get_value)(const struct … | |
- size_t), const void *); | |
- | |
-void properties_generate_break_property(const struct property_spec *, | |
- uint_least8_t, uint_least8_t | |
- (*fill_missing)(uint_least32_t), | |
- uint_least8_t | |
- (*handle_conflict)(uint_least32_t, | |
- uint_least8_t, uint_least8_t), | |
- void (*post_process) | |
- (struct properties *), | |
- const char *, const char *); | |
+void properties_print_derived_lookup_table( | |
+ char *, char *, size_t *, size_t, | |
+ int_least64_t (*get_value)(const struct properties *, size_t), | |
+ const void *); | |
+ | |
+void properties_generate_break_property( | |
+ const struct property_spec *, uint_least8_t, | |
+ uint_least8_t (*fill_missing)(uint_least32_t), | |
+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, | |
+ uint_least8_t), | |
+ void (*post_process)(struct properties *), const char *, const char *); | |
void break_test_list_parse(char *, struct break_test **, size_t *); | |
-void break_test_list_print(const struct break_test *, size_t, | |
- const char *, const char *); | |
+void break_test_list_print(const struct break_test *, size_t, const char *, | |
+ const char *); | |
void break_test_list_free(struct break_test *, size_t); | |
#endif /* UTIL_H */ | |
diff --git a/gen/word.c b/gen/word.c | |
@@ -11,108 +11,108 @@ | |
static const struct property_spec word_break_property[] = { | |
{ | |
.enumname = "OTHER", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
{ | |
.enumname = "ALETTER", | |
- .file = FILE_WORD, | |
- .ucdname = "ALetter", | |
+ .file = FILE_WORD, | |
+ .ucdname = "ALetter", | |
}, | |
{ | |
.enumname = "BOTH_ALETTER_EXTPICT", | |
- .file = NULL, | |
- .ucdname = NULL, | |
+ .file = NULL, | |
+ .ucdname = NULL, | |
}, | |
{ | |
.enumname = "CR", | |
- .file = FILE_WORD, | |
- .ucdname = "CR", | |
+ .file = FILE_WORD, | |
+ .ucdname = "CR", | |
}, | |
{ | |
.enumname = "DOUBLE_QUOTE", | |
- .file = FILE_WORD, | |
- .ucdname = "Double_Quote", | |
+ .file = FILE_WORD, | |
+ .ucdname = "Double_Quote", | |
}, | |
{ | |
.enumname = "EXTEND", | |
- .file = FILE_WORD, | |
- .ucdname = "Extend", | |
+ .file = FILE_WORD, | |
+ .ucdname = "Extend", | |
}, | |
{ | |
.enumname = "EXTENDED_PICTOGRAPHIC", | |
- .file = FILE_EMOJI, | |
- .ucdname = "Extended_Pictographic", | |
+ .file = FILE_EMOJI, | |
+ .ucdname = "Extended_Pictographic", | |
}, | |
{ | |
.enumname = "EXTENDNUMLET", | |
- .file = FILE_WORD, | |
- .ucdname = "ExtendNumLet", | |
+ .file = FILE_WORD, | |
+ .ucdname = "ExtendNumLet", | |
}, | |
{ | |
.enumname = "FORMAT", | |
- .file = FILE_WORD, | |
- .ucdname = "Format", | |
+ .file = FILE_WORD, | |
+ .ucdname = "Format", | |
}, | |
{ | |
.enumname = "HEBREW_LETTER", | |
- .file = FILE_WORD, | |
- .ucdname = "Hebrew_Letter", | |
+ .file = FILE_WORD, | |
+ .ucdname = "Hebrew_Letter", | |
}, | |
{ | |
.enumname = "KATAKANA", | |
- .file = FILE_WORD, | |
- .ucdname = "Katakana", | |
+ .file = FILE_WORD, | |
+ .ucdname = "Katakana", | |
}, | |
{ | |
.enumname = "LF", | |
- .file = FILE_WORD, | |
- .ucdname = "LF", | |
+ .file = FILE_WORD, | |
+ .ucdname = "LF", | |
}, | |
{ | |
.enumname = "MIDLETTER", | |
- .file = FILE_WORD, | |
- .ucdname = "MidLetter", | |
+ .file = FILE_WORD, | |
+ .ucdname = "MidLetter", | |
}, | |
{ | |
.enumname = "MIDNUM", | |
- .file = FILE_WORD, | |
- .ucdname = "MidNum", | |
+ .file = FILE_WORD, | |
+ .ucdname = "MidNum", | |
}, | |
{ | |
.enumname = "MIDNUMLET", | |
- .file = FILE_WORD, | |
- .ucdname = "MidNumLet", | |
+ .file = FILE_WORD, | |
+ .ucdname = "MidNumLet", | |
}, | |
{ | |
.enumname = "NEWLINE", | |
- .file = FILE_WORD, | |
- .ucdname = "Newline", | |
+ .file = FILE_WORD, | |
+ .ucdname = "Newline", | |
}, | |
{ | |
.enumname = "NUMERIC", | |
- .file = FILE_WORD, | |
- .ucdname = "Numeric", | |
+ .file = FILE_WORD, | |
+ .ucdname = "Numeric", | |
}, | |
{ | |
.enumname = "REGIONAL_INDICATOR", | |
- .file = FILE_WORD, | |
- .ucdname = "Regional_Indicator", | |
+ .file = FILE_WORD, | |
+ .ucdname = "Regional_Indicator", | |
}, | |
{ | |
.enumname = "SINGLE_QUOTE", | |
- .file = FILE_WORD, | |
- .ucdname = "Single_Quote", | |
+ .file = FILE_WORD, | |
+ .ucdname = "Single_Quote", | |
}, | |
{ | |
.enumname = "WSEGSPACE", | |
- .file = FILE_WORD, | |
- .ucdname = "WSegSpace", | |
+ .file = FILE_WORD, | |
+ .ucdname = "WSegSpace", | |
}, | |
{ | |
.enumname = "ZWJ", | |
- .file = FILE_WORD, | |
- .ucdname = "ZWJ", | |
+ .file = FILE_WORD, | |
+ .ucdname = "ZWJ", | |
}, | |
}; | |
@@ -124,8 +124,10 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, ui… | |
(void)cp; | |
if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") && | |
- !strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPH… | |
- (!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPH… | |
+ !strcmp(word_break_property[prop2].enumname, | |
+ "EXTENDED_PICTOGRAPHIC")) || | |
+ (!strcmp(word_break_property[prop1].enumname, | |
+ "EXTENDED_PICTOGRAPHIC") && | |
!strcmp(word_break_property[prop2].enumname, "ALETTER"))) { | |
for (result = 0; result < LEN(word_break_property); result++) { | |
if (!strcmp(word_break_property[result].enumname, | |
@@ -150,10 +152,9 @@ main(int argc, char *argv[]) | |
{ | |
(void)argc; | |
- properties_generate_break_property(word_break_property, | |
- LEN(word_break_property), NULL, | |
- handle_conflict, NULL, "word_break", | |
- argv[0]); | |
+ properties_generate_break_property( | |
+ word_break_property, LEN(word_break_property), NULL, | |
+ handle_conflict, NULL, "word_break", argv[0]); | |
return 0; | |
} | |
diff --git a/grapheme.h b/grapheme.h | |
@@ -18,14 +18,15 @@ enum grapheme_bidirectional_override { | |
size_t grapheme_decode_utf8(const char *, size_t, uint_least32_t *); | |
size_t grapheme_encode_utf8(uint_least32_t, char *, size_t); | |
-size_t grapheme_get_bidirectional_embedding_levels(const uint_least32_t *, siz… | |
- enum grapheme_bidirectional… | |
- int_least32_t *, size_t); | |
-size_t grapheme_get_bidirectional_embedding_levels_utf8(const char *, size_t, | |
- enum grapheme_bidirect… | |
- int_least32_t *, size_… | |
+size_t grapheme_get_bidirectional_embedding_levels( | |
+ const uint_least32_t *, size_t, enum grapheme_bidirectional_override, | |
+ int_least32_t *, size_t); | |
+size_t grapheme_get_bidirectional_embedding_levels_utf8( | |
+ const char *, size_t, enum grapheme_bidirectional_override, | |
+ int_least32_t *, size_t); | |
-bool grapheme_is_character_break(uint_least32_t, uint_least32_t, uint_least16_… | |
+bool grapheme_is_character_break(uint_least32_t, uint_least32_t, | |
+ uint_least16_t *); | |
bool grapheme_is_lowercase(const uint_least32_t *, size_t, size_t *); | |
bool grapheme_is_titlecase(const uint_least32_t *, size_t, size_t *); | |
@@ -45,9 +46,12 @@ size_t grapheme_next_line_break_utf8(const char *, size_t); | |
size_t grapheme_next_sentence_break_utf8(const char *, size_t); | |
size_t grapheme_next_word_break_utf8(const char *, size_t); | |
-size_t grapheme_to_lowercase(const uint_least32_t *, size_t, uint_least32_t *,… | |
-size_t grapheme_to_titlecase(const uint_least32_t *, size_t, uint_least32_t *,… | |
-size_t grapheme_to_uppercase(const uint_least32_t *, size_t, uint_least32_t *,… | |
+size_t grapheme_to_lowercase(const uint_least32_t *, size_t, uint_least32_t *, | |
+ size_t); | |
+size_t grapheme_to_titlecase(const uint_least32_t *, size_t, uint_least32_t *, | |
+ size_t); | |
+size_t grapheme_to_uppercase(const uint_least32_t *, size_t, uint_least32_t *, | |
+ size_t); | |
size_t grapheme_to_lowercase_utf8(const char *, size_t, char *, size_t); | |
size_t grapheme_to_titlecase_utf8(const char *, size_t, char *, size_t); | |
diff --git a/src/bidirectional.c b/src/bidirectional.c | |
@@ -12,15 +12,18 @@ struct isolate_runner { | |
int_least32_t *buf; | |
size_t buflen; | |
enum bidi_property prev_prop; | |
+ | |
struct { | |
size_t off; | |
enum bidi_property prop; | |
int_least8_t level; | |
} cur; | |
+ | |
struct { | |
size_t off; | |
enum bidi_property prop; | |
} next; | |
+ | |
uint_least8_t paragraph_level; | |
int_least8_t isolating_run_level; | |
enum bidi_property last_strong_type; | |
@@ -57,24 +60,42 @@ struct state { | |
static inline void | |
state_serialize(const struct state *s, int_least32_t *out) | |
{ | |
- *out = (int_least32_t)( | |
- ((((uint_least32_t)(s->paragraph_level)) & 0x01 /* 00000… | |
- ((((uint_least32_t)(s->level + 1)) & 0x7F /* 01111… | |
- ((((uint_least32_t)(s->prop)) & 0x1F /* 00011… | |
- ((((uint_least32_t)(s->bracket - bidi_bracket)) & 0xFF /* 11111… | |
- ((((uint_least32_t)(s->visited)) & 0x01 /* 00000… | |
- ((((uint_least32_t)(s->rawprop)) & 0x1F /* 00011… | |
+ *out = (int_least32_t)(((((uint_least32_t)(s->paragraph_level)) & | |
+ 0x01 /* 00000001 */) | |
+ << 0) | | |
+ ((((uint_least32_t)(s->level + 1)) & | |
+ 0x7F /* 01111111 */) | |
+ << 1) | | |
+ ((((uint_least32_t)(s->prop)) & | |
+ 0x1F /* 00011111 */) | |
+ << 8) | | |
+ ((((uint_least32_t)(s->bracket - bidi_bracket))… | |
+ 0xFF /* 11111111 */) | |
+ << 13) | | |
+ ((((uint_least32_t)(s->visited)) & | |
+ 0x01 /* 00000001 */) | |
+ << 21) | | |
+ ((((uint_least32_t)(s->rawprop)) & | |
+ 0x1F /* 00011111 */) | |
+ << 22)); | |
} | |
static inline void | |
state_deserialize(int_least32_t in, struct state *s) | |
{ | |
- s->paragraph_level = (uint_least8_t)((((uint_least32_t)… | |
- s->level = (int_least8_t)((((uint_least32_t)… | |
- s->prop = (enum bidi_property)((((uint_least32_t)… | |
- s->bracket = bidi_bracket + (uint_least8_t)((((uint_least32_t)… | |
- s->visited = (bool)((((uint_least32_t)… | |
- s->rawprop = (enum bidi_property)((((uint_least32_t)… | |
+ s->paragraph_level = (uint_least8_t)((((uint_least32_t)in) >> 0) & | |
+ 0x01 /* 00000001 */); | |
+ s->level = (int_least8_t)((((uint_least32_t)in) >> 1) & | |
+ 0x7F /* 01111111 */) - | |
+ 1; | |
+ s->prop = (enum bidi_property)((((uint_least32_t)in) >> 8) & | |
+ 0x1F /* 00011111 */); | |
+ s->bracket = | |
+ bidi_bracket + (uint_least8_t)((((uint_least32_t)in) >> 13) & | |
+ 0xFF /* 11111111 */); | |
+ s->visited = (bool)((((uint_least32_t)in) >> 21) & 0x01 /* 00000001 */… | |
+ s->rawprop = (enum bidi_property)((((uint_least32_t)in) >> 22) & | |
+ 0x1F /* 00011111 */); | |
} | |
static void | |
@@ -171,7 +192,6 @@ isolate_runner_advance(struct isolate_runner *ir) | |
return 1; | |
} | |
- | |
/* shift in */ | |
ir->prev_prop = ir->cur.prop; | |
ir->cur.off = ir->next.off; | |
@@ -188,13 +208,13 @@ isolate_runner_advance(struct isolate_runner *ir) | |
* on the first advancement as the prev_prop holds the sos type, | |
* which can only be either R or L, which are both strong types | |
*/ | |
- if (ir->prev_prop == BIDI_PROP_R || | |
- ir->prev_prop == BIDI_PROP_L || | |
+ if (ir->prev_prop == BIDI_PROP_R || ir->prev_prop == BIDI_PROP_L || | |
ir->prev_prop == BIDI_PROP_AL) { | |
ir->last_strong_type = ir->prev_prop; | |
} | |
- /* initialize next state by going to the next character in the sequenc… | |
+ /* initialize next state by going to the next character in the sequence | |
+ */ | |
ir->next.off = SIZE_MAX; | |
ir->next.prop = NUM_BIDI_PROPS; | |
@@ -210,8 +230,7 @@ isolate_runner_advance(struct isolate_runner *ir) | |
} | |
/* follow BD8/BD9 and P2 to traverse the current sequence */ | |
- if (s.prop == BIDI_PROP_LRI || | |
- s.prop == BIDI_PROP_RLI || | |
+ if (s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI || | |
s.prop == BIDI_PROP_FSI) { | |
/* | |
* we encountered an isolate initiator, increment | |
@@ -224,8 +243,7 @@ isolate_runner_advance(struct isolate_runner *ir) | |
if (isolate_level != 1) { | |
continue; | |
} | |
- } else if (s.prop == BIDI_PROP_PDI && | |
- isolate_level > 0) { | |
+ } else if (s.prop == BIDI_PROP_PDI && isolate_level > 0) { | |
isolate_level--; | |
/* | |
@@ -250,12 +268,14 @@ isolate_runner_advance(struct isolate_runner *ir) | |
/* we were in the first initializing round */ | |
continue; | |
} else if (s.level == ir->isolating_run_level) { | |
- /* isolate_level-skips have been handled before, we're… | |
+ /* isolate_level-skips have been handled before, we're | |
+ * good */ | |
/* still in the sequence */ | |
ir->next.off = (size_t)i; | |
ir->next.prop = s.prop; | |
} else { | |
- /* out of sequence or isolated, compare levels via eos… | |
+ /* out of sequence or isolated, compare levels via eos | |
+ */ | |
if (MAX(last_isolate_level, s.level) % 2 == 0) { | |
ir->next.prop = BIDI_PROP_L; | |
} else { | |
@@ -286,7 +306,8 @@ isolate_runner_advance(struct isolate_runner *ir) | |
} | |
static void | |
-isolate_runner_set_current_prop(struct isolate_runner *ir, enum bidi_property … | |
+isolate_runner_set_current_prop(struct isolate_runner *ir, | |
+ enum bidi_property prop) | |
{ | |
struct state s; | |
@@ -301,9 +322,9 @@ static inline enum bidi_property | |
get_bidi_property(uint_least32_t cp) | |
{ | |
if (likely(cp <= 0x10FFFF)) { | |
- return (enum bidi_property) | |
- ((bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) & | |
- 0x1F /* 00011111 */); | |
+ return (enum bidi_property)( | |
+ (bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) & | |
+ 0x1F /* 00011111 */); | |
} else { | |
return BIDI_PROP_L; | |
} | |
@@ -320,8 +341,8 @@ get_bidi_bracket_off(uint_least32_t cp) | |
} | |
static size_t | |
-process_isolating_run_sequence(int_least32_t *buf, size_t buflen, | |
- size_t off, uint_least8_t paragraph_level) | |
+process_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off, | |
+ uint_least8_t paragraph_level) | |
{ | |
enum bidi_property sequence_prop; | |
struct isolate_runner ir, tmp; | |
@@ -335,7 +356,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b… | |
ir.prev_prop == BIDI_PROP_RLI || | |
ir.prev_prop == BIDI_PROP_FSI || | |
ir.prev_prop == BIDI_PROP_PDI) { | |
- isolate_runner_set_current_prop(&ir, BIDI_PROP… | |
+ isolate_runner_set_current_prop(&ir, | |
+ BIDI_PROP_ON); | |
} else { | |
isolate_runner_set_current_prop(&ir, | |
ir.prev_prop); | |
@@ -371,7 +393,7 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b… | |
} | |
if (ir.prev_prop == BIDI_PROP_AN && | |
- ir.cur.prop == BIDI_PROP_CS && | |
+ ir.cur.prop == BIDI_PROP_CS && | |
ir.next.prop == BIDI_PROP_AN) { | |
isolate_runner_set_current_prop(&ir, BIDI_PROP_AN); | |
} | |
@@ -389,14 +411,19 @@ process_isolating_run_sequence(int_least32_t *buf, size_t… | |
} else if (ir.cur.prop == BIDI_PROP_EN) { | |
/* set the preceding sequence */ | |
if (runsince != SIZE_MAX) { | |
- isolate_runner_init(buf, buflen, runsince, par… | |
+ isolate_runner_init(buf, buflen, runsince, | |
+ paragraph_level, | |
+ (runsince > off), &tmp); | |
while (!isolate_runner_advance(&tmp) && | |
tmp.cur.off < ir.cur.off) { | |
- isolate_runner_set_current_prop(&tmp, … | |
+ isolate_runner_set_current_prop( | |
+ &tmp, BIDI_PROP_EN); | |
} | |
runsince = SIZE_MAX; | |
} else { | |
- isolate_runner_init(buf, buflen, ir.cur.off, p… | |
+ isolate_runner_init(buf, buflen, ir.cur.off, | |
+ paragraph_level, | |
+ (ir.cur.off > off), &tmp); | |
isolate_runner_advance(&tmp); | |
} | |
/* follow the succeeding sequence */ | |
@@ -404,7 +431,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b… | |
if (tmp.cur.prop != BIDI_PROP_ET) { | |
break; | |
} | |
- isolate_runner_set_current_prop(&tmp, BIDI_PRO… | |
+ isolate_runner_set_current_prop(&tmp, | |
+ BIDI_PROP_EN); | |
} | |
} else { | |
/* sequence ended */ | |
@@ -439,23 +467,26 @@ process_isolating_run_sequence(int_least32_t *buf, size_t… | |
isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
while (!isolate_runner_advance(&ir)) { | |
if (sequence_end == SIZE_MAX) { | |
- if (ir.cur.prop == BIDI_PROP_B || | |
- ir.cur.prop == BIDI_PROP_S || | |
- ir.cur.prop == BIDI_PROP_WS || | |
- ir.cur.prop == BIDI_PROP_ON || | |
+ if (ir.cur.prop == BIDI_PROP_B || | |
+ ir.cur.prop == BIDI_PROP_S || | |
+ ir.cur.prop == BIDI_PROP_WS || | |
+ ir.cur.prop == BIDI_PROP_ON || | |
ir.cur.prop == BIDI_PROP_FSI || | |
ir.cur.prop == BIDI_PROP_LRI || | |
ir.cur.prop == BIDI_PROP_RLI || | |
ir.cur.prop == BIDI_PROP_PDI) { | |
- /* the current character is an NI (neutral or … | |
+ /* the current character is an NI (neutral or | |
+ * isolate) */ | |
/* scan ahead to the end of the NI-sequence */ | |
- isolate_runner_init(buf, buflen, ir.cur.off, p… | |
+ isolate_runner_init(buf, buflen, ir.cur.off, | |
+ paragraph_level, | |
+ (ir.cur.off > off), &tmp); | |
while (!isolate_runner_advance(&tmp)) { | |
- if (tmp.next.prop != BIDI_PROP_B && | |
- tmp.next.prop != BIDI_PROP_S && | |
- tmp.next.prop != BIDI_PROP_WS && | |
- tmp.next.prop != BIDI_PROP_ON && | |
+ if (tmp.next.prop != BIDI_PROP_B && | |
+ tmp.next.prop != BIDI_PROP_S && | |
+ tmp.next.prop != BIDI_PROP_WS && | |
+ tmp.next.prop != BIDI_PROP_ON && | |
tmp.next.prop != BIDI_PROP_FSI && | |
tmp.next.prop != BIDI_PROP_LRI && | |
tmp.next.prop != BIDI_PROP_RLI && | |
@@ -465,17 +496,17 @@ process_isolating_run_sequence(int_least32_t *buf, size_t… | |
} | |
/* | |
- * check what follows and see if the text has … | |
- * same direction on both sides | |
+ * check what follows and see if the text has | |
+ * the same direction on both sides | |
*/ | |
if (ir.prev_prop == BIDI_PROP_L && | |
tmp.next.prop == BIDI_PROP_L) { | |
sequence_end = tmp.cur.off; | |
sequence_prop = BIDI_PROP_L; | |
- } else if ((ir.prev_prop == BIDI_PROP_R || | |
+ } else if ((ir.prev_prop == BIDI_PROP_R || | |
ir.prev_prop == BIDI_PROP_EN || | |
ir.prev_prop == BIDI_PROP_AN) && | |
- (tmp.next.prop == BIDI_PROP_R || | |
+ (tmp.next.prop == BIDI_PROP_R || | |
tmp.next.prop == BIDI_PROP_EN || | |
tmp.next.prop == BIDI_PROP_AN)) { | |
sequence_end = tmp.cur.off; | |
@@ -486,7 +517,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b… | |
if (sequence_end != SIZE_MAX) { | |
if (ir.cur.off <= sequence_end) { | |
- isolate_runner_set_current_prop(&ir, sequence_… | |
+ isolate_runner_set_current_prop(&ir, | |
+ sequence_prop); | |
} else { | |
/* end of sequence, reset */ | |
sequence_end = SIZE_MAX; | |
@@ -498,10 +530,9 @@ process_isolating_run_sequence(int_least32_t *buf, size_t … | |
/* N2 */ | |
isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
while (!isolate_runner_advance(&ir)) { | |
- if (ir.cur.prop == BIDI_PROP_B || | |
- ir.cur.prop == BIDI_PROP_S || | |
- ir.cur.prop == BIDI_PROP_WS || | |
- ir.cur.prop == BIDI_PROP_ON || | |
+ if (ir.cur.prop == BIDI_PROP_B || ir.cur.prop == BIDI_PROP_S || | |
+ ir.cur.prop == BIDI_PROP_WS || | |
+ ir.cur.prop == BIDI_PROP_ON || | |
ir.cur.prop == BIDI_PROP_FSI || | |
ir.cur.prop == BIDI_PROP_LRI || | |
ir.cur.prop == BIDI_PROP_RLI || | |
@@ -509,10 +540,12 @@ process_isolating_run_sequence(int_least32_t *buf, size_t… | |
/* N2 */ | |
if (ir.cur.level % 2 == 0) { | |
/* even embedding level */ | |
- isolate_runner_set_current_prop(&ir, BIDI_PROP… | |
+ isolate_runner_set_current_prop(&ir, | |
+ BIDI_PROP_L); | |
} else { | |
/* odd embedding level */ | |
- isolate_runner_set_current_prop(&ir, BIDI_PROP… | |
+ isolate_runner_set_current_prop(&ir, | |
+ BIDI_PROP_R); | |
} | |
} | |
} | |
@@ -522,8 +555,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b… | |
static uint_least8_t | |
get_paragraph_level(enum grapheme_bidirectional_override override, | |
- bool terminate_on_pdi, | |
- const int_least32_t *buf, size_t buflen) | |
+ bool terminate_on_pdi, const int_least32_t *buf, | |
+ size_t buflen) | |
{ | |
struct state s; | |
int_least8_t isolate_level; | |
@@ -541,8 +574,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov… | |
for (bufoff = 0, isolate_level = 0; bufoff < buflen; bufoff++) { | |
state_deserialize(buf[bufoff], &s); | |
- if (s.prop == BIDI_PROP_PDI && | |
- isolate_level == 0 && | |
+ if (s.prop == BIDI_PROP_PDI && isolate_level == 0 && | |
terminate_on_pdi) { | |
/* | |
* we are in a FSI-subsection of a paragraph and | |
@@ -552,8 +584,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov… | |
} | |
/* BD8/BD9 */ | |
- if ((s.prop == BIDI_PROP_LRI || | |
- s.prop == BIDI_PROP_RLI || | |
+ if ((s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI || | |
s.prop == BIDI_PROP_FSI) && | |
isolate_level < MAX_DEPTH) { | |
/* we hit an isolate initiator, increment counter */ | |
@@ -570,8 +601,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov… | |
/* P3 */ | |
if (s.prop == BIDI_PROP_L) { | |
return 0; | |
- } else if (s.prop == BIDI_PROP_AL || | |
- s.prop == BIDI_PROP_R) { | |
+ } else if (s.prop == BIDI_PROP_AL || s.prop == BIDI_PROP_R) { | |
return 1; | |
} | |
} | |
@@ -585,13 +615,15 @@ get_paragraph_embedding_levels(enum grapheme_bidirectiona… | |
{ | |
enum bidi_property tmp_prop; | |
struct state s, t; | |
+ | |
struct { | |
int_least8_t level; | |
enum grapheme_bidirectional_override override; | |
bool directional_isolate; | |
} directional_status[MAX_DEPTH + 2], *dirstat = directional_status; | |
+ | |
size_t overflow_isolate_count, overflow_embedding_count, | |
- valid_isolate_count, bufoff, i, runsince; | |
+ valid_isolate_count, bufoff, i, runsince; | |
uint_least8_t paragraph_level; | |
paragraph_level = get_paragraph_level(override, false, buf, buflen); | |
@@ -600,7 +632,8 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional_… | |
dirstat->level = (int_least8_t)paragraph_level; | |
dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; | |
dirstat->directional_isolate = false; | |
- overflow_isolate_count = overflow_embedding_count = valid_isolate_coun… | |
+ overflow_isolate_count = overflow_embedding_count = | |
+ valid_isolate_count = 0; | |
for (bufoff = 0; bufoff < buflen; bufoff++) { | |
state_deserialize(buf[bufoff], &s); | |
@@ -608,79 +641,105 @@ get_paragraph_embedding_levels(enum grapheme_bidirection… | |
again: | |
if (tmp_prop == BIDI_PROP_RLE) { | |
/* X2 */ | |
- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= … | |
+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= | |
+ MAX_DEPTH && | |
overflow_isolate_count == 0 && | |
overflow_embedding_count == 0) { | |
/* valid RLE */ | |
dirstat++; | |
- dirstat->level = (dirstat - 1)->level + ((dirs… | |
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ dirstat->level = | |
+ (dirstat - 1)->level + | |
+ ((dirstat - 1)->level % 2 != 0) + 1; | |
+ dirstat->override = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA… | |
dirstat->directional_isolate = false; | |
} else { | |
/* overflow RLE */ | |
- overflow_embedding_count += (overflow_isolate_… | |
+ overflow_embedding_count += | |
+ (overflow_isolate_count == 0); | |
} | |
} else if (tmp_prop == BIDI_PROP_LRE) { | |
/* X3 */ | |
- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= … | |
+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= | |
+ MAX_DEPTH && | |
overflow_isolate_count == 0 && | |
overflow_embedding_count == 0) { | |
/* valid LRE */ | |
dirstat++; | |
- dirstat->level = (dirstat - 1)->level + ((dirs… | |
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ dirstat->level = | |
+ (dirstat - 1)->level + | |
+ ((dirstat - 1)->level % 2 == 0) + 1; | |
+ dirstat->override = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA… | |
dirstat->directional_isolate = false; | |
} else { | |
/* overflow LRE */ | |
- overflow_embedding_count += (overflow_isolate_… | |
+ overflow_embedding_count += | |
+ (overflow_isolate_count == 0); | |
} | |
} else if (tmp_prop == BIDI_PROP_RLO) { | |
/* X4 */ | |
- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= … | |
+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= | |
+ MAX_DEPTH && | |
overflow_isolate_count == 0 && | |
overflow_embedding_count == 0) { | |
/* valid RLO */ | |
dirstat++; | |
- dirstat->level = (dirstat - 1)->level + ((dirs… | |
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ dirstat->level = | |
+ (dirstat - 1)->level + | |
+ ((dirstat - 1)->level % 2 != 0) + 1; | |
+ dirstat->override = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; | |
dirstat->directional_isolate = false; | |
} else { | |
/* overflow RLO */ | |
- overflow_embedding_count += (overflow_isolate_… | |
+ overflow_embedding_count += | |
+ (overflow_isolate_count == 0); | |
} | |
} else if (tmp_prop == BIDI_PROP_LRO) { | |
/* X5 */ | |
- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= … | |
+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= | |
+ MAX_DEPTH && | |
overflow_isolate_count == 0 && | |
overflow_embedding_count == 0) { | |
/* valid LRE */ | |
dirstat++; | |
- dirstat->level = (dirstat - 1)->level + ((dirs… | |
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ dirstat->level = | |
+ (dirstat - 1)->level + | |
+ ((dirstat - 1)->level % 2 == 0) + 1; | |
+ dirstat->override = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; | |
dirstat->directional_isolate = false; | |
} else { | |
/* overflow LRO */ | |
- overflow_embedding_count += (overflow_isolate_… | |
+ overflow_embedding_count += | |
+ (overflow_isolate_count == 0); | |
} | |
} else if (tmp_prop == BIDI_PROP_RLI) { | |
/* X5a */ | |
s.level = dirstat->level; | |
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI… | |
+ if (dirstat->override == | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
s.prop = BIDI_PROP_L; | |
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL… | |
+ } else if (dirstat->override == | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { | |
s.prop = BIDI_PROP_R; | |
} | |
state_serialize(&s, &(buf[bufoff])); | |
- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= … | |
+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= | |
+ MAX_DEPTH && | |
overflow_isolate_count == 0 && | |
overflow_embedding_count == 0) { | |
/* valid RLI */ | |
valid_isolate_count++; | |
dirstat++; | |
- dirstat->level = (dirstat - 1)->level + ((dirs… | |
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ dirstat->level = | |
+ (dirstat - 1)->level + | |
+ ((dirstat - 1)->level % 2 != 0) + 1; | |
+ dirstat->override = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA… | |
dirstat->directional_isolate = true; | |
} else { | |
/* overflow RLI */ | |
@@ -689,22 +748,28 @@ again: | |
} else if (tmp_prop == BIDI_PROP_LRI) { | |
/* X5b */ | |
s.level = dirstat->level; | |
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI… | |
+ if (dirstat->override == | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
s.prop = BIDI_PROP_L; | |
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL… | |
+ } else if (dirstat->override == | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { | |
s.prop = BIDI_PROP_R; | |
} | |
state_serialize(&s, &(buf[bufoff])); | |
- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= … | |
+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= | |
+ MAX_DEPTH && | |
overflow_isolate_count == 0 && | |
overflow_embedding_count == 0) { | |
/* valid LRI */ | |
valid_isolate_count++; | |
dirstat++; | |
- dirstat->level = (dirstat - 1)->level + ((dirs… | |
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE… | |
+ dirstat->level = | |
+ (dirstat - 1)->level + | |
+ ((dirstat - 1)->level % 2 == 0) + 1; | |
+ dirstat->override = | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA… | |
dirstat->directional_isolate = true; | |
} else { | |
/* overflow LRI */ | |
@@ -712,23 +777,27 @@ again: | |
} | |
} else if (tmp_prop == BIDI_PROP_FSI) { | |
/* X5c */ | |
- if (get_paragraph_level(GRAPHEME_BIDIRECTIONAL_OVERRID… | |
- buf + (bufoff + 1), buflen - (… | |
+ if (get_paragraph_level( | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL, | |
+ true, buf + (bufoff + 1), | |
+ buflen - (bufoff + 1)) == 1) { | |
tmp_prop = BIDI_PROP_RLI; | |
goto again; | |
} else { /* ... == 0 */ | |
tmp_prop = BIDI_PROP_LRI; | |
goto again; | |
} | |
- } else if (tmp_prop != BIDI_PROP_B && | |
- tmp_prop != BIDI_PROP_BN && | |
+ } else if (tmp_prop != BIDI_PROP_B && | |
+ tmp_prop != BIDI_PROP_BN && | |
tmp_prop != BIDI_PROP_PDF && | |
tmp_prop != BIDI_PROP_PDI) { | |
/* X6 */ | |
s.level = dirstat->level; | |
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI… | |
+ if (dirstat->override == | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
s.prop = BIDI_PROP_L; | |
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL… | |
+ } else if (dirstat->override == | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { | |
s.prop = BIDI_PROP_R; | |
} | |
state_serialize(&s, &(buf[bufoff])); | |
@@ -773,9 +842,11 @@ again: | |
} | |
s.level = dirstat->level; | |
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI… | |
+ if (dirstat->override == | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
s.prop = BIDI_PROP_L; | |
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL… | |
+ } else if (dirstat->override == | |
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { | |
s.prop = BIDI_PROP_R; | |
} | |
state_serialize(&s, &(buf[bufoff])); | |
@@ -796,12 +867,9 @@ again: | |
} | |
/* X9 */ | |
- if (tmp_prop == BIDI_PROP_RLE || | |
- tmp_prop == BIDI_PROP_LRE || | |
- tmp_prop == BIDI_PROP_RLO || | |
- tmp_prop == BIDI_PROP_LRO || | |
- tmp_prop == BIDI_PROP_PDF || | |
- tmp_prop == BIDI_PROP_BN) { | |
+ if (tmp_prop == BIDI_PROP_RLE || tmp_prop == BIDI_PROP_LRE || | |
+ tmp_prop == BIDI_PROP_RLO || tmp_prop == BIDI_PROP_LRO || | |
+ tmp_prop == BIDI_PROP_PDF || tmp_prop == BIDI_PROP_BN) { | |
s.level = -1; | |
state_serialize(&s, &(buf[bufoff])); | |
} | |
@@ -811,8 +879,8 @@ again: | |
for (bufoff = 0; bufoff < buflen; bufoff++) { | |
state_deserialize(buf[bufoff], &s); | |
if (!s.visited && s.level != -1) { | |
- bufoff += process_isolating_run_sequence(buf, buflen, … | |
- paragraph_lev… | |
+ bufoff += process_isolating_run_sequence( | |
+ buf, buflen, bufoff, paragraph_level); | |
} | |
} | |
@@ -823,7 +891,7 @@ again: | |
for (bufoff = 0; bufoff < buflen; bufoff++) { | |
state_deserialize(buf[bufoff], &s); | |
- if (s.level % 2 == 0 ) { | |
+ if (s.level % 2 == 0) { | |
/* even level */ | |
if (s.prop == BIDI_PROP_R) { | |
s.level += 1; | |
@@ -833,8 +901,7 @@ again: | |
} | |
} else { | |
/* odd level */ | |
- if (s.prop == BIDI_PROP_L || | |
- s.prop == BIDI_PROP_EN || | |
+ if (s.prop == BIDI_PROP_L || s.prop == BIDI_PROP_EN || | |
s.prop == BIDI_PROP_AN) { | |
s.level += 1; | |
} | |
@@ -853,10 +920,8 @@ again: | |
continue; | |
} | |
- if (s.rawprop == BIDI_PROP_WS || | |
- s.rawprop == BIDI_PROP_FSI || | |
- s.rawprop == BIDI_PROP_LRI || | |
- s.rawprop == BIDI_PROP_RLI || | |
+ if (s.rawprop == BIDI_PROP_WS || s.rawprop == BIDI_PROP_FSI || | |
+ s.rawprop == BIDI_PROP_LRI || s.rawprop == BIDI_PROP_RLI || | |
s.rawprop == BIDI_PROP_PDI) { | |
if (runsince == SIZE_MAX) { | |
/* a new run has begun */ | |
@@ -878,8 +943,7 @@ again: | |
runsince = SIZE_MAX; | |
} | |
- if (s.rawprop == BIDI_PROP_S || | |
- s.rawprop == BIDI_PROP_B) { | |
+ if (s.rawprop == BIDI_PROP_S || s.rawprop == BIDI_PROP_B) { | |
s.level = (int_least8_t)paragraph_level; | |
state_serialize(&s, &(buf[bufoff])); | |
} | |
@@ -902,7 +966,8 @@ again: | |
} | |
static size_t | |
-get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override… | |
+get_embedding_levels(HERODOTUS_READER *r, | |
+ enum grapheme_bidirectional_override override, | |
int_least32_t *buf, size_t buflen) | |
{ | |
struct state s; | |
@@ -911,8 +976,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bid… | |
if (buf == NULL) { | |
for (; herodotus_read_codepoint(r, true, &cp) == | |
- HERODOTUS_STATUS_SUCCESS;) | |
+ HERODOTUS_STATUS_SUCCESS;) { | |
; | |
+ } | |
/* see below for return value reasoning */ | |
return herodotus_reader_number_read(r); | |
@@ -922,8 +988,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bid… | |
* the first step is to determine the bidirectional properties | |
* and store them in the buffer | |
*/ | |
- for (bufoff = 0; herodotus_read_codepoint(r, true, &cp) == | |
- HERODOTUS_STATUS_SUCCESS; bufoff++) { | |
+ for (bufoff = 0; | |
+ herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCCES… | |
+ bufoff++) { | |
if (bufoff < buflen) { | |
/* | |
* actually only do something when we have | |
@@ -974,9 +1041,10 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_b… | |
} | |
size_t | |
-grapheme_get_bidirectional_embedding_levels(const uint_least32_t *src, size_t … | |
- enum grapheme_bidirectional_overri… | |
- int_least32_t *dest, size_t destle… | |
+grapheme_get_bidirectional_embedding_levels( | |
+ const uint_least32_t *src, size_t srclen, | |
+ enum grapheme_bidirectional_override override, int_least32_t *dest, | |
+ size_t destlen) | |
{ | |
HERODOTUS_READER r; | |
@@ -986,9 +1054,10 @@ grapheme_get_bidirectional_embedding_levels(const uint_le… | |
} | |
size_t | |
-grapheme_get_bidirectional_embedding_levels_utf8(const char *src, size_t srcle… | |
- enum grapheme_bidirectional_o… | |
- int_least32_t *dest, size_t d… | |
+grapheme_get_bidirectional_embedding_levels_utf8( | |
+ const char *src, size_t srclen, | |
+ enum grapheme_bidirectional_override override, int_least32_t *dest, | |
+ size_t destlen) | |
{ | |
HERODOTUS_READER r; | |
diff --git a/src/case.c b/src/case.c | |
@@ -2,8 +2,8 @@ | |
#include <stddef.h> | |
#include <stdint.h> | |
-#include "../grapheme.h" | |
#include "../gen/case.h" | |
+#include "../grapheme.h" | |
#include "util.h" | |
static inline enum case_property | |
@@ -11,7 +11,7 @@ get_case_property(uint_least32_t cp) | |
{ | |
if (likely(cp <= UINT32_C(0x10FFFF))) { | |
return (enum case_property) | |
- case_minor[case_major[cp >> 8] + (cp & 0xFF)]; | |
+ case_minor[case_major[cp >> 8] + (cp & 0xFF)]; | |
} else { | |
return CASE_PROP_OTHER; | |
} | |
@@ -45,58 +45,64 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w, | |
uint_least32_t cp, tmp_cp; | |
int_least32_t map; | |
- for (; herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCC… | |
+ for (; herodotus_read_codepoint(r, true, &cp) == | |
+ HERODOTUS_STATUS_SUCCESS;) { | |
if (sc == lower_special) { | |
/* | |
- * For the special Final_Sigma-rule (see SpecialCasing… | |
- * which is the only non-localized case-dependent rule, | |
- * we apply a different mapping when a sigma is at the | |
- * end of a word. | |
+ * For the special Final_Sigma-rule (see | |
+ * SpecialCasing.txt), which is the only non-localized | |
+ * case-dependent rule, we apply a different mapping | |
+ * when a sigma is at the end of a word. | |
* | |
* Before: cased case-ignorable* | |
* After: not(case-ignorable* cased) | |
* | |
- * We check the after-condition on demand, but the bef… | |
- * condition is best checked using the "level"-heurist… | |
- * also used in the sentence and line breaking-impleme… | |
+ * We check the after-condition on demand, but the | |
+ * before- condition is best checked using the | |
+ * "level"-heuristic also used in the sentence and line | |
+ * breaking-implementations. | |
*/ | |
- if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER … | |
+ if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER | |
+ SIGMA */ | |
(final_sigma_level == 1 || | |
final_sigma_level == 2)) { | |
/* | |
* check succeeding characters by first skippi… | |
- * all case-ignorable characters and then chec… | |
- * if the succeeding character is cased, inval… | |
- * the after-condition | |
+ * all case-ignorable characters and then | |
+ * checking if the succeeding character is | |
+ * cased, invalidating the after-condition | |
*/ | |
herodotus_reader_copy(r, &tmp); | |
for (prop = NUM_CASE_PROPS; | |
- (s = herodotus_read_codepoint(&tmp, true,… | |
- HERODOTUS_STATUS_SUCCESS; ) { | |
+ (s = herodotus_read_codepoint(&tmp, true, | |
+ &tmp_cp)) == | |
+ HERODOTUS_STATUS_SUCCESS;) { | |
prop = get_case_property(tmp_cp); | |
if (prop != CASE_PROP_CASE_IGNORABLE && | |
prop != CASE_PROP_BOTH_CASED_CASE_… | |
- break; | |
+ break; | |
} | |
} | |
/* | |
- * Now prop is something other than case-ignor… | |
- * the source-string ended. | |
- * If it is something other than cased, we know | |
+ * Now prop is something other than | |
+ * case-ignorable or the source-string ended. … | |
+ * it is something other than cased, we know | |
* that the after-condition holds | |
*/ | |
if (s != HERODOTUS_STATUS_SUCCESS || | |
(prop != CASE_PROP_CASED && | |
prop != CASE_PROP_BOTH_CASED_CASE_IGNORAB… | |
/* | |
- * write GREEK SMALL LETTER FINAL SIGM… | |
- * destination | |
+ * write GREEK SMALL LETTER FINAL SIGMA | |
+ * to destination | |
+ */ | |
+ herodotus_write_codepoint( | |
+ w, UINT32_C(0x03C2)); | |
+ | |
+ /* reset Final_Sigma-state and continue | |
*/ | |
- herodotus_write_codepoint(w, UINT32_C(… | |
- | |
- /* reset Final_Sigma-state and continu… | |
final_sigma_level = 0; | |
continue; | |
} | |
@@ -110,11 +116,13 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w, | |
prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) { | |
/* sequence has begun */ | |
final_sigma_level = 1; | |
- } else if ((final_sigma_level == 1 || | |
- final_sigma_level == 2) && | |
- (prop == CASE_PROP_CASE_IGNORABLE || | |
- prop == CASE_PROP_BOTH_CASED_CASE_IGNORABL… | |
- /* case-ignorable sequence begins or continued… | |
+ } else if ( | |
+ (final_sigma_level == 1 || | |
+ final_sigma_level == 2) && | |
+ (prop == CASE_PROP_CASE_IGNORABLE || | |
+ prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE))… | |
+ /* case-ignorable sequence begins or continued | |
+ */ | |
final_sigma_level = 2; | |
} else { | |
/* sequence broke */ | |
@@ -134,8 +142,8 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w, | |
} | |
} else { | |
/* we have a simple mapping */ | |
- herodotus_write_codepoint(w, (uint_least32_t) | |
- ((int_least32_t)cp + map)); | |
+ herodotus_write_codepoint( | |
+ w, (uint_least32_t)((int_least32_t)cp + map)); | |
} | |
} | |
@@ -168,14 +176,16 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w) | |
for (; (nwb = herodotus_next_word_break(r)) > 0;) { | |
herodotus_reader_push_advance_limit(r, nwb); | |
- for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODO… | |
+ for (; (s = herodotus_read_codepoint(r, false, &cp)) == | |
+ HERODOTUS_STATUS_SUCCESS;) { | |
/* check if we have a cased character */ | |
prop = get_case_property(cp); | |
if (prop == CASE_PROP_CASED || | |
prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) { | |
break; | |
} else { | |
- /* write the data to the output verbatim, it i… | |
+ /* write the data to the output verbatim, it if | |
+ * permits */ | |
herodotus_write_codepoint(w, cp); | |
/* increment reader */ | |
@@ -199,9 +209,10 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w) | |
* we encountered a cased character before the word | |
* break, convert it to titlecase | |
*/ | |
- herodotus_reader_push_advance_limit(r, | |
- herodotus_reader_next_codepoint_break(r)); | |
- to_case(r, w, 0, title_major, title_minor, title_speci… | |
+ herodotus_reader_push_advance_limit( | |
+ r, herodotus_reader_next_codepoint_break(r)); | |
+ to_case(r, w, 0, title_major, title_minor, | |
+ title_special); | |
herodotus_reader_pop_limit(r); | |
} | |
@@ -218,7 +229,8 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w) | |
} | |
size_t | |
-grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, uint_least32_t… | |
+grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, | |
+ uint_least32_t *dest, size_t destlen) | |
{ | |
HERODOTUS_READER r; | |
HERODOTUS_WRITER w; | |
@@ -230,7 +242,8 @@ grapheme_to_uppercase(const uint_least32_t *src, size_t src… | |
} | |
size_t | |
-grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, uint_least32_t… | |
+grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, | |
+ uint_least32_t *dest, size_t destlen) | |
{ | |
HERODOTUS_READER r; | |
HERODOTUS_WRITER w; | |
@@ -242,7 +255,8 @@ grapheme_to_lowercase(const uint_least32_t *src, size_t src… | |
} | |
size_t | |
-grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, uint_least32_t… | |
+grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, | |
+ uint_least32_t *dest, size_t destlen) | |
{ | |
HERODOTUS_READER r; | |
HERODOTUS_WRITER w; | |
@@ -254,7 +268,8 @@ grapheme_to_titlecase(const uint_least32_t *src, size_t src… | |
} | |
size_t | |
-grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest, size_t … | |
+grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest, | |
+ size_t destlen) | |
{ | |
HERODOTUS_READER r; | |
HERODOTUS_WRITER w; | |
@@ -266,7 +281,8 @@ grapheme_to_uppercase_utf8(const char *src, size_t srclen, … | |
} | |
size_t | |
-grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest, size_t … | |
+grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest, | |
+ size_t destlen) | |
{ | |
HERODOTUS_READER r; | |
HERODOTUS_WRITER w; | |
@@ -278,7 +294,8 @@ grapheme_to_lowercase_utf8(const char *src, size_t srclen, … | |
} | |
size_t | |
-grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest, size_t … | |
+grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest, | |
+ size_t destlen) | |
{ | |
HERODOTUS_READER r; | |
HERODOTUS_WRITER w; | |
@@ -299,7 +316,8 @@ is_case(HERODOTUS_READER *r, const uint_least16_t *major, | |
uint_least32_t cp; | |
int_least32_t map; | |
- for (; herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUC… | |
+ for (; herodotus_read_codepoint(r, false, &cp) == | |
+ HERODOTUS_STATUS_SUCCESS;) { | |
/* get and handle case mapping */ | |
if (unlikely((map = get_case_offset(cp, major, minor)) >= | |
INT32_C(0x110000))) { | |
@@ -315,7 +333,8 @@ is_case(HERODOTUS_READER *r, const uint_least16_t *major, | |
goto done; | |
} else { | |
/* move forward */ | |
- herodotus_read_codepoint(r, tr… | |
+ herodotus_read_codepoint( | |
+ r, true, &cp); | |
} | |
} else { | |
/* | |
@@ -357,7 +376,8 @@ is_titlecase(HERODOTUS_READER *r, size_t *output) | |
for (; (nwb = herodotus_next_word_break(r)) > 0;) { | |
herodotus_reader_push_advance_limit(r, nwb); | |
- for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODO… | |
+ for (; (s = herodotus_read_codepoint(r, false, &cp)) == | |
+ HERODOTUS_STATUS_SUCCESS;) { | |
/* check if we have a cased character */ | |
prop = get_case_property(cp); | |
if (prop == CASE_PROP_CASED || | |
@@ -384,17 +404,20 @@ is_titlecase(HERODOTUS_READER *r, size_t *output) | |
* we encountered a cased character before the word | |
* break, check if it's titlecase | |
*/ | |
- herodotus_reader_push_advance_limit(r, | |
- herodotus_reader_next_codepoint_break(r)); | |
- if (!is_case(r, title_major, title_minor, title_specia… | |
+ herodotus_reader_push_advance_limit( | |
+ r, herodotus_reader_next_codepoint_break(r)); | |
+ if (!is_case(r, title_major, title_minor, title_specia… | |
+ NULL)) { | |
ret = false; | |
goto done; | |
} | |
herodotus_reader_pop_limit(r); | |
} | |
- /* check if the rest of the codepoints in the word are lowerca… | |
- if (!is_case(r, lower_major, lower_minor, lower_special, NULL)… | |
+ /* check if the rest of the codepoints in the word are lowerca… | |
+ */ | |
+ if (!is_case(r, lower_major, lower_minor, lower_special, | |
+ NULL)) { | |
ret = false; | |
goto done; | |
} | |
diff --git a/src/character.c b/src/character.c | |
@@ -16,83 +16,80 @@ struct character_break_state { | |
static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = { | |
[CHAR_BREAK_PROP_OTHER] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
- [CHAR_BREAK_PROP_CR] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ [CHAR_BREAK_PROP_CR] = UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */ | |
[CHAR_BREAK_PROP_EXTEND] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
[CHAR_BREAK_PROP_HANGUL_L] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
[CHAR_BREAK_PROP_HANGUL_V] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
[CHAR_BREAK_PROP_HANGUL_T] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
[CHAR_BREAK_PROP_HANGUL_LV] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
[CHAR_BREAK_PROP_HANGUL_LVT] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
[CHAR_BREAK_PROP_PREPEND] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */ | |
(UINT16_C(0xFFFF) & | |
- ~(UINT16_C(1) << CHAR_BREAK_PROP_CR | | |
- UINT16_C(1) << CHAR_BREAK_PROP_LF | | |
- UINT16_C(1) << CHAR_BREAK_PROP_CONTROL | |
- ) | |
- ), /* GB9b */ | |
+ ~(UINT16_C(1) << CHAR_BREAK_PROP_CR | | |
+ UINT16_C(1) << CHAR_BREAK_PROP_LF | | |
+ UINT16_C(1) << CHAR_BREAK_PROP_CONTROL)), /* GB9b */ | |
[CHAR_BREAK_PROP_REGIONAL_INDICATOR] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
[CHAR_BREAK_PROP_SPACINGMARK] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
[CHAR_BREAK_PROP_ZWJ] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ | |
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ | |
}; | |
static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = { | |
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | | |
UINT16_C(1) << CHAR_BREAK_PROP_EXTEND, | |
[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] = | |
UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC, | |
[CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | | |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | | |
UINT16_C(1) << CHAR_BREAK_PROP_ZWJ, | |
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] = | |
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | | |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | | |
UINT16_C(1) << CHAR_BREAK_PROP_EXTEND, | |
}; | |
static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = { | |
@@ -113,7 +110,8 @@ get_break_prop(uint_least32_t cp) | |
{ | |
if (likely(cp <= UINT32_C(0x10FFFF))) { | |
return (enum char_break_property) | |
- char_break_minor[char_break_major[cp >> 8] + (cp & 0xFF… | |
+ char_break_minor[char_break_major[cp >> 8] + | |
+ (cp & 0xFF)]; | |
} else { | |
return CHAR_BREAK_PROP_OTHER; | |
} | |
@@ -122,23 +120,27 @@ get_break_prop(uint_least32_t cp) | |
static inline void | |
state_serialize(const struct character_break_state *in, uint_least16_t *out) | |
{ | |
- *out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) | … | |
- (uint_least16_t)(((uint_least16_t)(in->prop_set)) << 8) | … | |
- (uint_least16_t)(((uint_least16_t)(in->gb11_flag)) << 9) | … | |
- (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag)) << 10); … | |
+ *out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) | /* first 8 bits */ | |
+ (uint_least16_t)(((uint_least16_t)(in->prop_set)) | |
+ << 8) | /* 9th bit */ | |
+ (uint_least16_t)(((uint_least16_t)(in->gb11_flag)) | |
+ << 9) | /* 10th bit */ | |
+ (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag)) | |
+ << 10); /* 11th bit */ | |
} | |
static inline void | |
state_deserialize(uint_least16_t in, struct character_break_state *out) | |
{ | |
- out->prop = in & UINT8_C(0xFF); | |
- out->prop_set = in & (UINT16_C(1) << 8); | |
- out->gb11_flag = in & (UINT16_C(1) << 9); | |
+ out->prop = in & UINT8_C(0xFF); | |
+ out->prop_set = in & (UINT16_C(1) << 8); | |
+ out->gb11_flag = in & (UINT16_C(1) << 9); | |
out->gb12_13_flag = in & (UINT16_C(1) << 10); | |
} | |
bool | |
-grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least… | |
+grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, | |
+ uint_least16_t *s) | |
{ | |
struct character_break_state state; | |
enum char_break_property cp0_prop, cp1_prop; | |
@@ -161,23 +163,26 @@ grapheme_is_character_break(uint_least32_t cp0, uint_leas… | |
/* update flags */ | |
state.gb11_flag = | |
flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS * | |
- state.gb11_flag] & | |
+ state.gb11_flag] & | |
UINT16_C(1) << cp1_prop; | |
state.gb12_13_flag = | |
- flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS * | |
- state.gb12_13_flag] & | |
- UINT16_C(1) << cp1_prop; | |
+ flag_update_gb12_13[cp0_prop + | |
+ NUM_CHAR_BREAK_PROPS * | |
+ state.gb12_13_flag] & | |
+ UINT16_C(1) << cp1_prop; | |
/* | |
* Apply grapheme cluster breaking algorithm (UAX #29), see | |
* http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_… | |
*/ | |
notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) … | |
- (dont_break_gb11[cp0_prop + state.gb11_flag * | |
- NUM_CHAR_BREAK_PROPS] & | |
+ (dont_break_gb11[cp0_prop + | |
+ state.gb11_flag * | |
+ NUM_CHAR_BREAK_PROPS] & | |
(UINT16_C(1) << cp1_prop)) || | |
- (dont_break_gb12_13[cp0_prop + state.gb12_13_flag * | |
- NUM_CHAR_BREAK_PROPS] & | |
+ (dont_break_gb12_13[cp0_prop + | |
+ state.gb12_13_flag * | |
+ NUM_CHAR_BREAK_PROPS] & | |
(UINT16_C(1) << cp1_prop)); | |
/* update or reset flags (when we have a break) */ | |
@@ -198,8 +203,10 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least… | |
* were all set to false | |
*/ | |
notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) … | |
- (dont_break_gb11[cp0_prop] & (UINT16_C(1) << cp1_pr… | |
- (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << cp1… | |
+ (dont_break_gb11[cp0_prop] & | |
+ (UINT16_C(1) << cp1_prop)) || | |
+ (dont_break_gb12_13[cp0_prop] & | |
+ (UINT16_C(1) << cp1_prop)); | |
} | |
return !notbreak; | |
@@ -212,7 +219,8 @@ next_character_break(HERODOTUS_READER *r) | |
uint_least32_t cp0 = 0, cp1 = 0; | |
for (herodotus_read_codepoint(r, true, &cp0); | |
- herodotus_read_codepoint(r, false, &cp1) == HERODOTUS_STATUS_SUCC… | |
+ herodotus_read_codepoint(r, false, &cp1) == | |
+ HERODOTUS_STATUS_SUCCESS; | |
herodotus_read_codepoint(r, true, &cp0)) { | |
if (grapheme_is_character_break(cp0, cp1, &state)) { | |
break; | |
diff --git a/src/line.c b/src/line.c | |
@@ -11,7 +11,8 @@ get_break_prop(uint_least32_t cp) | |
{ | |
if (likely(cp <= UINT32_C(0x10FFFF))) { | |
return (enum line_break_property) | |
- line_break_minor[line_break_major[cp >> 8] + (cp & 0xff… | |
+ line_break_minor[line_break_major[cp >> 8] + | |
+ (cp & 0xff)]; | |
} else { | |
return LINE_BREAK_PROP_AL; | |
} | |
@@ -22,7 +23,7 @@ next_line_break(HERODOTUS_READER *r) | |
{ | |
HERODOTUS_READER tmp; | |
enum line_break_property cp0_prop, cp1_prop, last_non_cm_or_zwj_prop, | |
- last_non_sp_prop, last_non_sp_cm_or_zwj_prop; | |
+ last_non_sp_prop, last_non_sp_cm_or_zwj_prop; | |
uint_least32_t cp; | |
uint_least8_t lb25_level = 0; | |
bool lb21a_flag = false, ri_even = true; | |
@@ -43,8 +44,10 @@ next_line_break(HERODOTUS_READER *r) | |
last_non_cm_or_zwj_prop = LINE_BREAK_PROP_AL; /* according to LB10 */ | |
last_non_sp_prop = last_non_sp_cm_or_zwj_prop = NUM_LINE_BREAK_PROPS; | |
- for (herodotus_read_codepoint(r, true, &cp), cp0_prop = get_break_prop… | |
- herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCE… | |
+ for (herodotus_read_codepoint(r, true, &cp), | |
+ cp0_prop = get_break_prop(cp); | |
+ herodotus_read_codepoint(r, false, &cp) == | |
+ HERODOTUS_STATUS_SUCCESS; | |
herodotus_read_codepoint(r, true, &cp), cp0_prop = cp1_prop) { | |
/* get property of the right codepoint */ | |
cp1_prop = get_break_prop(cp); | |
@@ -59,10 +62,11 @@ next_line_break(HERODOTUS_READER *r) | |
cp0_prop != LINE_BREAK_PROP_ZWJ) { | |
/* | |
* check if the property we are overwriting now is an | |
- * HL. If so, we set the LB21a-flag which depends on t… | |
- * knowledge. | |
+ * HL. If so, we set the LB21a-flag which depends on | |
+ * this knowledge. | |
*/ | |
- lb21a_flag = (last_non_cm_or_zwj_prop == LINE_BREAK_PR… | |
+ lb21a_flag = | |
+ (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL… | |
/* check regional indicator state */ | |
if (cp0_prop == LINE_BREAK_PROP_RI) { | |
@@ -109,8 +113,7 @@ next_line_break(HERODOTUS_READER *r) | |
* and one (CL | CP) to the left of the middle | |
* spot | |
*/ | |
- if ((lb25_level == 0 || | |
- lb25_level == 1) && | |
+ if ((lb25_level == 0 || lb25_level == 1) && | |
cp0_prop == LINE_BREAK_PROP_NU) { | |
/* sequence has begun */ | |
lb25_level = 1; | |
@@ -118,12 +121,15 @@ next_line_break(HERODOTUS_READER *r) | |
(cp0_prop == LINE_BREAK_PROP_NU || | |
cp0_prop == LINE_BREAK_PROP_SY || | |
cp0_prop == LINE_BREAK_PROP_IS)) { | |
- /* (NU | SY | IS) sequence begins or continued… | |
+ /* (NU | SY | IS) sequence begins or continued | |
+ */ | |
lb25_level = 2; | |
- } else if ((lb25_level == 1 || lb25_level == 2) && | |
- (cp0_prop == LINE_BREAK_PROP_CL … | |
- cp0_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW… | |
- cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HW… | |
+ } else if ( | |
+ (lb25_level == 1 || lb25_level == 2) && | |
+ (cp0_prop == LINE_BREAK_PROP_CL || | |
+ cp0_prop == | |
+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || | |
+ cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF))… | |
/* CL or CP at the end of the sequence */ | |
lb25_level = 3; | |
} else { | |
@@ -229,17 +235,19 @@ next_line_break(HERODOTUS_READER *r) | |
/* LB13 (affected by tailoring for LB25, see example 7) */ | |
if (cp1_prop == LINE_BREAK_PROP_EX || | |
(last_non_cm_or_zwj_prop != LINE_BREAK_PROP_NU && | |
- (cp1_prop == LINE_BREAK_PROP_CL || | |
+ (cp1_prop == LINE_BREAK_PROP_CL || | |
cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || | |
- cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF || | |
- cp1_prop == LINE_BREAK_PROP_IS || | |
+ cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF || | |
+ cp1_prop == LINE_BREAK_PROP_IS || | |
cp1_prop == LINE_BREAK_PROP_SY))) { | |
continue; | |
} | |
/* LB14 */ | |
- if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_E… | |
- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_… | |
+ if (last_non_sp_cm_or_zwj_prop == | |
+ LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF || | |
+ last_non_sp_cm_or_zwj_prop == | |
+ LINE_BREAK_PROP_OP_WITH_EAW_HWF) { | |
continue; | |
} | |
@@ -251,9 +259,11 @@ next_line_break(HERODOTUS_READER *r) | |
} | |
/* LB16 */ | |
- if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL … | |
- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_… | |
- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITH_EAW… | |
+ if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL || | |
+ last_non_sp_cm_or_zwj_prop == | |
+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || | |
+ last_non_sp_cm_or_zwj_prop == | |
+ LINE_BREAK_PROP_CP_WITH_EAW_HWF) && | |
cp1_prop == LINE_BREAK_PROP_NS) { | |
continue; | |
} | |
@@ -308,7 +318,7 @@ next_line_break(HERODOTUS_READER *r) | |
} | |
/* LB23 */ | |
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || | |
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || | |
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) && | |
cp1_prop == LINE_BREAK_PROP_NU) { | |
continue; | |
@@ -336,11 +346,11 @@ next_line_break(HERODOTUS_READER *r) | |
/* LB24 */ | |
if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR || | |
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO) && | |
- (cp1_prop == LINE_BREAK_PROP_AL || | |
+ (cp1_prop == LINE_BREAK_PROP_AL || | |
cp1_prop == LINE_BREAK_PROP_HL)) { | |
continue; | |
} | |
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || | |
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || | |
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) && | |
(cp1_prop == LINE_BREAK_PROP_PR || | |
cp1_prop == LINE_BREAK_PROP_PO)) { | |
@@ -362,32 +372,33 @@ next_line_break(HERODOTUS_READER *r) | |
herodotus_reader_copy(r, &tmp); | |
herodotus_read_codepoint(&tmp, true, &cp); | |
if (herodotus_read_codepoint(&tmp, true, &cp) == | |
- HERODOTUS_STATUS_SUCCESS && | |
+ HERODOTUS_STATUS_SUCCESS && | |
(cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF || | |
- cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF || | |
+ cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF || | |
cp1_prop == LINE_BREAK_PROP_HY)) { | |
if (get_break_prop(cp) == LINE_BREAK_PROP_NU) { | |
continue; | |
} | |
} | |
} | |
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW… | |
- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HW… | |
+ if ((last_non_cm_or_zwj_prop == | |
+ LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF || | |
+ last_non_cm_or_zwj_prop == | |
+ LINE_BREAK_PROP_OP_WITH_EAW_HWF || | |
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY) && | |
cp1_prop == LINE_BREAK_PROP_NU) { | |
continue; | |
} | |
- if (lb25_level == 1 && | |
- (cp1_prop == LINE_BREAK_PROP_NU || | |
- cp1_prop == LINE_BREAK_PROP_SY || | |
- cp1_prop == LINE_BREAK_PROP_IS)) { | |
+ if (lb25_level == 1 && (cp1_prop == LINE_BREAK_PROP_NU || | |
+ cp1_prop == LINE_BREAK_PROP_SY || | |
+ cp1_prop == LINE_BREAK_PROP_IS)) { | |
continue; | |
} | |
if ((lb25_level == 1 || lb25_level == 2) && | |
- (cp1_prop == LINE_BREAK_PROP_NU || | |
- cp1_prop == LINE_BREAK_PROP_SY || | |
- cp1_prop == LINE_BREAK_PROP_IS || | |
- cp1_prop == LINE_BREAK_PROP_CL || | |
+ (cp1_prop == LINE_BREAK_PROP_NU || | |
+ cp1_prop == LINE_BREAK_PROP_SY || | |
+ cp1_prop == LINE_BREAK_PROP_IS || | |
+ cp1_prop == LINE_BREAK_PROP_CL || | |
cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || | |
cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) { | |
continue; | |
@@ -437,37 +448,37 @@ next_line_break(HERODOTUS_READER *r) | |
} | |
/* LB28 */ | |
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || | |
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || | |
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) && | |
- (cp1_prop == LINE_BREAK_PROP_AL || | |
+ (cp1_prop == LINE_BREAK_PROP_AL || | |
cp1_prop == LINE_BREAK_PROP_HL)) { | |
continue; | |
} | |
/* LB29 */ | |
if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_IS && | |
- (cp1_prop == LINE_BREAK_PROP_AL || | |
+ (cp1_prop == LINE_BREAK_PROP_AL || | |
cp1_prop == LINE_BREAK_PROP_HL)) { | |
continue; | |
} | |
/* LB30 */ | |
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || | |
- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL || | |
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || | |
+ last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL || | |
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU) && | |
cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF) { | |
continue; | |
} | |
- if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_… | |
- (cp1_prop == LINE_BREAK_PROP_AL || | |
- cp1_prop == LINE_BREAK_PROP_HL || | |
+ if (last_non_cm_or_zwj_prop == | |
+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF && | |
+ (cp1_prop == LINE_BREAK_PROP_AL || | |
+ cp1_prop == LINE_BREAK_PROP_HL || | |
cp1_prop == LINE_BREAK_PROP_NU)) { | |
continue; | |
} | |
/* LB30a */ | |
- if (!ri_even && | |
- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI && | |
+ if (!ri_even && last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI … | |
cp1_prop == LINE_BREAK_PROP_RI) { | |
continue; | |
} | |
@@ -477,7 +488,8 @@ next_line_break(HERODOTUS_READER *r) | |
cp1_prop == LINE_BREAK_PROP_EM) { | |
continue; | |
} | |
- if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BOTH_CN_EXTPICT… | |
+ if (last_non_cm_or_zwj_prop == | |
+ LINE_BREAK_PROP_BOTH_CN_EXTPICT && | |
cp1_prop == LINE_BREAK_PROP_EM) { | |
continue; | |
} | |
diff --git a/src/sentence.c b/src/sentence.c | |
@@ -6,8 +6,7 @@ | |
#include "../grapheme.h" | |
#include "util.h" | |
-struct sentence_break_state | |
-{ | |
+struct sentence_break_state { | |
uint_least8_t aterm_close_sp_level; | |
uint_least8_t saterm_close_sp_parasep_level; | |
}; | |
@@ -17,8 +16,8 @@ get_sentence_break_prop(uint_least32_t cp) | |
{ | |
if (likely(cp <= UINT32_C(0x10FFFF))) { | |
return (uint_least8_t) | |
- sentence_break_minor[sentence_break_major[cp >> 8] + | |
- (cp & 0xff)]; | |
+ sentence_break_minor[sentence_break_major[cp >> 8] + | |
+ (cp & 0xff)]; | |
} else { | |
return SENTENCE_BREAK_PROP_OTHER; | |
} | |
@@ -80,7 +79,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s) | |
state->aterm_close_sp_level = 2; | |
} else if ((state->aterm_close_sp_level == 1 || | |
state->aterm_close_sp_level == 2 || | |
- state->aterm_close_sp_level == 3) && | |
+ state->aterm_close_sp_level == 3) && | |
prop == SENTENCE_BREAK_PROP_SP) { | |
/* sp-sequence begins or continued */ | |
state->aterm_close_sp_level = 3; | |
@@ -102,7 +101,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s) | |
state->saterm_close_sp_parasep_level = 2; | |
} else if ((state->saterm_close_sp_parasep_level == 1 || | |
state->saterm_close_sp_parasep_level == 2 || | |
- state->saterm_close_sp_parasep_level == 3) && | |
+ state->saterm_close_sp_parasep_level == 3) && | |
prop == SENTENCE_BREAK_PROP_SP) { | |
/* sp-sequence begins or continued */ | |
state->saterm_close_sp_parasep_level = 3; | |
@@ -110,7 +109,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s) | |
state->saterm_close_sp_parasep_level == 2 || | |
state->saterm_close_sp_parasep_level == 3) && | |
(prop == SENTENCE_BREAK_PROP_SEP || | |
- prop == SENTENCE_BREAK_PROP_CR || | |
+ prop == SENTENCE_BREAK_PROP_CR || | |
prop == SENTENCE_BREAK_PROP_LF)) { | |
/* ParaSep at the end of the sequence */ | |
state->saterm_close_sp_parasep_level = 4; | |
@@ -146,7 +145,7 @@ next_sentence_break(HERODOTUS_READER *r) | |
/* SB4 */ | |
if (p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_SEP || | |
- p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR || | |
+ p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR || | |
p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_LF) { | |
break; | |
} | |
@@ -179,7 +178,8 @@ next_sentence_break(HERODOTUS_READER *r) | |
* This is the most complicated rule, requiring | |
* the right-hand-side to satisfy the regular expressi… | |
* | |
- * ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )… | |
+ * ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* | |
+ * Lower | |
* | |
* which we simply check "manually" given LUT-lookups | |
* are very cheap by starting at the mid_reader. | |
@@ -198,12 +198,12 @@ next_sentence_break(HERODOTUS_READER *r) | |
* match the following condition | |
*/ | |
if (prop == SENTENCE_BREAK_PROP_OLETTER || | |
- prop == SENTENCE_BREAK_PROP_UPPER || | |
- prop == SENTENCE_BREAK_PROP_LOWER || | |
- prop == SENTENCE_BREAK_PROP_SEP || | |
- prop == SENTENCE_BREAK_PROP_CR || | |
- prop == SENTENCE_BREAK_PROP_LF || | |
- prop == SENTENCE_BREAK_PROP_STERM || | |
+ prop == SENTENCE_BREAK_PROP_UPPER || | |
+ prop == SENTENCE_BREAK_PROP_LOWER || | |
+ prop == SENTENCE_BREAK_PROP_SEP || | |
+ prop == SENTENCE_BREAK_PROP_CR || | |
+ prop == SENTENCE_BREAK_PROP_LF || | |
+ prop == SENTENCE_BREAK_PROP_STERM || | |
prop == SENTENCE_BREAK_PROP_ATERM) { | |
break; | |
} | |
@@ -219,8 +219,8 @@ next_sentence_break(HERODOTUS_READER *r) | |
state.saterm_close_sp_parasep_level == 2 || | |
state.saterm_close_sp_parasep_level == 3) && | |
(p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SCONTINUE || | |
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM || | |
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) { | |
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM || | |
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) { | |
continue; | |
} | |
@@ -228,9 +228,9 @@ next_sentence_break(HERODOTUS_READER *r) | |
if ((state.saterm_close_sp_parasep_level == 1 || | |
state.saterm_close_sp_parasep_level == 2) && | |
(p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CLOSE || | |
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP || | |
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP || | |
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR || | |
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP || | |
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP || | |
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR || | |
p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) { | |
continue; | |
} | |
@@ -239,9 +239,9 @@ next_sentence_break(HERODOTUS_READER *r) | |
if ((state.saterm_close_sp_parasep_level == 1 || | |
state.saterm_close_sp_parasep_level == 2 || | |
state.saterm_close_sp_parasep_level == 3) && | |
- (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP || | |
+ (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP || | |
p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP || | |
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR || | |
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR || | |
p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) { | |
continue; | |
} | |
diff --git a/src/utf8.c b/src/utf8.c | |
@@ -9,14 +9,14 @@ | |
/* lookup-table for the types of sequence first bytes */ | |
static const struct { | |
- uint_least8_t lower; /* lower bound of sequence first byte */ | |
- uint_least8_t upper; /* upper bound of sequence first byte */ | |
+ uint_least8_t lower; /* lower bound of sequence first byte */ | |
+ uint_least8_t upper; /* upper bound of sequence first byte */ | |
uint_least32_t mincp; /* smallest non-overlong encoded codepoint */ | |
uint_least32_t maxcp; /* largest encodable codepoint */ | |
- /* | |
- * implicit: table-offset represents the number of following | |
- * bytes of the form 10xxxxxx (6 bits capacity each) | |
- */ | |
+ /* | |
+ * implicit: table-offset represents the number … | |
+ * bytes of the form 10xxxxxx (6 bits capacity e… | |
+ */ | |
} lut[] = { | |
[0] = { | |
/* 0xxxxxxx */ | |
@@ -104,8 +104,8 @@ grapheme_decode_utf8(const char *str, size_t len, uint_leas… | |
* sequence starter occurs right before a NUL-byte. | |
*/ | |
for (i = 0; 1 + i < len; i++) { | |
- if(!BETWEEN(((const unsigned char *)str)[1 + i], | |
- 0x80, 0xBF)) { | |
+ if (!BETWEEN(((const unsigned char *)str)[1 + i], 0x80, | |
+ 0xBF)) { | |
break; | |
} | |
} | |
@@ -124,7 +124,7 @@ grapheme_decode_utf8(const char *str, size_t len, uint_leas… | |
* (i.e. between 0x80 (10000000) and 0xBF (10111111)) | |
*/ | |
for (i = 1; i <= off; i++) { | |
- if(!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) { | |
+ if (!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) { | |
/* | |
* byte does not match format; return | |
* number of bytes processed excluding the | |
@@ -201,8 +201,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t l… | |
* We do not overwrite the mask because we guaranteed earlier | |
* that there are no bits higher than the mask allows. | |
*/ | |
- ((unsigned char *)str)[0] = lut[off].lower | | |
- (uint_least8_t)(cp >> (6 * off)); | |
+ ((unsigned char *)str)[0] = | |
+ lut[off].lower | (uint_least8_t)(cp >> (6 * off)); | |
for (i = 1; i <= off; i++) { | |
/* | |
@@ -211,8 +211,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t l… | |
* extract from the properly-shifted value using the | |
* mask 00111111 (0x3F) | |
*/ | |
- ((unsigned char *)str)[i] = 0x80 | | |
- ((cp >> (6 * (off - i))) & 0x3F); | |
+ ((unsigned char *)str)[i] = | |
+ 0x80 | ((cp >> (6 * (off - i))) & 0x3F); | |
} | |
return 1 + off; | |
diff --git a/src/util.c b/src/util.c | |
@@ -37,16 +37,20 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTU… | |
*/ | |
dest->type = src->type; | |
if (src->type == HERODOTUS_TYPE_CODEPOINT) { | |
- dest->src = (src->src == NULL) ? NULL : | |
- ((const uint_least32_t *)(src->src)) + src->off; | |
+ dest->src = | |
+ (src->src == NULL) ? | |
+ NULL : | |
+ ((const uint_least32_t *)(src->src)) + src->of… | |
} else { /* src->type == HERODOTUS_TYPE_UTF8 */ | |
- dest->src = (src->src == NULL) ? NULL : | |
- ((const char *)(src->src)) + src->off; | |
+ dest->src = (src->src == NULL) ? | |
+ NULL : | |
+ ((const char *)(src->src)) + src->off; | |
} | |
if (src->srclen == SIZE_MAX) { | |
dest->srclen = SIZE_MAX; | |
} else { | |
- dest->srclen = (src->off < src->srclen) ? src->srclen - src->o… | |
+ dest->srclen = | |
+ (src->off < src->srclen) ? src->srclen - src->off : 0; | |
} | |
dest->off = 0; | |
dest->terminated_by_null = src->terminated_by_null; | |
@@ -62,8 +66,10 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS… | |
* to release the limit and, instead, we just | |
* prevent any more reads | |
*/ | |
- dest->soft_limit[i] = (src->off < src->soft_limit[i]) ? | |
- src->soft_limit[i] - src->off : 0; | |
+ dest->soft_limit[i] = | |
+ (src->off < src->soft_limit[i]) ? | |
+ src->soft_limit[i] - src->off : | |
+ 0; | |
} | |
} | |
} | |
@@ -141,9 +147,9 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance,… | |
*cp = ((const uint_least32_t *)(r->src))[r->off]; | |
ret = 1; | |
} else { /* r->type == HERODOTUS_TYPE_UTF8 */ | |
- ret = grapheme_decode_utf8((const char *)r->src + r->off, | |
- MIN(r->srclen, r->soft_limit[0]) - | |
- r->off, cp); | |
+ ret = grapheme_decode_utf8( | |
+ (const char *)r->src + r->off, | |
+ MIN(r->srclen, r->soft_limit[0]) - r->off, cp); | |
} | |
if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) { | |
@@ -176,8 +182,8 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance,… | |
} | |
void | |
-herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type, | |
- void *dest, size_t destlen) | |
+herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type, void *des… | |
+ size_t destlen) | |
{ | |
w->type = type; | |
w->dest = dest; | |
@@ -212,8 +218,8 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w) | |
* (the last case meaning truncation). | |
*/ | |
if (w->type == HERODOTUS_TYPE_CODEPOINT) { | |
- ((uint_least32_t *)(w->dest)) | |
- [w->first_unwritable_offset] = 0; | |
+ ((uint_least32_t | |
+ *)(w->dest))[w->first_unwritable_offset] = 0; | |
} else { /* w->type == HERODOTUS_TYPE_UTF8 */ | |
((char *)(w->dest))[w->first_unwritable_offset] = '\0'; | |
} | |
@@ -226,8 +232,7 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w) | |
* byte. | |
*/ | |
if (w->type == HERODOTUS_TYPE_CODEPOINT) { | |
- ((uint_least32_t *)(w->dest)) | |
- [w->destlen - 1] = 0; | |
+ ((uint_least32_t *)(w->dest))[w->destlen - 1] = 0; | |
} else { /* w->type == HERODOTUS_TYPE_UTF8 */ | |
((char *)(w->dest))[w->destlen - 1] = '\0'; | |
} | |
@@ -267,8 +272,8 @@ herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32… | |
if (w->dest != NULL && w->off + ret < w->destlen) { | |
/* we still have enough room in the buffer */ | |
- grapheme_encode_utf8(cp, (char *)(w->dest) + | |
- w->off, w->destlen - w->off); | |
+ grapheme_encode_utf8(cp, (char *)(w->dest) + w->off, | |
+ w->destlen - w->off); | |
} else if (w->first_unwritable_offset == SIZE_MAX) { | |
/* | |
* the first unwritable offset has not been | |
@@ -328,8 +333,9 @@ proper_init(const HERODOTUS_READER *r, void *state, uint_le… | |
/* fill in the two next raw properties (after no-initialization) */ | |
p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop; | |
- for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, … | |
- HERODOTUS_STATUS_SUCCESS; ) { | |
+ for (i = 0; | |
+ i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) == | |
+ HERODOTUS_STATUS_SUCCESS;) { | |
p->raw.next_prop[i++] = p->get_break_prop(cp); | |
} | |
@@ -338,8 +344,9 @@ proper_init(const HERODOTUS_READER *r, void *state, uint_le… | |
/* fill in the two next skip properties (after no-initialization) */ | |
p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop; | |
- for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true,… | |
- HERODOTUS_STATUS_SUCCESS; ) { | |
+ for (i = 0; | |
+ i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) == | |
+ HERODOTUS_STATUS_SUCCESS;) { | |
prop = p->get_break_prop(cp); | |
if (!p->is_skippable_prop(prop)) { | |
p->skip.next_prop[i++] = prop; | |
diff --git a/src/util.h b/src/util.h | |
@@ -10,25 +10,25 @@ | |
#include "../grapheme.h" | |
#undef MIN | |
-#define MIN(x,y) ((x) < (y) ? (x) : (y)) | |
+#define MIN(x, y) ((x) < (y) ? (x) : (y)) | |
#undef MAX | |
-#define MAX(x,y) ((x) > (y) ? (x) : (y)) | |
+#define MAX(x, y) ((x) > (y) ? (x) : (y)) | |
#undef LEN | |
#define LEN(x) (sizeof(x) / sizeof(*(x))) | |
#undef likely | |
#undef unlikely | |
#ifdef __has_builtin | |
- #if __has_builtin(__builtin_expect) | |
- #define likely(expr) __builtin_expect(!!(expr), 1) | |
- #define unlikely(expr) __builtin_expect(!!(expr), 0) | |
- #else | |
- #define likely(expr) (expr) | |
- #define unlikely(expr) (expr) | |
- #endif | |
+#if __has_builtin(__builtin_expect) | |
+#define likely(expr) __builtin_expect(!!(expr), 1) | |
+#define unlikely(expr) __builtin_expect(!!(expr), 0) | |
#else | |
- #define likely(expr) (expr) | |
- #define unlikely(expr) (expr) | |
+#define likely(expr) (expr) | |
+#define unlikely(expr) (expr) | |
+#endif | |
+#else | |
+#define likely(expr) (expr) | |
+#define unlikely(expr) (expr) | |
#endif | |
/* | |
@@ -84,6 +84,7 @@ struct proper { | |
uint_least8_t prev_prop[2]; | |
uint_least8_t next_prop[2]; | |
} raw, skip; | |
+ | |
HERODOTUS_READER mid_reader, raw_reader, skip_reader; | |
void *state; | |
uint_least8_t no_prop; | |
@@ -100,7 +101,8 @@ void herodotus_reader_pop_limit(HERODOTUS_READER *); | |
size_t herodotus_reader_number_read(const HERODOTUS_READER *); | |
size_t herodotus_reader_next_word_break(const HERODOTUS_READER *); | |
size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *); | |
-enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_… | |
+enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, | |
+ uint_least32_t *); | |
void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *, | |
size_t); | |
diff --git a/src/word.c b/src/word.c | |
@@ -6,8 +6,7 @@ | |
#include "../grapheme.h" | |
#include "util.h" | |
-struct word_break_state | |
-{ | |
+struct word_break_state { | |
bool ri_even; | |
}; | |
@@ -16,7 +15,8 @@ get_word_break_prop(uint_least32_t cp) | |
{ | |
if (likely(cp <= UINT32_C(0x10FFFF))) { | |
return (uint_least8_t) | |
- word_break_minor[word_break_major[cp >> 8] + (cp & 0xff… | |
+ word_break_minor[word_break_major[cp >> 8] + | |
+ (cp & 0xff)]; | |
} else { | |
return WORD_BREAK_PROP_OTHER; | |
} | |
@@ -26,8 +26,7 @@ static bool | |
is_skippable_word_prop(uint_least8_t prop) | |
{ | |
return prop == WORD_BREAK_PROP_EXTEND || | |
- prop == WORD_BREAK_PROP_FORMAT || | |
- prop == WORD_BREAK_PROP_ZWJ; | |
+ prop == WORD_BREAK_PROP_FORMAT || prop == WORD_BREAK_PROP_ZWJ; | |
} | |
static void | |
@@ -79,22 +78,24 @@ next_word_break(HERODOTUS_READER *r) | |
/* WB3a */ | |
if (p.raw.prev_prop[0] == WORD_BREAK_PROP_NEWLINE || | |
- p.raw.prev_prop[0] == WORD_BREAK_PROP_CR || | |
+ p.raw.prev_prop[0] == WORD_BREAK_PROP_CR || | |
p.raw.prev_prop[0] == WORD_BREAK_PROP_LF) { | |
break; | |
} | |
/* WB3b */ | |
if (p.raw.next_prop[0] == WORD_BREAK_PROP_NEWLINE || | |
- p.raw.next_prop[0] == WORD_BREAK_PROP_CR || | |
+ p.raw.next_prop[0] == WORD_BREAK_PROP_CR || | |
p.raw.next_prop[0] == WORD_BREAK_PROP_LF) { | |
break; | |
} | |
/* WB3c */ | |
if (p.raw.prev_prop[0] == WORD_BREAK_PROP_ZWJ && | |
- (p.raw.next_prop[0] == WORD_BREAK_PROP_EXTENDED_PICTOGRAPH… | |
- p.raw.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPIC… | |
+ (p.raw.next_prop[0] == | |
+ WORD_BREAK_PROP_EXTENDED_PICTOGRAPHIC || | |
+ p.raw.next_prop[0] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT)) { | |
continue; | |
} | |
@@ -112,37 +113,43 @@ next_word_break(HERODOTUS_READER *r) | |
} | |
/* WB5 */ | |
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.prev_prop[0] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) && | |
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.next_prop[0] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) { | |
continue; | |
} | |
/* WB6 */ | |
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.prev_prop[0] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) && | |
- (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER || | |
- p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET || | |
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER || | |
+ p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET || | |
p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) && | |
- (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.next_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
+ (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.next_prop[1] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
p.skip.next_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) { | |
continue; | |
} | |
/* WB7 */ | |
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER || | |
- p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET || | |
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER || | |
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET || | |
p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) && | |
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.next_prop[0] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) && | |
- (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.prev_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
+ (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.prev_prop[1] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
p.skip.prev_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) { | |
continue; | |
} | |
@@ -174,8 +181,9 @@ next_word_break(HERODOTUS_READER *r) | |
} | |
/* WB9 */ | |
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.prev_prop[0] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) && | |
p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC) { | |
continue; | |
@@ -183,15 +191,16 @@ next_word_break(HERODOTUS_READER *r) | |
/* WB10 */ | |
if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC && | |
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.next_prop[0] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) { | |
continue; | |
} | |
/* WB11 */ | |
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM || | |
- p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET || | |
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM || | |
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET || | |
p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) && | |
p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC && | |
p.skip.prev_prop[1] == WORD_BREAK_PROP_NUMERIC) { | |
@@ -200,8 +209,8 @@ next_word_break(HERODOTUS_READER *r) | |
/* WB12 */ | |
if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC && | |
- (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM || | |
- p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET || | |
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM || | |
+ p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET || | |
p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) && | |
p.skip.next_prop[1] == WORD_BREAK_PROP_NUMERIC) { | |
continue; | |
@@ -214,11 +223,12 @@ next_word_break(HERODOTUS_READER *r) | |
} | |
/* WB13a */ | |
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
- p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER … | |
- p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC … | |
- p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA … | |
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.prev_prop[0] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER || | |
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC || | |
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA || | |
p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) && | |
p.skip.next_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) { | |
continue; | |
@@ -226,10 +236,11 @@ next_word_break(HERODOTUS_READER *r) | |
/* WB13b */ | |
if (p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET && | |
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER … | |
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI… | |
- p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER … | |
- p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC … | |
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || | |
+ p.skip.next_prop[0] == | |
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || | |
+ p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER || | |
+ p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC || | |
p.skip.next_prop[0] == WORD_BREAK_PROP_KATAKANA)) { | |
continue; | |
} | |
diff --git a/test/bidirectional.c b/test/bidirectional.c | |
@@ -25,14 +25,16 @@ main(int argc, char *argv[]) | |
for (i = 0, failed = 0; i < LEN(bidirectional_test); i++) { | |
/*if (i != 490798) | |
- continue;*/ | |
+ continue;*/ | |
for (m = 0; m < bidirectional_test[i].modelen; m++) { | |
ret = grapheme_get_bidirectional_embedding_levels( | |
- bidirectional_test[i].cp, bidirectional_test[i… | |
+ bidirectional_test[i].cp, | |
+ bidirectional_test[i].cplen, | |
bidirectional_test[i].mode[m], lev, levlen); | |
- if (ret != bidirectional_test[i].cplen || ret > levlen… | |
+ if (ret != bidirectional_test[i].cplen || | |
+ ret > levlen) { | |
goto err; | |
} | |
@@ -43,18 +45,22 @@ main(int argc, char *argv[]) | |
} | |
continue; | |
err: | |
- fprintf(stderr, "%s: Failed conformance test %zu (mode… | |
+ fprintf(stderr, | |
+ "%s: Failed conformance test %zu (mode %i) [", | |
argv[0], i, bidirectional_test[i].mode[m]); | |
for (j = 0; j < bidirectional_test[i].cplen; j++) { | |
- fprintf(stderr, " 0x%04" PRIXLEAST32, bidirect… | |
+ fprintf(stderr, " 0x%04" PRIXLEAST32, | |
+ bidirectional_test[i].cp[j]); | |
} | |
fprintf(stderr, " ],\n\tgot ("); | |
for (j = 0; j < ret; j++) { | |
- fprintf(stderr, " %" PRIdLEAST8, (int_least8_t… | |
+ fprintf(stderr, " %" PRIdLEAST8, | |
+ (int_least8_t)lev[j]); | |
} | |
fprintf(stderr, " ),\n\texpected ("); | |
for (j = 0; j < ret; j++) { | |
- fprintf(stderr, " %" PRIdLEAST8, bidirectional… | |
+ fprintf(stderr, " %" PRIdLEAST8, | |
+ bidirectional_test[i].level[j]); | |
} | |
fprintf(stderr, " ).\n"); | |
failed++; | |
diff --git a/test/case.c b/test/case.c | |
@@ -9,10 +9,12 @@ | |
struct unit_test_is_case_utf8 { | |
const char *description; | |
+ | |
struct { | |
const char *src; | |
size_t srclen; | |
} input; | |
+ | |
struct { | |
bool ret; | |
size_t caselen; | |
@@ -21,11 +23,13 @@ struct unit_test_is_case_utf8 { | |
struct unit_test_to_case_utf8 { | |
const char *description; | |
+ | |
struct { | |
const char *src; | |
size_t srclen; | |
size_t destlen; | |
} input; | |
+ | |
struct { | |
const char *dest; | |
size_t ret; | |
@@ -35,57 +39,69 @@ struct unit_test_to_case_utf8 { | |
static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = { | |
{ | |
.description = "empty input", | |
- .input = { "", 0 }, | |
+ .input = { "", 0 }, | |
.output = { true, 0 }, | |
}, | |
{ | |
.description = "one character, violation", | |
- .input = { "A", 1 }, | |
+ .input = { "A", 1 }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one character, confirmation", | |
- .input = { "\xC3\x9F", 2 }, | |
+ .input = { "\xC3\x9F", 2 }, | |
.output = { true, 2 }, | |
}, | |
{ | |
.description = "one character, violation, NUL-terminated", | |
- .input = { "A", SIZE_MAX }, | |
+ .input = { "A", SIZE_MAX }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one character, confirmation, NUL-terminated", | |
- .input = { "\xC3\x9F", SIZE_MAX }, | |
+ .input = { "\xC3\x9F", SIZE_MAX }, | |
.output = { true, 2 }, | |
}, | |
{ | |
.description = "one word, violation", | |
- .input = { "Hello", 5 }, | |
+ .input = { "Hello", 5 }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one word, partial confirmation", | |
- .input = { "gru" "\xC3\x9F" "fOrmel", 11 }, | |
+ .input = { "gru" | |
+ "\xC3\x9F" | |
+ "fOrmel", | |
+ 11 }, | |
.output = { false, 6 }, | |
}, | |
{ | |
.description = "one word, full confirmation", | |
- .input = { "gru" "\xC3\x9F" "formel", 11 }, | |
+ .input = { "gru" | |
+ "\xC3\x9F" | |
+ "formel", | |
+ 11 }, | |
.output = { true, 11 }, | |
}, | |
{ | |
.description = "one word, violation, NUL-terminated", | |
- .input = { "Hello", SIZE_MAX }, | |
+ .input = { "Hello", SIZE_MAX }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one word, partial confirmation, NUL-terminated… | |
- .input = { "gru" "\xC3\x9F" "fOrmel", SIZE_MAX }, | |
+ .input = { "gru" | |
+ "\xC3\x9F" | |
+ "fOrmel", | |
+ SIZE_MAX }, | |
.output = { false, 6 }, | |
}, | |
{ | |
.description = "one word, full confirmation, NUL-terminated", | |
- .input = { "gru" "\xC3\x9F" "formel", SIZE_MAX }, | |
+ .input = { "gru" | |
+ "\xC3\x9F" | |
+ "formel", | |
+ SIZE_MAX }, | |
.output = { true, 11 }, | |
}, | |
}; | |
@@ -93,57 +109,63 @@ static const struct unit_test_is_case_utf8 is_lowercase_ut… | |
static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = { | |
{ | |
.description = "empty input", | |
- .input = { "", 0 }, | |
+ .input = { "", 0 }, | |
.output = { true, 0 }, | |
}, | |
{ | |
.description = "one character, violation", | |
- .input = { "\xC3\x9F", 2 }, | |
+ .input = { "\xC3\x9F", 2 }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one character, confirmation", | |
- .input = { "A", 1 }, | |
+ .input = { "A", 1 }, | |
.output = { true, 1 }, | |
}, | |
{ | |
.description = "one character, violation, NUL-terminated", | |
- .input = { "\xC3\x9F", SIZE_MAX }, | |
+ .input = { "\xC3\x9F", SIZE_MAX }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one character, confirmation, NUL-terminated", | |
- .input = { "A", SIZE_MAX }, | |
+ .input = { "A", SIZE_MAX }, | |
.output = { true, 1 }, | |
}, | |
{ | |
.description = "one word, violation", | |
- .input = { "hello", 5 }, | |
+ .input = { "hello", 5 }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one word, partial confirmation", | |
- .input = { "GRU" "\xC3\x9F" "formel", 11 }, | |
+ .input = { "GRU" | |
+ "\xC3\x9F" | |
+ "formel", | |
+ 11 }, | |
.output = { false, 3 }, | |
}, | |
{ | |
.description = "one word, full confirmation", | |
- .input = { "HELLO", 5 }, | |
+ .input = { "HELLO", 5 }, | |
.output = { true, 5 }, | |
}, | |
{ | |
.description = "one word, violation, NUL-terminated", | |
- .input = { "hello", SIZE_MAX }, | |
+ .input = { "hello", SIZE_MAX }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one word, partial confirmation, NUL-terminated… | |
- .input = { "GRU" "\xC3\x9F" "formel", SIZE_MAX }, | |
+ .input = { "GRU" | |
+ "\xC3\x9F" | |
+ "formel", | |
+ SIZE_MAX }, | |
.output = { false, 3 }, | |
}, | |
{ | |
.description = "one word, full confirmation, NUL-terminated", | |
- .input = { "HELLO", SIZE_MAX }, | |
+ .input = { "HELLO", SIZE_MAX }, | |
.output = { true, 5 }, | |
}, | |
}; | |
@@ -151,77 +173,103 @@ static const struct unit_test_is_case_utf8 is_uppercase_… | |
static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = { | |
{ | |
.description = "empty input", | |
- .input = { "", 0 }, | |
+ .input = { "", 0 }, | |
.output = { true, 0 }, | |
}, | |
{ | |
.description = "one character, violation", | |
- .input = { "\xC3\x9F", 2 }, | |
+ .input = { "\xC3\x9F", 2 }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one character, confirmation", | |
- .input = { "A", 1 }, | |
+ .input = { "A", 1 }, | |
.output = { true, 1 }, | |
}, | |
{ | |
.description = "one character, violation, NUL-terminated", | |
- .input = { "\xC3\x9F", SIZE_MAX }, | |
+ .input = { "\xC3\x9F", SIZE_MAX }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one character, confirmation, NUL-terminated", | |
- .input = { "A", SIZE_MAX }, | |
+ .input = { "A", SIZE_MAX }, | |
.output = { true, 1 }, | |
}, | |
{ | |
.description = "one word, violation", | |
- .input = { "hello", 5 }, | |
+ .input = { "hello", 5 }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one word, partial confirmation", | |
- .input = { "Gru" "\xC3\x9F" "fOrmel", 11 }, | |
+ .input = { "Gru" | |
+ "\xC3\x9F" | |
+ "fOrmel", | |
+ 11 }, | |
.output = { false, 6 }, | |
}, | |
{ | |
.description = "one word, full confirmation", | |
- .input = { "Gru" "\xC3\x9F" "formel", 11 }, | |
+ .input = { "Gru" | |
+ "\xC3\x9F" | |
+ "formel", | |
+ 11 }, | |
.output = { true, 11 }, | |
}, | |
{ | |
.description = "one word, violation, NUL-terminated", | |
- .input = { "hello", SIZE_MAX }, | |
+ .input = { "hello", SIZE_MAX }, | |
.output = { false, 0 }, | |
}, | |
{ | |
.description = "one word, partial confirmation, NUL-terminated… | |
- .input = { "Gru" "\xC3\x9F" "fOrmel", SIZE_MAX }, | |
+ .input = { "Gru" | |
+ "\xC3\x9F" | |
+ "fOrmel", | |
+ SIZE_MAX }, | |
.output = { false, 6 }, | |
}, | |
{ | |
.description = "one word, full confirmation, NUL-terminated", | |
- .input = { "Gru" "\xC3\x9F" "formel", SIZE_MAX }, | |
+ .input = { "Gru" | |
+ "\xC3\x9F" | |
+ "formel", | |
+ SIZE_MAX }, | |
.output = { true, 11 }, | |
}, | |
{ | |
.description = "multiple words, partial confirmation", | |
- .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", 18 }, | |
+ .input = { "Hello Gru" | |
+ "\xC3\x9F" | |
+ "fOrmel!", | |
+ 18 }, | |
.output = { false, 12 }, | |
}, | |
{ | |
.description = "multiple words, full confirmation", | |
- .input = { "Hello Gru" "\xC3\x9F" "formel!", 18 }, | |
+ .input = { "Hello Gru" | |
+ "\xC3\x9F" | |
+ "formel!", | |
+ 18 }, | |
.output = { true, 18 }, | |
}, | |
{ | |
- .description = "multiple words, partial confirmation, NUL-term… | |
- .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", SIZE_MAX }, | |
+ .description = | |
+ "multiple words, partial confirmation, NUL-terminated", | |
+ .input = { "Hello Gru" | |
+ "\xC3\x9F" | |
+ "fOrmel!", | |
+ SIZE_MAX }, | |
.output = { false, 12 }, | |
}, | |
{ | |
- .description = "multiple words, full confirmation, NUL-termina… | |
- .input = { "Hello Gru" "\xC3\x9F" "formel!", SIZE_MAX }, | |
+ .description = | |
+ "multiple words, full confirmation, NUL-terminated", | |
+ .input = { "Hello Gru" | |
+ "\xC3\x9F" | |
+ "formel!", | |
+ SIZE_MAX }, | |
.output = { true, 18 }, | |
}, | |
}; | |
@@ -229,72 +277,74 @@ static const struct unit_test_is_case_utf8 is_titlecase_u… | |
static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = { | |
{ | |
.description = "empty input", | |
- .input = { "", 0, 10 }, | |
+ .input = { "", 0, 10 }, | |
.output = { "", 0 }, | |
}, | |
{ | |
.description = "empty output", | |
- .input = { "hello", 5, 0 }, | |
+ .input = { "hello", 5, 0 }, | |
.output = { "", 5 }, | |
}, | |
{ | |
.description = "one character, conversion", | |
- .input = { "A", 1, 10 }, | |
+ .input = { "A", 1, 10 }, | |
.output = { "a", 1 }, | |
}, | |
{ | |
.description = "one character, no conversion", | |
- .input = { "\xC3\x9F", 2, 10 }, | |
+ .input = { "\xC3\x9F", 2, 10 }, | |
.output = { "\xC3\x9F", 2 }, | |
}, | |
{ | |
.description = "one character, conversion, truncation", | |
- .input = { "A", 1, 0 }, | |
+ .input = { "A", 1, 0 }, | |
.output = { "", 1 }, | |
}, | |
{ | |
.description = "one character, conversion, NUL-terminated", | |
- .input = { "A", SIZE_MAX, 10 }, | |
+ .input = { "A", SIZE_MAX, 10 }, | |
.output = { "a", 1 }, | |
}, | |
{ | |
.description = "one character, no conversion, NUL-terminated", | |
- .input = { "\xC3\x9F", SIZE_MAX, 10 }, | |
+ .input = { "\xC3\x9F", SIZE_MAX, 10 }, | |
.output = { "\xC3\x9F", 2 }, | |
}, | |
{ | |
- .description = "one character, conversion, NUL-terminated, tru… | |
- .input = { "A", SIZE_MAX, 0 }, | |
+ .description = | |
+ "one character, conversion, NUL-terminated, truncation… | |
+ .input = { "A", SIZE_MAX, 0 }, | |
.output = { "", 1 }, | |
}, | |
{ | |
.description = "one word, conversion", | |
- .input = { "wOrD", 4, 10 }, | |
+ .input = { "wOrD", 4, 10 }, | |
.output = { "word", 4 }, | |
}, | |
{ | |
.description = "one word, no conversion", | |
- .input = { "word", 4, 10 }, | |
+ .input = { "word", 4, 10 }, | |
.output = { "word", 4 }, | |
}, | |
{ | |
.description = "one word, conversion, truncation", | |
- .input = { "wOrD", 4, 3 }, | |
+ .input = { "wOrD", 4, 3 }, | |
.output = { "wo", 4 }, | |
}, | |
{ | |
.description = "one word, conversion, NUL-terminated", | |
- .input = { "wOrD", SIZE_MAX, 10 }, | |
+ .input = { "wOrD", SIZE_MAX, 10 }, | |
.output = { "word", 4 }, | |
}, | |
{ | |
.description = "one word, no conversion, NUL-terminated", | |
- .input = { "word", SIZE_MAX, 10 }, | |
+ .input = { "word", SIZE_MAX, 10 }, | |
.output = { "word", 4 }, | |
}, | |
{ | |
- .description = "one word, conversion, NUL-terminated, truncati… | |
- .input = { "wOrD", SIZE_MAX, 3 }, | |
+ .description = | |
+ "one word, conversion, NUL-terminated, truncation", | |
+ .input = { "wOrD", SIZE_MAX, 3 }, | |
.output = { "wo", 4 }, | |
}, | |
}; | |
@@ -302,72 +352,86 @@ static const struct unit_test_to_case_utf8 to_lowercase_u… | |
static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = { | |
{ | |
.description = "empty input", | |
- .input = { "", 0, 10 }, | |
+ .input = { "", 0, 10 }, | |
.output = { "", 0 }, | |
}, | |
{ | |
.description = "empty output", | |
- .input = { "hello", 5, 0 }, | |
+ .input = { "hello", 5, 0 }, | |
.output = { "", 5 }, | |
}, | |
{ | |
.description = "one character, conversion", | |
- .input = { "\xC3\x9F", 2, 10 }, | |
+ .input = { "\xC3\x9F", 2, 10 }, | |
.output = { "SS", 2 }, | |
}, | |
{ | |
.description = "one character, no conversion", | |
- .input = { "A", 1, 10 }, | |
+ .input = { "A", 1, 10 }, | |
.output = { "A", 1 }, | |
}, | |
{ | |
.description = "one character, conversion, truncation", | |
- .input = { "\xC3\x9F", 2, 0 }, | |
+ .input = { "\xC3\x9F", 2, 0 }, | |
.output = { "", 2 }, | |
}, | |
{ | |
.description = "one character, conversion, NUL-terminated", | |
- .input = { "\xC3\x9F", SIZE_MAX, 10 }, | |
+ .input = { "\xC3\x9F", SIZE_MAX, 10 }, | |
.output = { "SS", 2 }, | |
}, | |
{ | |
.description = "one character, no conversion, NUL-terminated", | |
- .input = { "A", SIZE_MAX, 10 }, | |
+ .input = { "A", SIZE_MAX, 10 }, | |
.output = { "A", 1 }, | |
}, | |
{ | |
- .description = "one character, conversion, NUL-terminated, tru… | |
- .input = { "\xC3\x9F", SIZE_MAX, 0 }, | |
+ .description = | |
+ "one character, conversion, NUL-terminated, truncation… | |
+ .input = { "\xC3\x9F", SIZE_MAX, 0 }, | |
.output = { "", 2 }, | |
}, | |
{ | |
.description = "one word, conversion", | |
- .input = { "gRu" "\xC3\x9F" "fOrMel", 11, 15 }, | |
+ .input = { "gRu" | |
+ "\xC3\x9F" | |
+ "fOrMel", | |
+ 11, 15 }, | |
.output = { "GRUSSFORMEL", 11 }, | |
}, | |
{ | |
.description = "one word, no conversion", | |
- .input = { "WORD", 4, 10 }, | |
+ .input = { "WORD", 4, 10 }, | |
.output = { "WORD", 4 }, | |
}, | |
{ | |
.description = "one word, conversion, truncation", | |
- .input = { "gRu" "\xC3\x9F" "formel", 11, 5 }, | |
+ .input = { "gRu" | |
+ "\xC3\x9F" | |
+ "formel", | |
+ 11, 5 }, | |
.output = { "GRUS", 11 }, | |
}, | |
{ | |
.description = "one word, conversion, NUL-terminated", | |
- .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 15 }, | |
+ .input = { "gRu" | |
+ "\xC3\x9F" | |
+ "formel", | |
+ SIZE_MAX, 15 }, | |
.output = { "GRUSSFORMEL", 11 }, | |
}, | |
{ | |
.description = "one word, no conversion, NUL-terminated", | |
- .input = { "WORD", SIZE_MAX, 10 }, | |
+ .input = { "WORD", SIZE_MAX, 10 }, | |
.output = { "WORD", 4 }, | |
}, | |
{ | |
- .description = "one word, conversion, NUL-terminated, truncati… | |
- .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 5 }, | |
+ .description = | |
+ "one word, conversion, NUL-terminated, truncation", | |
+ .input = { "gRu" | |
+ "\xC3\x9F" | |
+ "formel", | |
+ SIZE_MAX, 5 }, | |
.output = { "GRUS", 11 }, | |
}, | |
}; | |
@@ -375,102 +439,105 @@ static const struct unit_test_to_case_utf8 to_uppercase… | |
static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = { | |
{ | |
.description = "empty input", | |
- .input = { "", 0, 10 }, | |
+ .input = { "", 0, 10 }, | |
.output = { "", 0 }, | |
}, | |
{ | |
.description = "empty output", | |
- .input = { "hello", 5, 0 }, | |
+ .input = { "hello", 5, 0 }, | |
.output = { "", 5 }, | |
}, | |
{ | |
.description = "one character, conversion", | |
- .input = { "a", 1, 10 }, | |
+ .input = { "a", 1, 10 }, | |
.output = { "A", 1 }, | |
}, | |
{ | |
.description = "one character, no conversion", | |
- .input = { "A", 1, 10 }, | |
+ .input = { "A", 1, 10 }, | |
.output = { "A", 1 }, | |
}, | |
{ | |
.description = "one character, conversion, truncation", | |
- .input = { "a", 1, 0 }, | |
+ .input = { "a", 1, 0 }, | |
.output = { "", 1 }, | |
}, | |
{ | |
.description = "one character, conversion, NUL-terminated", | |
- .input = { "a", SIZE_MAX, 10 }, | |
+ .input = { "a", SIZE_MAX, 10 }, | |
.output = { "A", 1 }, | |
}, | |
{ | |
.description = "one character, no conversion, NUL-terminated", | |
- .input = { "A", SIZE_MAX, 10 }, | |
+ .input = { "A", SIZE_MAX, 10 }, | |
.output = { "A", 1 }, | |
}, | |
{ | |
- .description = "one character, conversion, NUL-terminated, tru… | |
- .input = { "a", SIZE_MAX, 0 }, | |
+ .description = | |
+ "one character, conversion, NUL-terminated, truncation… | |
+ .input = { "a", SIZE_MAX, 0 }, | |
.output = { "", 1 }, | |
}, | |
{ | |
.description = "one word, conversion", | |
- .input = { "heLlo", 5, 10 }, | |
+ .input = { "heLlo", 5, 10 }, | |
.output = { "Hello", 5 }, | |
}, | |
{ | |
.description = "one word, no conversion", | |
- .input = { "Hello", 5, 10 }, | |
+ .input = { "Hello", 5, 10 }, | |
.output = { "Hello", 5 }, | |
}, | |
{ | |
.description = "one word, conversion, truncation", | |
- .input = { "heLlo", 5, 2 }, | |
+ .input = { "heLlo", 5, 2 }, | |
.output = { "H", 5 }, | |
}, | |
{ | |
.description = "one word, conversion, NUL-terminated", | |
- .input = { "heLlo", SIZE_MAX, 10 }, | |
+ .input = { "heLlo", SIZE_MAX, 10 }, | |
.output = { "Hello", 5 }, | |
}, | |
{ | |
.description = "one word, no conversion, NUL-terminated", | |
- .input = { "Hello", SIZE_MAX, 10 }, | |
+ .input = { "Hello", SIZE_MAX, 10 }, | |
.output = { "Hello", 5 }, | |
}, | |
{ | |
- .description = "one word, conversion, NUL-terminated, truncati… | |
- .input = { "heLlo", SIZE_MAX, 3 }, | |
+ .description = | |
+ "one word, conversion, NUL-terminated, truncation", | |
+ .input = { "heLlo", SIZE_MAX, 3 }, | |
.output = { "He", 5 }, | |
}, | |
{ | |
.description = "two words, conversion", | |
- .input = { "heLlo wORLd!", 12, 20 }, | |
+ .input = { "heLlo wORLd!", 12, 20 }, | |
.output = { "Hello World!", 12 }, | |
}, | |
{ | |
.description = "two words, no conversion", | |
- .input = { "Hello World!", 12, 20 }, | |
+ .input = { "Hello World!", 12, 20 }, | |
.output = { "Hello World!", 12 }, | |
}, | |
{ | |
.description = "two words, conversion, truncation", | |
- .input = { "heLlo wORLd!", 12, 8 }, | |
+ .input = { "heLlo wORLd!", 12, 8 }, | |
.output = { "Hello W", 12 }, | |
}, | |
{ | |
.description = "two words, conversion, NUL-terminated", | |
- .input = { "heLlo wORLd!", SIZE_MAX, 20 }, | |
+ .input = { "heLlo wORLd!", SIZE_MAX, 20 }, | |
.output = { "Hello World!", 12 }, | |
}, | |
{ | |
.description = "two words, no conversion, NUL-terminated", | |
- .input = { "Hello World!", SIZE_MAX, 20 }, | |
+ .input = { "Hello World!", SIZE_MAX, 20 }, | |
.output = { "Hello World!", 12 }, | |
}, | |
{ | |
- .description = "two words, conversion, NUL-terminated, truncat… | |
- .input = { "heLlo wORLd!", SIZE_MAX, 4 }, | |
+ .description = | |
+ "two words, conversion, NUL-terminated, truncation", | |
+ .input = { "heLlo wORLd!", SIZE_MAX, 4 }, | |
.output = { "Hel", 12 }, | |
}, | |
}; | |
@@ -485,14 +552,14 @@ unit_test_callback_is_case_utf8(const void *t, size_t off… | |
size_t caselen = 0x7f; | |
if (t == is_lowercase_utf8) { | |
- ret = grapheme_is_lowercase_utf8(test->input.src, test->input.… | |
- &caselen); | |
+ ret = grapheme_is_lowercase_utf8(test->input.src, | |
+ test->input.srclen, &caselen); | |
} else if (t == is_uppercase_utf8) { | |
- ret = grapheme_is_uppercase_utf8(test->input.src, test->input.… | |
- &caselen); | |
+ ret = grapheme_is_uppercase_utf8(test->input.src, | |
+ test->input.srclen, &caselen); | |
} else if (t == is_titlecase_utf8) { | |
- ret = grapheme_is_titlecase_utf8(test->input.src, test->input.… | |
- &caselen); | |
+ ret = grapheme_is_titlecase_utf8(test->input.src, | |
+ test->input.srclen, &caselen); | |
} else { | |
goto err; | |
@@ -505,10 +572,11 @@ unit_test_callback_is_case_utf8(const void *t, size_t off… | |
return 0; | |
err: | |
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " | |
- "(returned (%s, %zu) instead of (%s, %zu)).\n", argv0, | |
- name, off, test->description, ret ? "true" : "false", | |
- caselen, test->output.ret ? "true" : "false", | |
+ fprintf(stderr, | |
+ "%s: %s: Failed unit test %zu \"%s\" " | |
+ "(returned (%s, %zu) instead of (%s, %zu)).\n", | |
+ argv0, name, off, test->description, ret ? "true" : "false", | |
+ caselen, test->output.ret ? "true" : "false", | |
test->output.caselen); | |
return 1; | |
} | |
@@ -526,21 +594,25 @@ unit_test_callback_to_case_utf8(const void *t, size_t off… | |
memset(buf, 0x7f, LEN(buf)); | |
if (t == to_lowercase_utf8) { | |
- ret = grapheme_to_lowercase_utf8(test->input.src, test->input.… | |
- buf, test->input.destlen); | |
+ ret = grapheme_to_lowercase_utf8(test->input.src, | |
+ test->input.srclen, buf, | |
+ test->input.destlen); | |
} else if (t == to_uppercase_utf8) { | |
- ret = grapheme_to_uppercase_utf8(test->input.src, test->input.… | |
- buf, test->input.destlen); | |
+ ret = grapheme_to_uppercase_utf8(test->input.src, | |
+ test->input.srclen, buf, | |
+ test->input.destlen); | |
} else if (t == to_titlecase_utf8) { | |
- ret = grapheme_to_titlecase_utf8(test->input.src, test->input.… | |
- buf, test->input.destlen); | |
+ ret = grapheme_to_titlecase_utf8(test->input.src, | |
+ test->input.srclen, buf, | |
+ test->input.destlen); | |
} else { | |
goto err; | |
} | |
/* check results */ | |
if (ret != test->output.ret || | |
- memcmp(buf, test->output.dest, MIN(test->input.destlen, test->outp… | |
+ memcmp(buf, test->output.dest, | |
+ MIN(test->input.destlen, test->output.ret))) { | |
goto err; | |
} | |
@@ -553,9 +625,10 @@ unit_test_callback_to_case_utf8(const void *t, size_t off,… | |
return 0; | |
err: | |
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " | |
- "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", ar… | |
- name, off, test->description, (int)ret, buf, ret, | |
+ fprintf(stderr, | |
+ "%s: %s: Failed unit test %zu \"%s\" " | |
+ "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", | |
+ argv0, name, off, test->description, (int)ret, buf, ret, | |
(int)test->output.ret, test->output.dest, test->output.ret); | |
return 1; | |
} | |
@@ -565,16 +638,22 @@ main(int argc, char *argv[]) | |
{ | |
(void)argc; | |
- return run_unit_tests(unit_test_callback_is_case_utf8, is_lowercase_ut… | |
- LEN(is_lowercase_utf8), "grapheme_is_lowercase_u… | |
- run_unit_tests(unit_test_callback_is_case_utf8, is_uppercase_ut… | |
- LEN(is_uppercase_utf8), "grapheme_is_uppercase_u… | |
- run_unit_tests(unit_test_callback_is_case_utf8, is_titlecase_ut… | |
- LEN(is_titlecase_utf8), "grapheme_is_titlecase_u… | |
- run_unit_tests(unit_test_callback_to_case_utf8, to_lowercase_ut… | |
- LEN(to_lowercase_utf8), "grapheme_to_lowercase_u… | |
- run_unit_tests(unit_test_callback_to_case_utf8, to_uppercase_ut… | |
- LEN(to_uppercase_utf8), "grapheme_to_uppercase_u… | |
- run_unit_tests(unit_test_callback_to_case_utf8, to_titlecase_ut… | |
- LEN(to_titlecase_utf8), "grapheme_to_titlecase_u… | |
+ return run_unit_tests(unit_test_callback_is_case_utf8, | |
+ is_lowercase_utf8, LEN(is_lowercase_utf8), | |
+ "grapheme_is_lowercase_utf8", argv[0]) + | |
+ run_unit_tests(unit_test_callback_is_case_utf8, | |
+ is_uppercase_utf8, LEN(is_uppercase_utf8), | |
+ "grapheme_is_uppercase_utf8", argv[0]) + | |
+ run_unit_tests(unit_test_callback_is_case_utf8, | |
+ is_titlecase_utf8, LEN(is_titlecase_utf8), | |
+ "grapheme_is_titlecase_utf8", argv[0]) + | |
+ run_unit_tests(unit_test_callback_to_case_utf8, | |
+ to_lowercase_utf8, LEN(to_lowercase_utf8), | |
+ "grapheme_to_lowercase_utf8", argv[0]) + | |
+ run_unit_tests(unit_test_callback_to_case_utf8, | |
+ to_uppercase_utf8, LEN(to_uppercase_utf8), | |
+ "grapheme_to_uppercase_utf8", argv[0]) + | |
+ run_unit_tests(unit_test_callback_to_case_utf8, | |
+ to_titlecase_utf8, LEN(to_titlecase_utf8), | |
+ "grapheme_to_titlecase_utf8", argv[0]); | |
} | |
diff --git a/test/character.c b/test/character.c | |
@@ -92,12 +92,10 @@ static const struct unit_test_next_break_utf8 next_characte… | |
static int | |
unit_test_callback_next_character_break(const void *t, size_t off, | |
- const char *name, | |
- const char *argv0) | |
+ const char *name, const char *argv0) | |
{ | |
- return unit_test_callback_next_break(t, off, | |
- grapheme_next_character_break, | |
- name, argv0); | |
+ return unit_test_callback_next_break( | |
+ t, off, grapheme_next_character_break, name, argv0); | |
} | |
static int | |
@@ -105,9 +103,8 @@ unit_test_callback_next_character_break_utf8(const void *t,… | |
const char *name, | |
const char *argv0) | |
{ | |
- return unit_test_callback_next_break_utf8(t, off, | |
- grapheme_next_character_brea… | |
- name, argv0); | |
+ return unit_test_callback_next_break_utf8( | |
+ t, off, grapheme_next_character_break_utf8, name, argv0); | |
} | |
int | |
@@ -116,11 +113,13 @@ main(int argc, char *argv[]) | |
(void)argc; | |
return run_break_tests(grapheme_next_character_break, | |
- character_break_test, LEN(character_break_test)… | |
+ character_break_test, LEN(character_break_test), | |
+ argv[0]) + | |
run_unit_tests(unit_test_callback_next_character_break, | |
next_character_break, LEN(next_character_break), | |
"grapheme_next_character_break", argv[0]) + | |
run_unit_tests(unit_test_callback_next_character_break_utf8, | |
- next_character_break_utf8, LEN(next_character_br… | |
+ next_character_break_utf8, | |
+ LEN(next_character_break_utf8), | |
"grapheme_next_character_break_utf8", argv[0]); | |
} | |
diff --git a/test/line.c b/test/line.c | |
@@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_line_bre… | |
}; | |
static int | |
-unit_test_callback_next_line_break(const void *t, size_t off, | |
- const char *name, | |
- const char *argv0) | |
+unit_test_callback_next_line_break(const void *t, size_t off, const char *name, | |
+ const char *argv0) | |
{ | |
- return unit_test_callback_next_break(t, off, | |
- grapheme_next_line_break, | |
+ return unit_test_callback_next_break(t, off, grapheme_next_line_break, | |
name, argv0); | |
} | |
static int | |
unit_test_callback_next_line_break_utf8(const void *t, size_t off, | |
- const char *name, | |
- const char *argv0) | |
+ const char *name, const char *argv0) | |
{ | |
- return unit_test_callback_next_break_utf8(t, off, | |
- grapheme_next_line_break_utf… | |
- name, argv0); | |
+ return unit_test_callback_next_break_utf8( | |
+ t, off, grapheme_next_line_break_utf8, name, argv0); | |
} | |
int | |
@@ -115,9 +111,8 @@ main(int argc, char *argv[]) | |
{ | |
(void)argc; | |
- return run_break_tests(grapheme_next_line_break, | |
- line_break_test, LEN(line_break_test), | |
- argv[0]) + | |
+ return run_break_tests(grapheme_next_line_break, line_break_test, | |
+ LEN(line_break_test), argv[0]) + | |
run_unit_tests(unit_test_callback_next_line_break, | |
next_line_break, LEN(next_line_break), | |
"grapheme_next_line_break", argv[0]) + | |
diff --git a/test/sentence.c b/test/sentence.c | |
@@ -92,22 +92,18 @@ static const struct unit_test_next_break_utf8 next_sentence… | |
static int | |
unit_test_callback_next_sentence_break(const void *t, size_t off, | |
- const char *name, | |
- const char *argv0) | |
+ const char *name, const char *argv0) | |
{ | |
- return unit_test_callback_next_break(t, off, | |
- grapheme_next_sentence_break, | |
- name, argv0); | |
+ return unit_test_callback_next_break( | |
+ t, off, grapheme_next_sentence_break, name, argv0); | |
} | |
static int | |
unit_test_callback_next_sentence_break_utf8(const void *t, size_t off, | |
- const char *name, | |
- const char *argv0) | |
+ const char *name, const char *argv… | |
{ | |
- return unit_test_callback_next_break_utf8(t, off, | |
- grapheme_next_sentence_break… | |
- name, argv0); | |
+ return unit_test_callback_next_break_utf8( | |
+ t, off, grapheme_next_sentence_break_utf8, name, argv0); | |
} | |
int | |
@@ -116,12 +112,13 @@ main(int argc, char *argv[]) | |
(void)argc; | |
return run_break_tests(grapheme_next_sentence_break, | |
- sentence_break_test, | |
- LEN(sentence_break_test), argv[0]) + | |
+ sentence_break_test, LEN(sentence_break_test), | |
+ argv[0]) + | |
run_unit_tests(unit_test_callback_next_sentence_break, | |
next_sentence_break, LEN(next_sentence_break), | |
"grapheme_next_sentence_break", argv[0]) + | |
run_unit_tests(unit_test_callback_next_sentence_break_utf8, | |
- next_sentence_break_utf8, LEN(next_sentence_brea… | |
+ next_sentence_break_utf8, | |
+ LEN(next_sentence_break_utf8), | |
"grapheme_next_character_break_utf8", argv[0]); | |
} | |
diff --git a/test/utf8-decode.c b/test/utf8-decode.c | |
@@ -8,281 +8,279 @@ | |
#include "util.h" | |
static const struct { | |
- char *arr; /* UTF-8 byte sequence */ | |
- size_t len; /* length of UTF-8 byte sequence */ | |
- size_t exp_len; /* expected length returned */ | |
- uint_least32_t exp_cp; /* expected codepoint returned */ | |
+ char *arr; /* UTF-8 byte sequence */ | |
+ size_t len; /* length of UTF-8 byte sequence */ | |
+ size_t exp_len; /* expected length returned */ | |
+ uint_least32_t exp_cp; /* expected codepoint returned */ | |
} dec_test[] = { | |
{ | |
/* empty sequence | |
- * [ ] -> | |
- * INVALID | |
- */ | |
- .arr = NULL, | |
- .len = 0, | |
+ * [ ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = NULL, | |
+ .len = 0, | |
.exp_len = 0, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid lead byte | |
- * [ 11111101 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xFD }, | |
- .len = 1, | |
+ * [ 11111101 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xFD }, | |
+ .len = 1, | |
.exp_len = 1, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* valid 1-byte sequence | |
- * [ 00000001 ] -> | |
- * 0000001 | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0x01 }, | |
- .len = 1, | |
+ * [ 00000001 ] -> | |
+ * 0000001 | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0x01 }, | |
+ .len = 1, | |
.exp_len = 1, | |
- .exp_cp = 0x1, | |
+ .exp_cp = 0x1, | |
}, | |
{ | |
/* valid 2-byte sequence | |
- * [ 11000011 10111111 ] -> | |
- * 00011111111 | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xC3, 0xBF }, | |
- .len = 2, | |
+ * [ 11000011 10111111 ] -> | |
+ * 00011111111 | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xC3, 0xBF }, | |
+ .len = 2, | |
.exp_len = 2, | |
- .exp_cp = 0xFF, | |
+ .exp_cp = 0xFF, | |
}, | |
{ | |
/* invalid 2-byte sequence (second byte missing) | |
- * [ 11000011 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xC3 }, | |
- .len = 1, | |
+ * [ 11000011 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xC3 }, | |
+ .len = 1, | |
.exp_len = 2, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 2-byte sequence (second byte malformed) | |
- * [ 11000011 11111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xC3, 0xFF }, | |
- .len = 2, | |
+ * [ 11000011 11111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xC3, 0xFF }, | |
+ .len = 2, | |
.exp_len = 1, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 2-byte sequence (overlong encoded) | |
- * [ 11000001 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xC1, 0xBF }, | |
- .len = 2, | |
+ * [ 11000001 10111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xC1, 0xBF }, | |
+ .len = 2, | |
.exp_len = 2, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* valid 3-byte sequence | |
- * [ 11100000 10111111 10111111 ] -> | |
- * 0000111111111111 | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF }, | |
- .len = 3, | |
+ * [ 11100000 10111111 10111111 ] -> | |
+ * 0000111111111111 | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF }, | |
+ .len = 3, | |
.exp_len = 3, | |
- .exp_cp = 0xFFF, | |
+ .exp_cp = 0xFFF, | |
}, | |
{ | |
/* invalid 3-byte sequence (second byte missing) | |
- * [ 11100000 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xE0 }, | |
- .len = 1, | |
+ * [ 11100000 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xE0 }, | |
+ .len = 1, | |
.exp_len = 3, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 3-byte sequence (second byte malformed) | |
- * [ 11100000 01111111 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF }, | |
- .len = 3, | |
+ * [ 11100000 01111111 10111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xE0, 0x7F, 0xBF }, | |
+ .len = 3, | |
.exp_len = 1, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 3-byte sequence (short string, second byte malforme… | |
- * [ 11100000 01111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xE0, 0x7F }, | |
- .len = 2, | |
+ * [ 11100000 01111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xE0, 0x7F }, | |
+ .len = 2, | |
.exp_len = 1, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 3-byte sequence (third byte missing) | |
- * [ 11100000 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF }, | |
- .len = 2, | |
+ * [ 11100000 10111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF }, | |
+ .len = 2, | |
.exp_len = 3, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 3-byte sequence (third byte malformed) | |
- * [ 11100000 10111111 01111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F }, | |
- .len = 3, | |
+ * [ 11100000 10111111 01111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0x7F }, | |
+ .len = 3, | |
.exp_len = 2, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 3-byte sequence (overlong encoded) | |
- * [ 11100000 10011111 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF }, | |
- .len = 3, | |
+ * [ 11100000 10011111 10111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xE0, 0x9F, 0xBF }, | |
+ .len = 3, | |
.exp_len = 3, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 3-byte sequence (UTF-16 surrogate half) | |
- * [ 11101101 10100000 10000000 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 }, | |
- .len = 3, | |
+ * [ 11101101 10100000 10000000 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xED, 0xA0, 0x80 }, | |
+ .len = 3, | |
.exp_len = 3, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* valid 4-byte sequence | |
- * [ 11110011 10111111 10111111 10111111 ] -> | |
- * 011111111111111111111 | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF }, | |
- .len = 4, | |
+ * [ 11110011 10111111 10111111 10111111 ] -> | |
+ * 011111111111111111111 | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF }, | |
+ .len = 4, | |
.exp_len = 4, | |
- .exp_cp = UINT32_C(0xFFFFF), | |
+ .exp_cp = UINT32_C(0xFFFFF), | |
}, | |
{ | |
/* invalid 4-byte sequence (second byte missing) | |
- * [ 11110011 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3 }, | |
- .len = 1, | |
+ * [ 11110011 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3 }, | |
+ .len = 1, | |
.exp_len = 4, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 4-byte sequence (second byte malformed) | |
- * [ 11110011 01111111 10111111 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF }, | |
- .len = 4, | |
+ * [ 11110011 01111111 10111111 10111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF, 0xBF }, | |
+ .len = 4, | |
.exp_len = 1, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
- /* invalid 4-byte sequence (short string 1, second byte malfor… | |
- * [ 11110011 011111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F }, | |
- .len = 2, | |
+ /* invalid 4-byte sequence (short string 1, second byte | |
+ * malformed) [ 11110011 011111111 ] -> INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F }, | |
+ .len = 2, | |
.exp_len = 1, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
- /* invalid 4-byte sequence (short string 2, second byte malfor… | |
- * [ 11110011 011111111 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF }, | |
- .len = 3, | |
+ /* invalid 4-byte sequence (short string 2, second byte | |
+ * malformed) [ 11110011 011111111 10111111 ] -> INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF }, | |
+ .len = 3, | |
.exp_len = 1, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 4-byte sequence (third byte missing) | |
- * [ 11110011 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF }, | |
- .len = 2, | |
+ * [ 11110011 10111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF }, | |
+ .len = 2, | |
.exp_len = 4, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 4-byte sequence (third byte malformed) | |
- * [ 11110011 10111111 01111111 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF }, | |
- .len = 4, | |
+ * [ 11110011 10111111 01111111 10111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F, 0xBF }, | |
+ .len = 4, | |
.exp_len = 2, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 4-byte sequence (short string, third byte malformed) | |
- * [ 11110011 10111111 01111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F }, | |
- .len = 3, | |
+ * [ 11110011 10111111 01111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F }, | |
+ .len = 3, | |
.exp_len = 2, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 4-byte sequence (fourth byte missing) | |
- * [ 11110011 10111111 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF }, | |
- .len = 3, | |
+ * [ 11110011 10111111 10111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF }, | |
+ .len = 3, | |
.exp_len = 4, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 4-byte sequence (fourth byte malformed) | |
- * [ 11110011 10111111 10111111 01111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F }, | |
- .len = 4, | |
+ * [ 11110011 10111111 10111111 01111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0x7F }, | |
+ .len = 4, | |
.exp_len = 3, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 4-byte sequence (overlong encoded) | |
- * [ 11110000 10000000 10000001 10111111 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF }, | |
- .len = 4, | |
+ * [ 11110000 10000000 10000001 10111111 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF0, 0x80, 0x81, 0xBF }, | |
+ .len = 4, | |
.exp_len = 4, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
{ | |
/* invalid 4-byte sequence (UTF-16-unrepresentable) | |
- * [ 11110100 10010000 10000000 10000000 ] -> | |
- * INVALID | |
- */ | |
- .arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 }, | |
- .len = 4, | |
+ * [ 11110100 10010000 10000000 10000000 ] -> | |
+ * INVALID | |
+ */ | |
+ .arr = (char *)(unsigned char[]) { 0xF4, 0x90, 0x80, 0x80 }, | |
+ .len = 4, | |
.exp_len = 4, | |
- .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT, | |
}, | |
}; | |
@@ -298,12 +296,12 @@ main(int argc, char *argv[]) | |
size_t len; | |
uint_least32_t cp; | |
- len = grapheme_decode_utf8(dec_test[i].arr, | |
- dec_test[i].len, &cp); | |
+ len = grapheme_decode_utf8(dec_test[i].arr, dec_test[i].len, | |
+ &cp); | |
- if (len != dec_test[i].exp_len || | |
- cp != dec_test[i].exp_cp) { | |
- fprintf(stderr, "%s: Failed test %zu: " | |
+ if (len != dec_test[i].exp_len || cp != dec_test[i].exp_cp) { | |
+ fprintf(stderr, | |
+ "%s: Failed test %zu: " | |
"Expected (%zx,%u), but got (%zx,%u).\n", | |
argv[0], i, dec_test[i].exp_len, | |
dec_test[i].exp_cp, len, cp); | |
diff --git a/test/utf8-encode.c b/test/utf8-encode.c | |
@@ -8,44 +8,44 @@ | |
#include "util.h" | |
static const struct { | |
- uint_least32_t cp; /* input codepoint */ | |
- char *exp_arr; /* expected UTF-8 byte sequence */ | |
- size_t exp_len; /* expected length of UTF-8 sequence */ | |
+ uint_least32_t cp; /* input codepoint */ | |
+ char *exp_arr; /* expected UTF-8 byte sequence */ | |
+ size_t exp_len; /* expected length of UTF-8 sequence */ | |
} enc_test[] = { | |
{ | |
/* invalid codepoint (UTF-16 surrogate half) */ | |
- .cp = UINT32_C(0xD800), | |
- .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD }, | |
+ .cp = UINT32_C(0xD800), | |
+ .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD }, | |
.exp_len = 3, | |
}, | |
{ | |
/* invalid codepoint (UTF-16-unrepresentable) */ | |
- .cp = UINT32_C(0x110000), | |
- .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD }, | |
+ .cp = UINT32_C(0x110000), | |
+ .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD }, | |
.exp_len = 3, | |
}, | |
{ | |
/* codepoint encoded to a 1-byte sequence */ | |
- .cp = 0x01, | |
- .exp_arr = (char *)(unsigned char[]){ 0x01 }, | |
+ .cp = 0x01, | |
+ .exp_arr = (char *)(unsigned char[]) { 0x01 }, | |
.exp_len = 1, | |
}, | |
{ | |
/* codepoint encoded to a 2-byte sequence */ | |
- .cp = 0xFF, | |
- .exp_arr = (char *)(unsigned char[]){ 0xC3, 0xBF }, | |
+ .cp = 0xFF, | |
+ .exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF }, | |
.exp_len = 2, | |
}, | |
{ | |
/* codepoint encoded to a 3-byte sequence */ | |
- .cp = 0xFFF, | |
- .exp_arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF }, | |
+ .cp = 0xFFF, | |
+ .exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF }, | |
.exp_len = 3, | |
}, | |
{ | |
/* codepoint encoded to a 4-byte sequence */ | |
- .cp = UINT32_C(0xFFFFF), | |
- .exp_arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF }, | |
+ .cp = UINT32_C(0xFFFFF), | |
+ .exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF … | |
.exp_len = 4, | |
}, | |
}; | |
@@ -66,11 +66,12 @@ main(int argc, char *argv[]) | |
if (len != enc_test[i].exp_len || | |
memcmp(arr, enc_test[i].exp_arr, len)) { | |
- fprintf(stderr, "%s, Failed test %zu: " | |
- "Expected (", argv[0], i); | |
+ fprintf(stderr, | |
+ "%s, Failed test %zu: " | |
+ "Expected (", | |
+ argv[0], i); | |
for (j = 0; j < enc_test[i].exp_len; j++) { | |
- fprintf(stderr, "0x%x", | |
- enc_test[i].exp_arr[j]); | |
+ fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]… | |
if (j + 1 < enc_test[i].exp_len) { | |
fprintf(stderr, " "); | |
} | |
diff --git a/test/util.c b/test/util.c | |
@@ -5,13 +5,14 @@ | |
#include <stdio.h> | |
#include <string.h> | |
-#include "../grapheme.h" | |
#include "../gen/types.h" | |
+#include "../grapheme.h" | |
#include "util.h" | |
int | |
run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t), | |
- const struct break_test *test, size_t testlen, const char *arg… | |
+ const struct break_test *test, size_t testlen, | |
+ const char *argv0) | |
{ | |
size_t i, j, off, res, failed; | |
@@ -21,11 +22,14 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *… | |
res = next_break(test[i].cp + off, test[i].cplen - off… | |
/* check if our resulting offset matches */ | |
- if (j == test[i].lenlen || | |
- res != test[i].len[j++]) { | |
- fprintf(stderr, "%s: Failed conformance test %… | |
+ if (j == test[i].lenlen || res != test[i].len[j++]) { | |
+ fprintf(stderr, | |
+ "%s: Failed conformance test %zu " | |
+ "\"%s\".\n", | |
argv0, i, test[i].descr); | |
- fprintf(stderr, "J=%zu: EXPECTED len %zu, got … | |
+ fprintf(stderr, | |
+ "J=%zu: EXPECTED len %zu, got %zu\n", | |
+ j - 1, test[i].len[j - 1], res); | |
failed++; | |
break; | |
} | |
@@ -39,13 +43,15 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *… | |
int | |
run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *, | |
- const char *), const void *test, size_t testlen, const char *na… | |
+ const char *), | |
+ const void *test, size_t testlen, const char *name, | |
const char *argv0) | |
{ | |
size_t i, failed; | |
for (i = 0, failed = 0; i < testlen; i++) { | |
- failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 … | |
+ failed += | |
+ (unit_test_callback(test, i, name, argv0) == 0) ? 0 : … | |
} | |
printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name, | |
@@ -56,8 +62,9 @@ run_unit_tests(int (*unit_test_callback)(const void *, size_t… | |
int | |
unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off, | |
- size_t (*next_break)(const uint_least32_t *… | |
- const char *name, const char *argv0) | |
+ size_t (*next_break)(const uint_least32_t *, | |
+ size_t), | |
+ const char *name, const char *argv0) | |
{ | |
const struct unit_test_next_break *test = t + off; | |
@@ -69,16 +76,18 @@ unit_test_callback_next_break(const struct unit_test_next_b… | |
return 0; | |
err: | |
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " | |
- "(returned %zu instead of %zu).\n", argv0, | |
- name, off, test->description, ret, test->output.ret); | |
+ fprintf(stderr, | |
+ "%s: %s: Failed unit test %zu \"%s\" " | |
+ "(returned %zu instead of %zu).\n", | |
+ argv0, name, off, test->description, ret, test->output.ret); | |
return 1; | |
} | |
int | |
unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t, | |
size_t off, | |
- size_t (*next_break_utf8)(const char *, siz… | |
+ size_t (*next_break_utf8)(const char *, | |
+ size_t), | |
const char *name, const char *argv0) | |
{ | |
const struct unit_test_next_break_utf8 *test = t + off; | |
@@ -91,8 +100,9 @@ unit_test_callback_next_break_utf8(const struct unit_test_ne… | |
return 0; | |
err: | |
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " | |
- "(returned %zu instead of %zu).\n", argv0, | |
- name, off, test->description, ret, test->output.ret); | |
+ fprintf(stderr, | |
+ "%s: %s: Failed unit test %zu \"%s\" " | |
+ "(returned %zu instead of %zu).\n", | |
+ argv0, name, off, test->description, ret, test->output.ret); | |
return 1; | |
} | |
diff --git a/test/util.h b/test/util.h | |
@@ -6,16 +6,18 @@ | |
#include "../grapheme.h" | |
#undef MIN | |
-#define MIN(x,y) ((x) < (y) ? (x) : (y)) | |
+#define MIN(x, y) ((x) < (y) ? (x) : (y)) | |
#undef LEN | |
#define LEN(x) (sizeof(x) / sizeof(*(x))) | |
struct unit_test_next_break { | |
const char *description; | |
+ | |
struct { | |
const uint_least32_t *src; | |
size_t srclen; | |
} input; | |
+ | |
struct { | |
size_t ret; | |
} output; | |
@@ -23,10 +25,12 @@ struct unit_test_next_break { | |
struct unit_test_next_break_utf8 { | |
const char *description; | |
+ | |
struct { | |
const char *src; | |
size_t srclen; | |
} input; | |
+ | |
struct { | |
size_t ret; | |
} output; | |
@@ -36,14 +40,17 @@ int run_break_tests(size_t (*next_break)(const uint_least32… | |
const struct break_test *test, size_t testlen, | |
const char *); | |
int run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char … | |
- const char *), const void *, size_t, const char *, const ch… | |
+ const char *), | |
+ const void *, size_t, const char *, const char *); | |
int unit_test_callback_next_break(const struct unit_test_next_break *, size_t, | |
- size_t (*next_break)(const uint_least32_t *,… | |
+ size_t (*next_break)(const uint_least32_t *, | |
+ size_t), | |
const char *, const char *); | |
int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 … | |
size_t, | |
- size_t (*next_break_utf8)(const char *,… | |
+ size_t (*next_break_utf8)(const char *, | |
+ size_t), | |
const char *, const char *); | |
#endif /* UTIL_H */ | |
diff --git a/test/word.c b/test/word.c | |
@@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_word_bre… | |
}; | |
static int | |
-unit_test_callback_next_word_break(const void *t, size_t off, | |
- const char *name, | |
- const char *argv0) | |
+unit_test_callback_next_word_break(const void *t, size_t off, const char *name, | |
+ const char *argv0) | |
{ | |
- return unit_test_callback_next_break(t, off, | |
- grapheme_next_word_break, | |
+ return unit_test_callback_next_break(t, off, grapheme_next_word_break, | |
name, argv0); | |
} | |
static int | |
unit_test_callback_next_word_break_utf8(const void *t, size_t off, | |
- const char *name, | |
- const char *argv0) | |
+ const char *name, const char *argv0) | |
{ | |
- return unit_test_callback_next_break_utf8(t, off, | |
- grapheme_next_word_break_utf… | |
- name, argv0); | |
+ return unit_test_callback_next_break_utf8( | |
+ t, off, grapheme_next_word_break_utf8, name, argv0); | |
} | |
int |