Introduction
Introduction Statistics Contact Development Disclaimer Help
Apply clang-format - libgrapheme - unicode string library
git clone git://git.suckless.org/libgrapheme
Log
Files
Refs
README
LICENSE
---
commit abdc2ba0c764c527aaa2ed9fe42db27d71a10bc2
parent 50efb9a3396588e6e1266f51ec5446a9fa8013ea
Author: Laslo Hunhold <[email protected]>
Date: Tue, 15 Nov 2022 15:53:56 +0100
Apply clang-format
Even though this disrupts the backtrackability of the code a bit,
it's better to rip the band aid off now than to push it on into the
future.
With these changes, formatting is automatically governed and ensured by
a simple call to
make format
Signed-off-by: Laslo Hunhold <[email protected]>
Diffstat:
M benchmark/bidirectional.c | 2 +-
M benchmark/case.c | 5 +++--
M benchmark/character.c | 12 ++++++------
M benchmark/line.c | 4 ++--
M benchmark/sentence.c | 7 ++++---
M benchmark/utf8-decode.c | 24 +++++++++++-------------
M benchmark/util.c | 25 ++++++++++++-------------
M benchmark/util.h | 8 ++++----
M benchmark/word.c | 4 ++--
M gen/bidirectional-test.c | 150 +++++++++++++++++++----------…
M gen/bidirectional.c | 144 ++++++++++++++++-------------…
M gen/case.c | 79 ++++++++++++++++++-----------…
M gen/character.c | 64 ++++++++++++++++-------------…
M gen/line.c | 343 +++++++++++++++++------------…
M gen/sentence.c | 66 ++++++++++++++++-------------…
M gen/util.c | 202 +++++++++++++++++------------…
M gen/util.h | 39 ++++++++++++++++-------------…
M gen/word.c | 97 ++++++++++++++++-------------…
M grapheme.h | 24 ++++++++++++++----------
M src/bidirectional.c | 323 +++++++++++++++++++----------…
M src/case.c | 125 ++++++++++++++++++-----------…
M src/character.c | 160 ++++++++++++++++-------------…
M src/line.c | 108 +++++++++++++++++------------…
M src/sentence.c | 44 ++++++++++++++++-------------…
M src/utf8.c | 26 +++++++++++++-------------
M src/util.c | 51 ++++++++++++++++++-----------…
M src/util.h | 26 ++++++++++++++------------
M src/word.c | 95 +++++++++++++++++------------…
M test/bidirectional.c | 20 +++++++++++++-------
M test/case.c | 331 +++++++++++++++++++----------…
M test/character.c | 19 +++++++++----------
M test/line.c | 21 ++++++++-------------
M test/sentence.c | 23 ++++++++++-------------
M test/utf8-decode.c | 344 +++++++++++++++--------------…
M test/utf8-encode.c | 39 ++++++++++++++++-------------…
M test/util.c | 44 +++++++++++++++++++----------…
M test/util.h | 15 +++++++++++----
M test/word.c | 16 ++++++----------
38 files changed, 1736 insertions(+), 1393 deletions(-)
---
diff --git a/benchmark/bidirectional.c b/benchmark/bidirectional.c
@@ -5,8 +5,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/bidirectional-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 100000
diff --git a/benchmark/case.c b/benchmark/case.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/word-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 10000
@@ -40,7 +40,8 @@ main(int argc, char *argv[])
&(p.srclen))) == NULL) {
return 1;
}
- if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) =…
+ if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) ==
+ NULL) {
fprintf(stderr, "calloc: Out of memory\n");
}
diff --git a/benchmark/character.c b/benchmark/character.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/character-test.h"
+#include "../grapheme.h"
#include "util.h"
#include <utf8proc.h>
@@ -28,7 +28,7 @@ libgrapheme(const void *payload)
size_t i;
for (i = 0; i + 1 < p->buflen; i++) {
- (void)grapheme_is_character_break(p->buf[i], p->buf[i+1],
+ (void)grapheme_is_character_break(p->buf[i], p->buf[i + 1],
&state);
}
}
@@ -41,9 +41,8 @@ libutf8proc(const void *payload)
size_t i;
for (i = 0; i + 1 < p->buflen; i++) {
- (void)utf8proc_grapheme_break_stateful(p->buf_utf8proc[i],
- p->buf_utf8proc[i+1],
- &state);
+ (void)utf8proc_grapheme_break_stateful(
+ p->buf_utf8proc[i], p->buf_utf8proc[i + 1], &state);
}
}
@@ -61,7 +60,8 @@ main(int argc, char *argv[])
&(p.buflen))) == NULL) {
return 1;
}
- if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) ==…
+ if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) ==
+ NULL) {
fprintf(stderr, "malloc: %s\n", strerror(errno));
exit(1);
}
diff --git a/benchmark/line.c b/benchmark/line.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/line-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 10000
@@ -23,7 +23,7 @@ libgrapheme(const void *payload)
const struct break_benchmark_payload *p = payload;
size_t off;
- for (off = 0; off < p->buflen; ) {
+ for (off = 0; off < p->buflen;) {
off += grapheme_next_line_break(p->buf + off, p->buflen - off);
}
}
diff --git a/benchmark/sentence.c b/benchmark/sentence.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/sentence-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 100000
@@ -23,8 +23,9 @@ libgrapheme(const void *payload)
const struct break_benchmark_payload *p = payload;
size_t off;
- for (off = 0; off < p->buflen; ) {
- off += grapheme_next_sentence_break(p->buf + off, p->buflen - …
+ for (off = 0; off < p->buflen;) {
+ off += grapheme_next_sentence_break(p->buf + off,
+ p->buflen - off);
}
}
diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/character-test.h"
+#include "../grapheme.h"
#include "util.h"
#include <utf8proc.h>
@@ -28,9 +28,8 @@ libgrapheme(const void *payload)
size_t ret, off;
for (off = 0; off < p->buflen; off += ret) {
- if ((ret = grapheme_decode_utf8(p->buf + off,
- p->buflen - off, &cp)) >
- (p->buflen - off)) {
+ if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen - off,
+ &cp)) > (p->buflen - off)) {
break;
}
(void)cp;
@@ -48,7 +47,7 @@ libutf8proc(const void *payload)
for (off = 0; off < p->buflen; off += (size_t)ret) {
if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
(utf8proc_ssize_t)(p->buflen - off…
- &cp)) < 0) {
+ &cp)) < 0) {
break;
}
(void)cp;
@@ -64,9 +63,8 @@ main(int argc, char *argv[])
(void)argc;
- p.buf = generate_utf8_test_buffer(character_break_test,
- LEN(character_break_test),
- &(p.buflen));
+ p.buf = generate_utf8_test_buffer(
+ character_break_test, LEN(character_break_test), &(p.buflen));
/* convert cp-buffer to stupid custom libutf8proc-uint8-type */
if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
@@ -74,7 +72,7 @@ main(int argc, char *argv[])
exit(1);
}
for (i = 0; i < p.buflen; i++) {
- /*
+ /*
* even if char is larger than 8 bit, it will only have
* any of the first 8 bits set (by construction).
*/
@@ -82,11 +80,11 @@ main(int argc, char *argv[])
}
printf("%s\n", argv[0]);
- run_benchmark(libgrapheme, &p, "libgrapheme ", NULL,
- "byte", &baseline, NUM_ITERATIONS, p.buflen);
+ run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &baseline,
+ NUM_ITERATIONS, p.buflen);
run_benchmark(libutf8proc, &p, "libutf8proc ",
- "but unsafe (does not detect overlong encodings)",
- "byte", &baseline, NUM_ITERATIONS, p.buflen);
+ "but unsafe (does not detect overlong encodings)", "byte…
+ &baseline, NUM_ITERATIONS, p.buflen);
free(p.buf);
free(p.buf_utf8proc);
diff --git a/benchmark/util.c b/benchmark/util.c
@@ -1,7 +1,7 @@
/* See LICENSE file for copyright and license details. */
#include <math.h>
-#include <stdlib.h>
#include <stdio.h>
+#include <stdlib.h>
#include <time.h>
#include "../gen/types.h"
@@ -20,7 +20,8 @@ generate_cp_test_buffer(const struct break_test *test, size_t…
*buflen += test[i].cplen;
}
if (!(buf = calloc(*buflen, sizeof(*buf)))) {
- fprintf(stderr, "generate_test_buffer: calloc: Out of memory.\…
+ fprintf(stderr,
+ "generate_test_buffer: calloc: Out of memory.\n");
exit(1);
}
for (i = 0, off = 0; i < testlen; i++) {
@@ -48,18 +49,18 @@ generate_utf8_test_buffer(const struct break_test *test, si…
}
(*buflen)++; /* terminating NUL-byte */
if (!(buf = malloc(*buflen))) {
- fprintf(stderr, "generate_test_buffer: malloc: Out of memory.\…
+ fprintf(stderr,
+ "generate_test_buffer: malloc: Out of memory.\n");
exit(1);
}
for (i = 0, off = 0; i < testlen; i++) {
for (j = 0; j < test[i].cplen; j++, off += ret) {
- if ((ret = grapheme_encode_utf8(test[i].cp[j],
- buf + off,
- *buflen - off)) >
+ if ((ret = grapheme_encode_utf8(
+ test[i].cp[j], buf + off, *buflen - off))…
(*buflen - off)) {
/* shouldn't happen */
fprintf(stderr, "generate_utf8_test_buffer: "
- "Buffer too small.\n");
+ "Buffer too small.\n");
exit(1);
}
}
@@ -77,10 +78,9 @@ time_diff(struct timespec *a, struct timespec *b)
}
void
-run_benchmark(void (*func)(const void *), const void *payload,
- const char *name, const char *comment, const char *unit,
- double *baseline, size_t num_iterations,
- size_t units_per_iteration)
+run_benchmark(void (*func)(const void *), const void *payload, const char *nam…
+ const char *comment, const char *unit, double *baseline,
+ size_t num_iterations, size_t units_per_iteration)
{
struct timespec start, end;
size_t i;
@@ -109,7 +109,6 @@ run_benchmark(void (*func)(const void *), const void *paylo…
printf(" avg. %.3es/%s (%.2f%% %s%s%s)\n", diff, unit,
fabs(1.0 - diff / *baseline) * 100,
(diff < *baseline) ? "faster" : "slower",
- comment ? ", " : "",
- comment ? comment : "");
+ comment ? ", " : "", comment ? comment : "");
}
}
diff --git a/benchmark/util.h b/benchmark/util.h
@@ -7,10 +7,10 @@
#define LEN(x) (sizeof(x) / sizeof(*(x)))
#ifdef __has_attribute
- #if __has_attribute(optnone)
- void libgrapheme(const void *) __attribute__((optnone));
- void libutf8proc(const void *) __attribute__((optnone));
- #endif
+#if __has_attribute(optnone)
+void libgrapheme(const void *) __attribute__((optnone));
+void libutf8proc(const void *) __attribute__((optnone));
+#endif
#endif
uint_least32_t *generate_cp_test_buffer(const struct break_test *, size_t,
diff --git a/benchmark/word.c b/benchmark/word.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/word-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 10000
@@ -23,7 +23,7 @@ libgrapheme(const void *payload)
const struct break_benchmark_payload *p = payload;
size_t off;
- for (off = 0; off < p->buflen; ) {
+ for (off = 0; off < p->buflen;) {
off += grapheme_next_word_break(p->buf + off, p->buflen - off);
}
}
diff --git a/gen/bidirectional-test.c b/gen/bidirectional-test.c
@@ -3,8 +3,8 @@
#include <inttypes.h>
#include <stddef.h>
#include <stdio.h>
-#include <string.h>
#include <stdlib.h>
+#include <string.h>
#include "../grapheme.h"
#include "util.h"
@@ -23,29 +23,29 @@ static const struct {
const char *class;
const uint_least32_t cp;
} classcpmap[] = {
- { .class = "L", .cp = UINT32_C(0x0041) },
- { .class = "AL", .cp = UINT32_C(0x0608) },
- { .class = "AN", .cp = UINT32_C(0x0600) },
- { .class = "B", .cp = UINT32_C(0x000A) },
- { .class = "BN", .cp = UINT32_C(0x0000) },
- { .class = "CS", .cp = UINT32_C(0x002C) },
- { .class = "EN", .cp = UINT32_C(0x0030) },
- { .class = "ES", .cp = UINT32_C(0x002B) },
- { .class = "ET", .cp = UINT32_C(0x0023) },
+ { .class = "L", .cp = UINT32_C(0x0041) },
+ { .class = "AL", .cp = UINT32_C(0x0608) },
+ { .class = "AN", .cp = UINT32_C(0x0600) },
+ { .class = "B", .cp = UINT32_C(0x000A) },
+ { .class = "BN", .cp = UINT32_C(0x0000) },
+ { .class = "CS", .cp = UINT32_C(0x002C) },
+ { .class = "EN", .cp = UINT32_C(0x0030) },
+ { .class = "ES", .cp = UINT32_C(0x002B) },
+ { .class = "ET", .cp = UINT32_C(0x0023) },
{ .class = "FSI", .cp = UINT32_C(0x2068) },
{ .class = "LRE", .cp = UINT32_C(0x202A) },
{ .class = "LRI", .cp = UINT32_C(0x2066) },
{ .class = "LRO", .cp = UINT32_C(0x202D) },
{ .class = "NSM", .cp = UINT32_C(0x0300) },
- { .class = "ON", .cp = UINT32_C(0x0021) },
+ { .class = "ON", .cp = UINT32_C(0x0021) },
{ .class = "PDF", .cp = UINT32_C(0x202C) },
{ .class = "PDI", .cp = UINT32_C(0x2069) },
- { .class = "R", .cp = UINT32_C(0x05BE) },
+ { .class = "R", .cp = UINT32_C(0x05BE) },
{ .class = "RLE", .cp = UINT32_C(0x202B) },
{ .class = "RLI", .cp = UINT32_C(0x2067) },
{ .class = "RLO", .cp = UINT32_C(0x202E) },
- { .class = "S", .cp = UINT32_C(0x0009) },
- { .class = "WS", .cp = UINT32_C(0x000C) },
+ { .class = "S", .cp = UINT32_C(0x0009) },
+ { .class = "WS", .cp = UINT32_C(0x000C) },
};
static int
@@ -59,7 +59,8 @@ classtocp(const char *str, size_t len, uint_least32_t *cp)
return 0;
}
}
- fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len,…
+ fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len,
+ str);
return 1;
}
@@ -77,8 +78,10 @@ parse_class_list(const char *str, uint_least32_t **cp, size_…
}
/* count the number of spaces in the string and infer list length */
- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count+…
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
+ count++, tmp1 = tmp2 + 1) {
;
+ }
/* allocate resources */
if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
@@ -89,7 +92,8 @@ parse_class_list(const char *str, uint_least32_t **cp, size_t…
/* go through the string again, parsing the classes */
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
tmp2 = strchr(tmp1, ' ');
- if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1…
+ if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1…
+ &((*cp)[i]))) {
return 1;
}
if (tmp2 != NULL) {
@@ -135,12 +139,10 @@ strtolevel(const char *str, size_t len, int_least8_t *lev…
if (str[0] != '1') {
goto toolarge;
}
- *level = (str[0] - '0') * 100 +
- (str[1] - '0') * 10 +
- (str[2] - '0');
+ *level = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
+ (str[2] - '0');
} else if (len == 2) {
- *level = (str[0] - '0') * 10 +
- (str[1] - '0');
+ *level = (str[0] - '0') * 10 + (str[1] - '0');
} else if (len == 1) {
*level = (str[0] - '0');
} else { /* len == 0 */
@@ -149,8 +151,7 @@ strtolevel(const char *str, size_t len, int_least8_t *level)
return 0;
toolarge:
- fprintf(stderr, "hextocp: '%.*s' is too large.\n",
- (int)len, str);
+ fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, str);
return 1;
}
@@ -167,8 +168,10 @@ parse_level_list(const char *str, int_least8_t **level, si…
}
/* count the number of spaces in the string and infer list length */
- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count+…
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
+ count++, tmp1 = tmp2 + 1) {
;
+ }
/* allocate resources */
if (!(*level = calloc((*levellen = count), sizeof(**level)))) {
@@ -179,7 +182,9 @@ parse_level_list(const char *str, int_least8_t **level, siz…
/* go through the string again, parsing the levels */
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
tmp2 = strchr(tmp1, ' ');
- if (strtolevel(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp…
+ if (strtolevel(tmp1,
+ tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
+ &((*level)[i]))) {
return 1;
}
if (tmp2 != NULL) {
@@ -199,7 +204,8 @@ bidirectional_test_list_print(const struct bidirectional_te…
printf("/* Automatically generated by %s */\n"
"#include <stdint.h>\n#include <stddef.h>\n\n"
- "#include \"../grapheme.h\"\n\n", progname);
+ "#include \"../grapheme.h\"\n\n",
+ progname);
printf("static const struct {\n"
"\tuint_least32_t *cp;\n"
@@ -208,7 +214,8 @@ bidirectional_test_list_print(const struct bidirectional_te…
"\tsize_t modelen;\n"
"\tint_least8_t *level;\n"
"\tint_least8_t *reorder;\n"
- "\tsize_t reorderlen;\n} %s[] = {\n", identifier);
+ "\tsize_t reorderlen;\n} %s[] = {\n",
+ identifier);
for (i = 0; i < testlen; i++) {
printf("\t{\n");
@@ -222,11 +229,13 @@ bidirectional_test_list_print(const struct bidirectional_…
printf(" },\n");
printf("\t\t.cplen = %zu,\n", test[i].cplen);
- printf("\t\t.mode = (enum grapheme_bidirectional_overrid…
+ printf("\t\t.mode = (enum "
+ "grapheme_bidirectional_override[]){");
for (j = 0; j < test[i].modelen; j++) {
if (test[i].mode[j] ==
GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL) {
- printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTR…
+ printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_"
+ "NEUTRAL");
} else if (test[i].mode[j] ==
GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR");
@@ -279,8 +288,8 @@ static int_least8_t *current_reorder;
static size_t current_reorder_len;
static int
-test_callback(const char *file, char **field, size_t nfields,
- char *comment, void *payload)
+test_callback(const char *file, char **field, size_t nfields, char *comment,
+ void *payload)
{
char *tmp;
@@ -292,23 +301,31 @@ test_callback(const char *file, char **field, size_t nfie…
if (nfields > 0 && field[0][0] == '@') {
if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - 1)) {
tmp = field[0] + sizeof("@Levels:") - 1;
- for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); …
+ for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
+ tmp++) {
;
+ }
free(current_level);
- parse_level_list(tmp, &current_level, &current_level_l…
- } else if (!strncmp(field[0], "@Reorder:", sizeof("@Reorder:")…
+ parse_level_list(tmp, &current_level,
+ &current_level_len);
+ } else if (!strncmp(field[0],
+ "@Reorder:", sizeof("@Reorder:") - 1)) {
tmp = field[0] + sizeof("@Reorder:") - 1;
- for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); …
+ for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
+ tmp++) {
;
+ }
free(current_reorder);
- parse_level_list(tmp, &current_reorder, &current_reord…
+ parse_level_list(tmp, &current_reorder,
+ &current_reorder_len);
} else {
fprintf(stderr, "Unknown @-input-line.\n");
exit(1);
}
} else {
if (nfields < 2) {
- /* discard any line that does not have at least 2 fiel…
+ /* discard any line that does not have at least 2 fiel…
+ */
return 0;
}
@@ -321,26 +338,33 @@ test_callback(const char *file, char **field, size_t nfie…
/* parse field data */
parse_class_list(field[0], &(test[testlen - 1].cp),
&(test[testlen - 1].cplen));
-
+
/* copy current level- and reorder-arrays */
- if (!(test[testlen - 1].level = calloc(current_level_len, size…
+ if (!(test[testlen - 1].level =
+ calloc(current_level_len,
+ sizeof(*(test[testlen - 1].level))))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
- memcpy(test[testlen - 1].level, current_level, current_level_l…
+ memcpy(test[testlen - 1].level, current_level,
+ current_level_len * sizeof(*(test[testlen - 1].level)));
- if (!(test[testlen - 1].reorder = calloc(current_reorder_len, …
+ if (!(test[testlen - 1].reorder =
+ calloc(current_reorder_len,
+ sizeof(*(test[testlen - 1].reorder))))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
if (current_reorder != NULL) {
memcpy(test[testlen - 1].reorder, current_reorder,
- current_reorder_len * sizeof(*(test[testlen - 1…
+ current_reorder_len *
+ sizeof(*(test[testlen - 1].reorder)));
}
test[testlen - 1].reorderlen = current_reorder_len;
-
+
if (current_level_len != test[testlen - 1].cplen) {
- fprintf(stderr, "mismatch between string and level len…
+ fprintf(stderr,
+ "mismatch between string and level lengths.\n"…
exit(1);
}
@@ -349,27 +373,38 @@ test_callback(const char *file, char **field, size_t nfie…
fprintf(stderr, "malformed paragraph-level-bitset.\n");
exit(1);
} else if (field[1][0] == '2') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
test[testlen - 1].modelen = 1;
} else if (field[1][0] == '3') {
/* auto=0 and LTR=1 */
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVE…
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ test[testlen - 1].mode[1] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
test[testlen - 1].modelen = 2;
} else if (field[1][0] == '4') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
test[testlen - 1].modelen = 1;
- } else if (field[1][0] == '5') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVE…
+ } else if (field[1][0] == '5') {
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ test[testlen - 1].mode[1] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
test[testlen - 1].modelen = 2;
} else if (field[1][0] == '7') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVE…
- test[testlen - 1].mode[2] = GRAPHEME_BIDIRECTIONAL_OVE…
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ test[testlen - 1].mode[1] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
+ test[testlen - 1].mode[2] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
test[testlen - 1].modelen = 3;
} else {
- fprintf(stderr, "unhandled paragraph-level-bitset %s.\…
+ fprintf(stderr,
+ "unhandled paragraph-level-bitset %s.\n",
+ field[1]);
exit(1);
}
}
@@ -414,7 +449,8 @@ character_test_callback(const char *file, char **field, siz…
} else if (field[1][0] == '1') {
test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RT…
} else if (field[1][0] == '2') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NE…
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
} else {
fprintf(stderr, "unhandled paragraph-level-setting.\n");
exit(1);
diff --git a/gen/bidirectional.c b/gen/bidirectional.c
@@ -15,118 +15,118 @@ static const struct property_spec bidi_property[] = {
{
/* default */
.enumname = "L",
- .file = FILE_BIDI_CLASS,
- .ucdname = "L",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "L",
},
{
.enumname = "AL",
- .file = FILE_BIDI_CLASS,
- .ucdname = "AL",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "AL",
},
{
.enumname = "AN",
- .file = FILE_BIDI_CLASS,
- .ucdname = "AN",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "AN",
},
{
.enumname = "B",
- .file = FILE_BIDI_CLASS,
- .ucdname = "B",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "B",
},
{
.enumname = "BN",
- .file = FILE_BIDI_CLASS,
- .ucdname = "BN",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "BN",
},
{
.enumname = "CS",
- .file = FILE_BIDI_CLASS,
- .ucdname = "CS",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "CS",
},
{
.enumname = "EN",
- .file = FILE_BIDI_CLASS,
- .ucdname = "EN",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "EN",
},
{
.enumname = "ES",
- .file = FILE_BIDI_CLASS,
- .ucdname = "ES",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "ES",
},
{
.enumname = "ET",
- .file = FILE_BIDI_CLASS,
- .ucdname = "ET",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "ET",
},
{
.enumname = "FSI",
- .file = FILE_BIDI_CLASS,
- .ucdname = "FSI",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "FSI",
},
{
.enumname = "LRE",
- .file = FILE_BIDI_CLASS,
- .ucdname = "LRE",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "LRE",
},
{
.enumname = "LRI",
- .file = FILE_BIDI_CLASS,
- .ucdname = "LRI",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "LRI",
},
{
.enumname = "LRO",
- .file = FILE_BIDI_CLASS,
- .ucdname = "LRO",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "LRO",
},
{
.enumname = "NSM",
- .file = FILE_BIDI_CLASS,
- .ucdname = "NSM",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "NSM",
},
{
.enumname = "ON",
- .file = FILE_BIDI_CLASS,
- .ucdname = "ON",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "ON",
},
{
.enumname = "PDF",
- .file = FILE_BIDI_CLASS,
- .ucdname = "PDF",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "PDF",
},
{
.enumname = "PDI",
- .file = FILE_BIDI_CLASS,
- .ucdname = "PDI",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "PDI",
},
{
.enumname = "R",
- .file = FILE_BIDI_CLASS,
- .ucdname = "R",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "R",
},
{
.enumname = "RLE",
- .file = FILE_BIDI_CLASS,
- .ucdname = "RLE",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "RLE",
},
{
.enumname = "RLI",
- .file = FILE_BIDI_CLASS,
- .ucdname = "RLI",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "RLI",
},
{
.enumname = "RLO",
- .file = FILE_BIDI_CLASS,
- .ucdname = "RLO",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "RLO",
},
{
.enumname = "S",
- .file = FILE_BIDI_CLASS,
- .ucdname = "S",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "S",
},
{
.enumname = "WS",
- .file = FILE_BIDI_CLASS,
- .ucdname = "WS",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "WS",
},
};
@@ -135,11 +135,12 @@ static struct {
uint_least32_t cp_pair;
char type;
} *b = NULL;
+
static size_t blen;
static int
-bracket_callback(const char *file, char **field, size_t nfields,
- char *comment, void *payload)
+bracket_callback(const char *file, char **field, size_t nfields, char *comment,
+ void *payload)
{
(void)file;
(void)comment;
@@ -189,11 +190,12 @@ post_process(struct properties *prop)
}
static uint_least8_t
-fill_missing(uint_least32_t cp) {
+fill_missing(uint_least32_t cp)
+{
/* based on the @missing-properties in data/DerivedBidiClass.txt */
- if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) ||
- (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) ||
- (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) ||
+ if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) ||
+ (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) ||
+ (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) ||
(cp >= UINT32_C(0x10800) && cp <= UINT32_C(0x10CFF)) ||
(cp >= UINT32_C(0x10D40) && cp <= UINT32_C(0x10EBF)) ||
(cp >= UINT32_C(0x10F00) && cp <= UINT32_C(0x10F2F)) ||
@@ -203,22 +205,22 @@ fill_missing(uint_least32_t cp) {
(cp >= UINT32_C(0x1ED50) && cp <= UINT32_C(0x1EDFF)) ||
(cp >= UINT32_C(0x1EF00) && cp <= UINT32_C(0x1EFFF))) {
return 17; /* class R */
- } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) ||
- (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) ||
- (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) ||
- (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) ||
- (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) ||
+ } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) ||
+ (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) ||
+ (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) ||
+ (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) ||
+ (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) ||
(cp >= UINT32_C(0x10D00) && cp <= UINT32_C(0x10D3F)) ||
(cp >= UINT32_C(0x10EC0) && cp <= UINT32_C(0x10EFF)) ||
- (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) ||
+ (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) ||
(cp >= UINT32_C(0x1EC70) && cp <= UINT32_C(0x1ECBF)) ||
(cp >= UINT32_C(0x1ED00) && cp <= UINT32_C(0x1ED4F)) ||
(cp >= UINT32_C(0x1EE00) && cp <= UINT32_C(0x1EEFF))) {
- return 1; /* class AL */
+ return 1; /* class AL */
} else if (cp >= UINT32_C(0x20A0) && cp <= UINT32_C(0x20CF)) {
- return 8; /* class ET */
+ return 8; /* class ET */
} else {
- return 0; /* class L */
+ return 0; /* class L */
}
}
@@ -238,13 +240,11 @@ main(int argc, char *argv[])
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
- parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback,
- NULL);
+ parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback, NULL);
- properties_generate_break_property(bidi_property,
- LEN(bidi_property), fill_missing,
- NULL, post_process, "bidi",
- argv[0]);
+ properties_generate_break_property(bidi_property, LEN(bidi_property),
+ fill_missing, NULL, post_process,
+ "bidi", argv[0]);
printf("\nenum bracket_type {\n\tBIDI_BRACKET_NONE,\n\t"
"BIDI_BRACKET_OPEN,\n\tBIDI_BRACKET_CLOSE,\n};\n\n"
@@ -252,10 +252,12 @@ main(int argc, char *argv[])
"\tuint_least32_t pair;\n};\n\n"
"static const struct bracket bidi_bracket[] = {\n");
for (i = 0; i < blen; i++) {
- printf("\t{\n\t\t.type = %s,\n\t\t.pair = UINT32_C(0x%06X),\n\…
- (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" :
- (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" : "BIDI_BRACKET_NONE…
- b[i].cp_pair);
+ printf("\t{\n\t\t.type = %s,\n\t\t.pair = "
+ "UINT32_C(0x%06X),\n\t},\n",
+ (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" :
+ (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" :
+ "BIDI_BRACKET_NONE",
+ b[i].cp_pair);
}
printf("};\n");
diff --git a/gen/case.c b/gen/case.c
@@ -12,28 +12,28 @@
static const struct property_spec case_property[] = {
{
.enumname = "OTHER",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "BOTH_CASED_CASE_IGNORABLE",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
- {
+ {
.enumname = "CASED",
- .file = FILE_DCP,
- .ucdname = "Cased",
+ .file = FILE_DCP,
+ .ucdname = "Cased",
},
{
.enumname = "CASE_IGNORABLE",
- .file = FILE_DCP,
- .ucdname = "Case_Ignorable",
+ .file = FILE_DCP,
+ .ucdname = "Case_Ignorable",
},
{
.enumname = "UNCASED",
- .file = FILE_DCP,
- .ucdname = "Uncased",
+ .file = FILE_DCP,
+ .ucdname = "Uncased",
},
};
@@ -67,12 +67,14 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uin…
}
static struct properties *prop_upper = NULL, *prop_lower, *prop_title;
+
static struct special_case {
struct {
uint_least32_t *cp;
size_t cplen;
} upper, lower, title;
} *sc = NULL;
+
static size_t sclen = 0;
static int
@@ -89,9 +91,12 @@ unicodedata_callback(const char *file, char **field, size_t …
upper = lower = title = cp;
- if ((strlen(field[12]) > 0 && hextocp(field[12], strlen(field[12]), &u…
- (strlen(field[13]) > 0 && hextocp(field[13], strlen(field[13]), &l…
- (nfields >= 15 && strlen(field[14]) > 0 && hextocp(field[14], strl…
+ if ((strlen(field[12]) > 0 &&
+ hextocp(field[12], strlen(field[12]), &upper)) ||
+ (strlen(field[13]) > 0 &&
+ hextocp(field[13], strlen(field[13]), &lower)) ||
+ (nfields >= 15 && strlen(field[14]) > 0 &&
+ hextocp(field[14], strlen(field[14]), &title))) {
return 1;
}
@@ -126,7 +131,7 @@ specialcasing_callback(const char *file, char **field, size…
/* extend special case array */
if (!(sc = realloc(sc, (++sclen) * sizeof(*sc)))) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
- exit(1);
+ exit(1);
}
/* parse field data */
@@ -142,9 +147,12 @@ specialcasing_callback(const char *file, char **field, siz…
* special value 0x110000 + (offset in special case array),
* even if the special case has length 1
*/
- prop_upper[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen …
- prop_lower[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen …
- prop_title[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen …
+ prop_upper[cp].property =
+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
+ prop_lower[cp].property =
+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
+ prop_title[cp].property =
+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
return 0;
}
@@ -165,9 +173,8 @@ main(int argc, char *argv[])
(void)argc;
/* generate case property table from the specification */
- properties_generate_break_property(case_property,
- LEN(case_property), NULL,
- handle_conflict, NULL, "case",
+ properties_generate_break_property(case_property, LEN(case_property),
+ NULL, handle_conflict, NULL, "case",
argv[0]);
/*
@@ -186,38 +193,46 @@ main(int argc, char *argv[])
}
parse_file_with_callback("data/UnicodeData.txt", unicodedata_callback,
NULL);
- parse_file_with_callback("data/SpecialCasing.txt", specialcasing_callb…
- NULL);
+ parse_file_with_callback("data/SpecialCasing.txt",
+ specialcasing_callback, NULL);
/* compress properties */
properties_compress(prop_upper, &comp_upper);
properties_compress(prop_lower, &comp_lower);
properties_compress(prop_title, &comp_title);
- fprintf(stderr, "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%…
+ fprintf(stderr,
+ "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%%, "
+ "title=%.2f%%\n",
argv[0], properties_get_major_minor(&comp_upper, &mm_upper),
properties_get_major_minor(&comp_lower, &mm_lower),
properties_get_major_minor(&comp_title, &mm_title));
/* print tables */
- printf("/* Automatically generated by %s */\n#include <stdint.h>\n#inc…
+ printf("/* Automatically generated by %s */\n#include "
+ "<stdint.h>\n#include <stddef.h>\n\n",
+ argv[0]);
- printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t cplen;\…
+ printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t "
+ "cplen;\n};\n\n");
properties_print_lookup_table("upper_major", mm_upper.major, 0x1100);
printf("\n");
- properties_print_derived_lookup_table("upper_minor", "int_least32_t", …
- mm_upper.minorlen, get_value, co…
+ properties_print_derived_lookup_table("upper_minor", "int_least32_t",
+ mm_upper.minor, mm_upper.minorle…
+ get_value, comp_upper.data);
printf("\n");
properties_print_lookup_table("lower_major", mm_lower.major, 0x1100);
printf("\n");
- properties_print_derived_lookup_table("lower_minor", "int_least32_t", …
- mm_lower.minorlen, get_value, co…
+ properties_print_derived_lookup_table("lower_minor", "int_least32_t",
+ mm_lower.minor, mm_lower.minorle…
+ get_value, comp_lower.data);
printf("\n");
properties_print_lookup_table("title_major", mm_title.major, 0x1100);
printf("\n");
- properties_print_derived_lookup_table("title_minor", "int_least32_t", …
- mm_title.minorlen, get_value, co…
+ properties_print_derived_lookup_table("title_minor", "int_least32_t",
+ mm_title.minor, mm_title.minorle…
+ get_value, comp_title.data);
printf("\n");
printf("static const struct special_case upper_special[] = {\n");
diff --git a/gen/character.c b/gen/character.c
@@ -9,78 +9,78 @@
static const struct property_spec char_break_property[] = {
{
.enumname = "OTHER",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CONTROL",
- .file = FILE_GRAPHEME,
- .ucdname = "Control",
+ .file = FILE_GRAPHEME,
+ .ucdname = "Control",
},
{
.enumname = "CR",
- .file = FILE_GRAPHEME,
- .ucdname = "CR",
+ .file = FILE_GRAPHEME,
+ .ucdname = "CR",
},
{
.enumname = "EXTEND",
- .file = FILE_GRAPHEME,
- .ucdname = "Extend",
+ .file = FILE_GRAPHEME,
+ .ucdname = "Extend",
},
{
.enumname = "EXTENDED_PICTOGRAPHIC",
- .file = FILE_EMOJI,
- .ucdname = "Extended_Pictographic",
+ .file = FILE_EMOJI,
+ .ucdname = "Extended_Pictographic",
},
{
.enumname = "HANGUL_L",
- .file = FILE_GRAPHEME,
- .ucdname = "L",
+ .file = FILE_GRAPHEME,
+ .ucdname = "L",
},
{
.enumname = "HANGUL_V",
- .file = FILE_GRAPHEME,
- .ucdname = "V",
+ .file = FILE_GRAPHEME,
+ .ucdname = "V",
},
{
.enumname = "HANGUL_T",
- .file = FILE_GRAPHEME,
- .ucdname = "T",
+ .file = FILE_GRAPHEME,
+ .ucdname = "T",
},
{
.enumname = "HANGUL_LV",
- .file = FILE_GRAPHEME,
- .ucdname = "LV",
+ .file = FILE_GRAPHEME,
+ .ucdname = "LV",
},
{
.enumname = "HANGUL_LVT",
- .file = FILE_GRAPHEME,
- .ucdname = "LVT",
+ .file = FILE_GRAPHEME,
+ .ucdname = "LVT",
},
{
.enumname = "LF",
- .file = FILE_GRAPHEME,
- .ucdname = "LF",
+ .file = FILE_GRAPHEME,
+ .ucdname = "LF",
},
{
.enumname = "PREPEND",
- .file = FILE_GRAPHEME,
- .ucdname = "Prepend",
+ .file = FILE_GRAPHEME,
+ .ucdname = "Prepend",
},
{
.enumname = "REGIONAL_INDICATOR",
- .file = FILE_GRAPHEME,
- .ucdname = "Regional_Indicator",
+ .file = FILE_GRAPHEME,
+ .ucdname = "Regional_Indicator",
},
{
.enumname = "SPACINGMARK",
- .file = FILE_GRAPHEME,
- .ucdname = "SpacingMark",
+ .file = FILE_GRAPHEME,
+ .ucdname = "SpacingMark",
},
{
.enumname = "ZWJ",
- .file = FILE_GRAPHEME,
- .ucdname = "ZWJ",
+ .file = FILE_GRAPHEME,
+ .ucdname = "ZWJ",
},
};
@@ -90,8 +90,8 @@ main(int argc, char *argv[])
(void)argc;
properties_generate_break_property(char_break_property,
- LEN(char_break_property), NULL,
- NULL, NULL, "char_break", argv[0]);
+ LEN(char_break_property), NULL, NUL…
+ NULL, "char_break", argv[0]);
return 0;
}
diff --git a/gen/line.c b/gen/line.c
@@ -12,8 +12,8 @@
static const struct property_spec line_break_property[] = {
{
.enumname = "AL",
- .file = FILE_LINE,
- .ucdname = "AL",
+ .file = FILE_LINE,
+ .ucdname = "AL",
},
/*
* Both extended pictographic and cn are large classes,
@@ -32,269 +32,269 @@ static const struct property_spec line_break_property[] =…
*/
{
.enumname = "TMP_CN",
- .file = FILE_LINE,
- .ucdname = "Cn",
+ .file = FILE_LINE,
+ .ucdname = "Cn",
},
{
.enumname = "TMP_EXTENDED_PICTOGRAPHIC",
- .file = FILE_EMOJI,
- .ucdname = "Extended_Pictographic",
+ .file = FILE_EMOJI,
+ .ucdname = "Extended_Pictographic",
},
/* end of special block */
{
.enumname = "B2",
- .file = FILE_LINE,
- .ucdname = "B2",
+ .file = FILE_LINE,
+ .ucdname = "B2",
},
{
.enumname = "BA",
- .file = FILE_LINE,
- .ucdname = "BA",
+ .file = FILE_LINE,
+ .ucdname = "BA",
},
{
.enumname = "BB",
- .file = FILE_LINE,
- .ucdname = "BB",
+ .file = FILE_LINE,
+ .ucdname = "BB",
},
{
.enumname = "BK",
- .file = FILE_LINE,
- .ucdname = "BK",
+ .file = FILE_LINE,
+ .ucdname = "BK",
},
{
.enumname = "BOTH_CN_EXTPICT",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CB",
- .file = FILE_LINE,
- .ucdname = "CB",
+ .file = FILE_LINE,
+ .ucdname = "CB",
},
{
.enumname = "CL",
- .file = FILE_LINE,
- .ucdname = "CL",
+ .file = FILE_LINE,
+ .ucdname = "CL",
},
{
.enumname = "CM",
- .file = FILE_LINE,
- .ucdname = "CM",
+ .file = FILE_LINE,
+ .ucdname = "CM",
},
{
.enumname = "CP_WITHOUT_EAW_HWF",
- .file = FILE_LINE,
- .ucdname = "CP",
+ .file = FILE_LINE,
+ .ucdname = "CP",
},
{
.enumname = "CP_WITH_EAW_HWF",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CR",
- .file = FILE_LINE,
- .ucdname = "CR",
+ .file = FILE_LINE,
+ .ucdname = "CR",
},
{
.enumname = "EB",
- .file = FILE_LINE,
- .ucdname = "EB",
+ .file = FILE_LINE,
+ .ucdname = "EB",
},
{
.enumname = "EM",
- .file = FILE_LINE,
- .ucdname = "EM",
+ .file = FILE_LINE,
+ .ucdname = "EM",
},
{
.enumname = "EX",
- .file = FILE_LINE,
- .ucdname = "EX",
+ .file = FILE_LINE,
+ .ucdname = "EX",
},
{
.enumname = "GL",
- .file = FILE_LINE,
- .ucdname = "GL",
+ .file = FILE_LINE,
+ .ucdname = "GL",
},
{
.enumname = "H2",
- .file = FILE_LINE,
- .ucdname = "H2",
+ .file = FILE_LINE,
+ .ucdname = "H2",
},
{
.enumname = "H3",
- .file = FILE_LINE,
- .ucdname = "H3",
+ .file = FILE_LINE,
+ .ucdname = "H3",
},
{
.enumname = "HL",
- .file = FILE_LINE,
- .ucdname = "HL",
+ .file = FILE_LINE,
+ .ucdname = "HL",
},
{
.enumname = "HY",
- .file = FILE_LINE,
- .ucdname = "HY",
+ .file = FILE_LINE,
+ .ucdname = "HY",
},
{
.enumname = "ID",
- .file = FILE_LINE,
- .ucdname = "ID",
+ .file = FILE_LINE,
+ .ucdname = "ID",
},
{
.enumname = "IN",
- .file = FILE_LINE,
- .ucdname = "IN",
+ .file = FILE_LINE,
+ .ucdname = "IN",
},
{
.enumname = "IS",
- .file = FILE_LINE,
- .ucdname = "IS",
+ .file = FILE_LINE,
+ .ucdname = "IS",
},
{
.enumname = "JL",
- .file = FILE_LINE,
- .ucdname = "JL",
+ .file = FILE_LINE,
+ .ucdname = "JL",
},
{
.enumname = "JT",
- .file = FILE_LINE,
- .ucdname = "JT",
+ .file = FILE_LINE,
+ .ucdname = "JT",
},
{
.enumname = "JV",
- .file = FILE_LINE,
- .ucdname = "JV",
+ .file = FILE_LINE,
+ .ucdname = "JV",
},
{
.enumname = "LF",
- .file = FILE_LINE,
- .ucdname = "LF",
+ .file = FILE_LINE,
+ .ucdname = "LF",
},
{
.enumname = "NL",
- .file = FILE_LINE,
- .ucdname = "NL",
+ .file = FILE_LINE,
+ .ucdname = "NL",
},
{
.enumname = "NS",
- .file = FILE_LINE,
- .ucdname = "NS",
+ .file = FILE_LINE,
+ .ucdname = "NS",
},
{
.enumname = "NU",
- .file = FILE_LINE,
- .ucdname = "NU",
+ .file = FILE_LINE,
+ .ucdname = "NU",
},
{
.enumname = "OP_WITHOUT_EAW_HWF",
- .file = FILE_LINE,
- .ucdname = "OP",
+ .file = FILE_LINE,
+ .ucdname = "OP",
},
{
.enumname = "OP_WITH_EAW_HWF",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "PO",
- .file = FILE_LINE,
- .ucdname = "PO",
+ .file = FILE_LINE,
+ .ucdname = "PO",
},
{
.enumname = "PR",
- .file = FILE_LINE,
- .ucdname = "PR",
+ .file = FILE_LINE,
+ .ucdname = "PR",
},
{
.enumname = "QU",
- .file = FILE_LINE,
- .ucdname = "QU",
+ .file = FILE_LINE,
+ .ucdname = "QU",
},
{
.enumname = "RI",
- .file = FILE_LINE,
- .ucdname = "RI",
+ .file = FILE_LINE,
+ .ucdname = "RI",
},
{
.enumname = "SP",
- .file = FILE_LINE,
- .ucdname = "SP",
+ .file = FILE_LINE,
+ .ucdname = "SP",
},
{
.enumname = "SY",
- .file = FILE_LINE,
- .ucdname = "SY",
+ .file = FILE_LINE,
+ .ucdname = "SY",
},
{
.enumname = "WJ",
- .file = FILE_LINE,
- .ucdname = "WJ",
+ .file = FILE_LINE,
+ .ucdname = "WJ",
},
{
.enumname = "ZW",
- .file = FILE_LINE,
- .ucdname = "ZW",
+ .file = FILE_LINE,
+ .ucdname = "ZW",
},
{
.enumname = "ZWJ",
- .file = FILE_LINE,
- .ucdname = "ZWJ",
+ .file = FILE_LINE,
+ .ucdname = "ZWJ",
},
{
.enumname = "TMP_AI",
- .file = FILE_LINE,
- .ucdname = "AI",
+ .file = FILE_LINE,
+ .ucdname = "AI",
},
{
.enumname = "TMP_CJ",
- .file = FILE_LINE,
- .ucdname = "CJ",
+ .file = FILE_LINE,
+ .ucdname = "CJ",
},
{
.enumname = "TMP_XX",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "TMP_MN",
- .file = FILE_LINE,
- .ucdname = "Mn",
+ .file = FILE_LINE,
+ .ucdname = "Mn",
},
{
.enumname = "TMP_MC",
- .file = FILE_LINE,
- .ucdname = "Mc",
+ .file = FILE_LINE,
+ .ucdname = "Mc",
},
{
.enumname = "TMP_SA_WITHOUT_MN_OR_MC",
- .file = FILE_LINE,
- .ucdname = "SA",
+ .file = FILE_LINE,
+ .ucdname = "SA",
},
{
.enumname = "TMP_SA_WITH_MN_OR_MC",
- .file = FILE_LINE,
- .ucdname = "SA",
+ .file = FILE_LINE,
+ .ucdname = "SA",
},
{
.enumname = "TMP_SG",
- .file = FILE_LINE,
- .ucdname = "SG",
+ .file = FILE_LINE,
+ .ucdname = "SG",
},
{
.enumname = "TMP_EAW_H",
- .file = FILE_EAW,
- .ucdname = "H",
+ .file = FILE_EAW,
+ .ucdname = "H",
},
{
.enumname = "TMP_EAW_W",
- .file = FILE_EAW,
- .ucdname = "W",
+ .file = FILE_EAW,
+ .ucdname = "W",
},
{
.enumname = "TMP_EAW_F",
- .file = FILE_EAW,
- .ucdname = "F",
+ .file = FILE_EAW,
+ .ucdname = "F",
},
};
@@ -306,23 +306,30 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, u…
(void)cp;
- if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
- !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
+ if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
+ !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) ||
(!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") ||
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") ||
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) {
- if (!strcmp(line_break_property[prop1].enumname, "CP_WITHOUT_E…
- !strcmp(line_break_property[prop2].enumname, "CP_WITHOUT_E…
+ if (!strcmp(line_break_property[prop1].enumname,
+ "CP_WITHOUT_EAW_HWF") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "CP_WITHOUT_EAW_HWF")) {
target = "CP_WITH_EAW_HWF";
- } else if (!strcmp(line_break_property[prop1].enumname, "OP_WI…
- !strcmp(line_break_property[prop2].enumname, "OP_WITHOUT_E…
+ } else if (!strcmp(line_break_property[prop1].enumname,
+ "OP_WITHOUT_EAW_HWF") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "OP_WITHOUT_EAW_HWF")) {
target = "OP_WITH_EAW_HWF";
} else {
/* ignore EAW for the rest */
- if ((!strcmp(line_break_property[prop1].enumname, "TMP…
- !strcmp(line_break_property[prop1].enumname, "TMP…
- !strcmp(line_break_property[prop1].enumname, "TMP…
+ if ((!strcmp(line_break_property[prop1].enumname,
+ "TMP_EAW_H") ||
+ !strcmp(line_break_property[prop1].enumname,
+ "TMP_EAW_W") ||
+ !strcmp(line_break_property[prop1].enumname,
+ "TMP_EAW_F"))) {
result = prop2;
} else {
result = prop1;
@@ -330,15 +337,19 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, u…
}
} else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
!strcmp(line_break_property[prop1].enumname, "TMP_MC")) ||
- (!strcmp(line_break_property[prop2].enumname, "TMP_MN") ||
- !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
- if (!strcmp(line_break_property[prop1].enumname, "SA_WITHOUT_M…
- !strcmp(line_break_property[prop2].enumname, "SA_WITHOUT_M…
+ (!strcmp(line_break_property[prop2].enumname, "TMP_MN") ||
+ !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
+ if (!strcmp(line_break_property[prop1].enumname,
+ "SA_WITHOUT_MN_OR_MC") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "SA_WITHOUT_MN_OR_MC")) {
target = "SA_WITH_MN_OR_MC";
} else {
/* ignore Mn and Mc for the rest */
- if ((!strcmp(line_break_property[prop1].enumname, "TMP…
- !strcmp(line_break_property[prop1].enumname, "TMP…
+ if ((!strcmp(line_break_property[prop1].enumname,
+ "TMP_MN") ||
+ !strcmp(line_break_property[prop1].enumname,
+ "TMP_MC"))) {
result = prop2;
} else {
result = prop1;
@@ -346,33 +357,42 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, u…
}
} else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
- if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED…
- !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED…
+ if (!strcmp(line_break_property[prop1].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC")) {
target = "BOTH_CN_EXTPICT";
} else {
/* ignore Cn for all the other properties */
- if (!strcmp(line_break_property[prop1].enumname, "TMP_…
+ if (!strcmp(line_break_property[prop1].enumname,
+ "TMP_CN")) {
result = prop2;
} else {
result = prop1;
}
}
- } else if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_…
- !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_…
+ } else if (!strcmp(line_break_property[prop1].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC")) {
if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
target = "BOTH_CN_EXTPICT";
} else {
- /* ignore Extended_Pictographic for all the other prop…
- if (!strcmp(line_break_property[prop1].enumname, "TMP_…
+ /* ignore Extended_Pictographic for all the other
+ * properties */
+ if (!strcmp(line_break_property[prop1].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC")) {
result = prop2;
} else {
result = prop1;
}
}
} else {
- fprintf(stderr, "handle_conflict: Cannot handle conflict %s <-…
- line_break_property[prop1].enumname, line_break_proper…
+ fprintf(stderr,
+ "handle_conflict: Cannot handle conflict %s <- %s.\n",
+ line_break_property[prop1].enumname,
+ line_break_property[prop2].enumname);
exit(1);
}
@@ -402,27 +422,44 @@ post_process(struct properties *prop)
/* post-mapping according to the line breaking algorithm */
for (i = 0; i < UINT32_C(0x110000); i++) {
/* LB1 */
- if (!strcmp(line_break_property[prop[i].property].enumname, "T…
- !strcmp(line_break_property[prop[i].property].enumname, "T…
- !strcmp(line_break_property[prop[i].property].enumname, "T…
+ if (!strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_AI") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_SG") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_XX")) {
/* map AI, SG and XX to AL */
target = "AL";
- } else if (!strcmp(line_break_property[prop[i].property].enumn…
+ } else if (!strcmp(line_break_property[prop[i].property]
+ .enumname,
+ "TMP_SA_WITH_MN_OR_MC")) {
/* map SA (with General_Category Mn or Mc) to CM */
target = "CM";
- } else if (!strcmp(line_break_property[prop[i].property].enumn…
+ } else if (!strcmp(line_break_property[prop[i].property]
+ .enumname,
+ "TMP_SA_WITHOUT_MN_OR_MC")) {
/* map SA (without General_Category Mn or Mc) to AL */
target = "AL";
- } else if (!strcmp(line_break_property[prop[i].property].enumn…
+ } else if (!strcmp(line_break_property[prop[i].property]
+ .enumname,
+ "TMP_CJ")) {
/* map CJ to NS */
target = "NS";
- } else if (!strcmp(line_break_property[prop[i].property].enumn…
- !strcmp(line_break_property[prop[i].property].enumn…
- !strcmp(line_break_property[prop[i].property].enumn…
- !strcmp(line_break_property[prop[i].property].enumn…
- !strcmp(line_break_property[prop[i].property].enumn…
- !strcmp(line_break_property[prop[i].property].enumn…
- !strcmp(line_break_property[prop[i].property].enumn…
+ } else if (
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_CN") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_MN") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_MC") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_EAW_H") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_EAW_W") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_EAW_F")) {
/* map all the temporary classes "residue" to AL */
target = "AL";
} else {
@@ -430,14 +467,17 @@ post_process(struct properties *prop)
}
if (target) {
- for (result = 0; result < LEN(line_break_property); re…
- if (!strcmp(line_break_property[result].enumna…
+ for (result = 0; result < LEN(line_break_property);
+ result++) {
+ if (!strcmp(line_break_property[result]
+ .enumname,
target)) {
break;
}
}
if (result == LEN(line_break_property)) {
- fprintf(stderr, "handle_conflict: Internal err…
+ fprintf(stderr,
+ "handle_conflict: Internal error.\n");
exit(1);
}
@@ -451,10 +491,9 @@ main(int argc, char *argv[])
{
(void)argc;
- properties_generate_break_property(line_break_property,
- LEN(line_break_property), NULL,
- handle_conflict, post_process,
- "line_break", argv[0]);
+ properties_generate_break_property(
+ line_break_property, LEN(line_break_property), NULL,
+ handle_conflict, post_process, "line_break", argv[0]);
return 0;
}
diff --git a/gen/sentence.c b/gen/sentence.c
@@ -6,78 +6,78 @@
static const struct property_spec sentence_break_property[] = {
{
.enumname = "OTHER",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CR",
- .file = FILE_SENTENCE,
- .ucdname = "CR",
+ .file = FILE_SENTENCE,
+ .ucdname = "CR",
},
{
.enumname = "LF",
- .file = FILE_SENTENCE,
- .ucdname = "LF",
+ .file = FILE_SENTENCE,
+ .ucdname = "LF",
},
{
.enumname = "EXTEND",
- .file = FILE_SENTENCE,
- .ucdname = "Extend",
+ .file = FILE_SENTENCE,
+ .ucdname = "Extend",
},
{
.enumname = "SEP",
- .file = FILE_SENTENCE,
- .ucdname = "Sep",
+ .file = FILE_SENTENCE,
+ .ucdname = "Sep",
},
{
.enumname = "FORMAT",
- .file = FILE_SENTENCE,
- .ucdname = "Format",
+ .file = FILE_SENTENCE,
+ .ucdname = "Format",
},
{
.enumname = "SP",
- .file = FILE_SENTENCE,
- .ucdname = "Sp",
+ .file = FILE_SENTENCE,
+ .ucdname = "Sp",
},
{
.enumname = "LOWER",
- .file = FILE_SENTENCE,
- .ucdname = "Lower",
+ .file = FILE_SENTENCE,
+ .ucdname = "Lower",
},
{
.enumname = "UPPER",
- .file = FILE_SENTENCE,
- .ucdname = "Upper",
+ .file = FILE_SENTENCE,
+ .ucdname = "Upper",
},
{
.enumname = "OLETTER",
- .file = FILE_SENTENCE,
- .ucdname = "OLetter",
+ .file = FILE_SENTENCE,
+ .ucdname = "OLetter",
},
{
.enumname = "NUMERIC",
- .file = FILE_SENTENCE,
- .ucdname = "Numeric",
+ .file = FILE_SENTENCE,
+ .ucdname = "Numeric",
},
{
.enumname = "ATERM",
- .file = FILE_SENTENCE,
- .ucdname = "ATerm",
+ .file = FILE_SENTENCE,
+ .ucdname = "ATerm",
},
{
.enumname = "SCONTINUE",
- .file = FILE_SENTENCE,
- .ucdname = "SContinue",
+ .file = FILE_SENTENCE,
+ .ucdname = "SContinue",
},
{
.enumname = "STERM",
- .file = FILE_SENTENCE,
- .ucdname = "STerm",
+ .file = FILE_SENTENCE,
+ .ucdname = "STerm",
},
{
.enumname = "CLOSE",
- .file = FILE_SENTENCE,
- .ucdname = "Close",
+ .file = FILE_SENTENCE,
+ .ucdname = "Close",
},
};
@@ -86,9 +86,9 @@ main(int argc, char *argv[])
{
(void)argc;
- properties_generate_break_property(sentence_break_property,
- LEN(sentence_break_property), NULL,
- NULL, NULL, "sentence_break", argv[…
+ properties_generate_break_property(
+ sentence_break_property, LEN(sentence_break_property), NULL,
+ NULL, NULL, "sentence_break", argv[0]);
return 0;
}
diff --git a/gen/util.c b/gen/util.c
@@ -1,13 +1,12 @@
/* See LICENSE file for copyright and license details. */
-#include <stdbool.h>
#include <ctype.h>
#include <errno.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
-#include <stdlib.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include "util.h"
@@ -21,12 +20,13 @@ struct properties_payload {
struct properties *prop;
const struct property_spec *spec;
uint_least8_t speclen;
- int (*set_value)(struct properties_payload *, uint_least32_t, int_leas…
- uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, uint_l…
+ int (*set_value)(struct properties_payload *, uint_least32_t,
+ int_least64_t);
+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
+ uint_least8_t);
};
-struct break_test_payload
-{
+struct break_test_payload {
struct break_test **test;
size_t *testlen;
};
@@ -51,8 +51,8 @@ hextocp(const char *str, size_t len, uint_least32_t *cp)
/* the maximum valid codepoint is 0x10FFFF */
if (len > 6) {
- fprintf(stderr, "hextocp: '%.*s' is too long.\n",
- (int)len, str);
+ fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)len,
+ str);
return 1;
}
@@ -77,8 +77,8 @@ hextocp(const char *str, size_t len, uint_least32_t *cp)
}
if (*cp > UINT32_C(0x10FFFF)) {
- fprintf(stderr, "hextocp: '%.*s' is too large.\n",
- (int)len, str);
+ fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len,
+ str);
return 1;
}
@@ -98,8 +98,10 @@ parse_cp_list(const char *str, uint_least32_t **cp, size_t *…
}
/* count the number of spaces in the string and infer list length */
- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count+…
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
+ count++, tmp1 = tmp2 + 1) {
;
+ }
/* allocate resources */
if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
@@ -110,7 +112,8 @@ parse_cp_list(const char *str, uint_least32_t **cp, size_t …
/* go through the string again, parsing the numbers */
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
tmp2 = strchr(tmp1, ' ');
- if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),…
+ if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
+ &((*cp)[i]))) {
return 1;
}
if (tmp2 != NULL) {
@@ -144,8 +147,10 @@ range_parse(const char *str, struct range *range)
}
void
-parse_file_with_callback(const char *fname, int (*callback)(const char *,
- char **, size_t, char *, void *), void *payload)
+parse_file_with_callback(const char *fname,
+ int (*callback)(const char *, char **, size_t, char *,
+ void *),
+ void *payload)
{
FILE *fp;
char *line = NULL, **field = NULL, *comment;
@@ -182,10 +187,15 @@ parse_file_with_callback(const char *fname, int (*callbac…
if (line[i] != '#') {
/* extend field buffer, if necessary */
if (++nfields > fieldbufsize) {
- if ((field = realloc(field, nfields *
- sizeof(*field))) == NULL) {
- fprintf(stderr, "parse_file_wi…
- "callback: realloc: %s…
+ if ((field = realloc(
+ field,
+ nfields *
+ sizeof(*field))) …
+ NULL) {
+ fprintf(stderr,
+ "parse_file_with_"
+ "callback: realloc: "
+ "%s.\n",
strerror(errno));
exit(1);
}
@@ -209,8 +219,9 @@ parse_file_with_callback(const char *fname, int (*callback)…
/* go back whitespace and terminate field there */
if (i > 0) {
- for (j = i - 1; line[j] == ' '; j--)
+ for (j = i - 1; line[j] == ' '; j--) {
;
+ }
line[j + 1] = '\0';
} else {
line[i] = '\0';
@@ -230,7 +241,7 @@ parse_file_with_callback(const char *fname, int (*callback)…
/* call callback function */
if (callback(fname, field, nfields, comment, payload)) {
fprintf(stderr, "parse_file_with_callback: "
- "Malformed input.\n");
+ "Malformed input.\n");
exit(1);
}
}
@@ -257,10 +268,11 @@ properties_callback(const char *file, char **field, size_…
for (i = 0; i < p->speclen; i++) {
/* identify fitting file and identifier */
- if (p->spec[i].file &&
- !strcmp(p->spec[i].file, file) &&
+ if (p->spec[i].file && !strcmp(p->spec[i].file, file) &&
(!strcmp(p->spec[i].ucdname, field[1]) ||
- (comment != NULL && !strncmp(p->spec[i].ucdname, comment,…
+ (comment != NULL &&
+ !strncmp(p->spec[i].ucdname, comment,
+ strlen(p->spec[i].ucdname)) &&
comment[strlen(p->spec[i].ucdname)] == ' '))) {
/* parse range in first field */
if (range_parse(field[0], &r)) {
@@ -287,7 +299,8 @@ properties_compress(const struct properties *prop,
uint_least32_t cp, i;
/* initialization */
- if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * sizeof(*(comp…
+ if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) *
+ sizeof(*(comp->offset))))) {
fprintf(stderr, "malloc: %s\n", strerror(errno));
exit(1);
}
@@ -296,7 +309,8 @@ properties_compress(const struct properties *prop,
for (cp = 0; cp < UINT32_C(0x110000); cp++) {
for (i = 0; i < comp->datalen; i++) {
- if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*pro…
+ if (!memcmp(&(prop[cp]), &(comp->data[i]),
+ sizeof(*prop))) {
/* found a match! */
comp->offset[cp] = i;
break;
@@ -308,9 +322,9 @@ properties_compress(const struct properties *prop,
* add current properties to data and add the
* offset in the offset-table
*/
- if (!(comp->data = reallocate_array(comp->data,
- ++(comp->datalen),
- sizeof(*(comp->dat…
+ if (!(comp->data = reallocate_array(
+ comp->data, ++(comp->datalen),
+ sizeof(*(comp->data))))) {
fprintf(stderr, "reallocate_array: %s\n",
strerror(errno));
exit(1);
@@ -357,8 +371,7 @@ properties_get_major_minor(const struct properties_compress…
* and need less storage)
*/
for (j = 0; j + 0xFF < mm->minorlen; j++) {
- if (!memcmp(&(comp->offset[i << 8]),
- &(mm->minor[j]),
+ if (!memcmp(&(comp->offset[i << 8]), &(mm->minor[j]),
sizeof(*(comp->offset)) * 0x100)) {
break;
}
@@ -373,9 +386,9 @@ properties_get_major_minor(const struct properties_compress…
* in major
*/
mm->minorlen += 0x100;
- if (!(mm->minor = reallocate_array(mm->minor,
- mm->minorlen,
- sizeof(*(mm->minor)…
+ if (!(mm->minor =
+ reallocate_array(mm->minor, mm->minorlen,
+ sizeof(*(mm->minor)))))…
fprintf(stderr, "reallocate_array: %s\n",
strerror(errno));
exit(1);
@@ -403,7 +416,7 @@ properties_print_lookup_table(char *name, size_t *data, siz…
}
}
- type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
+ type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
(maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
(maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
"uint_least64_t";
@@ -418,21 +431,21 @@ properties_print_lookup_table(char *name, size_t *data, s…
} else {
printf(",\n\t");
}
-
}
printf("};\n");
}
void
-properties_print_derived_lookup_table(char *name, char *type, size_t *offset, …
- int_least64_t (*get_value)(const struct …
- size_t), const void *payload)
+properties_print_derived_lookup_table(
+ char *name, char *type, size_t *offset, size_t offsetlen,
+ int_least64_t (*get_value)(const struct properties *, size_t),
+ const void *payload)
{
size_t i;
printf("static const %s %s[] = {\n\t", type, name);
for (i = 0; i < offsetlen; i++) {
- printf("%"PRIiLEAST64, get_value(payload, offset[i]));
+ printf("%" PRIiLEAST64, get_value(payload, offset[i]));
if (i + 1 == offsetlen) {
printf("\n");
} else if ((i + 1) % 8 != 0) {
@@ -440,7 +453,6 @@ properties_print_derived_lookup_table(char *name, char *typ…
} else {
printf(",\n\t");
}
-
}
printf("};\n");
}
@@ -464,17 +476,19 @@ set_value_bp(struct properties_payload *payload, uint_lea…
{
if (payload->prop[cp].property != payload->speclen) {
if (payload->handle_conflict == NULL) {
- fprintf(stderr, "set_value_bp: "
- "Unhandled character break property "
+ fprintf(stderr,
+ "set_value_bp: "
+ "Unhandled character break property "
"overwrite for 0x%06X (%s <- %s).\n",
- cp, payload->spec[payload->prop[cp].
- property].enumname,
+ cp,
+ payload->spec[payload->prop[cp].property]
+ .enumname,
payload->spec[value].enumname);
return 1;
} else {
- value = payload->handle_conflict(cp,
- (uint_least8_t)payload->prop[cp].property,
- (uint_least8_t)value);
+ value = payload->handle_conflict(
+ cp, (uint_least8_t)payload->prop[cp].property,
+ (uint_least8_t)value);
}
}
payload->prop[cp].property = value;
@@ -489,15 +503,13 @@ get_value_bp(const struct properties *prop, size_t offset)
}
void
-properties_generate_break_property(const struct property_spec *spec,
- uint_least8_t speclen,
- uint_least8_t (*fill_missing)(
- uint_least32_t),
- uint_least8_t (*handle_conflict)(
- uint_least32_t, uint_least8_t,
- uint_least8_t), void
- (*post_process)(struct properties *),
- const char *prefix, const char *argv0)
+properties_generate_break_property(
+ const struct property_spec *spec, uint_least8_t speclen,
+ uint_least8_t (*fill_missing)(uint_least32_t),
+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
+ uint_least8_t),
+ void (*post_process)(struct properties *), const char *prefix,
+ const char *argv0)
{
struct properties_compressed comp;
struct properties_major_minor mm;
@@ -537,8 +549,7 @@ properties_generate_break_property(const struct property_sp…
if (i == j && spec[i].file) {
/* file has not been processed yet */
parse_file_with_callback(spec[i].file,
- properties_callback,
- &payload);
+ properties_callback, &payload…
}
}
@@ -546,7 +557,8 @@ properties_generate_break_property(const struct property_sp…
for (i = 0; i < UINT32_C(0x110000); i++) {
if (payload.prop[i].property == speclen) {
if (fill_missing != NULL) {
- payload.prop[i].property = fill_missing((uint_…
+ payload.prop[i].property =
+ fill_missing((uint_least32_t)i);
} else {
payload.prop[i].property = 0;
}
@@ -559,14 +571,16 @@ properties_generate_break_property(const struct property_…
}
/* compress data */
- printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n",…
+ printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n",
+ argv0);
properties_compress(prop, &comp);
- fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0,
- prefix, properties_get_major_minor(&comp, &mm));
+ fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0, prefi…
+ properties_get_major_minor(&comp, &mm));
/* prepare names */
- if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= LEN(bu…
+ if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >=
+ LEN(buf1)) {
fprintf(stderr, "snprintf: String truncated.\n");
exit(1);
}
@@ -578,9 +592,12 @@ properties_generate_break_property(const struct property_s…
prefix_uc[i] = (char)toupper(prefix[i]);
}
prefix_uc[prefixlen] = '\0';
- if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= LEN(buf…
- (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= LEN(buf3)…
- (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= LEN(buf4)…
+ if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >=
+ LEN(buf2) ||
+ (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >=
+ LEN(buf3) ||
+ (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >=
+ LEN(buf4)) {
fprintf(stderr, "snprintf: String truncated.\n");
exit(1);
}
@@ -589,8 +606,9 @@ properties_generate_break_property(const struct property_sp…
properties_print_enum(spec, speclen, buf1, buf2);
properties_print_lookup_table(buf3, mm.major, 0x1100);
printf("\n");
- properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor,…
- get_value_bp, comp.data);
+ properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor,
+ mm.minorlen, get_value_bp,
+ comp.data);
/* free data */
free(prop);
@@ -625,42 +643,50 @@ break_test_callback(const char *fname, char **field, size…
memset(t, 0, sizeof(*t));
/* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */
- for (token = strtok(field[0], " "), i = 0; token != NULL; i++,
- token = strtok(NULL, " ")) {
+ for (token = strtok(field[0], " "), i = 0; token != NULL;
+ i++, token = strtok(NULL, " ")) {
if (i % 2 == 0) {
/* delimiter or start of sequence */
- if (i == 0 || !strncmp(token, "\xC3\xB7", 2)) { /* UTF…
+ if (i == 0 ||
+ !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */
/*
* '÷' indicates a breakpoint,
* the current length is done; allocate
* a new length field and set it to 0
*/
- if ((t->len = realloc(t->len,
- ++t->lenlen * sizeof(*t->len))) == NULL) {
- fprintf(stderr, "break_test_"
+ if ((t->len = realloc(
+ t->len,
+ ++t->lenlen * sizeof(*t->len))) ==
+ NULL) {
+ fprintf(stderr,
+ "break_test_"
"callback: realloc: %s.\n",
strerror(errno));
return 1;
}
t->len[t->lenlen - 1] = 0;
} else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 …
- /*
- * '×' indicates a non-breakpoint, do nothing
- */
+ /* '×' indicates a non-breakpoint, do nothing…
} else {
- fprintf(stderr, "break_test_callback: "
- "Malformed delimiter '%s'.\n", token);
+ fprintf(stderr,
+ "break_test_callback: "
+ "Malformed delimiter '%s'.\n",
+ token);
return 1;
}
} else {
/* add codepoint to cp-array */
- if ((t->cp = realloc(t->cp, ++t->cplen *
- sizeof(*t->cp))) == NULL) {
- fprintf(stderr, "break_test_callback: "
- "realloc: %s.\n", strerror(errno));
+ if ((t->cp = realloc(t->cp,
+ ++t->cplen * sizeof(*t->cp))) ==
+ NULL) {
+ fprintf(stderr,
+ "break_test_callback: "
+ "realloc: %s.\n",
+ strerror(errno));
return 1;
}
- if (hextocp(token, strlen(token), &t->cp[t->cplen - 1]…
+ if (hextocp(token, strlen(token),
+ &t->cp[t->cplen - 1])) {
return 1;
}
if (t->lenlen > 0) {
@@ -688,8 +714,7 @@ break_test_callback(const char *fname, char **field, size_t…
}
void
-break_test_list_parse(char *fname, struct break_test **test,
- size_t *testlen)
+break_test_list_parse(char *fname, struct break_test **test, size_t *testlen)
{
struct break_test_payload pl = {
.test = test,
@@ -703,13 +728,14 @@ break_test_list_parse(char *fname, struct break_test **te…
void
break_test_list_print(const struct break_test *test, size_t testlen,
- const char *identifier, const char *progname)
+ const char *identifier, const char *progname)
{
size_t i, j;
printf("/* Automatically generated by %s */\n"
"#include <stdint.h>\n#include <stddef.h>\n\n"
- "#include \"../gen/types.h\"\n\n", progname);
+ "#include \"../gen/types.h\"\n\n",
+ progname);
printf("static const struct break_test %s[] = {\n", identifier);
for (i = 0; i < testlen; i++) {
diff --git a/gen/util.h b/gen/util.h
@@ -7,7 +7,7 @@
#include "types.h"
-#define LEN(x) (sizeof (x) / sizeof *(x))
+#define LEN(x) (sizeof(x) / sizeof *(x))
struct property_spec {
const char *enumname;
@@ -34,30 +34,31 @@ struct properties_major_minor {
int hextocp(const char *, size_t, uint_least32_t *cp);
int parse_cp_list(const char *, uint_least32_t **, size_t *);
-void parse_file_with_callback(const char *, int (*callback)(const char *,
- char **, size_t, char *, void *), void *payload);
+void parse_file_with_callback(const char *,
+ int (*callback)(const char *, char **, size_t,
+ char *, void *),
+ void *payload);
-void properties_compress(const struct properties *, struct properties_compress…
+void properties_compress(const struct properties *,
+ struct properties_compressed *comp);
double properties_get_major_minor(const struct properties_compressed *,
struct properties_major_minor *);
void properties_print_lookup_table(char *, size_t *, size_t);
-void properties_print_derived_lookup_table(char *, char *, size_t *, size_t,
- int_least64_t (*get_value)(const struct …
- size_t), const void *);
-
-void properties_generate_break_property(const struct property_spec *,
- uint_least8_t, uint_least8_t
- (*fill_missing)(uint_least32_t),
- uint_least8_t
- (*handle_conflict)(uint_least32_t,
- uint_least8_t, uint_least8_t),
- void (*post_process)
- (struct properties *),
- const char *, const char *);
+void properties_print_derived_lookup_table(
+ char *, char *, size_t *, size_t,
+ int_least64_t (*get_value)(const struct properties *, size_t),
+ const void *);
+
+void properties_generate_break_property(
+ const struct property_spec *, uint_least8_t,
+ uint_least8_t (*fill_missing)(uint_least32_t),
+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
+ uint_least8_t),
+ void (*post_process)(struct properties *), const char *, const char *);
void break_test_list_parse(char *, struct break_test **, size_t *);
-void break_test_list_print(const struct break_test *, size_t,
- const char *, const char *);
+void break_test_list_print(const struct break_test *, size_t, const char *,
+ const char *);
void break_test_list_free(struct break_test *, size_t);
#endif /* UTIL_H */
diff --git a/gen/word.c b/gen/word.c
@@ -11,108 +11,108 @@
static const struct property_spec word_break_property[] = {
{
.enumname = "OTHER",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "ALETTER",
- .file = FILE_WORD,
- .ucdname = "ALetter",
+ .file = FILE_WORD,
+ .ucdname = "ALetter",
},
{
.enumname = "BOTH_ALETTER_EXTPICT",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CR",
- .file = FILE_WORD,
- .ucdname = "CR",
+ .file = FILE_WORD,
+ .ucdname = "CR",
},
{
.enumname = "DOUBLE_QUOTE",
- .file = FILE_WORD,
- .ucdname = "Double_Quote",
+ .file = FILE_WORD,
+ .ucdname = "Double_Quote",
},
{
.enumname = "EXTEND",
- .file = FILE_WORD,
- .ucdname = "Extend",
+ .file = FILE_WORD,
+ .ucdname = "Extend",
},
{
.enumname = "EXTENDED_PICTOGRAPHIC",
- .file = FILE_EMOJI,
- .ucdname = "Extended_Pictographic",
+ .file = FILE_EMOJI,
+ .ucdname = "Extended_Pictographic",
},
{
.enumname = "EXTENDNUMLET",
- .file = FILE_WORD,
- .ucdname = "ExtendNumLet",
+ .file = FILE_WORD,
+ .ucdname = "ExtendNumLet",
},
{
.enumname = "FORMAT",
- .file = FILE_WORD,
- .ucdname = "Format",
+ .file = FILE_WORD,
+ .ucdname = "Format",
},
{
.enumname = "HEBREW_LETTER",
- .file = FILE_WORD,
- .ucdname = "Hebrew_Letter",
+ .file = FILE_WORD,
+ .ucdname = "Hebrew_Letter",
},
{
.enumname = "KATAKANA",
- .file = FILE_WORD,
- .ucdname = "Katakana",
+ .file = FILE_WORD,
+ .ucdname = "Katakana",
},
{
.enumname = "LF",
- .file = FILE_WORD,
- .ucdname = "LF",
+ .file = FILE_WORD,
+ .ucdname = "LF",
},
{
.enumname = "MIDLETTER",
- .file = FILE_WORD,
- .ucdname = "MidLetter",
+ .file = FILE_WORD,
+ .ucdname = "MidLetter",
},
{
.enumname = "MIDNUM",
- .file = FILE_WORD,
- .ucdname = "MidNum",
+ .file = FILE_WORD,
+ .ucdname = "MidNum",
},
{
.enumname = "MIDNUMLET",
- .file = FILE_WORD,
- .ucdname = "MidNumLet",
+ .file = FILE_WORD,
+ .ucdname = "MidNumLet",
},
{
.enumname = "NEWLINE",
- .file = FILE_WORD,
- .ucdname = "Newline",
+ .file = FILE_WORD,
+ .ucdname = "Newline",
},
{
.enumname = "NUMERIC",
- .file = FILE_WORD,
- .ucdname = "Numeric",
+ .file = FILE_WORD,
+ .ucdname = "Numeric",
},
{
.enumname = "REGIONAL_INDICATOR",
- .file = FILE_WORD,
- .ucdname = "Regional_Indicator",
+ .file = FILE_WORD,
+ .ucdname = "Regional_Indicator",
},
{
.enumname = "SINGLE_QUOTE",
- .file = FILE_WORD,
- .ucdname = "Single_Quote",
+ .file = FILE_WORD,
+ .ucdname = "Single_Quote",
},
{
.enumname = "WSEGSPACE",
- .file = FILE_WORD,
- .ucdname = "WSegSpace",
+ .file = FILE_WORD,
+ .ucdname = "WSegSpace",
},
{
.enumname = "ZWJ",
- .file = FILE_WORD,
- .ucdname = "ZWJ",
+ .file = FILE_WORD,
+ .ucdname = "ZWJ",
},
};
@@ -124,8 +124,10 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, ui…
(void)cp;
if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") &&
- !strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPH…
- (!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPH…
+ !strcmp(word_break_property[prop2].enumname,
+ "EXTENDED_PICTOGRAPHIC")) ||
+ (!strcmp(word_break_property[prop1].enumname,
+ "EXTENDED_PICTOGRAPHIC") &&
!strcmp(word_break_property[prop2].enumname, "ALETTER"))) {
for (result = 0; result < LEN(word_break_property); result++) {
if (!strcmp(word_break_property[result].enumname,
@@ -150,10 +152,9 @@ main(int argc, char *argv[])
{
(void)argc;
- properties_generate_break_property(word_break_property,
- LEN(word_break_property), NULL,
- handle_conflict, NULL, "word_break",
- argv[0]);
+ properties_generate_break_property(
+ word_break_property, LEN(word_break_property), NULL,
+ handle_conflict, NULL, "word_break", argv[0]);
return 0;
}
diff --git a/grapheme.h b/grapheme.h
@@ -18,14 +18,15 @@ enum grapheme_bidirectional_override {
size_t grapheme_decode_utf8(const char *, size_t, uint_least32_t *);
size_t grapheme_encode_utf8(uint_least32_t, char *, size_t);
-size_t grapheme_get_bidirectional_embedding_levels(const uint_least32_t *, siz…
- enum grapheme_bidirectional…
- int_least32_t *, size_t);
-size_t grapheme_get_bidirectional_embedding_levels_utf8(const char *, size_t,
- enum grapheme_bidirect…
- int_least32_t *, size_…
+size_t grapheme_get_bidirectional_embedding_levels(
+ const uint_least32_t *, size_t, enum grapheme_bidirectional_override,
+ int_least32_t *, size_t);
+size_t grapheme_get_bidirectional_embedding_levels_utf8(
+ const char *, size_t, enum grapheme_bidirectional_override,
+ int_least32_t *, size_t);
-bool grapheme_is_character_break(uint_least32_t, uint_least32_t, uint_least16_…
+bool grapheme_is_character_break(uint_least32_t, uint_least32_t,
+ uint_least16_t *);
bool grapheme_is_lowercase(const uint_least32_t *, size_t, size_t *);
bool grapheme_is_titlecase(const uint_least32_t *, size_t, size_t *);
@@ -45,9 +46,12 @@ size_t grapheme_next_line_break_utf8(const char *, size_t);
size_t grapheme_next_sentence_break_utf8(const char *, size_t);
size_t grapheme_next_word_break_utf8(const char *, size_t);
-size_t grapheme_to_lowercase(const uint_least32_t *, size_t, uint_least32_t *,…
-size_t grapheme_to_titlecase(const uint_least32_t *, size_t, uint_least32_t *,…
-size_t grapheme_to_uppercase(const uint_least32_t *, size_t, uint_least32_t *,…
+size_t grapheme_to_lowercase(const uint_least32_t *, size_t, uint_least32_t *,
+ size_t);
+size_t grapheme_to_titlecase(const uint_least32_t *, size_t, uint_least32_t *,
+ size_t);
+size_t grapheme_to_uppercase(const uint_least32_t *, size_t, uint_least32_t *,
+ size_t);
size_t grapheme_to_lowercase_utf8(const char *, size_t, char *, size_t);
size_t grapheme_to_titlecase_utf8(const char *, size_t, char *, size_t);
diff --git a/src/bidirectional.c b/src/bidirectional.c
@@ -12,15 +12,18 @@ struct isolate_runner {
int_least32_t *buf;
size_t buflen;
enum bidi_property prev_prop;
+
struct {
size_t off;
enum bidi_property prop;
int_least8_t level;
} cur;
+
struct {
size_t off;
enum bidi_property prop;
} next;
+
uint_least8_t paragraph_level;
int_least8_t isolating_run_level;
enum bidi_property last_strong_type;
@@ -57,24 +60,42 @@ struct state {
static inline void
state_serialize(const struct state *s, int_least32_t *out)
{
- *out = (int_least32_t)(
- ((((uint_least32_t)(s->paragraph_level)) & 0x01 /* 00000…
- ((((uint_least32_t)(s->level + 1)) & 0x7F /* 01111…
- ((((uint_least32_t)(s->prop)) & 0x1F /* 00011…
- ((((uint_least32_t)(s->bracket - bidi_bracket)) & 0xFF /* 11111…
- ((((uint_least32_t)(s->visited)) & 0x01 /* 00000…
- ((((uint_least32_t)(s->rawprop)) & 0x1F /* 00011…
+ *out = (int_least32_t)(((((uint_least32_t)(s->paragraph_level)) &
+ 0x01 /* 00000001 */)
+ << 0) |
+ ((((uint_least32_t)(s->level + 1)) &
+ 0x7F /* 01111111 */)
+ << 1) |
+ ((((uint_least32_t)(s->prop)) &
+ 0x1F /* 00011111 */)
+ << 8) |
+ ((((uint_least32_t)(s->bracket - bidi_bracket))…
+ 0xFF /* 11111111 */)
+ << 13) |
+ ((((uint_least32_t)(s->visited)) &
+ 0x01 /* 00000001 */)
+ << 21) |
+ ((((uint_least32_t)(s->rawprop)) &
+ 0x1F /* 00011111 */)
+ << 22));
}
static inline void
state_deserialize(int_least32_t in, struct state *s)
{
- s->paragraph_level = (uint_least8_t)((((uint_least32_t)…
- s->level = (int_least8_t)((((uint_least32_t)…
- s->prop = (enum bidi_property)((((uint_least32_t)…
- s->bracket = bidi_bracket + (uint_least8_t)((((uint_least32_t)…
- s->visited = (bool)((((uint_least32_t)…
- s->rawprop = (enum bidi_property)((((uint_least32_t)…
+ s->paragraph_level = (uint_least8_t)((((uint_least32_t)in) >> 0) &
+ 0x01 /* 00000001 */);
+ s->level = (int_least8_t)((((uint_least32_t)in) >> 1) &
+ 0x7F /* 01111111 */) -
+ 1;
+ s->prop = (enum bidi_property)((((uint_least32_t)in) >> 8) &
+ 0x1F /* 00011111 */);
+ s->bracket =
+ bidi_bracket + (uint_least8_t)((((uint_least32_t)in) >> 13) &
+ 0xFF /* 11111111 */);
+ s->visited = (bool)((((uint_least32_t)in) >> 21) & 0x01 /* 00000001 */…
+ s->rawprop = (enum bidi_property)((((uint_least32_t)in) >> 22) &
+ 0x1F /* 00011111 */);
}
static void
@@ -171,7 +192,6 @@ isolate_runner_advance(struct isolate_runner *ir)
return 1;
}
-
/* shift in */
ir->prev_prop = ir->cur.prop;
ir->cur.off = ir->next.off;
@@ -188,13 +208,13 @@ isolate_runner_advance(struct isolate_runner *ir)
* on the first advancement as the prev_prop holds the sos type,
* which can only be either R or L, which are both strong types
*/
- if (ir->prev_prop == BIDI_PROP_R ||
- ir->prev_prop == BIDI_PROP_L ||
+ if (ir->prev_prop == BIDI_PROP_R || ir->prev_prop == BIDI_PROP_L ||
ir->prev_prop == BIDI_PROP_AL) {
ir->last_strong_type = ir->prev_prop;
}
- /* initialize next state by going to the next character in the sequenc…
+ /* initialize next state by going to the next character in the sequence
+ */
ir->next.off = SIZE_MAX;
ir->next.prop = NUM_BIDI_PROPS;
@@ -210,8 +230,7 @@ isolate_runner_advance(struct isolate_runner *ir)
}
/* follow BD8/BD9 and P2 to traverse the current sequence */
- if (s.prop == BIDI_PROP_LRI ||
- s.prop == BIDI_PROP_RLI ||
+ if (s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI ||
s.prop == BIDI_PROP_FSI) {
/*
* we encountered an isolate initiator, increment
@@ -224,8 +243,7 @@ isolate_runner_advance(struct isolate_runner *ir)
if (isolate_level != 1) {
continue;
}
- } else if (s.prop == BIDI_PROP_PDI &&
- isolate_level > 0) {
+ } else if (s.prop == BIDI_PROP_PDI && isolate_level > 0) {
isolate_level--;
/*
@@ -250,12 +268,14 @@ isolate_runner_advance(struct isolate_runner *ir)
/* we were in the first initializing round */
continue;
} else if (s.level == ir->isolating_run_level) {
- /* isolate_level-skips have been handled before, we're…
+ /* isolate_level-skips have been handled before, we're
+ * good */
/* still in the sequence */
ir->next.off = (size_t)i;
ir->next.prop = s.prop;
} else {
- /* out of sequence or isolated, compare levels via eos…
+ /* out of sequence or isolated, compare levels via eos
+ */
if (MAX(last_isolate_level, s.level) % 2 == 0) {
ir->next.prop = BIDI_PROP_L;
} else {
@@ -286,7 +306,8 @@ isolate_runner_advance(struct isolate_runner *ir)
}
static void
-isolate_runner_set_current_prop(struct isolate_runner *ir, enum bidi_property …
+isolate_runner_set_current_prop(struct isolate_runner *ir,
+ enum bidi_property prop)
{
struct state s;
@@ -301,9 +322,9 @@ static inline enum bidi_property
get_bidi_property(uint_least32_t cp)
{
if (likely(cp <= 0x10FFFF)) {
- return (enum bidi_property)
- ((bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) &
- 0x1F /* 00011111 */);
+ return (enum bidi_property)(
+ (bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) &
+ 0x1F /* 00011111 */);
} else {
return BIDI_PROP_L;
}
@@ -320,8 +341,8 @@ get_bidi_bracket_off(uint_least32_t cp)
}
static size_t
-process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
- size_t off, uint_least8_t paragraph_level)
+process_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off,
+ uint_least8_t paragraph_level)
{
enum bidi_property sequence_prop;
struct isolate_runner ir, tmp;
@@ -335,7 +356,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…
ir.prev_prop == BIDI_PROP_RLI ||
ir.prev_prop == BIDI_PROP_FSI ||
ir.prev_prop == BIDI_PROP_PDI) {
- isolate_runner_set_current_prop(&ir, BIDI_PROP…
+ isolate_runner_set_current_prop(&ir,
+ BIDI_PROP_ON);
} else {
isolate_runner_set_current_prop(&ir,
ir.prev_prop);
@@ -371,7 +393,7 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…
}
if (ir.prev_prop == BIDI_PROP_AN &&
- ir.cur.prop == BIDI_PROP_CS &&
+ ir.cur.prop == BIDI_PROP_CS &&
ir.next.prop == BIDI_PROP_AN) {
isolate_runner_set_current_prop(&ir, BIDI_PROP_AN);
}
@@ -389,14 +411,19 @@ process_isolating_run_sequence(int_least32_t *buf, size_t…
} else if (ir.cur.prop == BIDI_PROP_EN) {
/* set the preceding sequence */
if (runsince != SIZE_MAX) {
- isolate_runner_init(buf, buflen, runsince, par…
+ isolate_runner_init(buf, buflen, runsince,
+ paragraph_level,
+ (runsince > off), &tmp);
while (!isolate_runner_advance(&tmp) &&
tmp.cur.off < ir.cur.off) {
- isolate_runner_set_current_prop(&tmp, …
+ isolate_runner_set_current_prop(
+ &tmp, BIDI_PROP_EN);
}
runsince = SIZE_MAX;
} else {
- isolate_runner_init(buf, buflen, ir.cur.off, p…
+ isolate_runner_init(buf, buflen, ir.cur.off,
+ paragraph_level,
+ (ir.cur.off > off), &tmp);
isolate_runner_advance(&tmp);
}
/* follow the succeeding sequence */
@@ -404,7 +431,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…
if (tmp.cur.prop != BIDI_PROP_ET) {
break;
}
- isolate_runner_set_current_prop(&tmp, BIDI_PRO…
+ isolate_runner_set_current_prop(&tmp,
+ BIDI_PROP_EN);
}
} else {
/* sequence ended */
@@ -439,23 +467,26 @@ process_isolating_run_sequence(int_least32_t *buf, size_t…
isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir);
while (!isolate_runner_advance(&ir)) {
if (sequence_end == SIZE_MAX) {
- if (ir.cur.prop == BIDI_PROP_B ||
- ir.cur.prop == BIDI_PROP_S ||
- ir.cur.prop == BIDI_PROP_WS ||
- ir.cur.prop == BIDI_PROP_ON ||
+ if (ir.cur.prop == BIDI_PROP_B ||
+ ir.cur.prop == BIDI_PROP_S ||
+ ir.cur.prop == BIDI_PROP_WS ||
+ ir.cur.prop == BIDI_PROP_ON ||
ir.cur.prop == BIDI_PROP_FSI ||
ir.cur.prop == BIDI_PROP_LRI ||
ir.cur.prop == BIDI_PROP_RLI ||
ir.cur.prop == BIDI_PROP_PDI) {
- /* the current character is an NI (neutral or …
+ /* the current character is an NI (neutral or
+ * isolate) */
/* scan ahead to the end of the NI-sequence */
- isolate_runner_init(buf, buflen, ir.cur.off, p…
+ isolate_runner_init(buf, buflen, ir.cur.off,
+ paragraph_level,
+ (ir.cur.off > off), &tmp);
while (!isolate_runner_advance(&tmp)) {
- if (tmp.next.prop != BIDI_PROP_B &&
- tmp.next.prop != BIDI_PROP_S &&
- tmp.next.prop != BIDI_PROP_WS &&
- tmp.next.prop != BIDI_PROP_ON &&
+ if (tmp.next.prop != BIDI_PROP_B &&
+ tmp.next.prop != BIDI_PROP_S &&
+ tmp.next.prop != BIDI_PROP_WS &&
+ tmp.next.prop != BIDI_PROP_ON &&
tmp.next.prop != BIDI_PROP_FSI &&
tmp.next.prop != BIDI_PROP_LRI &&
tmp.next.prop != BIDI_PROP_RLI &&
@@ -465,17 +496,17 @@ process_isolating_run_sequence(int_least32_t *buf, size_t…
}
/*
- * check what follows and see if the text has …
- * same direction on both sides
+ * check what follows and see if the text has
+ * the same direction on both sides
*/
if (ir.prev_prop == BIDI_PROP_L &&
tmp.next.prop == BIDI_PROP_L) {
sequence_end = tmp.cur.off;
sequence_prop = BIDI_PROP_L;
- } else if ((ir.prev_prop == BIDI_PROP_R ||
+ } else if ((ir.prev_prop == BIDI_PROP_R ||
ir.prev_prop == BIDI_PROP_EN ||
ir.prev_prop == BIDI_PROP_AN) &&
- (tmp.next.prop == BIDI_PROP_R ||
+ (tmp.next.prop == BIDI_PROP_R ||
tmp.next.prop == BIDI_PROP_EN ||
tmp.next.prop == BIDI_PROP_AN)) {
sequence_end = tmp.cur.off;
@@ -486,7 +517,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…
if (sequence_end != SIZE_MAX) {
if (ir.cur.off <= sequence_end) {
- isolate_runner_set_current_prop(&ir, sequence_…
+ isolate_runner_set_current_prop(&ir,
+ sequence_prop);
} else {
/* end of sequence, reset */
sequence_end = SIZE_MAX;
@@ -498,10 +530,9 @@ process_isolating_run_sequence(int_least32_t *buf, size_t …
/* N2 */
isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir);
while (!isolate_runner_advance(&ir)) {
- if (ir.cur.prop == BIDI_PROP_B ||
- ir.cur.prop == BIDI_PROP_S ||
- ir.cur.prop == BIDI_PROP_WS ||
- ir.cur.prop == BIDI_PROP_ON ||
+ if (ir.cur.prop == BIDI_PROP_B || ir.cur.prop == BIDI_PROP_S ||
+ ir.cur.prop == BIDI_PROP_WS ||
+ ir.cur.prop == BIDI_PROP_ON ||
ir.cur.prop == BIDI_PROP_FSI ||
ir.cur.prop == BIDI_PROP_LRI ||
ir.cur.prop == BIDI_PROP_RLI ||
@@ -509,10 +540,12 @@ process_isolating_run_sequence(int_least32_t *buf, size_t…
/* N2 */
if (ir.cur.level % 2 == 0) {
/* even embedding level */
- isolate_runner_set_current_prop(&ir, BIDI_PROP…
+ isolate_runner_set_current_prop(&ir,
+ BIDI_PROP_L);
} else {
/* odd embedding level */
- isolate_runner_set_current_prop(&ir, BIDI_PROP…
+ isolate_runner_set_current_prop(&ir,
+ BIDI_PROP_R);
}
}
}
@@ -522,8 +555,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…
static uint_least8_t
get_paragraph_level(enum grapheme_bidirectional_override override,
- bool terminate_on_pdi,
- const int_least32_t *buf, size_t buflen)
+ bool terminate_on_pdi, const int_least32_t *buf,
+ size_t buflen)
{
struct state s;
int_least8_t isolate_level;
@@ -541,8 +574,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov…
for (bufoff = 0, isolate_level = 0; bufoff < buflen; bufoff++) {
state_deserialize(buf[bufoff], &s);
- if (s.prop == BIDI_PROP_PDI &&
- isolate_level == 0 &&
+ if (s.prop == BIDI_PROP_PDI && isolate_level == 0 &&
terminate_on_pdi) {
/*
* we are in a FSI-subsection of a paragraph and
@@ -552,8 +584,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov…
}
/* BD8/BD9 */
- if ((s.prop == BIDI_PROP_LRI ||
- s.prop == BIDI_PROP_RLI ||
+ if ((s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI ||
s.prop == BIDI_PROP_FSI) &&
isolate_level < MAX_DEPTH) {
/* we hit an isolate initiator, increment counter */
@@ -570,8 +601,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov…
/* P3 */
if (s.prop == BIDI_PROP_L) {
return 0;
- } else if (s.prop == BIDI_PROP_AL ||
- s.prop == BIDI_PROP_R) {
+ } else if (s.prop == BIDI_PROP_AL || s.prop == BIDI_PROP_R) {
return 1;
}
}
@@ -585,13 +615,15 @@ get_paragraph_embedding_levels(enum grapheme_bidirectiona…
{
enum bidi_property tmp_prop;
struct state s, t;
+
struct {
int_least8_t level;
enum grapheme_bidirectional_override override;
bool directional_isolate;
} directional_status[MAX_DEPTH + 2], *dirstat = directional_status;
+
size_t overflow_isolate_count, overflow_embedding_count,
- valid_isolate_count, bufoff, i, runsince;
+ valid_isolate_count, bufoff, i, runsince;
uint_least8_t paragraph_level;
paragraph_level = get_paragraph_level(override, false, buf, buflen);
@@ -600,7 +632,8 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional_…
dirstat->level = (int_least8_t)paragraph_level;
dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
dirstat->directional_isolate = false;
- overflow_isolate_count = overflow_embedding_count = valid_isolate_coun…
+ overflow_isolate_count = overflow_embedding_count =
+ valid_isolate_count = 0;
for (bufoff = 0; bufoff < buflen; bufoff++) {
state_deserialize(buf[bufoff], &s);
@@ -608,79 +641,105 @@ get_paragraph_embedding_levels(enum grapheme_bidirection…
again:
if (tmp_prop == BIDI_PROP_RLE) {
/* X2 */
- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= …
+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid RLE */
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirs…
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 != 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA…
dirstat->directional_isolate = false;
} else {
/* overflow RLE */
- overflow_embedding_count += (overflow_isolate_…
+ overflow_embedding_count +=
+ (overflow_isolate_count == 0);
}
} else if (tmp_prop == BIDI_PROP_LRE) {
/* X3 */
- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= …
+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid LRE */
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirs…
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 == 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA…
dirstat->directional_isolate = false;
} else {
/* overflow LRE */
- overflow_embedding_count += (overflow_isolate_…
+ overflow_embedding_count +=
+ (overflow_isolate_count == 0);
}
} else if (tmp_prop == BIDI_PROP_RLO) {
/* X4 */
- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= …
+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid RLO */
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirs…
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 != 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
dirstat->directional_isolate = false;
} else {
/* overflow RLO */
- overflow_embedding_count += (overflow_isolate_…
+ overflow_embedding_count +=
+ (overflow_isolate_count == 0);
}
} else if (tmp_prop == BIDI_PROP_LRO) {
/* X5 */
- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= …
+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid LRE */
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirs…
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 == 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
dirstat->directional_isolate = false;
} else {
/* overflow LRO */
- overflow_embedding_count += (overflow_isolate_…
+ overflow_embedding_count +=
+ (overflow_isolate_count == 0);
}
} else if (tmp_prop == BIDI_PROP_RLI) {
/* X5a */
s.level = dirstat->level;
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI…
+ if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
s.prop = BIDI_PROP_L;
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL…
+ } else if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
s.prop = BIDI_PROP_R;
}
state_serialize(&s, &(buf[bufoff]));
- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= …
+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid RLI */
valid_isolate_count++;
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirs…
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 != 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA…
dirstat->directional_isolate = true;
} else {
/* overflow RLI */
@@ -689,22 +748,28 @@ again:
} else if (tmp_prop == BIDI_PROP_LRI) {
/* X5b */
s.level = dirstat->level;
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI…
+ if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
s.prop = BIDI_PROP_L;
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL…
+ } else if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
s.prop = BIDI_PROP_R;
}
state_serialize(&s, &(buf[bufoff]));
- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= …
+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid LRI */
valid_isolate_count++;
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirs…
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 == 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA…
dirstat->directional_isolate = true;
} else {
/* overflow LRI */
@@ -712,23 +777,27 @@ again:
}
} else if (tmp_prop == BIDI_PROP_FSI) {
/* X5c */
- if (get_paragraph_level(GRAPHEME_BIDIRECTIONAL_OVERRID…
- buf + (bufoff + 1), buflen - (…
+ if (get_paragraph_level(
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL,
+ true, buf + (bufoff + 1),
+ buflen - (bufoff + 1)) == 1) {
tmp_prop = BIDI_PROP_RLI;
goto again;
} else { /* ... == 0 */
tmp_prop = BIDI_PROP_LRI;
goto again;
}
- } else if (tmp_prop != BIDI_PROP_B &&
- tmp_prop != BIDI_PROP_BN &&
+ } else if (tmp_prop != BIDI_PROP_B &&
+ tmp_prop != BIDI_PROP_BN &&
tmp_prop != BIDI_PROP_PDF &&
tmp_prop != BIDI_PROP_PDI) {
/* X6 */
s.level = dirstat->level;
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI…
+ if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
s.prop = BIDI_PROP_L;
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL…
+ } else if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
s.prop = BIDI_PROP_R;
}
state_serialize(&s, &(buf[bufoff]));
@@ -773,9 +842,11 @@ again:
}
s.level = dirstat->level;
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI…
+ if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
s.prop = BIDI_PROP_L;
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL…
+ } else if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
s.prop = BIDI_PROP_R;
}
state_serialize(&s, &(buf[bufoff]));
@@ -796,12 +867,9 @@ again:
}
/* X9 */
- if (tmp_prop == BIDI_PROP_RLE ||
- tmp_prop == BIDI_PROP_LRE ||
- tmp_prop == BIDI_PROP_RLO ||
- tmp_prop == BIDI_PROP_LRO ||
- tmp_prop == BIDI_PROP_PDF ||
- tmp_prop == BIDI_PROP_BN) {
+ if (tmp_prop == BIDI_PROP_RLE || tmp_prop == BIDI_PROP_LRE ||
+ tmp_prop == BIDI_PROP_RLO || tmp_prop == BIDI_PROP_LRO ||
+ tmp_prop == BIDI_PROP_PDF || tmp_prop == BIDI_PROP_BN) {
s.level = -1;
state_serialize(&s, &(buf[bufoff]));
}
@@ -811,8 +879,8 @@ again:
for (bufoff = 0; bufoff < buflen; bufoff++) {
state_deserialize(buf[bufoff], &s);
if (!s.visited && s.level != -1) {
- bufoff += process_isolating_run_sequence(buf, buflen, …
- paragraph_lev…
+ bufoff += process_isolating_run_sequence(
+ buf, buflen, bufoff, paragraph_level);
}
}
@@ -823,7 +891,7 @@ again:
for (bufoff = 0; bufoff < buflen; bufoff++) {
state_deserialize(buf[bufoff], &s);
- if (s.level % 2 == 0 ) {
+ if (s.level % 2 == 0) {
/* even level */
if (s.prop == BIDI_PROP_R) {
s.level += 1;
@@ -833,8 +901,7 @@ again:
}
} else {
/* odd level */
- if (s.prop == BIDI_PROP_L ||
- s.prop == BIDI_PROP_EN ||
+ if (s.prop == BIDI_PROP_L || s.prop == BIDI_PROP_EN ||
s.prop == BIDI_PROP_AN) {
s.level += 1;
}
@@ -853,10 +920,8 @@ again:
continue;
}
- if (s.rawprop == BIDI_PROP_WS ||
- s.rawprop == BIDI_PROP_FSI ||
- s.rawprop == BIDI_PROP_LRI ||
- s.rawprop == BIDI_PROP_RLI ||
+ if (s.rawprop == BIDI_PROP_WS || s.rawprop == BIDI_PROP_FSI ||
+ s.rawprop == BIDI_PROP_LRI || s.rawprop == BIDI_PROP_RLI ||
s.rawprop == BIDI_PROP_PDI) {
if (runsince == SIZE_MAX) {
/* a new run has begun */
@@ -878,8 +943,7 @@ again:
runsince = SIZE_MAX;
}
- if (s.rawprop == BIDI_PROP_S ||
- s.rawprop == BIDI_PROP_B) {
+ if (s.rawprop == BIDI_PROP_S || s.rawprop == BIDI_PROP_B) {
s.level = (int_least8_t)paragraph_level;
state_serialize(&s, &(buf[bufoff]));
}
@@ -902,7 +966,8 @@ again:
}
static size_t
-get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override…
+get_embedding_levels(HERODOTUS_READER *r,
+ enum grapheme_bidirectional_override override,
int_least32_t *buf, size_t buflen)
{
struct state s;
@@ -911,8 +976,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bid…
if (buf == NULL) {
for (; herodotus_read_codepoint(r, true, &cp) ==
- HERODOTUS_STATUS_SUCCESS;)
+ HERODOTUS_STATUS_SUCCESS;) {
;
+ }
/* see below for return value reasoning */
return herodotus_reader_number_read(r);
@@ -922,8 +988,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bid…
* the first step is to determine the bidirectional properties
* and store them in the buffer
*/
- for (bufoff = 0; herodotus_read_codepoint(r, true, &cp) ==
- HERODOTUS_STATUS_SUCCESS; bufoff++) {
+ for (bufoff = 0;
+ herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCCES…
+ bufoff++) {
if (bufoff < buflen) {
/*
* actually only do something when we have
@@ -974,9 +1041,10 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_b…
}
size_t
-grapheme_get_bidirectional_embedding_levels(const uint_least32_t *src, size_t …
- enum grapheme_bidirectional_overri…
- int_least32_t *dest, size_t destle…
+grapheme_get_bidirectional_embedding_levels(
+ const uint_least32_t *src, size_t srclen,
+ enum grapheme_bidirectional_override override, int_least32_t *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
@@ -986,9 +1054,10 @@ grapheme_get_bidirectional_embedding_levels(const uint_le…
}
size_t
-grapheme_get_bidirectional_embedding_levels_utf8(const char *src, size_t srcle…
- enum grapheme_bidirectional_o…
- int_least32_t *dest, size_t d…
+grapheme_get_bidirectional_embedding_levels_utf8(
+ const char *src, size_t srclen,
+ enum grapheme_bidirectional_override override, int_least32_t *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
diff --git a/src/case.c b/src/case.c
@@ -2,8 +2,8 @@
#include <stddef.h>
#include <stdint.h>
-#include "../grapheme.h"
#include "../gen/case.h"
+#include "../grapheme.h"
#include "util.h"
static inline enum case_property
@@ -11,7 +11,7 @@ get_case_property(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (enum case_property)
- case_minor[case_major[cp >> 8] + (cp & 0xFF)];
+ case_minor[case_major[cp >> 8] + (cp & 0xFF)];
} else {
return CASE_PROP_OTHER;
}
@@ -45,58 +45,64 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w,
uint_least32_t cp, tmp_cp;
int_least32_t map;
- for (; herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCC…
+ for (; herodotus_read_codepoint(r, true, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;) {
if (sc == lower_special) {
/*
- * For the special Final_Sigma-rule (see SpecialCasing…
- * which is the only non-localized case-dependent rule,
- * we apply a different mapping when a sigma is at the
- * end of a word.
+ * For the special Final_Sigma-rule (see
+ * SpecialCasing.txt), which is the only non-localized
+ * case-dependent rule, we apply a different mapping
+ * when a sigma is at the end of a word.
*
* Before: cased case-ignorable*
* After: not(case-ignorable* cased)
*
- * We check the after-condition on demand, but the bef…
- * condition is best checked using the "level"-heurist…
- * also used in the sentence and line breaking-impleme…
+ * We check the after-condition on demand, but the
+ * before- condition is best checked using the
+ * "level"-heuristic also used in the sentence and line
+ * breaking-implementations.
*/
- if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER …
+ if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER
+ SIGMA */
(final_sigma_level == 1 ||
final_sigma_level == 2)) {
/*
* check succeeding characters by first skippi…
- * all case-ignorable characters and then chec…
- * if the succeeding character is cased, inval…
- * the after-condition
+ * all case-ignorable characters and then
+ * checking if the succeeding character is
+ * cased, invalidating the after-condition
*/
herodotus_reader_copy(r, &tmp);
for (prop = NUM_CASE_PROPS;
- (s = herodotus_read_codepoint(&tmp, true,…
- HERODOTUS_STATUS_SUCCESS; ) {
+ (s = herodotus_read_codepoint(&tmp, true,
+ &tmp_cp)) ==
+ HERODOTUS_STATUS_SUCCESS;) {
prop = get_case_property(tmp_cp);
if (prop != CASE_PROP_CASE_IGNORABLE &&
prop != CASE_PROP_BOTH_CASED_CASE_…
- break;
+ break;
}
}
/*
- * Now prop is something other than case-ignor…
- * the source-string ended.
- * If it is something other than cased, we know
+ * Now prop is something other than
+ * case-ignorable or the source-string ended. …
+ * it is something other than cased, we know
* that the after-condition holds
*/
if (s != HERODOTUS_STATUS_SUCCESS ||
(prop != CASE_PROP_CASED &&
prop != CASE_PROP_BOTH_CASED_CASE_IGNORAB…
/*
- * write GREEK SMALL LETTER FINAL SIGM…
- * destination
+ * write GREEK SMALL LETTER FINAL SIGMA
+ * to destination
+ */
+ herodotus_write_codepoint(
+ w, UINT32_C(0x03C2));
+
+ /* reset Final_Sigma-state and continue
*/
- herodotus_write_codepoint(w, UINT32_C(…
-
- /* reset Final_Sigma-state and continu…
final_sigma_level = 0;
continue;
}
@@ -110,11 +116,13 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w,
prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) {
/* sequence has begun */
final_sigma_level = 1;
- } else if ((final_sigma_level == 1 ||
- final_sigma_level == 2) &&
- (prop == CASE_PROP_CASE_IGNORABLE ||
- prop == CASE_PROP_BOTH_CASED_CASE_IGNORABL…
- /* case-ignorable sequence begins or continued…
+ } else if (
+ (final_sigma_level == 1 ||
+ final_sigma_level == 2) &&
+ (prop == CASE_PROP_CASE_IGNORABLE ||
+ prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE))…
+ /* case-ignorable sequence begins or continued
+ */
final_sigma_level = 2;
} else {
/* sequence broke */
@@ -134,8 +142,8 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w,
}
} else {
/* we have a simple mapping */
- herodotus_write_codepoint(w, (uint_least32_t)
- ((int_least32_t)cp + map));
+ herodotus_write_codepoint(
+ w, (uint_least32_t)((int_least32_t)cp + map));
}
}
@@ -168,14 +176,16 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
for (; (nwb = herodotus_next_word_break(r)) > 0;) {
herodotus_reader_push_advance_limit(r, nwb);
- for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODO…
+ for (; (s = herodotus_read_codepoint(r, false, &cp)) ==
+ HERODOTUS_STATUS_SUCCESS;) {
/* check if we have a cased character */
prop = get_case_property(cp);
if (prop == CASE_PROP_CASED ||
prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
break;
} else {
- /* write the data to the output verbatim, it i…
+ /* write the data to the output verbatim, it if
+ * permits */
herodotus_write_codepoint(w, cp);
/* increment reader */
@@ -199,9 +209,10 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
* we encountered a cased character before the word
* break, convert it to titlecase
*/
- herodotus_reader_push_advance_limit(r,
- herodotus_reader_next_codepoint_break(r));
- to_case(r, w, 0, title_major, title_minor, title_speci…
+ herodotus_reader_push_advance_limit(
+ r, herodotus_reader_next_codepoint_break(r));
+ to_case(r, w, 0, title_major, title_minor,
+ title_special);
herodotus_reader_pop_limit(r);
}
@@ -218,7 +229,8 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
}
size_t
-grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, uint_least32_t…
+grapheme_to_uppercase(const uint_least32_t *src, size_t srclen,
+ uint_least32_t *dest, size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -230,7 +242,8 @@ grapheme_to_uppercase(const uint_least32_t *src, size_t src…
}
size_t
-grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, uint_least32_t…
+grapheme_to_lowercase(const uint_least32_t *src, size_t srclen,
+ uint_least32_t *dest, size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -242,7 +255,8 @@ grapheme_to_lowercase(const uint_least32_t *src, size_t src…
}
size_t
-grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, uint_least32_t…
+grapheme_to_titlecase(const uint_least32_t *src, size_t srclen,
+ uint_least32_t *dest, size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -254,7 +268,8 @@ grapheme_to_titlecase(const uint_least32_t *src, size_t src…
}
size_t
-grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest, size_t …
+grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -266,7 +281,8 @@ grapheme_to_uppercase_utf8(const char *src, size_t srclen, …
}
size_t
-grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest, size_t …
+grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -278,7 +294,8 @@ grapheme_to_lowercase_utf8(const char *src, size_t srclen, …
}
size_t
-grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest, size_t …
+grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -299,7 +316,8 @@ is_case(HERODOTUS_READER *r, const uint_least16_t *major,
uint_least32_t cp;
int_least32_t map;
- for (; herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUC…
+ for (; herodotus_read_codepoint(r, false, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;) {
/* get and handle case mapping */
if (unlikely((map = get_case_offset(cp, major, minor)) >=
INT32_C(0x110000))) {
@@ -315,7 +333,8 @@ is_case(HERODOTUS_READER *r, const uint_least16_t *major,
goto done;
} else {
/* move forward */
- herodotus_read_codepoint(r, tr…
+ herodotus_read_codepoint(
+ r, true, &cp);
}
} else {
/*
@@ -357,7 +376,8 @@ is_titlecase(HERODOTUS_READER *r, size_t *output)
for (; (nwb = herodotus_next_word_break(r)) > 0;) {
herodotus_reader_push_advance_limit(r, nwb);
- for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODO…
+ for (; (s = herodotus_read_codepoint(r, false, &cp)) ==
+ HERODOTUS_STATUS_SUCCESS;) {
/* check if we have a cased character */
prop = get_case_property(cp);
if (prop == CASE_PROP_CASED ||
@@ -384,17 +404,20 @@ is_titlecase(HERODOTUS_READER *r, size_t *output)
* we encountered a cased character before the word
* break, check if it's titlecase
*/
- herodotus_reader_push_advance_limit(r,
- herodotus_reader_next_codepoint_break(r));
- if (!is_case(r, title_major, title_minor, title_specia…
+ herodotus_reader_push_advance_limit(
+ r, herodotus_reader_next_codepoint_break(r));
+ if (!is_case(r, title_major, title_minor, title_specia…
+ NULL)) {
ret = false;
goto done;
}
herodotus_reader_pop_limit(r);
}
- /* check if the rest of the codepoints in the word are lowerca…
- if (!is_case(r, lower_major, lower_minor, lower_special, NULL)…
+ /* check if the rest of the codepoints in the word are lowerca…
+ */
+ if (!is_case(r, lower_major, lower_minor, lower_special,
+ NULL)) {
ret = false;
goto done;
}
diff --git a/src/character.c b/src/character.c
@@ -16,83 +16,80 @@ struct character_break_state {
static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_OTHER] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
- [CHAR_BREAK_PROP_CR] =
- UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ [CHAR_BREAK_PROP_CR] = UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */
[CHAR_BREAK_PROP_EXTEND] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_L] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_V] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_T] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_LV] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_LVT] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_PREPEND] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */
(UINT16_C(0xFFFF) &
- ~(UINT16_C(1) << CHAR_BREAK_PROP_CR |
- UINT16_C(1) << CHAR_BREAK_PROP_LF |
- UINT16_C(1) << CHAR_BREAK_PROP_CONTROL
- )
- ), /* GB9b */
+ ~(UINT16_C(1) << CHAR_BREAK_PROP_CR |
+ UINT16_C(1) << CHAR_BREAK_PROP_LF |
+ UINT16_C(1) << CHAR_BREAK_PROP_CONTROL)), /* GB9b */
[CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_SPACINGMARK] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_ZWJ] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
};
static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
[CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND |
UINT16_C(1) << CHAR_BREAK_PROP_ZWJ,
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
};
static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
@@ -113,7 +110,8 @@ get_break_prop(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (enum char_break_property)
- char_break_minor[char_break_major[cp >> 8] + (cp & 0xFF…
+ char_break_minor[char_break_major[cp >> 8] +
+ (cp & 0xFF)];
} else {
return CHAR_BREAK_PROP_OTHER;
}
@@ -122,23 +120,27 @@ get_break_prop(uint_least32_t cp)
static inline void
state_serialize(const struct character_break_state *in, uint_least16_t *out)
{
- *out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) | …
- (uint_least16_t)(((uint_least16_t)(in->prop_set)) << 8) | …
- (uint_least16_t)(((uint_least16_t)(in->gb11_flag)) << 9) | …
- (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag)) << 10); …
+ *out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) | /* first 8 bits */
+ (uint_least16_t)(((uint_least16_t)(in->prop_set))
+ << 8) | /* 9th bit */
+ (uint_least16_t)(((uint_least16_t)(in->gb11_flag))
+ << 9) | /* 10th bit */
+ (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag))
+ << 10); /* 11th bit */
}
static inline void
state_deserialize(uint_least16_t in, struct character_break_state *out)
{
- out->prop = in & UINT8_C(0xFF);
- out->prop_set = in & (UINT16_C(1) << 8);
- out->gb11_flag = in & (UINT16_C(1) << 9);
+ out->prop = in & UINT8_C(0xFF);
+ out->prop_set = in & (UINT16_C(1) << 8);
+ out->gb11_flag = in & (UINT16_C(1) << 9);
out->gb12_13_flag = in & (UINT16_C(1) << 10);
}
bool
-grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least…
+grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1,
+ uint_least16_t *s)
{
struct character_break_state state;
enum char_break_property cp0_prop, cp1_prop;
@@ -161,23 +163,26 @@ grapheme_is_character_break(uint_least32_t cp0, uint_leas…
/* update flags */
state.gb11_flag =
flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS *
- state.gb11_flag] &
+ state.gb11_flag] &
UINT16_C(1) << cp1_prop;
state.gb12_13_flag =
- flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS *
- state.gb12_13_flag] &
- UINT16_C(1) << cp1_prop;
+ flag_update_gb12_13[cp0_prop +
+ NUM_CHAR_BREAK_PROPS *
+ state.gb12_13_flag] &
+ UINT16_C(1) << cp1_prop;
/*
* Apply grapheme cluster breaking algorithm (UAX #29), see
* http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_…
*/
notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) …
- (dont_break_gb11[cp0_prop + state.gb11_flag *
- NUM_CHAR_BREAK_PROPS] &
+ (dont_break_gb11[cp0_prop +
+ state.gb11_flag *
+ NUM_CHAR_BREAK_PROPS] &
(UINT16_C(1) << cp1_prop)) ||
- (dont_break_gb12_13[cp0_prop + state.gb12_13_flag *
- NUM_CHAR_BREAK_PROPS] &
+ (dont_break_gb12_13[cp0_prop +
+ state.gb12_13_flag *
+ NUM_CHAR_BREAK_PROPS] &
(UINT16_C(1) << cp1_prop));
/* update or reset flags (when we have a break) */
@@ -198,8 +203,10 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least…
* were all set to false
*/
notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) …
- (dont_break_gb11[cp0_prop] & (UINT16_C(1) << cp1_pr…
- (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << cp1…
+ (dont_break_gb11[cp0_prop] &
+ (UINT16_C(1) << cp1_prop)) ||
+ (dont_break_gb12_13[cp0_prop] &
+ (UINT16_C(1) << cp1_prop));
}
return !notbreak;
@@ -212,7 +219,8 @@ next_character_break(HERODOTUS_READER *r)
uint_least32_t cp0 = 0, cp1 = 0;
for (herodotus_read_codepoint(r, true, &cp0);
- herodotus_read_codepoint(r, false, &cp1) == HERODOTUS_STATUS_SUCC…
+ herodotus_read_codepoint(r, false, &cp1) ==
+ HERODOTUS_STATUS_SUCCESS;
herodotus_read_codepoint(r, true, &cp0)) {
if (grapheme_is_character_break(cp0, cp1, &state)) {
break;
diff --git a/src/line.c b/src/line.c
@@ -11,7 +11,8 @@ get_break_prop(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (enum line_break_property)
- line_break_minor[line_break_major[cp >> 8] + (cp & 0xff…
+ line_break_minor[line_break_major[cp >> 8] +
+ (cp & 0xff)];
} else {
return LINE_BREAK_PROP_AL;
}
@@ -22,7 +23,7 @@ next_line_break(HERODOTUS_READER *r)
{
HERODOTUS_READER tmp;
enum line_break_property cp0_prop, cp1_prop, last_non_cm_or_zwj_prop,
- last_non_sp_prop, last_non_sp_cm_or_zwj_prop;
+ last_non_sp_prop, last_non_sp_cm_or_zwj_prop;
uint_least32_t cp;
uint_least8_t lb25_level = 0;
bool lb21a_flag = false, ri_even = true;
@@ -43,8 +44,10 @@ next_line_break(HERODOTUS_READER *r)
last_non_cm_or_zwj_prop = LINE_BREAK_PROP_AL; /* according to LB10 */
last_non_sp_prop = last_non_sp_cm_or_zwj_prop = NUM_LINE_BREAK_PROPS;
- for (herodotus_read_codepoint(r, true, &cp), cp0_prop = get_break_prop…
- herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCE…
+ for (herodotus_read_codepoint(r, true, &cp),
+ cp0_prop = get_break_prop(cp);
+ herodotus_read_codepoint(r, false, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;
herodotus_read_codepoint(r, true, &cp), cp0_prop = cp1_prop) {
/* get property of the right codepoint */
cp1_prop = get_break_prop(cp);
@@ -59,10 +62,11 @@ next_line_break(HERODOTUS_READER *r)
cp0_prop != LINE_BREAK_PROP_ZWJ) {
/*
* check if the property we are overwriting now is an
- * HL. If so, we set the LB21a-flag which depends on t…
- * knowledge.
+ * HL. If so, we set the LB21a-flag which depends on
+ * this knowledge.
*/
- lb21a_flag = (last_non_cm_or_zwj_prop == LINE_BREAK_PR…
+ lb21a_flag =
+ (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL…
/* check regional indicator state */
if (cp0_prop == LINE_BREAK_PROP_RI) {
@@ -109,8 +113,7 @@ next_line_break(HERODOTUS_READER *r)
* and one (CL | CP) to the left of the middle
* spot
*/
- if ((lb25_level == 0 ||
- lb25_level == 1) &&
+ if ((lb25_level == 0 || lb25_level == 1) &&
cp0_prop == LINE_BREAK_PROP_NU) {
/* sequence has begun */
lb25_level = 1;
@@ -118,12 +121,15 @@ next_line_break(HERODOTUS_READER *r)
(cp0_prop == LINE_BREAK_PROP_NU ||
cp0_prop == LINE_BREAK_PROP_SY ||
cp0_prop == LINE_BREAK_PROP_IS)) {
- /* (NU | SY | IS) sequence begins or continued…
+ /* (NU | SY | IS) sequence begins or continued
+ */
lb25_level = 2;
- } else if ((lb25_level == 1 || lb25_level == 2) &&
- (cp0_prop == LINE_BREAK_PROP_CL …
- cp0_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW…
- cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HW…
+ } else if (
+ (lb25_level == 1 || lb25_level == 2) &&
+ (cp0_prop == LINE_BREAK_PROP_CL ||
+ cp0_prop ==
+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
+ cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF))…
/* CL or CP at the end of the sequence */
lb25_level = 3;
} else {
@@ -229,17 +235,19 @@ next_line_break(HERODOTUS_READER *r)
/* LB13 (affected by tailoring for LB25, see example 7) */
if (cp1_prop == LINE_BREAK_PROP_EX ||
(last_non_cm_or_zwj_prop != LINE_BREAK_PROP_NU &&
- (cp1_prop == LINE_BREAK_PROP_CL ||
+ (cp1_prop == LINE_BREAK_PROP_CL ||
cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
- cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF ||
- cp1_prop == LINE_BREAK_PROP_IS ||
+ cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF ||
+ cp1_prop == LINE_BREAK_PROP_IS ||
cp1_prop == LINE_BREAK_PROP_SY))) {
continue;
}
/* LB14 */
- if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_E…
- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_…
+ if (last_non_sp_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
+ last_non_sp_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_OP_WITH_EAW_HWF) {
continue;
}
@@ -251,9 +259,11 @@ next_line_break(HERODOTUS_READER *r)
}
/* LB16 */
- if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL …
- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_…
- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITH_EAW…
+ if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL ||
+ last_non_sp_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
+ last_non_sp_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_CP_WITH_EAW_HWF) &&
cp1_prop == LINE_BREAK_PROP_NS) {
continue;
}
@@ -308,7 +318,7 @@ next_line_break(HERODOTUS_READER *r)
}
/* LB23 */
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
cp1_prop == LINE_BREAK_PROP_NU) {
continue;
@@ -336,11 +346,11 @@ next_line_break(HERODOTUS_READER *r)
/* LB24 */
if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO) &&
- (cp1_prop == LINE_BREAK_PROP_AL ||
+ (cp1_prop == LINE_BREAK_PROP_AL ||
cp1_prop == LINE_BREAK_PROP_HL)) {
continue;
}
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
(cp1_prop == LINE_BREAK_PROP_PR ||
cp1_prop == LINE_BREAK_PROP_PO)) {
@@ -362,32 +372,33 @@ next_line_break(HERODOTUS_READER *r)
herodotus_reader_copy(r, &tmp);
herodotus_read_codepoint(&tmp, true, &cp);
if (herodotus_read_codepoint(&tmp, true, &cp) ==
- HERODOTUS_STATUS_SUCCESS &&
+ HERODOTUS_STATUS_SUCCESS &&
(cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
- cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF ||
+ cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF ||
cp1_prop == LINE_BREAK_PROP_HY)) {
if (get_break_prop(cp) == LINE_BREAK_PROP_NU) {
continue;
}
}
}
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW…
- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HW…
+ if ((last_non_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
+ last_non_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_OP_WITH_EAW_HWF ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY) &&
cp1_prop == LINE_BREAK_PROP_NU) {
continue;
}
- if (lb25_level == 1 &&
- (cp1_prop == LINE_BREAK_PROP_NU ||
- cp1_prop == LINE_BREAK_PROP_SY ||
- cp1_prop == LINE_BREAK_PROP_IS)) {
+ if (lb25_level == 1 && (cp1_prop == LINE_BREAK_PROP_NU ||
+ cp1_prop == LINE_BREAK_PROP_SY ||
+ cp1_prop == LINE_BREAK_PROP_IS)) {
continue;
}
if ((lb25_level == 1 || lb25_level == 2) &&
- (cp1_prop == LINE_BREAK_PROP_NU ||
- cp1_prop == LINE_BREAK_PROP_SY ||
- cp1_prop == LINE_BREAK_PROP_IS ||
- cp1_prop == LINE_BREAK_PROP_CL ||
+ (cp1_prop == LINE_BREAK_PROP_NU ||
+ cp1_prop == LINE_BREAK_PROP_SY ||
+ cp1_prop == LINE_BREAK_PROP_IS ||
+ cp1_prop == LINE_BREAK_PROP_CL ||
cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
continue;
@@ -437,37 +448,37 @@ next_line_break(HERODOTUS_READER *r)
}
/* LB28 */
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
- (cp1_prop == LINE_BREAK_PROP_AL ||
+ (cp1_prop == LINE_BREAK_PROP_AL ||
cp1_prop == LINE_BREAK_PROP_HL)) {
continue;
}
/* LB29 */
if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_IS &&
- (cp1_prop == LINE_BREAK_PROP_AL ||
+ (cp1_prop == LINE_BREAK_PROP_AL ||
cp1_prop == LINE_BREAK_PROP_HL)) {
continue;
}
/* LB30 */
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL ||
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
+ last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU) &&
cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF) {
continue;
}
- if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_…
- (cp1_prop == LINE_BREAK_PROP_AL ||
- cp1_prop == LINE_BREAK_PROP_HL ||
+ if (last_non_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF &&
+ (cp1_prop == LINE_BREAK_PROP_AL ||
+ cp1_prop == LINE_BREAK_PROP_HL ||
cp1_prop == LINE_BREAK_PROP_NU)) {
continue;
}
/* LB30a */
- if (!ri_even &&
- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI &&
+ if (!ri_even && last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI …
cp1_prop == LINE_BREAK_PROP_RI) {
continue;
}
@@ -477,7 +488,8 @@ next_line_break(HERODOTUS_READER *r)
cp1_prop == LINE_BREAK_PROP_EM) {
continue;
}
- if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BOTH_CN_EXTPICT…
+ if (last_non_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_BOTH_CN_EXTPICT &&
cp1_prop == LINE_BREAK_PROP_EM) {
continue;
}
diff --git a/src/sentence.c b/src/sentence.c
@@ -6,8 +6,7 @@
#include "../grapheme.h"
#include "util.h"
-struct sentence_break_state
-{
+struct sentence_break_state {
uint_least8_t aterm_close_sp_level;
uint_least8_t saterm_close_sp_parasep_level;
};
@@ -17,8 +16,8 @@ get_sentence_break_prop(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (uint_least8_t)
- sentence_break_minor[sentence_break_major[cp >> 8] +
- (cp & 0xff)];
+ sentence_break_minor[sentence_break_major[cp >> 8] +
+ (cp & 0xff)];
} else {
return SENTENCE_BREAK_PROP_OTHER;
}
@@ -80,7 +79,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s)
state->aterm_close_sp_level = 2;
} else if ((state->aterm_close_sp_level == 1 ||
state->aterm_close_sp_level == 2 ||
- state->aterm_close_sp_level == 3) &&
+ state->aterm_close_sp_level == 3) &&
prop == SENTENCE_BREAK_PROP_SP) {
/* sp-sequence begins or continued */
state->aterm_close_sp_level = 3;
@@ -102,7 +101,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s)
state->saterm_close_sp_parasep_level = 2;
} else if ((state->saterm_close_sp_parasep_level == 1 ||
state->saterm_close_sp_parasep_level == 2 ||
- state->saterm_close_sp_parasep_level == 3) &&
+ state->saterm_close_sp_parasep_level == 3) &&
prop == SENTENCE_BREAK_PROP_SP) {
/* sp-sequence begins or continued */
state->saterm_close_sp_parasep_level = 3;
@@ -110,7 +109,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s)
state->saterm_close_sp_parasep_level == 2 ||
state->saterm_close_sp_parasep_level == 3) &&
(prop == SENTENCE_BREAK_PROP_SEP ||
- prop == SENTENCE_BREAK_PROP_CR ||
+ prop == SENTENCE_BREAK_PROP_CR ||
prop == SENTENCE_BREAK_PROP_LF)) {
/* ParaSep at the end of the sequence */
state->saterm_close_sp_parasep_level = 4;
@@ -146,7 +145,7 @@ next_sentence_break(HERODOTUS_READER *r)
/* SB4 */
if (p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_SEP ||
- p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR ||
+ p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR ||
p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_LF) {
break;
}
@@ -179,7 +178,8 @@ next_sentence_break(HERODOTUS_READER *r)
* This is the most complicated rule, requiring
* the right-hand-side to satisfy the regular expressi…
*
- * ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )…
+ * ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )*
+ * Lower
*
* which we simply check "manually" given LUT-lookups
* are very cheap by starting at the mid_reader.
@@ -198,12 +198,12 @@ next_sentence_break(HERODOTUS_READER *r)
* match the following condition
*/
if (prop == SENTENCE_BREAK_PROP_OLETTER ||
- prop == SENTENCE_BREAK_PROP_UPPER ||
- prop == SENTENCE_BREAK_PROP_LOWER ||
- prop == SENTENCE_BREAK_PROP_SEP ||
- prop == SENTENCE_BREAK_PROP_CR ||
- prop == SENTENCE_BREAK_PROP_LF ||
- prop == SENTENCE_BREAK_PROP_STERM ||
+ prop == SENTENCE_BREAK_PROP_UPPER ||
+ prop == SENTENCE_BREAK_PROP_LOWER ||
+ prop == SENTENCE_BREAK_PROP_SEP ||
+ prop == SENTENCE_BREAK_PROP_CR ||
+ prop == SENTENCE_BREAK_PROP_LF ||
+ prop == SENTENCE_BREAK_PROP_STERM ||
prop == SENTENCE_BREAK_PROP_ATERM) {
break;
}
@@ -219,8 +219,8 @@ next_sentence_break(HERODOTUS_READER *r)
state.saterm_close_sp_parasep_level == 2 ||
state.saterm_close_sp_parasep_level == 3) &&
(p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SCONTINUE ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) {
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) {
continue;
}
@@ -228,9 +228,9 @@ next_sentence_break(HERODOTUS_READER *r)
if ((state.saterm_close_sp_parasep_level == 1 ||
state.saterm_close_sp_parasep_level == 2) &&
(p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CLOSE ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR ||
p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) {
continue;
}
@@ -239,9 +239,9 @@ next_sentence_break(HERODOTUS_READER *r)
if ((state.saterm_close_sp_parasep_level == 1 ||
state.saterm_close_sp_parasep_level == 2 ||
state.saterm_close_sp_parasep_level == 3) &&
- (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP ||
+ (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP ||
p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR ||
p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) {
continue;
}
diff --git a/src/utf8.c b/src/utf8.c
@@ -9,14 +9,14 @@
/* lookup-table for the types of sequence first bytes */
static const struct {
- uint_least8_t lower; /* lower bound of sequence first byte */
- uint_least8_t upper; /* upper bound of sequence first byte */
+ uint_least8_t lower; /* lower bound of sequence first byte */
+ uint_least8_t upper; /* upper bound of sequence first byte */
uint_least32_t mincp; /* smallest non-overlong encoded codepoint */
uint_least32_t maxcp; /* largest encodable codepoint */
- /*
- * implicit: table-offset represents the number of following
- * bytes of the form 10xxxxxx (6 bits capacity each)
- */
+ /*
+ * implicit: table-offset represents the number …
+ * bytes of the form 10xxxxxx (6 bits capacity e…
+ */
} lut[] = {
[0] = {
/* 0xxxxxxx */
@@ -104,8 +104,8 @@ grapheme_decode_utf8(const char *str, size_t len, uint_leas…
* sequence starter occurs right before a NUL-byte.
*/
for (i = 0; 1 + i < len; i++) {
- if(!BETWEEN(((const unsigned char *)str)[1 + i],
- 0x80, 0xBF)) {
+ if (!BETWEEN(((const unsigned char *)str)[1 + i], 0x80,
+ 0xBF)) {
break;
}
}
@@ -124,7 +124,7 @@ grapheme_decode_utf8(const char *str, size_t len, uint_leas…
* (i.e. between 0x80 (10000000) and 0xBF (10111111))
*/
for (i = 1; i <= off; i++) {
- if(!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
+ if (!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
/*
* byte does not match format; return
* number of bytes processed excluding the
@@ -201,8 +201,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t l…
* We do not overwrite the mask because we guaranteed earlier
* that there are no bits higher than the mask allows.
*/
- ((unsigned char *)str)[0] = lut[off].lower |
- (uint_least8_t)(cp >> (6 * off));
+ ((unsigned char *)str)[0] =
+ lut[off].lower | (uint_least8_t)(cp >> (6 * off));
for (i = 1; i <= off; i++) {
/*
@@ -211,8 +211,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t l…
* extract from the properly-shifted value using the
* mask 00111111 (0x3F)
*/
- ((unsigned char *)str)[i] = 0x80 |
- ((cp >> (6 * (off - i))) & 0x3F);
+ ((unsigned char *)str)[i] =
+ 0x80 | ((cp >> (6 * (off - i))) & 0x3F);
}
return 1 + off;
diff --git a/src/util.c b/src/util.c
@@ -37,16 +37,20 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTU…
*/
dest->type = src->type;
if (src->type == HERODOTUS_TYPE_CODEPOINT) {
- dest->src = (src->src == NULL) ? NULL :
- ((const uint_least32_t *)(src->src)) + src->off;
+ dest->src =
+ (src->src == NULL) ?
+ NULL :
+ ((const uint_least32_t *)(src->src)) + src->of…
} else { /* src->type == HERODOTUS_TYPE_UTF8 */
- dest->src = (src->src == NULL) ? NULL :
- ((const char *)(src->src)) + src->off;
+ dest->src = (src->src == NULL) ?
+ NULL :
+ ((const char *)(src->src)) + src->off;
}
if (src->srclen == SIZE_MAX) {
dest->srclen = SIZE_MAX;
} else {
- dest->srclen = (src->off < src->srclen) ? src->srclen - src->o…
+ dest->srclen =
+ (src->off < src->srclen) ? src->srclen - src->off : 0;
}
dest->off = 0;
dest->terminated_by_null = src->terminated_by_null;
@@ -62,8 +66,10 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS…
* to release the limit and, instead, we just
* prevent any more reads
*/
- dest->soft_limit[i] = (src->off < src->soft_limit[i]) ?
- src->soft_limit[i] - src->off : 0;
+ dest->soft_limit[i] =
+ (src->off < src->soft_limit[i]) ?
+ src->soft_limit[i] - src->off :
+ 0;
}
}
}
@@ -141,9 +147,9 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance,…
*cp = ((const uint_least32_t *)(r->src))[r->off];
ret = 1;
} else { /* r->type == HERODOTUS_TYPE_UTF8 */
- ret = grapheme_decode_utf8((const char *)r->src + r->off,
- MIN(r->srclen, r->soft_limit[0]) -
- r->off, cp);
+ ret = grapheme_decode_utf8(
+ (const char *)r->src + r->off,
+ MIN(r->srclen, r->soft_limit[0]) - r->off, cp);
}
if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) {
@@ -176,8 +182,8 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance,…
}
void
-herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type,
- void *dest, size_t destlen)
+herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type, void *des…
+ size_t destlen)
{
w->type = type;
w->dest = dest;
@@ -212,8 +218,8 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
* (the last case meaning truncation).
*/
if (w->type == HERODOTUS_TYPE_CODEPOINT) {
- ((uint_least32_t *)(w->dest))
- [w->first_unwritable_offset] = 0;
+ ((uint_least32_t
+ *)(w->dest))[w->first_unwritable_offset] = 0;
} else { /* w->type == HERODOTUS_TYPE_UTF8 */
((char *)(w->dest))[w->first_unwritable_offset] = '\0';
}
@@ -226,8 +232,7 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
* byte.
*/
if (w->type == HERODOTUS_TYPE_CODEPOINT) {
- ((uint_least32_t *)(w->dest))
- [w->destlen - 1] = 0;
+ ((uint_least32_t *)(w->dest))[w->destlen - 1] = 0;
} else { /* w->type == HERODOTUS_TYPE_UTF8 */
((char *)(w->dest))[w->destlen - 1] = '\0';
}
@@ -267,8 +272,8 @@ herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32…
if (w->dest != NULL && w->off + ret < w->destlen) {
/* we still have enough room in the buffer */
- grapheme_encode_utf8(cp, (char *)(w->dest) +
- w->off, w->destlen - w->off);
+ grapheme_encode_utf8(cp, (char *)(w->dest) + w->off,
+ w->destlen - w->off);
} else if (w->first_unwritable_offset == SIZE_MAX) {
/*
* the first unwritable offset has not been
@@ -328,8 +333,9 @@ proper_init(const HERODOTUS_READER *r, void *state, uint_le…
/* fill in the two next raw properties (after no-initialization) */
p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop;
- for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, …
- HERODOTUS_STATUS_SUCCESS; ) {
+ for (i = 0;
+ i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;) {
p->raw.next_prop[i++] = p->get_break_prop(cp);
}
@@ -338,8 +344,9 @@ proper_init(const HERODOTUS_READER *r, void *state, uint_le…
/* fill in the two next skip properties (after no-initialization) */
p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop;
- for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true,…
- HERODOTUS_STATUS_SUCCESS; ) {
+ for (i = 0;
+ i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;) {
prop = p->get_break_prop(cp);
if (!p->is_skippable_prop(prop)) {
p->skip.next_prop[i++] = prop;
diff --git a/src/util.h b/src/util.h
@@ -10,25 +10,25 @@
#include "../grapheme.h"
#undef MIN
-#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
#undef MAX
-#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
#undef LEN
#define LEN(x) (sizeof(x) / sizeof(*(x)))
#undef likely
#undef unlikely
#ifdef __has_builtin
- #if __has_builtin(__builtin_expect)
- #define likely(expr) __builtin_expect(!!(expr), 1)
- #define unlikely(expr) __builtin_expect(!!(expr), 0)
- #else
- #define likely(expr) (expr)
- #define unlikely(expr) (expr)
- #endif
+#if __has_builtin(__builtin_expect)
+#define likely(expr) __builtin_expect(!!(expr), 1)
+#define unlikely(expr) __builtin_expect(!!(expr), 0)
#else
- #define likely(expr) (expr)
- #define unlikely(expr) (expr)
+#define likely(expr) (expr)
+#define unlikely(expr) (expr)
+#endif
+#else
+#define likely(expr) (expr)
+#define unlikely(expr) (expr)
#endif
/*
@@ -84,6 +84,7 @@ struct proper {
uint_least8_t prev_prop[2];
uint_least8_t next_prop[2];
} raw, skip;
+
HERODOTUS_READER mid_reader, raw_reader, skip_reader;
void *state;
uint_least8_t no_prop;
@@ -100,7 +101,8 @@ void herodotus_reader_pop_limit(HERODOTUS_READER *);
size_t herodotus_reader_number_read(const HERODOTUS_READER *);
size_t herodotus_reader_next_word_break(const HERODOTUS_READER *);
size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *);
-enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_…
+enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool,
+ uint_least32_t *);
void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *,
size_t);
diff --git a/src/word.c b/src/word.c
@@ -6,8 +6,7 @@
#include "../grapheme.h"
#include "util.h"
-struct word_break_state
-{
+struct word_break_state {
bool ri_even;
};
@@ -16,7 +15,8 @@ get_word_break_prop(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (uint_least8_t)
- word_break_minor[word_break_major[cp >> 8] + (cp & 0xff…
+ word_break_minor[word_break_major[cp >> 8] +
+ (cp & 0xff)];
} else {
return WORD_BREAK_PROP_OTHER;
}
@@ -26,8 +26,7 @@ static bool
is_skippable_word_prop(uint_least8_t prop)
{
return prop == WORD_BREAK_PROP_EXTEND ||
- prop == WORD_BREAK_PROP_FORMAT ||
- prop == WORD_BREAK_PROP_ZWJ;
+ prop == WORD_BREAK_PROP_FORMAT || prop == WORD_BREAK_PROP_ZWJ;
}
static void
@@ -79,22 +78,24 @@ next_word_break(HERODOTUS_READER *r)
/* WB3a */
if (p.raw.prev_prop[0] == WORD_BREAK_PROP_NEWLINE ||
- p.raw.prev_prop[0] == WORD_BREAK_PROP_CR ||
+ p.raw.prev_prop[0] == WORD_BREAK_PROP_CR ||
p.raw.prev_prop[0] == WORD_BREAK_PROP_LF) {
break;
}
/* WB3b */
if (p.raw.next_prop[0] == WORD_BREAK_PROP_NEWLINE ||
- p.raw.next_prop[0] == WORD_BREAK_PROP_CR ||
+ p.raw.next_prop[0] == WORD_BREAK_PROP_CR ||
p.raw.next_prop[0] == WORD_BREAK_PROP_LF) {
break;
}
/* WB3c */
if (p.raw.prev_prop[0] == WORD_BREAK_PROP_ZWJ &&
- (p.raw.next_prop[0] == WORD_BREAK_PROP_EXTENDED_PICTOGRAPH…
- p.raw.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPIC…
+ (p.raw.next_prop[0] ==
+ WORD_BREAK_PROP_EXTENDED_PICTOGRAPHIC ||
+ p.raw.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT)) {
continue;
}
@@ -112,37 +113,43 @@ next_word_break(HERODOTUS_READER *r)
}
/* WB5 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER …
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER …
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) {
continue;
}
/* WB6 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER …
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER ||
+ p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
- (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER …
- p.skip.next_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
+ (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[1] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.next_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) {
continue;
}
/* WB7 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER …
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
- (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER …
- p.skip.prev_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
+ (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[1] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.prev_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) {
continue;
}
@@ -174,8 +181,9 @@ next_word_break(HERODOTUS_READER *r)
}
/* WB9 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER …
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC) {
continue;
@@ -183,15 +191,16 @@ next_word_break(HERODOTUS_READER *r)
/* WB10 */
if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER …
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) {
continue;
}
/* WB11 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC &&
p.skip.prev_prop[1] == WORD_BREAK_PROP_NUMERIC) {
@@ -200,8 +209,8 @@ next_word_break(HERODOTUS_READER *r)
/* WB12 */
if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM ||
+ p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
p.skip.next_prop[1] == WORD_BREAK_PROP_NUMERIC) {
continue;
@@ -214,11 +223,12 @@ next_word_break(HERODOTUS_READER *r)
}
/* WB13a */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER …
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
- p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER …
- p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC …
- p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA …
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) &&
p.skip.next_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) {
continue;
@@ -226,10 +236,11 @@ next_word_break(HERODOTUS_READER *r)
/* WB13b */
if (p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER …
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
- p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER …
- p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC …
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER ||
+ p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC ||
p.skip.next_prop[0] == WORD_BREAK_PROP_KATAKANA)) {
continue;
}
diff --git a/test/bidirectional.c b/test/bidirectional.c
@@ -25,14 +25,16 @@ main(int argc, char *argv[])
for (i = 0, failed = 0; i < LEN(bidirectional_test); i++) {
/*if (i != 490798)
- continue;*/
+ continue;*/
for (m = 0; m < bidirectional_test[i].modelen; m++) {
ret = grapheme_get_bidirectional_embedding_levels(
- bidirectional_test[i].cp, bidirectional_test[i…
+ bidirectional_test[i].cp,
+ bidirectional_test[i].cplen,
bidirectional_test[i].mode[m], lev, levlen);
- if (ret != bidirectional_test[i].cplen || ret > levlen…
+ if (ret != bidirectional_test[i].cplen ||
+ ret > levlen) {
goto err;
}
@@ -43,18 +45,22 @@ main(int argc, char *argv[])
}
continue;
err:
- fprintf(stderr, "%s: Failed conformance test %zu (mode…
+ fprintf(stderr,
+ "%s: Failed conformance test %zu (mode %i) [",
argv[0], i, bidirectional_test[i].mode[m]);
for (j = 0; j < bidirectional_test[i].cplen; j++) {
- fprintf(stderr, " 0x%04" PRIXLEAST32, bidirect…
+ fprintf(stderr, " 0x%04" PRIXLEAST32,
+ bidirectional_test[i].cp[j]);
}
fprintf(stderr, " ],\n\tgot (");
for (j = 0; j < ret; j++) {
- fprintf(stderr, " %" PRIdLEAST8, (int_least8_t…
+ fprintf(stderr, " %" PRIdLEAST8,
+ (int_least8_t)lev[j]);
}
fprintf(stderr, " ),\n\texpected (");
for (j = 0; j < ret; j++) {
- fprintf(stderr, " %" PRIdLEAST8, bidirectional…
+ fprintf(stderr, " %" PRIdLEAST8,
+ bidirectional_test[i].level[j]);
}
fprintf(stderr, " ).\n");
failed++;
diff --git a/test/case.c b/test/case.c
@@ -9,10 +9,12 @@
struct unit_test_is_case_utf8 {
const char *description;
+
struct {
const char *src;
size_t srclen;
} input;
+
struct {
bool ret;
size_t caselen;
@@ -21,11 +23,13 @@ struct unit_test_is_case_utf8 {
struct unit_test_to_case_utf8 {
const char *description;
+
struct {
const char *src;
size_t srclen;
size_t destlen;
} input;
+
struct {
const char *dest;
size_t ret;
@@ -35,57 +39,69 @@ struct unit_test_to_case_utf8 {
static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0 },
+ .input = { "", 0 },
.output = { true, 0 },
},
{
.description = "one character, violation",
- .input = { "A", 1 },
+ .input = { "A", 1 },
.output = { false, 0 },
},
{
.description = "one character, confirmation",
- .input = { "\xC3\x9F", 2 },
+ .input = { "\xC3\x9F", 2 },
.output = { true, 2 },
},
{
.description = "one character, violation, NUL-terminated",
- .input = { "A", SIZE_MAX },
+ .input = { "A", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one character, confirmation, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX },
+ .input = { "\xC3\x9F", SIZE_MAX },
.output = { true, 2 },
},
{
.description = "one word, violation",
- .input = { "Hello", 5 },
+ .input = { "Hello", 5 },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation",
- .input = { "gru" "\xC3\x9F" "fOrmel", 11 },
+ .input = { "gru"
+ "\xC3\x9F"
+ "fOrmel",
+ 11 },
.output = { false, 6 },
},
{
.description = "one word, full confirmation",
- .input = { "gru" "\xC3\x9F" "formel", 11 },
+ .input = { "gru"
+ "\xC3\x9F"
+ "formel",
+ 11 },
.output = { true, 11 },
},
{
.description = "one word, violation, NUL-terminated",
- .input = { "Hello", SIZE_MAX },
+ .input = { "Hello", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation, NUL-terminated…
- .input = { "gru" "\xC3\x9F" "fOrmel", SIZE_MAX },
+ .input = { "gru"
+ "\xC3\x9F"
+ "fOrmel",
+ SIZE_MAX },
.output = { false, 6 },
},
{
.description = "one word, full confirmation, NUL-terminated",
- .input = { "gru" "\xC3\x9F" "formel", SIZE_MAX },
+ .input = { "gru"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX },
.output = { true, 11 },
},
};
@@ -93,57 +109,63 @@ static const struct unit_test_is_case_utf8 is_lowercase_ut…
static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0 },
+ .input = { "", 0 },
.output = { true, 0 },
},
{
.description = "one character, violation",
- .input = { "\xC3\x9F", 2 },
+ .input = { "\xC3\x9F", 2 },
.output = { false, 0 },
},
{
.description = "one character, confirmation",
- .input = { "A", 1 },
+ .input = { "A", 1 },
.output = { true, 1 },
},
{
.description = "one character, violation, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX },
+ .input = { "\xC3\x9F", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one character, confirmation, NUL-terminated",
- .input = { "A", SIZE_MAX },
+ .input = { "A", SIZE_MAX },
.output = { true, 1 },
},
{
.description = "one word, violation",
- .input = { "hello", 5 },
+ .input = { "hello", 5 },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation",
- .input = { "GRU" "\xC3\x9F" "formel", 11 },
+ .input = { "GRU"
+ "\xC3\x9F"
+ "formel",
+ 11 },
.output = { false, 3 },
},
{
.description = "one word, full confirmation",
- .input = { "HELLO", 5 },
+ .input = { "HELLO", 5 },
.output = { true, 5 },
},
{
.description = "one word, violation, NUL-terminated",
- .input = { "hello", SIZE_MAX },
+ .input = { "hello", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation, NUL-terminated…
- .input = { "GRU" "\xC3\x9F" "formel", SIZE_MAX },
+ .input = { "GRU"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX },
.output = { false, 3 },
},
{
.description = "one word, full confirmation, NUL-terminated",
- .input = { "HELLO", SIZE_MAX },
+ .input = { "HELLO", SIZE_MAX },
.output = { true, 5 },
},
};
@@ -151,77 +173,103 @@ static const struct unit_test_is_case_utf8 is_uppercase_…
static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0 },
+ .input = { "", 0 },
.output = { true, 0 },
},
{
.description = "one character, violation",
- .input = { "\xC3\x9F", 2 },
+ .input = { "\xC3\x9F", 2 },
.output = { false, 0 },
},
{
.description = "one character, confirmation",
- .input = { "A", 1 },
+ .input = { "A", 1 },
.output = { true, 1 },
},
{
.description = "one character, violation, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX },
+ .input = { "\xC3\x9F", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one character, confirmation, NUL-terminated",
- .input = { "A", SIZE_MAX },
+ .input = { "A", SIZE_MAX },
.output = { true, 1 },
},
{
.description = "one word, violation",
- .input = { "hello", 5 },
+ .input = { "hello", 5 },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation",
- .input = { "Gru" "\xC3\x9F" "fOrmel", 11 },
+ .input = { "Gru"
+ "\xC3\x9F"
+ "fOrmel",
+ 11 },
.output = { false, 6 },
},
{
.description = "one word, full confirmation",
- .input = { "Gru" "\xC3\x9F" "formel", 11 },
+ .input = { "Gru"
+ "\xC3\x9F"
+ "formel",
+ 11 },
.output = { true, 11 },
},
{
.description = "one word, violation, NUL-terminated",
- .input = { "hello", SIZE_MAX },
+ .input = { "hello", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation, NUL-terminated…
- .input = { "Gru" "\xC3\x9F" "fOrmel", SIZE_MAX },
+ .input = { "Gru"
+ "\xC3\x9F"
+ "fOrmel",
+ SIZE_MAX },
.output = { false, 6 },
},
{
.description = "one word, full confirmation, NUL-terminated",
- .input = { "Gru" "\xC3\x9F" "formel", SIZE_MAX },
+ .input = { "Gru"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX },
.output = { true, 11 },
},
{
.description = "multiple words, partial confirmation",
- .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", 18 },
+ .input = { "Hello Gru"
+ "\xC3\x9F"
+ "fOrmel!",
+ 18 },
.output = { false, 12 },
},
{
.description = "multiple words, full confirmation",
- .input = { "Hello Gru" "\xC3\x9F" "formel!", 18 },
+ .input = { "Hello Gru"
+ "\xC3\x9F"
+ "formel!",
+ 18 },
.output = { true, 18 },
},
{
- .description = "multiple words, partial confirmation, NUL-term…
- .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", SIZE_MAX },
+ .description =
+ "multiple words, partial confirmation, NUL-terminated",
+ .input = { "Hello Gru"
+ "\xC3\x9F"
+ "fOrmel!",
+ SIZE_MAX },
.output = { false, 12 },
},
{
- .description = "multiple words, full confirmation, NUL-termina…
- .input = { "Hello Gru" "\xC3\x9F" "formel!", SIZE_MAX },
+ .description =
+ "multiple words, full confirmation, NUL-terminated",
+ .input = { "Hello Gru"
+ "\xC3\x9F"
+ "formel!",
+ SIZE_MAX },
.output = { true, 18 },
},
};
@@ -229,72 +277,74 @@ static const struct unit_test_is_case_utf8 is_titlecase_u…
static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0, 10 },
+ .input = { "", 0, 10 },
.output = { "", 0 },
},
{
.description = "empty output",
- .input = { "hello", 5, 0 },
+ .input = { "hello", 5, 0 },
.output = { "", 5 },
},
{
.description = "one character, conversion",
- .input = { "A", 1, 10 },
+ .input = { "A", 1, 10 },
.output = { "a", 1 },
},
{
.description = "one character, no conversion",
- .input = { "\xC3\x9F", 2, 10 },
+ .input = { "\xC3\x9F", 2, 10 },
.output = { "\xC3\x9F", 2 },
},
{
.description = "one character, conversion, truncation",
- .input = { "A", 1, 0 },
+ .input = { "A", 1, 0 },
.output = { "", 1 },
},
{
.description = "one character, conversion, NUL-terminated",
- .input = { "A", SIZE_MAX, 10 },
+ .input = { "A", SIZE_MAX, 10 },
.output = { "a", 1 },
},
{
.description = "one character, no conversion, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX, 10 },
+ .input = { "\xC3\x9F", SIZE_MAX, 10 },
.output = { "\xC3\x9F", 2 },
},
{
- .description = "one character, conversion, NUL-terminated, tru…
- .input = { "A", SIZE_MAX, 0 },
+ .description =
+ "one character, conversion, NUL-terminated, truncation…
+ .input = { "A", SIZE_MAX, 0 },
.output = { "", 1 },
},
{
.description = "one word, conversion",
- .input = { "wOrD", 4, 10 },
+ .input = { "wOrD", 4, 10 },
.output = { "word", 4 },
},
{
.description = "one word, no conversion",
- .input = { "word", 4, 10 },
+ .input = { "word", 4, 10 },
.output = { "word", 4 },
},
{
.description = "one word, conversion, truncation",
- .input = { "wOrD", 4, 3 },
+ .input = { "wOrD", 4, 3 },
.output = { "wo", 4 },
},
{
.description = "one word, conversion, NUL-terminated",
- .input = { "wOrD", SIZE_MAX, 10 },
+ .input = { "wOrD", SIZE_MAX, 10 },
.output = { "word", 4 },
},
{
.description = "one word, no conversion, NUL-terminated",
- .input = { "word", SIZE_MAX, 10 },
+ .input = { "word", SIZE_MAX, 10 },
.output = { "word", 4 },
},
{
- .description = "one word, conversion, NUL-terminated, truncati…
- .input = { "wOrD", SIZE_MAX, 3 },
+ .description =
+ "one word, conversion, NUL-terminated, truncation",
+ .input = { "wOrD", SIZE_MAX, 3 },
.output = { "wo", 4 },
},
};
@@ -302,72 +352,86 @@ static const struct unit_test_to_case_utf8 to_lowercase_u…
static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0, 10 },
+ .input = { "", 0, 10 },
.output = { "", 0 },
},
{
.description = "empty output",
- .input = { "hello", 5, 0 },
+ .input = { "hello", 5, 0 },
.output = { "", 5 },
},
{
.description = "one character, conversion",
- .input = { "\xC3\x9F", 2, 10 },
+ .input = { "\xC3\x9F", 2, 10 },
.output = { "SS", 2 },
},
{
.description = "one character, no conversion",
- .input = { "A", 1, 10 },
+ .input = { "A", 1, 10 },
.output = { "A", 1 },
},
{
.description = "one character, conversion, truncation",
- .input = { "\xC3\x9F", 2, 0 },
+ .input = { "\xC3\x9F", 2, 0 },
.output = { "", 2 },
},
{
.description = "one character, conversion, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX, 10 },
+ .input = { "\xC3\x9F", SIZE_MAX, 10 },
.output = { "SS", 2 },
},
{
.description = "one character, no conversion, NUL-terminated",
- .input = { "A", SIZE_MAX, 10 },
+ .input = { "A", SIZE_MAX, 10 },
.output = { "A", 1 },
},
{
- .description = "one character, conversion, NUL-terminated, tru…
- .input = { "\xC3\x9F", SIZE_MAX, 0 },
+ .description =
+ "one character, conversion, NUL-terminated, truncation…
+ .input = { "\xC3\x9F", SIZE_MAX, 0 },
.output = { "", 2 },
},
{
.description = "one word, conversion",
- .input = { "gRu" "\xC3\x9F" "fOrMel", 11, 15 },
+ .input = { "gRu"
+ "\xC3\x9F"
+ "fOrMel",
+ 11, 15 },
.output = { "GRUSSFORMEL", 11 },
},
{
.description = "one word, no conversion",
- .input = { "WORD", 4, 10 },
+ .input = { "WORD", 4, 10 },
.output = { "WORD", 4 },
},
{
.description = "one word, conversion, truncation",
- .input = { "gRu" "\xC3\x9F" "formel", 11, 5 },
+ .input = { "gRu"
+ "\xC3\x9F"
+ "formel",
+ 11, 5 },
.output = { "GRUS", 11 },
},
{
.description = "one word, conversion, NUL-terminated",
- .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 15 },
+ .input = { "gRu"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX, 15 },
.output = { "GRUSSFORMEL", 11 },
},
{
.description = "one word, no conversion, NUL-terminated",
- .input = { "WORD", SIZE_MAX, 10 },
+ .input = { "WORD", SIZE_MAX, 10 },
.output = { "WORD", 4 },
},
{
- .description = "one word, conversion, NUL-terminated, truncati…
- .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 5 },
+ .description =
+ "one word, conversion, NUL-terminated, truncation",
+ .input = { "gRu"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX, 5 },
.output = { "GRUS", 11 },
},
};
@@ -375,102 +439,105 @@ static const struct unit_test_to_case_utf8 to_uppercase…
static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0, 10 },
+ .input = { "", 0, 10 },
.output = { "", 0 },
},
{
.description = "empty output",
- .input = { "hello", 5, 0 },
+ .input = { "hello", 5, 0 },
.output = { "", 5 },
},
{
.description = "one character, conversion",
- .input = { "a", 1, 10 },
+ .input = { "a", 1, 10 },
.output = { "A", 1 },
},
{
.description = "one character, no conversion",
- .input = { "A", 1, 10 },
+ .input = { "A", 1, 10 },
.output = { "A", 1 },
},
{
.description = "one character, conversion, truncation",
- .input = { "a", 1, 0 },
+ .input = { "a", 1, 0 },
.output = { "", 1 },
},
{
.description = "one character, conversion, NUL-terminated",
- .input = { "a", SIZE_MAX, 10 },
+ .input = { "a", SIZE_MAX, 10 },
.output = { "A", 1 },
},
{
.description = "one character, no conversion, NUL-terminated",
- .input = { "A", SIZE_MAX, 10 },
+ .input = { "A", SIZE_MAX, 10 },
.output = { "A", 1 },
},
{
- .description = "one character, conversion, NUL-terminated, tru…
- .input = { "a", SIZE_MAX, 0 },
+ .description =
+ "one character, conversion, NUL-terminated, truncation…
+ .input = { "a", SIZE_MAX, 0 },
.output = { "", 1 },
},
{
.description = "one word, conversion",
- .input = { "heLlo", 5, 10 },
+ .input = { "heLlo", 5, 10 },
.output = { "Hello", 5 },
},
{
.description = "one word, no conversion",
- .input = { "Hello", 5, 10 },
+ .input = { "Hello", 5, 10 },
.output = { "Hello", 5 },
},
{
.description = "one word, conversion, truncation",
- .input = { "heLlo", 5, 2 },
+ .input = { "heLlo", 5, 2 },
.output = { "H", 5 },
},
{
.description = "one word, conversion, NUL-terminated",
- .input = { "heLlo", SIZE_MAX, 10 },
+ .input = { "heLlo", SIZE_MAX, 10 },
.output = { "Hello", 5 },
},
{
.description = "one word, no conversion, NUL-terminated",
- .input = { "Hello", SIZE_MAX, 10 },
+ .input = { "Hello", SIZE_MAX, 10 },
.output = { "Hello", 5 },
},
{
- .description = "one word, conversion, NUL-terminated, truncati…
- .input = { "heLlo", SIZE_MAX, 3 },
+ .description =
+ "one word, conversion, NUL-terminated, truncation",
+ .input = { "heLlo", SIZE_MAX, 3 },
.output = { "He", 5 },
},
{
.description = "two words, conversion",
- .input = { "heLlo wORLd!", 12, 20 },
+ .input = { "heLlo wORLd!", 12, 20 },
.output = { "Hello World!", 12 },
},
{
.description = "two words, no conversion",
- .input = { "Hello World!", 12, 20 },
+ .input = { "Hello World!", 12, 20 },
.output = { "Hello World!", 12 },
},
{
.description = "two words, conversion, truncation",
- .input = { "heLlo wORLd!", 12, 8 },
+ .input = { "heLlo wORLd!", 12, 8 },
.output = { "Hello W", 12 },
},
{
.description = "two words, conversion, NUL-terminated",
- .input = { "heLlo wORLd!", SIZE_MAX, 20 },
+ .input = { "heLlo wORLd!", SIZE_MAX, 20 },
.output = { "Hello World!", 12 },
},
{
.description = "two words, no conversion, NUL-terminated",
- .input = { "Hello World!", SIZE_MAX, 20 },
+ .input = { "Hello World!", SIZE_MAX, 20 },
.output = { "Hello World!", 12 },
},
{
- .description = "two words, conversion, NUL-terminated, truncat…
- .input = { "heLlo wORLd!", SIZE_MAX, 4 },
+ .description =
+ "two words, conversion, NUL-terminated, truncation",
+ .input = { "heLlo wORLd!", SIZE_MAX, 4 },
.output = { "Hel", 12 },
},
};
@@ -485,14 +552,14 @@ unit_test_callback_is_case_utf8(const void *t, size_t off…
size_t caselen = 0x7f;
if (t == is_lowercase_utf8) {
- ret = grapheme_is_lowercase_utf8(test->input.src, test->input.…
- &caselen);
+ ret = grapheme_is_lowercase_utf8(test->input.src,
+ test->input.srclen, &caselen);
} else if (t == is_uppercase_utf8) {
- ret = grapheme_is_uppercase_utf8(test->input.src, test->input.…
- &caselen);
+ ret = grapheme_is_uppercase_utf8(test->input.src,
+ test->input.srclen, &caselen);
} else if (t == is_titlecase_utf8) {
- ret = grapheme_is_titlecase_utf8(test->input.src, test->input.…
- &caselen);
+ ret = grapheme_is_titlecase_utf8(test->input.src,
+ test->input.srclen, &caselen);
} else {
goto err;
@@ -505,10 +572,11 @@ unit_test_callback_is_case_utf8(const void *t, size_t off…
return 0;
err:
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
- "(returned (%s, %zu) instead of (%s, %zu)).\n", argv0,
- name, off, test->description, ret ? "true" : "false",
- caselen, test->output.ret ? "true" : "false",
+ fprintf(stderr,
+ "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned (%s, %zu) instead of (%s, %zu)).\n",
+ argv0, name, off, test->description, ret ? "true" : "false",
+ caselen, test->output.ret ? "true" : "false",
test->output.caselen);
return 1;
}
@@ -526,21 +594,25 @@ unit_test_callback_to_case_utf8(const void *t, size_t off…
memset(buf, 0x7f, LEN(buf));
if (t == to_lowercase_utf8) {
- ret = grapheme_to_lowercase_utf8(test->input.src, test->input.…
- buf, test->input.destlen);
+ ret = grapheme_to_lowercase_utf8(test->input.src,
+ test->input.srclen, buf,
+ test->input.destlen);
} else if (t == to_uppercase_utf8) {
- ret = grapheme_to_uppercase_utf8(test->input.src, test->input.…
- buf, test->input.destlen);
+ ret = grapheme_to_uppercase_utf8(test->input.src,
+ test->input.srclen, buf,
+ test->input.destlen);
} else if (t == to_titlecase_utf8) {
- ret = grapheme_to_titlecase_utf8(test->input.src, test->input.…
- buf, test->input.destlen);
+ ret = grapheme_to_titlecase_utf8(test->input.src,
+ test->input.srclen, buf,
+ test->input.destlen);
} else {
goto err;
}
/* check results */
if (ret != test->output.ret ||
- memcmp(buf, test->output.dest, MIN(test->input.destlen, test->outp…
+ memcmp(buf, test->output.dest,
+ MIN(test->input.destlen, test->output.ret))) {
goto err;
}
@@ -553,9 +625,10 @@ unit_test_callback_to_case_utf8(const void *t, size_t off,…
return 0;
err:
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
- "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", ar…
- name, off, test->description, (int)ret, buf, ret,
+ fprintf(stderr,
+ "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n",
+ argv0, name, off, test->description, (int)ret, buf, ret,
(int)test->output.ret, test->output.dest, test->output.ret);
return 1;
}
@@ -565,16 +638,22 @@ main(int argc, char *argv[])
{
(void)argc;
- return run_unit_tests(unit_test_callback_is_case_utf8, is_lowercase_ut…
- LEN(is_lowercase_utf8), "grapheme_is_lowercase_u…
- run_unit_tests(unit_test_callback_is_case_utf8, is_uppercase_ut…
- LEN(is_uppercase_utf8), "grapheme_is_uppercase_u…
- run_unit_tests(unit_test_callback_is_case_utf8, is_titlecase_ut…
- LEN(is_titlecase_utf8), "grapheme_is_titlecase_u…
- run_unit_tests(unit_test_callback_to_case_utf8, to_lowercase_ut…
- LEN(to_lowercase_utf8), "grapheme_to_lowercase_u…
- run_unit_tests(unit_test_callback_to_case_utf8, to_uppercase_ut…
- LEN(to_uppercase_utf8), "grapheme_to_uppercase_u…
- run_unit_tests(unit_test_callback_to_case_utf8, to_titlecase_ut…
- LEN(to_titlecase_utf8), "grapheme_to_titlecase_u…
+ return run_unit_tests(unit_test_callback_is_case_utf8,
+ is_lowercase_utf8, LEN(is_lowercase_utf8),
+ "grapheme_is_lowercase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_is_case_utf8,
+ is_uppercase_utf8, LEN(is_uppercase_utf8),
+ "grapheme_is_uppercase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_is_case_utf8,
+ is_titlecase_utf8, LEN(is_titlecase_utf8),
+ "grapheme_is_titlecase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_to_case_utf8,
+ to_lowercase_utf8, LEN(to_lowercase_utf8),
+ "grapheme_to_lowercase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_to_case_utf8,
+ to_uppercase_utf8, LEN(to_uppercase_utf8),
+ "grapheme_to_uppercase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_to_case_utf8,
+ to_titlecase_utf8, LEN(to_titlecase_utf8),
+ "grapheme_to_titlecase_utf8", argv[0]);
}
diff --git a/test/character.c b/test/character.c
@@ -92,12 +92,10 @@ static const struct unit_test_next_break_utf8 next_characte…
static int
unit_test_callback_next_character_break(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv0)
{
- return unit_test_callback_next_break(t, off,
- grapheme_next_character_break,
- name, argv0);
+ return unit_test_callback_next_break(
+ t, off, grapheme_next_character_break, name, argv0);
}
static int
@@ -105,9 +103,8 @@ unit_test_callback_next_character_break_utf8(const void *t,…
const char *name,
const char *argv0)
{
- return unit_test_callback_next_break_utf8(t, off,
- grapheme_next_character_brea…
- name, argv0);
+ return unit_test_callback_next_break_utf8(
+ t, off, grapheme_next_character_break_utf8, name, argv0);
}
int
@@ -116,11 +113,13 @@ main(int argc, char *argv[])
(void)argc;
return run_break_tests(grapheme_next_character_break,
- character_break_test, LEN(character_break_test)…
+ character_break_test, LEN(character_break_test),
+ argv[0]) +
run_unit_tests(unit_test_callback_next_character_break,
next_character_break, LEN(next_character_break),
"grapheme_next_character_break", argv[0]) +
run_unit_tests(unit_test_callback_next_character_break_utf8,
- next_character_break_utf8, LEN(next_character_br…
+ next_character_break_utf8,
+ LEN(next_character_break_utf8),
"grapheme_next_character_break_utf8", argv[0]);
}
diff --git a/test/line.c b/test/line.c
@@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_line_bre…
};
static int
-unit_test_callback_next_line_break(const void *t, size_t off,
- const char *name,
- const char *argv0)
+unit_test_callback_next_line_break(const void *t, size_t off, const char *name,
+ const char *argv0)
{
- return unit_test_callback_next_break(t, off,
- grapheme_next_line_break,
+ return unit_test_callback_next_break(t, off, grapheme_next_line_break,
name, argv0);
}
static int
unit_test_callback_next_line_break_utf8(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv0)
{
- return unit_test_callback_next_break_utf8(t, off,
- grapheme_next_line_break_utf…
- name, argv0);
+ return unit_test_callback_next_break_utf8(
+ t, off, grapheme_next_line_break_utf8, name, argv0);
}
int
@@ -115,9 +111,8 @@ main(int argc, char *argv[])
{
(void)argc;
- return run_break_tests(grapheme_next_line_break,
- line_break_test, LEN(line_break_test),
- argv[0]) +
+ return run_break_tests(grapheme_next_line_break, line_break_test,
+ LEN(line_break_test), argv[0]) +
run_unit_tests(unit_test_callback_next_line_break,
next_line_break, LEN(next_line_break),
"grapheme_next_line_break", argv[0]) +
diff --git a/test/sentence.c b/test/sentence.c
@@ -92,22 +92,18 @@ static const struct unit_test_next_break_utf8 next_sentence…
static int
unit_test_callback_next_sentence_break(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv0)
{
- return unit_test_callback_next_break(t, off,
- grapheme_next_sentence_break,
- name, argv0);
+ return unit_test_callback_next_break(
+ t, off, grapheme_next_sentence_break, name, argv0);
}
static int
unit_test_callback_next_sentence_break_utf8(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv…
{
- return unit_test_callback_next_break_utf8(t, off,
- grapheme_next_sentence_break…
- name, argv0);
+ return unit_test_callback_next_break_utf8(
+ t, off, grapheme_next_sentence_break_utf8, name, argv0);
}
int
@@ -116,12 +112,13 @@ main(int argc, char *argv[])
(void)argc;
return run_break_tests(grapheme_next_sentence_break,
- sentence_break_test,
- LEN(sentence_break_test), argv[0]) +
+ sentence_break_test, LEN(sentence_break_test),
+ argv[0]) +
run_unit_tests(unit_test_callback_next_sentence_break,
next_sentence_break, LEN(next_sentence_break),
"grapheme_next_sentence_break", argv[0]) +
run_unit_tests(unit_test_callback_next_sentence_break_utf8,
- next_sentence_break_utf8, LEN(next_sentence_brea…
+ next_sentence_break_utf8,
+ LEN(next_sentence_break_utf8),
"grapheme_next_character_break_utf8", argv[0]);
}
diff --git a/test/utf8-decode.c b/test/utf8-decode.c
@@ -8,281 +8,279 @@
#include "util.h"
static const struct {
- char *arr; /* UTF-8 byte sequence */
- size_t len; /* length of UTF-8 byte sequence */
- size_t exp_len; /* expected length returned */
- uint_least32_t exp_cp; /* expected codepoint returned */
+ char *arr; /* UTF-8 byte sequence */
+ size_t len; /* length of UTF-8 byte sequence */
+ size_t exp_len; /* expected length returned */
+ uint_least32_t exp_cp; /* expected codepoint returned */
} dec_test[] = {
{
/* empty sequence
- * [ ] ->
- * INVALID
- */
- .arr = NULL,
- .len = 0,
+ * [ ] ->
+ * INVALID
+ */
+ .arr = NULL,
+ .len = 0,
.exp_len = 0,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid lead byte
- * [ 11111101 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xFD },
- .len = 1,
+ * [ 11111101 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xFD },
+ .len = 1,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* valid 1-byte sequence
- * [ 00000001 ] ->
- * 0000001
- */
- .arr = (char *)(unsigned char[]){ 0x01 },
- .len = 1,
+ * [ 00000001 ] ->
+ * 0000001
+ */
+ .arr = (char *)(unsigned char[]) { 0x01 },
+ .len = 1,
.exp_len = 1,
- .exp_cp = 0x1,
+ .exp_cp = 0x1,
},
{
/* valid 2-byte sequence
- * [ 11000011 10111111 ] ->
- * 00011111111
- */
- .arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
- .len = 2,
+ * [ 11000011 10111111 ] ->
+ * 00011111111
+ */
+ .arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
+ .len = 2,
.exp_len = 2,
- .exp_cp = 0xFF,
+ .exp_cp = 0xFF,
},
{
/* invalid 2-byte sequence (second byte missing)
- * [ 11000011 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xC3 },
- .len = 1,
+ * [ 11000011 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xC3 },
+ .len = 1,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 2-byte sequence (second byte malformed)
- * [ 11000011 11111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xC3, 0xFF },
- .len = 2,
+ * [ 11000011 11111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xC3, 0xFF },
+ .len = 2,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 2-byte sequence (overlong encoded)
- * [ 11000001 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xC1, 0xBF },
- .len = 2,
+ * [ 11000001 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xC1, 0xBF },
+ .len = 2,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* valid 3-byte sequence
- * [ 11100000 10111111 10111111 ] ->
- * 0000111111111111
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
- .len = 3,
+ * [ 11100000 10111111 10111111 ] ->
+ * 0000111111111111
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
+ .len = 3,
.exp_len = 3,
- .exp_cp = 0xFFF,
+ .exp_cp = 0xFFF,
},
{
/* invalid 3-byte sequence (second byte missing)
- * [ 11100000 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0 },
- .len = 1,
+ * [ 11100000 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0 },
+ .len = 1,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (second byte malformed)
- * [ 11100000 01111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF },
- .len = 3,
+ * [ 11100000 01111111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0x7F, 0xBF },
+ .len = 3,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (short string, second byte malforme…
- * [ 11100000 01111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0x7F },
- .len = 2,
+ * [ 11100000 01111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0x7F },
+ .len = 2,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (third byte missing)
- * [ 11100000 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF },
- .len = 2,
+ * [ 11100000 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF },
+ .len = 2,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (third byte malformed)
- * [ 11100000 10111111 01111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F },
- .len = 3,
+ * [ 11100000 10111111 01111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0x7F },
+ .len = 3,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (overlong encoded)
- * [ 11100000 10011111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF },
- .len = 3,
+ * [ 11100000 10011111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0x9F, 0xBF },
+ .len = 3,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (UTF-16 surrogate half)
- * [ 11101101 10100000 10000000 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 },
- .len = 3,
+ * [ 11101101 10100000 10000000 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xED, 0xA0, 0x80 },
+ .len = 3,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* valid 4-byte sequence
- * [ 11110011 10111111 10111111 10111111 ] ->
- * 011111111111111111111
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
- .len = 4,
+ * [ 11110011 10111111 10111111 10111111 ] ->
+ * 011111111111111111111
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF },
+ .len = 4,
.exp_len = 4,
- .exp_cp = UINT32_C(0xFFFFF),
+ .exp_cp = UINT32_C(0xFFFFF),
},
{
/* invalid 4-byte sequence (second byte missing)
- * [ 11110011 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3 },
- .len = 1,
+ * [ 11110011 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3 },
+ .len = 1,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (second byte malformed)
- * [ 11110011 01111111 10111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF },
- .len = 4,
+ * [ 11110011 01111111 10111111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF, 0xBF },
+ .len = 4,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
- /* invalid 4-byte sequence (short string 1, second byte malfor…
- * [ 11110011 011111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F },
- .len = 2,
+ /* invalid 4-byte sequence (short string 1, second byte
+ * malformed) [ 11110011 011111111 ] -> INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F },
+ .len = 2,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
- /* invalid 4-byte sequence (short string 2, second byte malfor…
- * [ 11110011 011111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF },
- .len = 3,
+ /* invalid 4-byte sequence (short string 2, second byte
+ * malformed) [ 11110011 011111111 10111111 ] -> INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF },
+ .len = 3,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (third byte missing)
- * [ 11110011 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF },
- .len = 2,
+ * [ 11110011 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF },
+ .len = 2,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (third byte malformed)
- * [ 11110011 10111111 01111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF },
- .len = 4,
+ * [ 11110011 10111111 01111111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F, 0xBF },
+ .len = 4,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (short string, third byte malformed)
- * [ 11110011 10111111 01111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F },
- .len = 3,
+ * [ 11110011 10111111 01111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F },
+ .len = 3,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (fourth byte missing)
- * [ 11110011 10111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF },
- .len = 3,
+ * [ 11110011 10111111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF },
+ .len = 3,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (fourth byte malformed)
- * [ 11110011 10111111 10111111 01111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F },
- .len = 4,
+ * [ 11110011 10111111 10111111 01111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0x7F },
+ .len = 4,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (overlong encoded)
- * [ 11110000 10000000 10000001 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF },
- .len = 4,
+ * [ 11110000 10000000 10000001 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF0, 0x80, 0x81, 0xBF },
+ .len = 4,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (UTF-16-unrepresentable)
- * [ 11110100 10010000 10000000 10000000 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 },
- .len = 4,
+ * [ 11110100 10010000 10000000 10000000 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF4, 0x90, 0x80, 0x80 },
+ .len = 4,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
};
@@ -298,12 +296,12 @@ main(int argc, char *argv[])
size_t len;
uint_least32_t cp;
- len = grapheme_decode_utf8(dec_test[i].arr,
- dec_test[i].len, &cp);
+ len = grapheme_decode_utf8(dec_test[i].arr, dec_test[i].len,
+ &cp);
- if (len != dec_test[i].exp_len ||
- cp != dec_test[i].exp_cp) {
- fprintf(stderr, "%s: Failed test %zu: "
+ if (len != dec_test[i].exp_len || cp != dec_test[i].exp_cp) {
+ fprintf(stderr,
+ "%s: Failed test %zu: "
"Expected (%zx,%u), but got (%zx,%u).\n",
argv[0], i, dec_test[i].exp_len,
dec_test[i].exp_cp, len, cp);
diff --git a/test/utf8-encode.c b/test/utf8-encode.c
@@ -8,44 +8,44 @@
#include "util.h"
static const struct {
- uint_least32_t cp; /* input codepoint */
- char *exp_arr; /* expected UTF-8 byte sequence */
- size_t exp_len; /* expected length of UTF-8 sequence */
+ uint_least32_t cp; /* input codepoint */
+ char *exp_arr; /* expected UTF-8 byte sequence */
+ size_t exp_len; /* expected length of UTF-8 sequence */
} enc_test[] = {
{
/* invalid codepoint (UTF-16 surrogate half) */
- .cp = UINT32_C(0xD800),
- .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
+ .cp = UINT32_C(0xD800),
+ .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
.exp_len = 3,
},
{
/* invalid codepoint (UTF-16-unrepresentable) */
- .cp = UINT32_C(0x110000),
- .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
+ .cp = UINT32_C(0x110000),
+ .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
.exp_len = 3,
},
{
/* codepoint encoded to a 1-byte sequence */
- .cp = 0x01,
- .exp_arr = (char *)(unsigned char[]){ 0x01 },
+ .cp = 0x01,
+ .exp_arr = (char *)(unsigned char[]) { 0x01 },
.exp_len = 1,
},
{
/* codepoint encoded to a 2-byte sequence */
- .cp = 0xFF,
- .exp_arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
+ .cp = 0xFF,
+ .exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
.exp_len = 2,
},
{
/* codepoint encoded to a 3-byte sequence */
- .cp = 0xFFF,
- .exp_arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
+ .cp = 0xFFF,
+ .exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
.exp_len = 3,
},
{
/* codepoint encoded to a 4-byte sequence */
- .cp = UINT32_C(0xFFFFF),
- .exp_arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
+ .cp = UINT32_C(0xFFFFF),
+ .exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF …
.exp_len = 4,
},
};
@@ -66,11 +66,12 @@ main(int argc, char *argv[])
if (len != enc_test[i].exp_len ||
memcmp(arr, enc_test[i].exp_arr, len)) {
- fprintf(stderr, "%s, Failed test %zu: "
- "Expected (", argv[0], i);
+ fprintf(stderr,
+ "%s, Failed test %zu: "
+ "Expected (",
+ argv[0], i);
for (j = 0; j < enc_test[i].exp_len; j++) {
- fprintf(stderr, "0x%x",
- enc_test[i].exp_arr[j]);
+ fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]…
if (j + 1 < enc_test[i].exp_len) {
fprintf(stderr, " ");
}
diff --git a/test/util.c b/test/util.c
@@ -5,13 +5,14 @@
#include <stdio.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/types.h"
+#include "../grapheme.h"
#include "util.h"
int
run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
- const struct break_test *test, size_t testlen, const char *arg…
+ const struct break_test *test, size_t testlen,
+ const char *argv0)
{
size_t i, j, off, res, failed;
@@ -21,11 +22,14 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *…
res = next_break(test[i].cp + off, test[i].cplen - off…
/* check if our resulting offset matches */
- if (j == test[i].lenlen ||
- res != test[i].len[j++]) {
- fprintf(stderr, "%s: Failed conformance test %…
+ if (j == test[i].lenlen || res != test[i].len[j++]) {
+ fprintf(stderr,
+ "%s: Failed conformance test %zu "
+ "\"%s\".\n",
argv0, i, test[i].descr);
- fprintf(stderr, "J=%zu: EXPECTED len %zu, got …
+ fprintf(stderr,
+ "J=%zu: EXPECTED len %zu, got %zu\n",
+ j - 1, test[i].len[j - 1], res);
failed++;
break;
}
@@ -39,13 +43,15 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *…
int
run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *,
- const char *), const void *test, size_t testlen, const char *na…
+ const char *),
+ const void *test, size_t testlen, const char *name,
const char *argv0)
{
size_t i, failed;
for (i = 0, failed = 0; i < testlen; i++) {
- failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 …
+ failed +=
+ (unit_test_callback(test, i, name, argv0) == 0) ? 0 : …
}
printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name,
@@ -56,8 +62,9 @@ run_unit_tests(int (*unit_test_callback)(const void *, size_t…
int
unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off,
- size_t (*next_break)(const uint_least32_t *…
- const char *name, const char *argv0)
+ size_t (*next_break)(const uint_least32_t *,
+ size_t),
+ const char *name, const char *argv0)
{
const struct unit_test_next_break *test = t + off;
@@ -69,16 +76,18 @@ unit_test_callback_next_break(const struct unit_test_next_b…
return 0;
err:
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
- "(returned %zu instead of %zu).\n", argv0,
- name, off, test->description, ret, test->output.ret);
+ fprintf(stderr,
+ "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned %zu instead of %zu).\n",
+ argv0, name, off, test->description, ret, test->output.ret);
return 1;
}
int
unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t,
size_t off,
- size_t (*next_break_utf8)(const char *, siz…
+ size_t (*next_break_utf8)(const char *,
+ size_t),
const char *name, const char *argv0)
{
const struct unit_test_next_break_utf8 *test = t + off;
@@ -91,8 +100,9 @@ unit_test_callback_next_break_utf8(const struct unit_test_ne…
return 0;
err:
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
- "(returned %zu instead of %zu).\n", argv0,
- name, off, test->description, ret, test->output.ret);
+ fprintf(stderr,
+ "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned %zu instead of %zu).\n",
+ argv0, name, off, test->description, ret, test->output.ret);
return 1;
}
diff --git a/test/util.h b/test/util.h
@@ -6,16 +6,18 @@
#include "../grapheme.h"
#undef MIN
-#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
#undef LEN
#define LEN(x) (sizeof(x) / sizeof(*(x)))
struct unit_test_next_break {
const char *description;
+
struct {
const uint_least32_t *src;
size_t srclen;
} input;
+
struct {
size_t ret;
} output;
@@ -23,10 +25,12 @@ struct unit_test_next_break {
struct unit_test_next_break_utf8 {
const char *description;
+
struct {
const char *src;
size_t srclen;
} input;
+
struct {
size_t ret;
} output;
@@ -36,14 +40,17 @@ int run_break_tests(size_t (*next_break)(const uint_least32…
const struct break_test *test, size_t testlen,
const char *);
int run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char …
- const char *), const void *, size_t, const char *, const ch…
+ const char *),
+ const void *, size_t, const char *, const char *);
int unit_test_callback_next_break(const struct unit_test_next_break *, size_t,
- size_t (*next_break)(const uint_least32_t *,…
+ size_t (*next_break)(const uint_least32_t *,
+ size_t),
const char *, const char *);
int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 …
size_t,
- size_t (*next_break_utf8)(const char *,…
+ size_t (*next_break_utf8)(const char *,
+ size_t),
const char *, const char *);
#endif /* UTIL_H */
diff --git a/test/word.c b/test/word.c
@@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_word_bre…
};
static int
-unit_test_callback_next_word_break(const void *t, size_t off,
- const char *name,
- const char *argv0)
+unit_test_callback_next_word_break(const void *t, size_t off, const char *name,
+ const char *argv0)
{
- return unit_test_callback_next_break(t, off,
- grapheme_next_word_break,
+ return unit_test_callback_next_break(t, off, grapheme_next_word_break,
name, argv0);
}
static int
unit_test_callback_next_word_break_utf8(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv0)
{
- return unit_test_callback_next_break_utf8(t, off,
- grapheme_next_word_break_utf…
- name, argv0);
+ return unit_test_callback_next_break_utf8(
+ t, off, grapheme_next_word_break_utf8, name, argv0);
}
int
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.